1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /* AFS vlserver list management.
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
5*4882a593Smuzhiyun * Written by David Howells (dhowells@redhat.com)
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/kernel.h>
9*4882a593Smuzhiyun #include <linux/slab.h>
10*4882a593Smuzhiyun #include "internal.h"
11*4882a593Smuzhiyun
afs_alloc_vlserver(const char * name,size_t name_len,unsigned short port)12*4882a593Smuzhiyun struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
13*4882a593Smuzhiyun unsigned short port)
14*4882a593Smuzhiyun {
15*4882a593Smuzhiyun struct afs_vlserver *vlserver;
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
18*4882a593Smuzhiyun GFP_KERNEL);
19*4882a593Smuzhiyun if (vlserver) {
20*4882a593Smuzhiyun atomic_set(&vlserver->usage, 1);
21*4882a593Smuzhiyun rwlock_init(&vlserver->lock);
22*4882a593Smuzhiyun init_waitqueue_head(&vlserver->probe_wq);
23*4882a593Smuzhiyun spin_lock_init(&vlserver->probe_lock);
24*4882a593Smuzhiyun vlserver->rtt = UINT_MAX;
25*4882a593Smuzhiyun vlserver->name_len = name_len;
26*4882a593Smuzhiyun vlserver->port = port;
27*4882a593Smuzhiyun memcpy(vlserver->name, name, name_len);
28*4882a593Smuzhiyun }
29*4882a593Smuzhiyun return vlserver;
30*4882a593Smuzhiyun }
31*4882a593Smuzhiyun
afs_vlserver_rcu(struct rcu_head * rcu)32*4882a593Smuzhiyun static void afs_vlserver_rcu(struct rcu_head *rcu)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
37*4882a593Smuzhiyun kfree_rcu(vlserver, rcu);
38*4882a593Smuzhiyun }
39*4882a593Smuzhiyun
afs_put_vlserver(struct afs_net * net,struct afs_vlserver * vlserver)40*4882a593Smuzhiyun void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
41*4882a593Smuzhiyun {
42*4882a593Smuzhiyun if (vlserver) {
43*4882a593Smuzhiyun unsigned int u = atomic_dec_return(&vlserver->usage);
44*4882a593Smuzhiyun //_debug("VL PUT %p{%u}", vlserver, u);
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun if (u == 0)
47*4882a593Smuzhiyun call_rcu(&vlserver->rcu, afs_vlserver_rcu);
48*4882a593Smuzhiyun }
49*4882a593Smuzhiyun }
50*4882a593Smuzhiyun
afs_alloc_vlserver_list(unsigned int nr_servers)51*4882a593Smuzhiyun struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun struct afs_vlserver_list *vllist;
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
56*4882a593Smuzhiyun if (vllist) {
57*4882a593Smuzhiyun atomic_set(&vllist->usage, 1);
58*4882a593Smuzhiyun rwlock_init(&vllist->lock);
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun return vllist;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun
afs_put_vlserverlist(struct afs_net * net,struct afs_vlserver_list * vllist)64*4882a593Smuzhiyun void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun if (vllist) {
67*4882a593Smuzhiyun unsigned int u = atomic_dec_return(&vllist->usage);
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun //_debug("VLLS PUT %p{%u}", vllist, u);
70*4882a593Smuzhiyun if (u == 0) {
71*4882a593Smuzhiyun int i;
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun for (i = 0; i < vllist->nr_servers; i++) {
74*4882a593Smuzhiyun afs_put_vlserver(net, vllist->servers[i].server);
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun kfree_rcu(vllist, rcu);
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun
afs_extract_le16(const u8 ** _b)81*4882a593Smuzhiyun static u16 afs_extract_le16(const u8 **_b)
82*4882a593Smuzhiyun {
83*4882a593Smuzhiyun u16 val;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun val = (u16)*(*_b)++ << 0;
86*4882a593Smuzhiyun val |= (u16)*(*_b)++ << 8;
87*4882a593Smuzhiyun return val;
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun /*
91*4882a593Smuzhiyun * Build a VL server address list from a DNS queried server list.
92*4882a593Smuzhiyun */
afs_extract_vl_addrs(const u8 ** _b,const u8 * end,u8 nr_addrs,u16 port)93*4882a593Smuzhiyun static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
94*4882a593Smuzhiyun u8 nr_addrs, u16 port)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun struct afs_addr_list *alist;
97*4882a593Smuzhiyun const u8 *b = *_b;
98*4882a593Smuzhiyun int ret = -EINVAL;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
101*4882a593Smuzhiyun if (!alist)
102*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
103*4882a593Smuzhiyun if (nr_addrs == 0)
104*4882a593Smuzhiyun return alist;
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) {
107*4882a593Smuzhiyun struct dns_server_list_v1_address hdr;
108*4882a593Smuzhiyun __be32 x[4];
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun hdr.address_type = *b++;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun switch (hdr.address_type) {
113*4882a593Smuzhiyun case DNS_ADDRESS_IS_IPV4:
114*4882a593Smuzhiyun if (end - b < 4) {
115*4882a593Smuzhiyun _leave(" = -EINVAL [short inet]");
116*4882a593Smuzhiyun goto error;
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun memcpy(x, b, 4);
119*4882a593Smuzhiyun afs_merge_fs_addr4(alist, x[0], port);
120*4882a593Smuzhiyun b += 4;
121*4882a593Smuzhiyun break;
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun case DNS_ADDRESS_IS_IPV6:
124*4882a593Smuzhiyun if (end - b < 16) {
125*4882a593Smuzhiyun _leave(" = -EINVAL [short inet6]");
126*4882a593Smuzhiyun goto error;
127*4882a593Smuzhiyun }
128*4882a593Smuzhiyun memcpy(x, b, 16);
129*4882a593Smuzhiyun afs_merge_fs_addr6(alist, x, port);
130*4882a593Smuzhiyun b += 16;
131*4882a593Smuzhiyun break;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun default:
134*4882a593Smuzhiyun _leave(" = -EADDRNOTAVAIL [unknown af %u]",
135*4882a593Smuzhiyun hdr.address_type);
136*4882a593Smuzhiyun ret = -EADDRNOTAVAIL;
137*4882a593Smuzhiyun goto error;
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun /* Start with IPv6 if available. */
142*4882a593Smuzhiyun if (alist->nr_ipv4 < alist->nr_addrs)
143*4882a593Smuzhiyun alist->preferred = alist->nr_ipv4;
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun *_b = b;
146*4882a593Smuzhiyun return alist;
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun error:
149*4882a593Smuzhiyun *_b = b;
150*4882a593Smuzhiyun afs_put_addrlist(alist);
151*4882a593Smuzhiyun return ERR_PTR(ret);
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun /*
155*4882a593Smuzhiyun * Build a VL server list from a DNS queried server list.
156*4882a593Smuzhiyun */
afs_extract_vlserver_list(struct afs_cell * cell,const void * buffer,size_t buffer_size)157*4882a593Smuzhiyun struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
158*4882a593Smuzhiyun const void *buffer,
159*4882a593Smuzhiyun size_t buffer_size)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun const struct dns_server_list_v1_header *hdr = buffer;
162*4882a593Smuzhiyun struct dns_server_list_v1_server bs;
163*4882a593Smuzhiyun struct afs_vlserver_list *vllist, *previous;
164*4882a593Smuzhiyun struct afs_addr_list *addrs;
165*4882a593Smuzhiyun struct afs_vlserver *server;
166*4882a593Smuzhiyun const u8 *b = buffer, *end = buffer + buffer_size;
167*4882a593Smuzhiyun int ret = -ENOMEM, nr_servers, i, j;
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun _enter("");
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun /* Check that it's a server list, v1 */
172*4882a593Smuzhiyun if (end - b < sizeof(*hdr) ||
173*4882a593Smuzhiyun hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST ||
174*4882a593Smuzhiyun hdr->hdr.version != 1) {
175*4882a593Smuzhiyun pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n",
176*4882a593Smuzhiyun hdr->hdr.content, hdr->hdr.version, end - b);
177*4882a593Smuzhiyun ret = -EDESTADDRREQ;
178*4882a593Smuzhiyun goto dump;
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun nr_servers = hdr->nr_servers;
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun vllist = afs_alloc_vlserver_list(nr_servers);
184*4882a593Smuzhiyun if (!vllist)
185*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun vllist->source = (hdr->source < NR__dns_record_source) ?
188*4882a593Smuzhiyun hdr->source : NR__dns_record_source;
189*4882a593Smuzhiyun vllist->status = (hdr->status < NR__dns_lookup_status) ?
190*4882a593Smuzhiyun hdr->status : NR__dns_lookup_status;
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun read_lock(&cell->vl_servers_lock);
193*4882a593Smuzhiyun previous = afs_get_vlserverlist(
194*4882a593Smuzhiyun rcu_dereference_protected(cell->vl_servers,
195*4882a593Smuzhiyun lockdep_is_held(&cell->vl_servers_lock)));
196*4882a593Smuzhiyun read_unlock(&cell->vl_servers_lock);
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun b += sizeof(*hdr);
199*4882a593Smuzhiyun while (end - b >= sizeof(bs)) {
200*4882a593Smuzhiyun bs.name_len = afs_extract_le16(&b);
201*4882a593Smuzhiyun bs.priority = afs_extract_le16(&b);
202*4882a593Smuzhiyun bs.weight = afs_extract_le16(&b);
203*4882a593Smuzhiyun bs.port = afs_extract_le16(&b);
204*4882a593Smuzhiyun bs.source = *b++;
205*4882a593Smuzhiyun bs.status = *b++;
206*4882a593Smuzhiyun bs.protocol = *b++;
207*4882a593Smuzhiyun bs.nr_addrs = *b++;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun _debug("extract %u %u %u %u %u %u %*.*s",
210*4882a593Smuzhiyun bs.name_len, bs.priority, bs.weight,
211*4882a593Smuzhiyun bs.port, bs.protocol, bs.nr_addrs,
212*4882a593Smuzhiyun bs.name_len, bs.name_len, b);
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun if (end - b < bs.name_len)
215*4882a593Smuzhiyun break;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun ret = -EPROTONOSUPPORT;
218*4882a593Smuzhiyun if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) {
219*4882a593Smuzhiyun bs.protocol = DNS_SERVER_PROTOCOL_UDP;
220*4882a593Smuzhiyun } else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) {
221*4882a593Smuzhiyun _leave(" = [proto %u]", bs.protocol);
222*4882a593Smuzhiyun goto error;
223*4882a593Smuzhiyun }
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun if (bs.port == 0)
226*4882a593Smuzhiyun bs.port = AFS_VL_PORT;
227*4882a593Smuzhiyun if (bs.source > NR__dns_record_source)
228*4882a593Smuzhiyun bs.source = NR__dns_record_source;
229*4882a593Smuzhiyun if (bs.status > NR__dns_lookup_status)
230*4882a593Smuzhiyun bs.status = NR__dns_lookup_status;
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun /* See if we can update an old server record */
233*4882a593Smuzhiyun server = NULL;
234*4882a593Smuzhiyun for (i = 0; i < previous->nr_servers; i++) {
235*4882a593Smuzhiyun struct afs_vlserver *p = previous->servers[i].server;
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun if (p->name_len == bs.name_len &&
238*4882a593Smuzhiyun p->port == bs.port &&
239*4882a593Smuzhiyun strncasecmp(b, p->name, bs.name_len) == 0) {
240*4882a593Smuzhiyun server = afs_get_vlserver(p);
241*4882a593Smuzhiyun break;
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun if (!server) {
246*4882a593Smuzhiyun ret = -ENOMEM;
247*4882a593Smuzhiyun server = afs_alloc_vlserver(b, bs.name_len, bs.port);
248*4882a593Smuzhiyun if (!server)
249*4882a593Smuzhiyun goto error;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun b += bs.name_len;
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun /* Extract the addresses - note that we can't skip this as we
255*4882a593Smuzhiyun * have to advance the payload pointer.
256*4882a593Smuzhiyun */
257*4882a593Smuzhiyun addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port);
258*4882a593Smuzhiyun if (IS_ERR(addrs)) {
259*4882a593Smuzhiyun ret = PTR_ERR(addrs);
260*4882a593Smuzhiyun goto error_2;
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun if (vllist->nr_servers >= nr_servers) {
264*4882a593Smuzhiyun _debug("skip %u >= %u", vllist->nr_servers, nr_servers);
265*4882a593Smuzhiyun afs_put_addrlist(addrs);
266*4882a593Smuzhiyun afs_put_vlserver(cell->net, server);
267*4882a593Smuzhiyun continue;
268*4882a593Smuzhiyun }
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun addrs->source = bs.source;
271*4882a593Smuzhiyun addrs->status = bs.status;
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun if (addrs->nr_addrs == 0) {
274*4882a593Smuzhiyun afs_put_addrlist(addrs);
275*4882a593Smuzhiyun if (!rcu_access_pointer(server->addresses)) {
276*4882a593Smuzhiyun afs_put_vlserver(cell->net, server);
277*4882a593Smuzhiyun continue;
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun } else {
280*4882a593Smuzhiyun struct afs_addr_list *old = addrs;
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun write_lock(&server->lock);
283*4882a593Smuzhiyun old = rcu_replace_pointer(server->addresses, old,
284*4882a593Smuzhiyun lockdep_is_held(&server->lock));
285*4882a593Smuzhiyun write_unlock(&server->lock);
286*4882a593Smuzhiyun afs_put_addrlist(old);
287*4882a593Smuzhiyun }
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun /* TODO: Might want to check for duplicates */
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun /* Insertion-sort by priority and weight */
293*4882a593Smuzhiyun for (j = 0; j < vllist->nr_servers; j++) {
294*4882a593Smuzhiyun if (bs.priority < vllist->servers[j].priority)
295*4882a593Smuzhiyun break; /* Lower preferable */
296*4882a593Smuzhiyun if (bs.priority == vllist->servers[j].priority &&
297*4882a593Smuzhiyun bs.weight > vllist->servers[j].weight)
298*4882a593Smuzhiyun break; /* Higher preferable */
299*4882a593Smuzhiyun }
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun if (j < vllist->nr_servers) {
302*4882a593Smuzhiyun memmove(vllist->servers + j + 1,
303*4882a593Smuzhiyun vllist->servers + j,
304*4882a593Smuzhiyun (vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun vllist->servers[j].priority = bs.priority;
310*4882a593Smuzhiyun vllist->servers[j].weight = bs.weight;
311*4882a593Smuzhiyun vllist->servers[j].server = server;
312*4882a593Smuzhiyun vllist->nr_servers++;
313*4882a593Smuzhiyun }
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun if (b != end) {
316*4882a593Smuzhiyun _debug("parse error %zd", b - end);
317*4882a593Smuzhiyun goto error;
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun afs_put_vlserverlist(cell->net, previous);
321*4882a593Smuzhiyun _leave(" = ok [%u]", vllist->nr_servers);
322*4882a593Smuzhiyun return vllist;
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun error_2:
325*4882a593Smuzhiyun afs_put_vlserver(cell->net, server);
326*4882a593Smuzhiyun error:
327*4882a593Smuzhiyun afs_put_vlserverlist(cell->net, vllist);
328*4882a593Smuzhiyun afs_put_vlserverlist(cell->net, previous);
329*4882a593Smuzhiyun dump:
330*4882a593Smuzhiyun if (ret != -ENOMEM) {
331*4882a593Smuzhiyun printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer);
332*4882a593Smuzhiyun print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size);
333*4882a593Smuzhiyun }
334*4882a593Smuzhiyun return ERR_PTR(ret);
335*4882a593Smuzhiyun }
336