1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /******************************************************************************
3*4882a593Smuzhiyun *******************************************************************************
4*4882a593Smuzhiyun **
5*4882a593Smuzhiyun ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
6*4882a593Smuzhiyun ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
7*4882a593Smuzhiyun **
8*4882a593Smuzhiyun **
9*4882a593Smuzhiyun *******************************************************************************
10*4882a593Smuzhiyun ******************************************************************************/
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include "dlm_internal.h"
13*4882a593Smuzhiyun #include "lockspace.h"
14*4882a593Smuzhiyun #include "member.h"
15*4882a593Smuzhiyun #include "lowcomms.h"
16*4882a593Smuzhiyun #include "rcom.h"
17*4882a593Smuzhiyun #include "config.h"
18*4882a593Smuzhiyun #include "memory.h"
19*4882a593Smuzhiyun #include "recover.h"
20*4882a593Smuzhiyun #include "util.h"
21*4882a593Smuzhiyun #include "lock.h"
22*4882a593Smuzhiyun #include "dir.h"
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun /*
25*4882a593Smuzhiyun * We use the upper 16 bits of the hash value to select the directory node.
26*4882a593Smuzhiyun * Low bits are used for distribution of rsb's among hash buckets on each node.
27*4882a593Smuzhiyun *
28*4882a593Smuzhiyun * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
29*4882a593Smuzhiyun * num_nodes to the hash value. This value in the desired range is used as an
30*4882a593Smuzhiyun * offset into the sorted list of nodeid's to give the particular nodeid.
31*4882a593Smuzhiyun */
32*4882a593Smuzhiyun
dlm_hash2nodeid(struct dlm_ls * ls,uint32_t hash)33*4882a593Smuzhiyun int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun uint32_t node;
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun if (ls->ls_num_nodes == 1)
38*4882a593Smuzhiyun return dlm_our_nodeid();
39*4882a593Smuzhiyun else {
40*4882a593Smuzhiyun node = (hash >> 16) % ls->ls_total_weight;
41*4882a593Smuzhiyun return ls->ls_node_array[node];
42*4882a593Smuzhiyun }
43*4882a593Smuzhiyun }
44*4882a593Smuzhiyun
dlm_dir_nodeid(struct dlm_rsb * r)45*4882a593Smuzhiyun int dlm_dir_nodeid(struct dlm_rsb *r)
46*4882a593Smuzhiyun {
47*4882a593Smuzhiyun return r->res_dir_nodeid;
48*4882a593Smuzhiyun }
49*4882a593Smuzhiyun
dlm_recover_dir_nodeid(struct dlm_ls * ls)50*4882a593Smuzhiyun void dlm_recover_dir_nodeid(struct dlm_ls *ls)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun struct dlm_rsb *r;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun down_read(&ls->ls_root_sem);
55*4882a593Smuzhiyun list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
56*4882a593Smuzhiyun r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash);
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun up_read(&ls->ls_root_sem);
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun
dlm_recover_directory(struct dlm_ls * ls)61*4882a593Smuzhiyun int dlm_recover_directory(struct dlm_ls *ls)
62*4882a593Smuzhiyun {
63*4882a593Smuzhiyun struct dlm_member *memb;
64*4882a593Smuzhiyun char *b, *last_name = NULL;
65*4882a593Smuzhiyun int error = -ENOMEM, last_len, nodeid, result;
66*4882a593Smuzhiyun uint16_t namelen;
67*4882a593Smuzhiyun unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun log_rinfo(ls, "dlm_recover_directory");
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun if (dlm_no_directory(ls))
72*4882a593Smuzhiyun goto out_status;
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
75*4882a593Smuzhiyun if (!last_name)
76*4882a593Smuzhiyun goto out;
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun list_for_each_entry(memb, &ls->ls_nodes, list) {
79*4882a593Smuzhiyun if (memb->nodeid == dlm_our_nodeid())
80*4882a593Smuzhiyun continue;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun memset(last_name, 0, DLM_RESNAME_MAXLEN);
83*4882a593Smuzhiyun last_len = 0;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun for (;;) {
86*4882a593Smuzhiyun int left;
87*4882a593Smuzhiyun error = dlm_recovery_stopped(ls);
88*4882a593Smuzhiyun if (error)
89*4882a593Smuzhiyun goto out_free;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun error = dlm_rcom_names(ls, memb->nodeid,
92*4882a593Smuzhiyun last_name, last_len);
93*4882a593Smuzhiyun if (error)
94*4882a593Smuzhiyun goto out_free;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun cond_resched();
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun /*
99*4882a593Smuzhiyun * pick namelen/name pairs out of received buffer
100*4882a593Smuzhiyun */
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun b = ls->ls_recover_buf->rc_buf;
103*4882a593Smuzhiyun left = ls->ls_recover_buf->rc_header.h_length;
104*4882a593Smuzhiyun left -= sizeof(struct dlm_rcom);
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun for (;;) {
107*4882a593Smuzhiyun __be16 v;
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun error = -EINVAL;
110*4882a593Smuzhiyun if (left < sizeof(__be16))
111*4882a593Smuzhiyun goto out_free;
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun memcpy(&v, b, sizeof(__be16));
114*4882a593Smuzhiyun namelen = be16_to_cpu(v);
115*4882a593Smuzhiyun b += sizeof(__be16);
116*4882a593Smuzhiyun left -= sizeof(__be16);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun /* namelen of 0xFFFFF marks end of names for
119*4882a593Smuzhiyun this node; namelen of 0 marks end of the
120*4882a593Smuzhiyun buffer */
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun if (namelen == 0xFFFF)
123*4882a593Smuzhiyun goto done;
124*4882a593Smuzhiyun if (!namelen)
125*4882a593Smuzhiyun break;
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun if (namelen > left)
128*4882a593Smuzhiyun goto out_free;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun if (namelen > DLM_RESNAME_MAXLEN)
131*4882a593Smuzhiyun goto out_free;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun error = dlm_master_lookup(ls, memb->nodeid,
134*4882a593Smuzhiyun b, namelen,
135*4882a593Smuzhiyun DLM_LU_RECOVER_DIR,
136*4882a593Smuzhiyun &nodeid, &result);
137*4882a593Smuzhiyun if (error) {
138*4882a593Smuzhiyun log_error(ls, "recover_dir lookup %d",
139*4882a593Smuzhiyun error);
140*4882a593Smuzhiyun goto out_free;
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun /* The name was found in rsbtbl, but the
144*4882a593Smuzhiyun * master nodeid is different from
145*4882a593Smuzhiyun * memb->nodeid which says it is the master.
146*4882a593Smuzhiyun * This should not happen. */
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun if (result == DLM_LU_MATCH &&
149*4882a593Smuzhiyun nodeid != memb->nodeid) {
150*4882a593Smuzhiyun count_bad++;
151*4882a593Smuzhiyun log_error(ls, "recover_dir lookup %d "
152*4882a593Smuzhiyun "nodeid %d memb %d bad %u",
153*4882a593Smuzhiyun result, nodeid, memb->nodeid,
154*4882a593Smuzhiyun count_bad);
155*4882a593Smuzhiyun print_hex_dump_bytes("dlm_recover_dir ",
156*4882a593Smuzhiyun DUMP_PREFIX_NONE,
157*4882a593Smuzhiyun b, namelen);
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun /* The name was found in rsbtbl, and the
161*4882a593Smuzhiyun * master nodeid matches memb->nodeid. */
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun if (result == DLM_LU_MATCH &&
164*4882a593Smuzhiyun nodeid == memb->nodeid) {
165*4882a593Smuzhiyun count_match++;
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun /* The name was not found in rsbtbl and was
169*4882a593Smuzhiyun * added with memb->nodeid as the master. */
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun if (result == DLM_LU_ADD) {
172*4882a593Smuzhiyun count_add++;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun last_len = namelen;
176*4882a593Smuzhiyun memcpy(last_name, b, namelen);
177*4882a593Smuzhiyun b += namelen;
178*4882a593Smuzhiyun left -= namelen;
179*4882a593Smuzhiyun count++;
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun done:
183*4882a593Smuzhiyun ;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun out_status:
187*4882a593Smuzhiyun error = 0;
188*4882a593Smuzhiyun dlm_set_recover_status(ls, DLM_RS_DIR);
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun log_rinfo(ls, "dlm_recover_directory %u in %u new",
191*4882a593Smuzhiyun count, count_add);
192*4882a593Smuzhiyun out_free:
193*4882a593Smuzhiyun kfree(last_name);
194*4882a593Smuzhiyun out:
195*4882a593Smuzhiyun return error;
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun
find_rsb_root(struct dlm_ls * ls,char * name,int len)198*4882a593Smuzhiyun static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun struct dlm_rsb *r;
201*4882a593Smuzhiyun uint32_t hash, bucket;
202*4882a593Smuzhiyun int rv;
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun hash = jhash(name, len, 0);
205*4882a593Smuzhiyun bucket = hash & (ls->ls_rsbtbl_size - 1);
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun spin_lock(&ls->ls_rsbtbl[bucket].lock);
208*4882a593Smuzhiyun rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r);
209*4882a593Smuzhiyun if (rv)
210*4882a593Smuzhiyun rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
211*4882a593Smuzhiyun name, len, &r);
212*4882a593Smuzhiyun spin_unlock(&ls->ls_rsbtbl[bucket].lock);
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun if (!rv)
215*4882a593Smuzhiyun return r;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun down_read(&ls->ls_root_sem);
218*4882a593Smuzhiyun list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
219*4882a593Smuzhiyun if (len == r->res_length && !memcmp(name, r->res_name, len)) {
220*4882a593Smuzhiyun up_read(&ls->ls_root_sem);
221*4882a593Smuzhiyun log_debug(ls, "find_rsb_root revert to root_list %s",
222*4882a593Smuzhiyun r->res_name);
223*4882a593Smuzhiyun return r;
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun }
226*4882a593Smuzhiyun up_read(&ls->ls_root_sem);
227*4882a593Smuzhiyun return NULL;
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun /* Find the rsb where we left off (or start again), then send rsb names
231*4882a593Smuzhiyun for rsb's we're master of and whose directory node matches the requesting
232*4882a593Smuzhiyun node. inbuf is the rsb name last sent, inlen is the name's length */
233*4882a593Smuzhiyun
dlm_copy_master_names(struct dlm_ls * ls,char * inbuf,int inlen,char * outbuf,int outlen,int nodeid)234*4882a593Smuzhiyun void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
235*4882a593Smuzhiyun char *outbuf, int outlen, int nodeid)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun struct list_head *list;
238*4882a593Smuzhiyun struct dlm_rsb *r;
239*4882a593Smuzhiyun int offset = 0, dir_nodeid;
240*4882a593Smuzhiyun __be16 be_namelen;
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun down_read(&ls->ls_root_sem);
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun if (inlen > 1) {
245*4882a593Smuzhiyun r = find_rsb_root(ls, inbuf, inlen);
246*4882a593Smuzhiyun if (!r) {
247*4882a593Smuzhiyun inbuf[inlen - 1] = '\0';
248*4882a593Smuzhiyun log_error(ls, "copy_master_names from %d start %d %s",
249*4882a593Smuzhiyun nodeid, inlen, inbuf);
250*4882a593Smuzhiyun goto out;
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun list = r->res_root_list.next;
253*4882a593Smuzhiyun } else {
254*4882a593Smuzhiyun list = ls->ls_root_list.next;
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun for (offset = 0; list != &ls->ls_root_list; list = list->next) {
258*4882a593Smuzhiyun r = list_entry(list, struct dlm_rsb, res_root_list);
259*4882a593Smuzhiyun if (r->res_nodeid)
260*4882a593Smuzhiyun continue;
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun dir_nodeid = dlm_dir_nodeid(r);
263*4882a593Smuzhiyun if (dir_nodeid != nodeid)
264*4882a593Smuzhiyun continue;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun /*
267*4882a593Smuzhiyun * The block ends when we can't fit the following in the
268*4882a593Smuzhiyun * remaining buffer space:
269*4882a593Smuzhiyun * namelen (uint16_t) +
270*4882a593Smuzhiyun * name (r->res_length) +
271*4882a593Smuzhiyun * end-of-block record 0x0000 (uint16_t)
272*4882a593Smuzhiyun */
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
275*4882a593Smuzhiyun /* Write end-of-block record */
276*4882a593Smuzhiyun be_namelen = cpu_to_be16(0);
277*4882a593Smuzhiyun memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
278*4882a593Smuzhiyun offset += sizeof(__be16);
279*4882a593Smuzhiyun ls->ls_recover_dir_sent_msg++;
280*4882a593Smuzhiyun goto out;
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun be_namelen = cpu_to_be16(r->res_length);
284*4882a593Smuzhiyun memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
285*4882a593Smuzhiyun offset += sizeof(__be16);
286*4882a593Smuzhiyun memcpy(outbuf + offset, r->res_name, r->res_length);
287*4882a593Smuzhiyun offset += r->res_length;
288*4882a593Smuzhiyun ls->ls_recover_dir_sent_res++;
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun /*
292*4882a593Smuzhiyun * If we've reached the end of the list (and there's room) write a
293*4882a593Smuzhiyun * terminating record.
294*4882a593Smuzhiyun */
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun if ((list == &ls->ls_root_list) &&
297*4882a593Smuzhiyun (offset + sizeof(uint16_t) <= outlen)) {
298*4882a593Smuzhiyun be_namelen = cpu_to_be16(0xFFFF);
299*4882a593Smuzhiyun memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
300*4882a593Smuzhiyun offset += sizeof(__be16);
301*4882a593Smuzhiyun ls->ls_recover_dir_sent_msg++;
302*4882a593Smuzhiyun }
303*4882a593Smuzhiyun out:
304*4882a593Smuzhiyun up_read(&ls->ls_root_sem);
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun
307