xref: /OK3568_Linux_fs/kernel/fs/dlm/dir.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /******************************************************************************
3*4882a593Smuzhiyun *******************************************************************************
4*4882a593Smuzhiyun **
5*4882a593Smuzhiyun **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
6*4882a593Smuzhiyun **  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
7*4882a593Smuzhiyun **
8*4882a593Smuzhiyun **
9*4882a593Smuzhiyun *******************************************************************************
10*4882a593Smuzhiyun ******************************************************************************/
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include "dlm_internal.h"
13*4882a593Smuzhiyun #include "lockspace.h"
14*4882a593Smuzhiyun #include "member.h"
15*4882a593Smuzhiyun #include "lowcomms.h"
16*4882a593Smuzhiyun #include "rcom.h"
17*4882a593Smuzhiyun #include "config.h"
18*4882a593Smuzhiyun #include "memory.h"
19*4882a593Smuzhiyun #include "recover.h"
20*4882a593Smuzhiyun #include "util.h"
21*4882a593Smuzhiyun #include "lock.h"
22*4882a593Smuzhiyun #include "dir.h"
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun /*
25*4882a593Smuzhiyun  * We use the upper 16 bits of the hash value to select the directory node.
26*4882a593Smuzhiyun  * Low bits are used for distribution of rsb's among hash buckets on each node.
27*4882a593Smuzhiyun  *
28*4882a593Smuzhiyun  * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
29*4882a593Smuzhiyun  * num_nodes to the hash value.  This value in the desired range is used as an
30*4882a593Smuzhiyun  * offset into the sorted list of nodeid's to give the particular nodeid.
31*4882a593Smuzhiyun  */
32*4882a593Smuzhiyun 
dlm_hash2nodeid(struct dlm_ls * ls,uint32_t hash)33*4882a593Smuzhiyun int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun 	uint32_t node;
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	if (ls->ls_num_nodes == 1)
38*4882a593Smuzhiyun 		return dlm_our_nodeid();
39*4882a593Smuzhiyun 	else {
40*4882a593Smuzhiyun 		node = (hash >> 16) % ls->ls_total_weight;
41*4882a593Smuzhiyun 		return ls->ls_node_array[node];
42*4882a593Smuzhiyun 	}
43*4882a593Smuzhiyun }
44*4882a593Smuzhiyun 
dlm_dir_nodeid(struct dlm_rsb * r)45*4882a593Smuzhiyun int dlm_dir_nodeid(struct dlm_rsb *r)
46*4882a593Smuzhiyun {
47*4882a593Smuzhiyun 	return r->res_dir_nodeid;
48*4882a593Smuzhiyun }
49*4882a593Smuzhiyun 
dlm_recover_dir_nodeid(struct dlm_ls * ls)50*4882a593Smuzhiyun void dlm_recover_dir_nodeid(struct dlm_ls *ls)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun 	struct dlm_rsb *r;
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	down_read(&ls->ls_root_sem);
55*4882a593Smuzhiyun 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
56*4882a593Smuzhiyun 		r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash);
57*4882a593Smuzhiyun 	}
58*4882a593Smuzhiyun 	up_read(&ls->ls_root_sem);
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun 
dlm_recover_directory(struct dlm_ls * ls)61*4882a593Smuzhiyun int dlm_recover_directory(struct dlm_ls *ls)
62*4882a593Smuzhiyun {
63*4882a593Smuzhiyun 	struct dlm_member *memb;
64*4882a593Smuzhiyun 	char *b, *last_name = NULL;
65*4882a593Smuzhiyun 	int error = -ENOMEM, last_len, nodeid, result;
66*4882a593Smuzhiyun 	uint16_t namelen;
67*4882a593Smuzhiyun 	unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0;
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	log_rinfo(ls, "dlm_recover_directory");
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	if (dlm_no_directory(ls))
72*4882a593Smuzhiyun 		goto out_status;
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
75*4882a593Smuzhiyun 	if (!last_name)
76*4882a593Smuzhiyun 		goto out;
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	list_for_each_entry(memb, &ls->ls_nodes, list) {
79*4882a593Smuzhiyun 		if (memb->nodeid == dlm_our_nodeid())
80*4882a593Smuzhiyun 			continue;
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 		memset(last_name, 0, DLM_RESNAME_MAXLEN);
83*4882a593Smuzhiyun 		last_len = 0;
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 		for (;;) {
86*4882a593Smuzhiyun 			int left;
87*4882a593Smuzhiyun 			error = dlm_recovery_stopped(ls);
88*4882a593Smuzhiyun 			if (error)
89*4882a593Smuzhiyun 				goto out_free;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 			error = dlm_rcom_names(ls, memb->nodeid,
92*4882a593Smuzhiyun 					       last_name, last_len);
93*4882a593Smuzhiyun 			if (error)
94*4882a593Smuzhiyun 				goto out_free;
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 			cond_resched();
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 			/*
99*4882a593Smuzhiyun 			 * pick namelen/name pairs out of received buffer
100*4882a593Smuzhiyun 			 */
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 			b = ls->ls_recover_buf->rc_buf;
103*4882a593Smuzhiyun 			left = ls->ls_recover_buf->rc_header.h_length;
104*4882a593Smuzhiyun 			left -= sizeof(struct dlm_rcom);
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 			for (;;) {
107*4882a593Smuzhiyun 				__be16 v;
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 				error = -EINVAL;
110*4882a593Smuzhiyun 				if (left < sizeof(__be16))
111*4882a593Smuzhiyun 					goto out_free;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 				memcpy(&v, b, sizeof(__be16));
114*4882a593Smuzhiyun 				namelen = be16_to_cpu(v);
115*4882a593Smuzhiyun 				b += sizeof(__be16);
116*4882a593Smuzhiyun 				left -= sizeof(__be16);
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 				/* namelen of 0xFFFFF marks end of names for
119*4882a593Smuzhiyun 				   this node; namelen of 0 marks end of the
120*4882a593Smuzhiyun 				   buffer */
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 				if (namelen == 0xFFFF)
123*4882a593Smuzhiyun 					goto done;
124*4882a593Smuzhiyun 				if (!namelen)
125*4882a593Smuzhiyun 					break;
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 				if (namelen > left)
128*4882a593Smuzhiyun 					goto out_free;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 				if (namelen > DLM_RESNAME_MAXLEN)
131*4882a593Smuzhiyun 					goto out_free;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 				error = dlm_master_lookup(ls, memb->nodeid,
134*4882a593Smuzhiyun 							  b, namelen,
135*4882a593Smuzhiyun 							  DLM_LU_RECOVER_DIR,
136*4882a593Smuzhiyun 							  &nodeid, &result);
137*4882a593Smuzhiyun 				if (error) {
138*4882a593Smuzhiyun 					log_error(ls, "recover_dir lookup %d",
139*4882a593Smuzhiyun 						  error);
140*4882a593Smuzhiyun 					goto out_free;
141*4882a593Smuzhiyun 				}
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 				/* The name was found in rsbtbl, but the
144*4882a593Smuzhiyun 				 * master nodeid is different from
145*4882a593Smuzhiyun 				 * memb->nodeid which says it is the master.
146*4882a593Smuzhiyun 				 * This should not happen. */
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 				if (result == DLM_LU_MATCH &&
149*4882a593Smuzhiyun 				    nodeid != memb->nodeid) {
150*4882a593Smuzhiyun 					count_bad++;
151*4882a593Smuzhiyun 					log_error(ls, "recover_dir lookup %d "
152*4882a593Smuzhiyun 						  "nodeid %d memb %d bad %u",
153*4882a593Smuzhiyun 						  result, nodeid, memb->nodeid,
154*4882a593Smuzhiyun 						  count_bad);
155*4882a593Smuzhiyun 					print_hex_dump_bytes("dlm_recover_dir ",
156*4882a593Smuzhiyun 							     DUMP_PREFIX_NONE,
157*4882a593Smuzhiyun 							     b, namelen);
158*4882a593Smuzhiyun 				}
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 				/* The name was found in rsbtbl, and the
161*4882a593Smuzhiyun 				 * master nodeid matches memb->nodeid. */
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 				if (result == DLM_LU_MATCH &&
164*4882a593Smuzhiyun 				    nodeid == memb->nodeid) {
165*4882a593Smuzhiyun 					count_match++;
166*4882a593Smuzhiyun 				}
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 				/* The name was not found in rsbtbl and was
169*4882a593Smuzhiyun 				 * added with memb->nodeid as the master. */
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 				if (result == DLM_LU_ADD) {
172*4882a593Smuzhiyun 					count_add++;
173*4882a593Smuzhiyun 				}
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 				last_len = namelen;
176*4882a593Smuzhiyun 				memcpy(last_name, b, namelen);
177*4882a593Smuzhiyun 				b += namelen;
178*4882a593Smuzhiyun 				left -= namelen;
179*4882a593Smuzhiyun 				count++;
180*4882a593Smuzhiyun 			}
181*4882a593Smuzhiyun 		}
182*4882a593Smuzhiyun 	 done:
183*4882a593Smuzhiyun 		;
184*4882a593Smuzhiyun 	}
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun  out_status:
187*4882a593Smuzhiyun 	error = 0;
188*4882a593Smuzhiyun 	dlm_set_recover_status(ls, DLM_RS_DIR);
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	log_rinfo(ls, "dlm_recover_directory %u in %u new",
191*4882a593Smuzhiyun 		  count, count_add);
192*4882a593Smuzhiyun  out_free:
193*4882a593Smuzhiyun 	kfree(last_name);
194*4882a593Smuzhiyun  out:
195*4882a593Smuzhiyun 	return error;
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun 
find_rsb_root(struct dlm_ls * ls,char * name,int len)198*4882a593Smuzhiyun static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun 	struct dlm_rsb *r;
201*4882a593Smuzhiyun 	uint32_t hash, bucket;
202*4882a593Smuzhiyun 	int rv;
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	hash = jhash(name, len, 0);
205*4882a593Smuzhiyun 	bucket = hash & (ls->ls_rsbtbl_size - 1);
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[bucket].lock);
208*4882a593Smuzhiyun 	rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r);
209*4882a593Smuzhiyun 	if (rv)
210*4882a593Smuzhiyun 		rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
211*4882a593Smuzhiyun 					 name, len, &r);
212*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	if (!rv)
215*4882a593Smuzhiyun 		return r;
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	down_read(&ls->ls_root_sem);
218*4882a593Smuzhiyun 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
219*4882a593Smuzhiyun 		if (len == r->res_length && !memcmp(name, r->res_name, len)) {
220*4882a593Smuzhiyun 			up_read(&ls->ls_root_sem);
221*4882a593Smuzhiyun 			log_debug(ls, "find_rsb_root revert to root_list %s",
222*4882a593Smuzhiyun 				  r->res_name);
223*4882a593Smuzhiyun 			return r;
224*4882a593Smuzhiyun 		}
225*4882a593Smuzhiyun 	}
226*4882a593Smuzhiyun 	up_read(&ls->ls_root_sem);
227*4882a593Smuzhiyun 	return NULL;
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun /* Find the rsb where we left off (or start again), then send rsb names
231*4882a593Smuzhiyun    for rsb's we're master of and whose directory node matches the requesting
232*4882a593Smuzhiyun    node.  inbuf is the rsb name last sent, inlen is the name's length */
233*4882a593Smuzhiyun 
dlm_copy_master_names(struct dlm_ls * ls,char * inbuf,int inlen,char * outbuf,int outlen,int nodeid)234*4882a593Smuzhiyun void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
235*4882a593Smuzhiyun  			   char *outbuf, int outlen, int nodeid)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun 	struct list_head *list;
238*4882a593Smuzhiyun 	struct dlm_rsb *r;
239*4882a593Smuzhiyun 	int offset = 0, dir_nodeid;
240*4882a593Smuzhiyun 	__be16 be_namelen;
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	down_read(&ls->ls_root_sem);
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	if (inlen > 1) {
245*4882a593Smuzhiyun 		r = find_rsb_root(ls, inbuf, inlen);
246*4882a593Smuzhiyun 		if (!r) {
247*4882a593Smuzhiyun 			inbuf[inlen - 1] = '\0';
248*4882a593Smuzhiyun 			log_error(ls, "copy_master_names from %d start %d %s",
249*4882a593Smuzhiyun 				  nodeid, inlen, inbuf);
250*4882a593Smuzhiyun 			goto out;
251*4882a593Smuzhiyun 		}
252*4882a593Smuzhiyun 		list = r->res_root_list.next;
253*4882a593Smuzhiyun 	} else {
254*4882a593Smuzhiyun 		list = ls->ls_root_list.next;
255*4882a593Smuzhiyun 	}
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	for (offset = 0; list != &ls->ls_root_list; list = list->next) {
258*4882a593Smuzhiyun 		r = list_entry(list, struct dlm_rsb, res_root_list);
259*4882a593Smuzhiyun 		if (r->res_nodeid)
260*4882a593Smuzhiyun 			continue;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 		dir_nodeid = dlm_dir_nodeid(r);
263*4882a593Smuzhiyun 		if (dir_nodeid != nodeid)
264*4882a593Smuzhiyun 			continue;
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 		/*
267*4882a593Smuzhiyun 		 * The block ends when we can't fit the following in the
268*4882a593Smuzhiyun 		 * remaining buffer space:
269*4882a593Smuzhiyun 		 * namelen (uint16_t) +
270*4882a593Smuzhiyun 		 * name (r->res_length) +
271*4882a593Smuzhiyun 		 * end-of-block record 0x0000 (uint16_t)
272*4882a593Smuzhiyun 		 */
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 		if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
275*4882a593Smuzhiyun 			/* Write end-of-block record */
276*4882a593Smuzhiyun 			be_namelen = cpu_to_be16(0);
277*4882a593Smuzhiyun 			memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
278*4882a593Smuzhiyun 			offset += sizeof(__be16);
279*4882a593Smuzhiyun 			ls->ls_recover_dir_sent_msg++;
280*4882a593Smuzhiyun 			goto out;
281*4882a593Smuzhiyun 		}
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 		be_namelen = cpu_to_be16(r->res_length);
284*4882a593Smuzhiyun 		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
285*4882a593Smuzhiyun 		offset += sizeof(__be16);
286*4882a593Smuzhiyun 		memcpy(outbuf + offset, r->res_name, r->res_length);
287*4882a593Smuzhiyun 		offset += r->res_length;
288*4882a593Smuzhiyun 		ls->ls_recover_dir_sent_res++;
289*4882a593Smuzhiyun 	}
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	/*
292*4882a593Smuzhiyun 	 * If we've reached the end of the list (and there's room) write a
293*4882a593Smuzhiyun 	 * terminating record.
294*4882a593Smuzhiyun 	 */
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	if ((list == &ls->ls_root_list) &&
297*4882a593Smuzhiyun 	    (offset + sizeof(uint16_t) <= outlen)) {
298*4882a593Smuzhiyun 		be_namelen = cpu_to_be16(0xFFFF);
299*4882a593Smuzhiyun 		memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
300*4882a593Smuzhiyun 		offset += sizeof(__be16);
301*4882a593Smuzhiyun 		ls->ls_recover_dir_sent_msg++;
302*4882a593Smuzhiyun 	}
303*4882a593Smuzhiyun  out:
304*4882a593Smuzhiyun 	up_read(&ls->ls_root_sem);
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun 
307