xref: /OK3568_Linux_fs/kernel/fs/dlm/lock.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /******************************************************************************
3*4882a593Smuzhiyun *******************************************************************************
4*4882a593Smuzhiyun **
5*4882a593Smuzhiyun **  Copyright (C) 2005-2010 Red Hat, Inc.  All rights reserved.
6*4882a593Smuzhiyun **
7*4882a593Smuzhiyun **
8*4882a593Smuzhiyun *******************************************************************************
9*4882a593Smuzhiyun ******************************************************************************/
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun /* Central locking logic has four stages:
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun    dlm_lock()
14*4882a593Smuzhiyun    dlm_unlock()
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun    request_lock(ls, lkb)
17*4882a593Smuzhiyun    convert_lock(ls, lkb)
18*4882a593Smuzhiyun    unlock_lock(ls, lkb)
19*4882a593Smuzhiyun    cancel_lock(ls, lkb)
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun    _request_lock(r, lkb)
22*4882a593Smuzhiyun    _convert_lock(r, lkb)
23*4882a593Smuzhiyun    _unlock_lock(r, lkb)
24*4882a593Smuzhiyun    _cancel_lock(r, lkb)
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun    do_request(r, lkb)
27*4882a593Smuzhiyun    do_convert(r, lkb)
28*4882a593Smuzhiyun    do_unlock(r, lkb)
29*4882a593Smuzhiyun    do_cancel(r, lkb)
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun    Stage 1 (lock, unlock) is mainly about checking input args and
32*4882a593Smuzhiyun    splitting into one of the four main operations:
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun        dlm_lock          = request_lock
35*4882a593Smuzhiyun        dlm_lock+CONVERT  = convert_lock
36*4882a593Smuzhiyun        dlm_unlock        = unlock_lock
37*4882a593Smuzhiyun        dlm_unlock+CANCEL = cancel_lock
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun    Stage 2, xxxx_lock(), just finds and locks the relevant rsb which is
40*4882a593Smuzhiyun    provided to the next stage.
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun    Stage 3, _xxxx_lock(), determines if the operation is local or remote.
43*4882a593Smuzhiyun    When remote, it calls send_xxxx(), when local it calls do_xxxx().
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun    Stage 4, do_xxxx(), is the guts of the operation.  It manipulates the
46*4882a593Smuzhiyun    given rsb and lkb and queues callbacks.
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun    For remote operations, send_xxxx() results in the corresponding do_xxxx()
49*4882a593Smuzhiyun    function being executed on the remote node.  The connecting send/receive
50*4882a593Smuzhiyun    calls on local (L) and remote (R) nodes:
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun    L: send_xxxx()              ->  R: receive_xxxx()
53*4882a593Smuzhiyun                                    R: do_xxxx()
54*4882a593Smuzhiyun    L: receive_xxxx_reply()     <-  R: send_xxxx_reply()
55*4882a593Smuzhiyun */
56*4882a593Smuzhiyun #include <linux/types.h>
57*4882a593Smuzhiyun #include <linux/rbtree.h>
58*4882a593Smuzhiyun #include <linux/slab.h>
59*4882a593Smuzhiyun #include "dlm_internal.h"
60*4882a593Smuzhiyun #include <linux/dlm_device.h>
61*4882a593Smuzhiyun #include "memory.h"
62*4882a593Smuzhiyun #include "lowcomms.h"
63*4882a593Smuzhiyun #include "requestqueue.h"
64*4882a593Smuzhiyun #include "util.h"
65*4882a593Smuzhiyun #include "dir.h"
66*4882a593Smuzhiyun #include "member.h"
67*4882a593Smuzhiyun #include "lockspace.h"
68*4882a593Smuzhiyun #include "ast.h"
69*4882a593Smuzhiyun #include "lock.h"
70*4882a593Smuzhiyun #include "rcom.h"
71*4882a593Smuzhiyun #include "recover.h"
72*4882a593Smuzhiyun #include "lvb_table.h"
73*4882a593Smuzhiyun #include "user.h"
74*4882a593Smuzhiyun #include "config.h"
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb);
77*4882a593Smuzhiyun static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb);
78*4882a593Smuzhiyun static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb);
79*4882a593Smuzhiyun static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb);
80*4882a593Smuzhiyun static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb);
81*4882a593Smuzhiyun static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
82*4882a593Smuzhiyun static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
83*4882a593Smuzhiyun static int send_remove(struct dlm_rsb *r);
84*4882a593Smuzhiyun static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
85*4882a593Smuzhiyun static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
86*4882a593Smuzhiyun static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
87*4882a593Smuzhiyun 				    struct dlm_message *ms);
88*4882a593Smuzhiyun static int receive_extralen(struct dlm_message *ms);
89*4882a593Smuzhiyun static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
90*4882a593Smuzhiyun static void del_timeout(struct dlm_lkb *lkb);
91*4882a593Smuzhiyun static void toss_rsb(struct kref *kref);
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun /*
94*4882a593Smuzhiyun  * Lock compatibilty matrix - thanks Steve
95*4882a593Smuzhiyun  * UN = Unlocked state. Not really a state, used as a flag
96*4882a593Smuzhiyun  * PD = Padding. Used to make the matrix a nice power of two in size
97*4882a593Smuzhiyun  * Other states are the same as the VMS DLM.
98*4882a593Smuzhiyun  * Usage: matrix[grmode+1][rqmode+1]  (although m[rq+1][gr+1] is the same)
99*4882a593Smuzhiyun  */
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun static const int __dlm_compat_matrix[8][8] = {
102*4882a593Smuzhiyun       /* UN NL CR CW PR PW EX PD */
103*4882a593Smuzhiyun         {1, 1, 1, 1, 1, 1, 1, 0},       /* UN */
104*4882a593Smuzhiyun         {1, 1, 1, 1, 1, 1, 1, 0},       /* NL */
105*4882a593Smuzhiyun         {1, 1, 1, 1, 1, 1, 0, 0},       /* CR */
106*4882a593Smuzhiyun         {1, 1, 1, 1, 0, 0, 0, 0},       /* CW */
107*4882a593Smuzhiyun         {1, 1, 1, 0, 1, 0, 0, 0},       /* PR */
108*4882a593Smuzhiyun         {1, 1, 1, 0, 0, 0, 0, 0},       /* PW */
109*4882a593Smuzhiyun         {1, 1, 0, 0, 0, 0, 0, 0},       /* EX */
110*4882a593Smuzhiyun         {0, 0, 0, 0, 0, 0, 0, 0}        /* PD */
111*4882a593Smuzhiyun };
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun /*
114*4882a593Smuzhiyun  * This defines the direction of transfer of LVB data.
115*4882a593Smuzhiyun  * Granted mode is the row; requested mode is the column.
116*4882a593Smuzhiyun  * Usage: matrix[grmode+1][rqmode+1]
117*4882a593Smuzhiyun  * 1 = LVB is returned to the caller
118*4882a593Smuzhiyun  * 0 = LVB is written to the resource
119*4882a593Smuzhiyun  * -1 = nothing happens to the LVB
120*4882a593Smuzhiyun  */
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun const int dlm_lvb_operations[8][8] = {
123*4882a593Smuzhiyun         /* UN   NL  CR  CW  PR  PW  EX  PD*/
124*4882a593Smuzhiyun         {  -1,  1,  1,  1,  1,  1,  1, -1 }, /* UN */
125*4882a593Smuzhiyun         {  -1,  1,  1,  1,  1,  1,  1,  0 }, /* NL */
126*4882a593Smuzhiyun         {  -1, -1,  1,  1,  1,  1,  1,  0 }, /* CR */
127*4882a593Smuzhiyun         {  -1, -1, -1,  1,  1,  1,  1,  0 }, /* CW */
128*4882a593Smuzhiyun         {  -1, -1, -1, -1,  1,  1,  1,  0 }, /* PR */
129*4882a593Smuzhiyun         {  -1,  0,  0,  0,  0,  0,  1,  0 }, /* PW */
130*4882a593Smuzhiyun         {  -1,  0,  0,  0,  0,  0,  0,  0 }, /* EX */
131*4882a593Smuzhiyun         {  -1,  0,  0,  0,  0,  0,  0,  0 }  /* PD */
132*4882a593Smuzhiyun };
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun #define modes_compat(gr, rq) \
135*4882a593Smuzhiyun 	__dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1]
136*4882a593Smuzhiyun 
dlm_modes_compat(int mode1,int mode2)137*4882a593Smuzhiyun int dlm_modes_compat(int mode1, int mode2)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun 	return __dlm_compat_matrix[mode1 + 1][mode2 + 1];
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun /*
143*4882a593Smuzhiyun  * Compatibility matrix for conversions with QUECVT set.
144*4882a593Smuzhiyun  * Granted mode is the row; requested mode is the column.
145*4882a593Smuzhiyun  * Usage: matrix[grmode+1][rqmode+1]
146*4882a593Smuzhiyun  */
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun static const int __quecvt_compat_matrix[8][8] = {
149*4882a593Smuzhiyun       /* UN NL CR CW PR PW EX PD */
150*4882a593Smuzhiyun         {0, 0, 0, 0, 0, 0, 0, 0},       /* UN */
151*4882a593Smuzhiyun         {0, 0, 1, 1, 1, 1, 1, 0},       /* NL */
152*4882a593Smuzhiyun         {0, 0, 0, 1, 1, 1, 1, 0},       /* CR */
153*4882a593Smuzhiyun         {0, 0, 0, 0, 1, 1, 1, 0},       /* CW */
154*4882a593Smuzhiyun         {0, 0, 0, 1, 0, 1, 1, 0},       /* PR */
155*4882a593Smuzhiyun         {0, 0, 0, 0, 0, 0, 1, 0},       /* PW */
156*4882a593Smuzhiyun         {0, 0, 0, 0, 0, 0, 0, 0},       /* EX */
157*4882a593Smuzhiyun         {0, 0, 0, 0, 0, 0, 0, 0}        /* PD */
158*4882a593Smuzhiyun };
159*4882a593Smuzhiyun 
dlm_print_lkb(struct dlm_lkb * lkb)160*4882a593Smuzhiyun void dlm_print_lkb(struct dlm_lkb *lkb)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun 	printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x "
163*4882a593Smuzhiyun 	       "sts %d rq %d gr %d wait_type %d wait_nodeid %d seq %llu\n",
164*4882a593Smuzhiyun 	       lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags,
165*4882a593Smuzhiyun 	       lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode,
166*4882a593Smuzhiyun 	       lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_wait_nodeid,
167*4882a593Smuzhiyun 	       (unsigned long long)lkb->lkb_recover_seq);
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun 
dlm_print_rsb(struct dlm_rsb * r)170*4882a593Smuzhiyun static void dlm_print_rsb(struct dlm_rsb *r)
171*4882a593Smuzhiyun {
172*4882a593Smuzhiyun 	printk(KERN_ERR "rsb: nodeid %d master %d dir %d flags %lx first %x "
173*4882a593Smuzhiyun 	       "rlc %d name %s\n",
174*4882a593Smuzhiyun 	       r->res_nodeid, r->res_master_nodeid, r->res_dir_nodeid,
175*4882a593Smuzhiyun 	       r->res_flags, r->res_first_lkid, r->res_recover_locks_count,
176*4882a593Smuzhiyun 	       r->res_name);
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun 
dlm_dump_rsb(struct dlm_rsb * r)179*4882a593Smuzhiyun void dlm_dump_rsb(struct dlm_rsb *r)
180*4882a593Smuzhiyun {
181*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 	dlm_print_rsb(r);
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	printk(KERN_ERR "rsb: root_list empty %d recover_list empty %d\n",
186*4882a593Smuzhiyun 	       list_empty(&r->res_root_list), list_empty(&r->res_recover_list));
187*4882a593Smuzhiyun 	printk(KERN_ERR "rsb lookup list\n");
188*4882a593Smuzhiyun 	list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup)
189*4882a593Smuzhiyun 		dlm_print_lkb(lkb);
190*4882a593Smuzhiyun 	printk(KERN_ERR "rsb grant queue:\n");
191*4882a593Smuzhiyun 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
192*4882a593Smuzhiyun 		dlm_print_lkb(lkb);
193*4882a593Smuzhiyun 	printk(KERN_ERR "rsb convert queue:\n");
194*4882a593Smuzhiyun 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
195*4882a593Smuzhiyun 		dlm_print_lkb(lkb);
196*4882a593Smuzhiyun 	printk(KERN_ERR "rsb wait queue:\n");
197*4882a593Smuzhiyun 	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
198*4882a593Smuzhiyun 		dlm_print_lkb(lkb);
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun /* Threads cannot use the lockspace while it's being recovered */
202*4882a593Smuzhiyun 
dlm_lock_recovery(struct dlm_ls * ls)203*4882a593Smuzhiyun static inline void dlm_lock_recovery(struct dlm_ls *ls)
204*4882a593Smuzhiyun {
205*4882a593Smuzhiyun 	down_read(&ls->ls_in_recovery);
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun 
dlm_unlock_recovery(struct dlm_ls * ls)208*4882a593Smuzhiyun void dlm_unlock_recovery(struct dlm_ls *ls)
209*4882a593Smuzhiyun {
210*4882a593Smuzhiyun 	up_read(&ls->ls_in_recovery);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
dlm_lock_recovery_try(struct dlm_ls * ls)213*4882a593Smuzhiyun int dlm_lock_recovery_try(struct dlm_ls *ls)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun 	return down_read_trylock(&ls->ls_in_recovery);
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun 
can_be_queued(struct dlm_lkb * lkb)218*4882a593Smuzhiyun static inline int can_be_queued(struct dlm_lkb *lkb)
219*4882a593Smuzhiyun {
220*4882a593Smuzhiyun 	return !(lkb->lkb_exflags & DLM_LKF_NOQUEUE);
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun 
force_blocking_asts(struct dlm_lkb * lkb)223*4882a593Smuzhiyun static inline int force_blocking_asts(struct dlm_lkb *lkb)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun 	return (lkb->lkb_exflags & DLM_LKF_NOQUEUEBAST);
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun 
is_demoted(struct dlm_lkb * lkb)228*4882a593Smuzhiyun static inline int is_demoted(struct dlm_lkb *lkb)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun 	return (lkb->lkb_sbflags & DLM_SBF_DEMOTED);
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun 
is_altmode(struct dlm_lkb * lkb)233*4882a593Smuzhiyun static inline int is_altmode(struct dlm_lkb *lkb)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun 	return (lkb->lkb_sbflags & DLM_SBF_ALTMODE);
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun 
is_granted(struct dlm_lkb * lkb)238*4882a593Smuzhiyun static inline int is_granted(struct dlm_lkb *lkb)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun 	return (lkb->lkb_status == DLM_LKSTS_GRANTED);
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun 
is_remote(struct dlm_rsb * r)243*4882a593Smuzhiyun static inline int is_remote(struct dlm_rsb *r)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r););
246*4882a593Smuzhiyun 	return !!r->res_nodeid;
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun 
is_process_copy(struct dlm_lkb * lkb)249*4882a593Smuzhiyun static inline int is_process_copy(struct dlm_lkb *lkb)
250*4882a593Smuzhiyun {
251*4882a593Smuzhiyun 	return (lkb->lkb_nodeid && !(lkb->lkb_flags & DLM_IFL_MSTCPY));
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun 
is_master_copy(struct dlm_lkb * lkb)254*4882a593Smuzhiyun static inline int is_master_copy(struct dlm_lkb *lkb)
255*4882a593Smuzhiyun {
256*4882a593Smuzhiyun 	return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0;
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun 
middle_conversion(struct dlm_lkb * lkb)259*4882a593Smuzhiyun static inline int middle_conversion(struct dlm_lkb *lkb)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun 	if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) ||
262*4882a593Smuzhiyun 	    (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW))
263*4882a593Smuzhiyun 		return 1;
264*4882a593Smuzhiyun 	return 0;
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun 
down_conversion(struct dlm_lkb * lkb)267*4882a593Smuzhiyun static inline int down_conversion(struct dlm_lkb *lkb)
268*4882a593Smuzhiyun {
269*4882a593Smuzhiyun 	return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode);
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun 
is_overlap_unlock(struct dlm_lkb * lkb)272*4882a593Smuzhiyun static inline int is_overlap_unlock(struct dlm_lkb *lkb)
273*4882a593Smuzhiyun {
274*4882a593Smuzhiyun 	return lkb->lkb_flags & DLM_IFL_OVERLAP_UNLOCK;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun 
is_overlap_cancel(struct dlm_lkb * lkb)277*4882a593Smuzhiyun static inline int is_overlap_cancel(struct dlm_lkb *lkb)
278*4882a593Smuzhiyun {
279*4882a593Smuzhiyun 	return lkb->lkb_flags & DLM_IFL_OVERLAP_CANCEL;
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun 
is_overlap(struct dlm_lkb * lkb)282*4882a593Smuzhiyun static inline int is_overlap(struct dlm_lkb *lkb)
283*4882a593Smuzhiyun {
284*4882a593Smuzhiyun 	return (lkb->lkb_flags & (DLM_IFL_OVERLAP_UNLOCK |
285*4882a593Smuzhiyun 				  DLM_IFL_OVERLAP_CANCEL));
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun 
queue_cast(struct dlm_rsb * r,struct dlm_lkb * lkb,int rv)288*4882a593Smuzhiyun static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun 	if (is_master_copy(lkb))
291*4882a593Smuzhiyun 		return;
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	del_timeout(lkb);
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	/* if the operation was a cancel, then return -DLM_ECANCEL, if a
298*4882a593Smuzhiyun 	   timeout caused the cancel then return -ETIMEDOUT */
299*4882a593Smuzhiyun 	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
300*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
301*4882a593Smuzhiyun 		rv = -ETIMEDOUT;
302*4882a593Smuzhiyun 	}
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
305*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
306*4882a593Smuzhiyun 		rv = -EDEADLK;
307*4882a593Smuzhiyun 	}
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun 
queue_cast_overlap(struct dlm_rsb * r,struct dlm_lkb * lkb)312*4882a593Smuzhiyun static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
313*4882a593Smuzhiyun {
314*4882a593Smuzhiyun 	queue_cast(r, lkb,
315*4882a593Smuzhiyun 		   is_overlap_unlock(lkb) ? -DLM_EUNLOCK : -DLM_ECANCEL);
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun 
queue_bast(struct dlm_rsb * r,struct dlm_lkb * lkb,int rqmode)318*4882a593Smuzhiyun static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
319*4882a593Smuzhiyun {
320*4882a593Smuzhiyun 	if (is_master_copy(lkb)) {
321*4882a593Smuzhiyun 		send_bast(r, lkb, rqmode);
322*4882a593Smuzhiyun 	} else {
323*4882a593Smuzhiyun 		dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0);
324*4882a593Smuzhiyun 	}
325*4882a593Smuzhiyun }
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun /*
328*4882a593Smuzhiyun  * Basic operations on rsb's and lkb's
329*4882a593Smuzhiyun  */
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun /* This is only called to add a reference when the code already holds
332*4882a593Smuzhiyun    a valid reference to the rsb, so there's no need for locking. */
333*4882a593Smuzhiyun 
hold_rsb(struct dlm_rsb * r)334*4882a593Smuzhiyun static inline void hold_rsb(struct dlm_rsb *r)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun 	kref_get(&r->res_ref);
337*4882a593Smuzhiyun }
338*4882a593Smuzhiyun 
dlm_hold_rsb(struct dlm_rsb * r)339*4882a593Smuzhiyun void dlm_hold_rsb(struct dlm_rsb *r)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun 	hold_rsb(r);
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun /* When all references to the rsb are gone it's transferred to
345*4882a593Smuzhiyun    the tossed list for later disposal. */
346*4882a593Smuzhiyun 
put_rsb(struct dlm_rsb * r)347*4882a593Smuzhiyun static void put_rsb(struct dlm_rsb *r)
348*4882a593Smuzhiyun {
349*4882a593Smuzhiyun 	struct dlm_ls *ls = r->res_ls;
350*4882a593Smuzhiyun 	uint32_t bucket = r->res_bucket;
351*4882a593Smuzhiyun 
352*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[bucket].lock);
353*4882a593Smuzhiyun 	kref_put(&r->res_ref, toss_rsb);
354*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun 
dlm_put_rsb(struct dlm_rsb * r)357*4882a593Smuzhiyun void dlm_put_rsb(struct dlm_rsb *r)
358*4882a593Smuzhiyun {
359*4882a593Smuzhiyun 	put_rsb(r);
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun 
pre_rsb_struct(struct dlm_ls * ls)362*4882a593Smuzhiyun static int pre_rsb_struct(struct dlm_ls *ls)
363*4882a593Smuzhiyun {
364*4882a593Smuzhiyun 	struct dlm_rsb *r1, *r2;
365*4882a593Smuzhiyun 	int count = 0;
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	spin_lock(&ls->ls_new_rsb_spin);
368*4882a593Smuzhiyun 	if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) {
369*4882a593Smuzhiyun 		spin_unlock(&ls->ls_new_rsb_spin);
370*4882a593Smuzhiyun 		return 0;
371*4882a593Smuzhiyun 	}
372*4882a593Smuzhiyun 	spin_unlock(&ls->ls_new_rsb_spin);
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	r1 = dlm_allocate_rsb(ls);
375*4882a593Smuzhiyun 	r2 = dlm_allocate_rsb(ls);
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	spin_lock(&ls->ls_new_rsb_spin);
378*4882a593Smuzhiyun 	if (r1) {
379*4882a593Smuzhiyun 		list_add(&r1->res_hashchain, &ls->ls_new_rsb);
380*4882a593Smuzhiyun 		ls->ls_new_rsb_count++;
381*4882a593Smuzhiyun 	}
382*4882a593Smuzhiyun 	if (r2) {
383*4882a593Smuzhiyun 		list_add(&r2->res_hashchain, &ls->ls_new_rsb);
384*4882a593Smuzhiyun 		ls->ls_new_rsb_count++;
385*4882a593Smuzhiyun 	}
386*4882a593Smuzhiyun 	count = ls->ls_new_rsb_count;
387*4882a593Smuzhiyun 	spin_unlock(&ls->ls_new_rsb_spin);
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 	if (!count)
390*4882a593Smuzhiyun 		return -ENOMEM;
391*4882a593Smuzhiyun 	return 0;
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun /* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can
395*4882a593Smuzhiyun    unlock any spinlocks, go back and call pre_rsb_struct again.
396*4882a593Smuzhiyun    Otherwise, take an rsb off the list and return it. */
397*4882a593Smuzhiyun 
get_rsb_struct(struct dlm_ls * ls,char * name,int len,struct dlm_rsb ** r_ret)398*4882a593Smuzhiyun static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
399*4882a593Smuzhiyun 			  struct dlm_rsb **r_ret)
400*4882a593Smuzhiyun {
401*4882a593Smuzhiyun 	struct dlm_rsb *r;
402*4882a593Smuzhiyun 	int count;
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 	spin_lock(&ls->ls_new_rsb_spin);
405*4882a593Smuzhiyun 	if (list_empty(&ls->ls_new_rsb)) {
406*4882a593Smuzhiyun 		count = ls->ls_new_rsb_count;
407*4882a593Smuzhiyun 		spin_unlock(&ls->ls_new_rsb_spin);
408*4882a593Smuzhiyun 		log_debug(ls, "find_rsb retry %d %d %s",
409*4882a593Smuzhiyun 			  count, dlm_config.ci_new_rsb_count, name);
410*4882a593Smuzhiyun 		return -EAGAIN;
411*4882a593Smuzhiyun 	}
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 	r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain);
414*4882a593Smuzhiyun 	list_del(&r->res_hashchain);
415*4882a593Smuzhiyun 	/* Convert the empty list_head to a NULL rb_node for tree usage: */
416*4882a593Smuzhiyun 	memset(&r->res_hashnode, 0, sizeof(struct rb_node));
417*4882a593Smuzhiyun 	ls->ls_new_rsb_count--;
418*4882a593Smuzhiyun 	spin_unlock(&ls->ls_new_rsb_spin);
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 	r->res_ls = ls;
421*4882a593Smuzhiyun 	r->res_length = len;
422*4882a593Smuzhiyun 	memcpy(r->res_name, name, len);
423*4882a593Smuzhiyun 	mutex_init(&r->res_mutex);
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 	INIT_LIST_HEAD(&r->res_lookup);
426*4882a593Smuzhiyun 	INIT_LIST_HEAD(&r->res_grantqueue);
427*4882a593Smuzhiyun 	INIT_LIST_HEAD(&r->res_convertqueue);
428*4882a593Smuzhiyun 	INIT_LIST_HEAD(&r->res_waitqueue);
429*4882a593Smuzhiyun 	INIT_LIST_HEAD(&r->res_root_list);
430*4882a593Smuzhiyun 	INIT_LIST_HEAD(&r->res_recover_list);
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 	*r_ret = r;
433*4882a593Smuzhiyun 	return 0;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun 
rsb_cmp(struct dlm_rsb * r,const char * name,int nlen)436*4882a593Smuzhiyun static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
437*4882a593Smuzhiyun {
438*4882a593Smuzhiyun 	char maxname[DLM_RESNAME_MAXLEN];
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	memset(maxname, 0, DLM_RESNAME_MAXLEN);
441*4882a593Smuzhiyun 	memcpy(maxname, name, nlen);
442*4882a593Smuzhiyun 	return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun 
dlm_search_rsb_tree(struct rb_root * tree,char * name,int len,struct dlm_rsb ** r_ret)445*4882a593Smuzhiyun int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
446*4882a593Smuzhiyun 			struct dlm_rsb **r_ret)
447*4882a593Smuzhiyun {
448*4882a593Smuzhiyun 	struct rb_node *node = tree->rb_node;
449*4882a593Smuzhiyun 	struct dlm_rsb *r;
450*4882a593Smuzhiyun 	int rc;
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun 	while (node) {
453*4882a593Smuzhiyun 		r = rb_entry(node, struct dlm_rsb, res_hashnode);
454*4882a593Smuzhiyun 		rc = rsb_cmp(r, name, len);
455*4882a593Smuzhiyun 		if (rc < 0)
456*4882a593Smuzhiyun 			node = node->rb_left;
457*4882a593Smuzhiyun 		else if (rc > 0)
458*4882a593Smuzhiyun 			node = node->rb_right;
459*4882a593Smuzhiyun 		else
460*4882a593Smuzhiyun 			goto found;
461*4882a593Smuzhiyun 	}
462*4882a593Smuzhiyun 	*r_ret = NULL;
463*4882a593Smuzhiyun 	return -EBADR;
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun  found:
466*4882a593Smuzhiyun 	*r_ret = r;
467*4882a593Smuzhiyun 	return 0;
468*4882a593Smuzhiyun }
469*4882a593Smuzhiyun 
rsb_insert(struct dlm_rsb * rsb,struct rb_root * tree)470*4882a593Smuzhiyun static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree)
471*4882a593Smuzhiyun {
472*4882a593Smuzhiyun 	struct rb_node **newn = &tree->rb_node;
473*4882a593Smuzhiyun 	struct rb_node *parent = NULL;
474*4882a593Smuzhiyun 	int rc;
475*4882a593Smuzhiyun 
476*4882a593Smuzhiyun 	while (*newn) {
477*4882a593Smuzhiyun 		struct dlm_rsb *cur = rb_entry(*newn, struct dlm_rsb,
478*4882a593Smuzhiyun 					       res_hashnode);
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 		parent = *newn;
481*4882a593Smuzhiyun 		rc = rsb_cmp(cur, rsb->res_name, rsb->res_length);
482*4882a593Smuzhiyun 		if (rc < 0)
483*4882a593Smuzhiyun 			newn = &parent->rb_left;
484*4882a593Smuzhiyun 		else if (rc > 0)
485*4882a593Smuzhiyun 			newn = &parent->rb_right;
486*4882a593Smuzhiyun 		else {
487*4882a593Smuzhiyun 			log_print("rsb_insert match");
488*4882a593Smuzhiyun 			dlm_dump_rsb(rsb);
489*4882a593Smuzhiyun 			dlm_dump_rsb(cur);
490*4882a593Smuzhiyun 			return -EEXIST;
491*4882a593Smuzhiyun 		}
492*4882a593Smuzhiyun 	}
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	rb_link_node(&rsb->res_hashnode, parent, newn);
495*4882a593Smuzhiyun 	rb_insert_color(&rsb->res_hashnode, tree);
496*4882a593Smuzhiyun 	return 0;
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun /*
500*4882a593Smuzhiyun  * Find rsb in rsbtbl and potentially create/add one
501*4882a593Smuzhiyun  *
502*4882a593Smuzhiyun  * Delaying the release of rsb's has a similar benefit to applications keeping
503*4882a593Smuzhiyun  * NL locks on an rsb, but without the guarantee that the cached master value
504*4882a593Smuzhiyun  * will still be valid when the rsb is reused.  Apps aren't always smart enough
505*4882a593Smuzhiyun  * to keep NL locks on an rsb that they may lock again shortly; this can lead
506*4882a593Smuzhiyun  * to excessive master lookups and removals if we don't delay the release.
507*4882a593Smuzhiyun  *
508*4882a593Smuzhiyun  * Searching for an rsb means looking through both the normal list and toss
509*4882a593Smuzhiyun  * list.  When found on the toss list the rsb is moved to the normal list with
510*4882a593Smuzhiyun  * ref count of 1; when found on normal list the ref count is incremented.
511*4882a593Smuzhiyun  *
512*4882a593Smuzhiyun  * rsb's on the keep list are being used locally and refcounted.
513*4882a593Smuzhiyun  * rsb's on the toss list are not being used locally, and are not refcounted.
514*4882a593Smuzhiyun  *
515*4882a593Smuzhiyun  * The toss list rsb's were either
516*4882a593Smuzhiyun  * - previously used locally but not any more (were on keep list, then
517*4882a593Smuzhiyun  *   moved to toss list when last refcount dropped)
518*4882a593Smuzhiyun  * - created and put on toss list as a directory record for a lookup
519*4882a593Smuzhiyun  *   (we are the dir node for the res, but are not using the res right now,
520*4882a593Smuzhiyun  *   but some other node is)
521*4882a593Smuzhiyun  *
522*4882a593Smuzhiyun  * The purpose of find_rsb() is to return a refcounted rsb for local use.
523*4882a593Smuzhiyun  * So, if the given rsb is on the toss list, it is moved to the keep list
524*4882a593Smuzhiyun  * before being returned.
525*4882a593Smuzhiyun  *
526*4882a593Smuzhiyun  * toss_rsb() happens when all local usage of the rsb is done, i.e. no
527*4882a593Smuzhiyun  * more refcounts exist, so the rsb is moved from the keep list to the
528*4882a593Smuzhiyun  * toss list.
529*4882a593Smuzhiyun  *
530*4882a593Smuzhiyun  * rsb's on both keep and toss lists are used for doing a name to master
531*4882a593Smuzhiyun  * lookups.  rsb's that are in use locally (and being refcounted) are on
532*4882a593Smuzhiyun  * the keep list, rsb's that are not in use locally (not refcounted) and
533*4882a593Smuzhiyun  * only exist for name/master lookups are on the toss list.
534*4882a593Smuzhiyun  *
535*4882a593Smuzhiyun  * rsb's on the toss list who's dir_nodeid is not local can have stale
536*4882a593Smuzhiyun  * name/master mappings.  So, remote requests on such rsb's can potentially
537*4882a593Smuzhiyun  * return with an error, which means the mapping is stale and needs to
538*4882a593Smuzhiyun  * be updated with a new lookup.  (The idea behind MASTER UNCERTAIN and
539*4882a593Smuzhiyun  * first_lkid is to keep only a single outstanding request on an rsb
540*4882a593Smuzhiyun  * while that rsb has a potentially stale master.)
541*4882a593Smuzhiyun  */
542*4882a593Smuzhiyun 
find_rsb_dir(struct dlm_ls * ls,char * name,int len,uint32_t hash,uint32_t b,int dir_nodeid,int from_nodeid,unsigned int flags,struct dlm_rsb ** r_ret)543*4882a593Smuzhiyun static int find_rsb_dir(struct dlm_ls *ls, char *name, int len,
544*4882a593Smuzhiyun 			uint32_t hash, uint32_t b,
545*4882a593Smuzhiyun 			int dir_nodeid, int from_nodeid,
546*4882a593Smuzhiyun 			unsigned int flags, struct dlm_rsb **r_ret)
547*4882a593Smuzhiyun {
548*4882a593Smuzhiyun 	struct dlm_rsb *r = NULL;
549*4882a593Smuzhiyun 	int our_nodeid = dlm_our_nodeid();
550*4882a593Smuzhiyun 	int from_local = 0;
551*4882a593Smuzhiyun 	int from_other = 0;
552*4882a593Smuzhiyun 	int from_dir = 0;
553*4882a593Smuzhiyun 	int create = 0;
554*4882a593Smuzhiyun 	int error;
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun 	if (flags & R_RECEIVE_REQUEST) {
557*4882a593Smuzhiyun 		if (from_nodeid == dir_nodeid)
558*4882a593Smuzhiyun 			from_dir = 1;
559*4882a593Smuzhiyun 		else
560*4882a593Smuzhiyun 			from_other = 1;
561*4882a593Smuzhiyun 	} else if (flags & R_REQUEST) {
562*4882a593Smuzhiyun 		from_local = 1;
563*4882a593Smuzhiyun 	}
564*4882a593Smuzhiyun 
565*4882a593Smuzhiyun 	/*
566*4882a593Smuzhiyun 	 * flags & R_RECEIVE_RECOVER is from dlm_recover_master_copy, so
567*4882a593Smuzhiyun 	 * from_nodeid has sent us a lock in dlm_recover_locks, believing
568*4882a593Smuzhiyun 	 * we're the new master.  Our local recovery may not have set
569*4882a593Smuzhiyun 	 * res_master_nodeid to our_nodeid yet, so allow either.  Don't
570*4882a593Smuzhiyun 	 * create the rsb; dlm_recover_process_copy() will handle EBADR
571*4882a593Smuzhiyun 	 * by resending.
572*4882a593Smuzhiyun 	 *
573*4882a593Smuzhiyun 	 * If someone sends us a request, we are the dir node, and we do
574*4882a593Smuzhiyun 	 * not find the rsb anywhere, then recreate it.  This happens if
575*4882a593Smuzhiyun 	 * someone sends us a request after we have removed/freed an rsb
576*4882a593Smuzhiyun 	 * from our toss list.  (They sent a request instead of lookup
577*4882a593Smuzhiyun 	 * because they are using an rsb from their toss list.)
578*4882a593Smuzhiyun 	 */
579*4882a593Smuzhiyun 
580*4882a593Smuzhiyun 	if (from_local || from_dir ||
581*4882a593Smuzhiyun 	    (from_other && (dir_nodeid == our_nodeid))) {
582*4882a593Smuzhiyun 		create = 1;
583*4882a593Smuzhiyun 	}
584*4882a593Smuzhiyun 
585*4882a593Smuzhiyun  retry:
586*4882a593Smuzhiyun 	if (create) {
587*4882a593Smuzhiyun 		error = pre_rsb_struct(ls);
588*4882a593Smuzhiyun 		if (error < 0)
589*4882a593Smuzhiyun 			goto out;
590*4882a593Smuzhiyun 	}
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[b].lock);
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
595*4882a593Smuzhiyun 	if (error)
596*4882a593Smuzhiyun 		goto do_toss;
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun 	/*
599*4882a593Smuzhiyun 	 * rsb is active, so we can't check master_nodeid without lock_rsb.
600*4882a593Smuzhiyun 	 */
601*4882a593Smuzhiyun 
602*4882a593Smuzhiyun 	kref_get(&r->res_ref);
603*4882a593Smuzhiyun 	error = 0;
604*4882a593Smuzhiyun 	goto out_unlock;
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 
607*4882a593Smuzhiyun  do_toss:
608*4882a593Smuzhiyun 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
609*4882a593Smuzhiyun 	if (error)
610*4882a593Smuzhiyun 		goto do_new;
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 	/*
613*4882a593Smuzhiyun 	 * rsb found inactive (master_nodeid may be out of date unless
614*4882a593Smuzhiyun 	 * we are the dir_nodeid or were the master)  No other thread
615*4882a593Smuzhiyun 	 * is using this rsb because it's on the toss list, so we can
616*4882a593Smuzhiyun 	 * look at or update res_master_nodeid without lock_rsb.
617*4882a593Smuzhiyun 	 */
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 	if ((r->res_master_nodeid != our_nodeid) && from_other) {
620*4882a593Smuzhiyun 		/* our rsb was not master, and another node (not the dir node)
621*4882a593Smuzhiyun 		   has sent us a request */
622*4882a593Smuzhiyun 		log_debug(ls, "find_rsb toss from_other %d master %d dir %d %s",
623*4882a593Smuzhiyun 			  from_nodeid, r->res_master_nodeid, dir_nodeid,
624*4882a593Smuzhiyun 			  r->res_name);
625*4882a593Smuzhiyun 		error = -ENOTBLK;
626*4882a593Smuzhiyun 		goto out_unlock;
627*4882a593Smuzhiyun 	}
628*4882a593Smuzhiyun 
629*4882a593Smuzhiyun 	if ((r->res_master_nodeid != our_nodeid) && from_dir) {
630*4882a593Smuzhiyun 		/* don't think this should ever happen */
631*4882a593Smuzhiyun 		log_error(ls, "find_rsb toss from_dir %d master %d",
632*4882a593Smuzhiyun 			  from_nodeid, r->res_master_nodeid);
633*4882a593Smuzhiyun 		dlm_print_rsb(r);
634*4882a593Smuzhiyun 		/* fix it and go on */
635*4882a593Smuzhiyun 		r->res_master_nodeid = our_nodeid;
636*4882a593Smuzhiyun 		r->res_nodeid = 0;
637*4882a593Smuzhiyun 		rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
638*4882a593Smuzhiyun 		r->res_first_lkid = 0;
639*4882a593Smuzhiyun 	}
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun 	if (from_local && (r->res_master_nodeid != our_nodeid)) {
642*4882a593Smuzhiyun 		/* Because we have held no locks on this rsb,
643*4882a593Smuzhiyun 		   res_master_nodeid could have become stale. */
644*4882a593Smuzhiyun 		rsb_set_flag(r, RSB_MASTER_UNCERTAIN);
645*4882a593Smuzhiyun 		r->res_first_lkid = 0;
646*4882a593Smuzhiyun 	}
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 	rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
649*4882a593Smuzhiyun 	error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
650*4882a593Smuzhiyun 	goto out_unlock;
651*4882a593Smuzhiyun 
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun  do_new:
654*4882a593Smuzhiyun 	/*
655*4882a593Smuzhiyun 	 * rsb not found
656*4882a593Smuzhiyun 	 */
657*4882a593Smuzhiyun 
658*4882a593Smuzhiyun 	if (error == -EBADR && !create)
659*4882a593Smuzhiyun 		goto out_unlock;
660*4882a593Smuzhiyun 
661*4882a593Smuzhiyun 	error = get_rsb_struct(ls, name, len, &r);
662*4882a593Smuzhiyun 	if (error == -EAGAIN) {
663*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
664*4882a593Smuzhiyun 		goto retry;
665*4882a593Smuzhiyun 	}
666*4882a593Smuzhiyun 	if (error)
667*4882a593Smuzhiyun 		goto out_unlock;
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun 	r->res_hash = hash;
670*4882a593Smuzhiyun 	r->res_bucket = b;
671*4882a593Smuzhiyun 	r->res_dir_nodeid = dir_nodeid;
672*4882a593Smuzhiyun 	kref_init(&r->res_ref);
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 	if (from_dir) {
675*4882a593Smuzhiyun 		/* want to see how often this happens */
676*4882a593Smuzhiyun 		log_debug(ls, "find_rsb new from_dir %d recreate %s",
677*4882a593Smuzhiyun 			  from_nodeid, r->res_name);
678*4882a593Smuzhiyun 		r->res_master_nodeid = our_nodeid;
679*4882a593Smuzhiyun 		r->res_nodeid = 0;
680*4882a593Smuzhiyun 		goto out_add;
681*4882a593Smuzhiyun 	}
682*4882a593Smuzhiyun 
683*4882a593Smuzhiyun 	if (from_other && (dir_nodeid != our_nodeid)) {
684*4882a593Smuzhiyun 		/* should never happen */
685*4882a593Smuzhiyun 		log_error(ls, "find_rsb new from_other %d dir %d our %d %s",
686*4882a593Smuzhiyun 			  from_nodeid, dir_nodeid, our_nodeid, r->res_name);
687*4882a593Smuzhiyun 		dlm_free_rsb(r);
688*4882a593Smuzhiyun 		r = NULL;
689*4882a593Smuzhiyun 		error = -ENOTBLK;
690*4882a593Smuzhiyun 		goto out_unlock;
691*4882a593Smuzhiyun 	}
692*4882a593Smuzhiyun 
693*4882a593Smuzhiyun 	if (from_other) {
694*4882a593Smuzhiyun 		log_debug(ls, "find_rsb new from_other %d dir %d %s",
695*4882a593Smuzhiyun 			  from_nodeid, dir_nodeid, r->res_name);
696*4882a593Smuzhiyun 	}
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	if (dir_nodeid == our_nodeid) {
699*4882a593Smuzhiyun 		/* When we are the dir nodeid, we can set the master
700*4882a593Smuzhiyun 		   node immediately */
701*4882a593Smuzhiyun 		r->res_master_nodeid = our_nodeid;
702*4882a593Smuzhiyun 		r->res_nodeid = 0;
703*4882a593Smuzhiyun 	} else {
704*4882a593Smuzhiyun 		/* set_master will send_lookup to dir_nodeid */
705*4882a593Smuzhiyun 		r->res_master_nodeid = 0;
706*4882a593Smuzhiyun 		r->res_nodeid = -1;
707*4882a593Smuzhiyun 	}
708*4882a593Smuzhiyun 
709*4882a593Smuzhiyun  out_add:
710*4882a593Smuzhiyun 	error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
711*4882a593Smuzhiyun  out_unlock:
712*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[b].lock);
713*4882a593Smuzhiyun  out:
714*4882a593Smuzhiyun 	*r_ret = r;
715*4882a593Smuzhiyun 	return error;
716*4882a593Smuzhiyun }
717*4882a593Smuzhiyun 
718*4882a593Smuzhiyun /* During recovery, other nodes can send us new MSTCPY locks (from
719*4882a593Smuzhiyun    dlm_recover_locks) before we've made ourself master (in
720*4882a593Smuzhiyun    dlm_recover_masters). */
721*4882a593Smuzhiyun 
find_rsb_nodir(struct dlm_ls * ls,char * name,int len,uint32_t hash,uint32_t b,int dir_nodeid,int from_nodeid,unsigned int flags,struct dlm_rsb ** r_ret)722*4882a593Smuzhiyun static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len,
723*4882a593Smuzhiyun 			  uint32_t hash, uint32_t b,
724*4882a593Smuzhiyun 			  int dir_nodeid, int from_nodeid,
725*4882a593Smuzhiyun 			  unsigned int flags, struct dlm_rsb **r_ret)
726*4882a593Smuzhiyun {
727*4882a593Smuzhiyun 	struct dlm_rsb *r = NULL;
728*4882a593Smuzhiyun 	int our_nodeid = dlm_our_nodeid();
729*4882a593Smuzhiyun 	int recover = (flags & R_RECEIVE_RECOVER);
730*4882a593Smuzhiyun 	int error;
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun  retry:
733*4882a593Smuzhiyun 	error = pre_rsb_struct(ls);
734*4882a593Smuzhiyun 	if (error < 0)
735*4882a593Smuzhiyun 		goto out;
736*4882a593Smuzhiyun 
737*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[b].lock);
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
740*4882a593Smuzhiyun 	if (error)
741*4882a593Smuzhiyun 		goto do_toss;
742*4882a593Smuzhiyun 
743*4882a593Smuzhiyun 	/*
744*4882a593Smuzhiyun 	 * rsb is active, so we can't check master_nodeid without lock_rsb.
745*4882a593Smuzhiyun 	 */
746*4882a593Smuzhiyun 
747*4882a593Smuzhiyun 	kref_get(&r->res_ref);
748*4882a593Smuzhiyun 	goto out_unlock;
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun 
751*4882a593Smuzhiyun  do_toss:
752*4882a593Smuzhiyun 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
753*4882a593Smuzhiyun 	if (error)
754*4882a593Smuzhiyun 		goto do_new;
755*4882a593Smuzhiyun 
756*4882a593Smuzhiyun 	/*
757*4882a593Smuzhiyun 	 * rsb found inactive. No other thread is using this rsb because
758*4882a593Smuzhiyun 	 * it's on the toss list, so we can look at or update
759*4882a593Smuzhiyun 	 * res_master_nodeid without lock_rsb.
760*4882a593Smuzhiyun 	 */
761*4882a593Smuzhiyun 
762*4882a593Smuzhiyun 	if (!recover && (r->res_master_nodeid != our_nodeid) && from_nodeid) {
763*4882a593Smuzhiyun 		/* our rsb is not master, and another node has sent us a
764*4882a593Smuzhiyun 		   request; this should never happen */
765*4882a593Smuzhiyun 		log_error(ls, "find_rsb toss from_nodeid %d master %d dir %d",
766*4882a593Smuzhiyun 			  from_nodeid, r->res_master_nodeid, dir_nodeid);
767*4882a593Smuzhiyun 		dlm_print_rsb(r);
768*4882a593Smuzhiyun 		error = -ENOTBLK;
769*4882a593Smuzhiyun 		goto out_unlock;
770*4882a593Smuzhiyun 	}
771*4882a593Smuzhiyun 
772*4882a593Smuzhiyun 	if (!recover && (r->res_master_nodeid != our_nodeid) &&
773*4882a593Smuzhiyun 	    (dir_nodeid == our_nodeid)) {
774*4882a593Smuzhiyun 		/* our rsb is not master, and we are dir; may as well fix it;
775*4882a593Smuzhiyun 		   this should never happen */
776*4882a593Smuzhiyun 		log_error(ls, "find_rsb toss our %d master %d dir %d",
777*4882a593Smuzhiyun 			  our_nodeid, r->res_master_nodeid, dir_nodeid);
778*4882a593Smuzhiyun 		dlm_print_rsb(r);
779*4882a593Smuzhiyun 		r->res_master_nodeid = our_nodeid;
780*4882a593Smuzhiyun 		r->res_nodeid = 0;
781*4882a593Smuzhiyun 	}
782*4882a593Smuzhiyun 
783*4882a593Smuzhiyun 	rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
784*4882a593Smuzhiyun 	error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
785*4882a593Smuzhiyun 	goto out_unlock;
786*4882a593Smuzhiyun 
787*4882a593Smuzhiyun 
788*4882a593Smuzhiyun  do_new:
789*4882a593Smuzhiyun 	/*
790*4882a593Smuzhiyun 	 * rsb not found
791*4882a593Smuzhiyun 	 */
792*4882a593Smuzhiyun 
793*4882a593Smuzhiyun 	error = get_rsb_struct(ls, name, len, &r);
794*4882a593Smuzhiyun 	if (error == -EAGAIN) {
795*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
796*4882a593Smuzhiyun 		goto retry;
797*4882a593Smuzhiyun 	}
798*4882a593Smuzhiyun 	if (error)
799*4882a593Smuzhiyun 		goto out_unlock;
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 	r->res_hash = hash;
802*4882a593Smuzhiyun 	r->res_bucket = b;
803*4882a593Smuzhiyun 	r->res_dir_nodeid = dir_nodeid;
804*4882a593Smuzhiyun 	r->res_master_nodeid = dir_nodeid;
805*4882a593Smuzhiyun 	r->res_nodeid = (dir_nodeid == our_nodeid) ? 0 : dir_nodeid;
806*4882a593Smuzhiyun 	kref_init(&r->res_ref);
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 	error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
809*4882a593Smuzhiyun  out_unlock:
810*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[b].lock);
811*4882a593Smuzhiyun  out:
812*4882a593Smuzhiyun 	*r_ret = r;
813*4882a593Smuzhiyun 	return error;
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun 
find_rsb(struct dlm_ls * ls,char * name,int len,int from_nodeid,unsigned int flags,struct dlm_rsb ** r_ret)816*4882a593Smuzhiyun static int find_rsb(struct dlm_ls *ls, char *name, int len, int from_nodeid,
817*4882a593Smuzhiyun 		    unsigned int flags, struct dlm_rsb **r_ret)
818*4882a593Smuzhiyun {
819*4882a593Smuzhiyun 	uint32_t hash, b;
820*4882a593Smuzhiyun 	int dir_nodeid;
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 	if (len > DLM_RESNAME_MAXLEN)
823*4882a593Smuzhiyun 		return -EINVAL;
824*4882a593Smuzhiyun 
825*4882a593Smuzhiyun 	hash = jhash(name, len, 0);
826*4882a593Smuzhiyun 	b = hash & (ls->ls_rsbtbl_size - 1);
827*4882a593Smuzhiyun 
828*4882a593Smuzhiyun 	dir_nodeid = dlm_hash2nodeid(ls, hash);
829*4882a593Smuzhiyun 
830*4882a593Smuzhiyun 	if (dlm_no_directory(ls))
831*4882a593Smuzhiyun 		return find_rsb_nodir(ls, name, len, hash, b, dir_nodeid,
832*4882a593Smuzhiyun 				      from_nodeid, flags, r_ret);
833*4882a593Smuzhiyun 	else
834*4882a593Smuzhiyun 		return find_rsb_dir(ls, name, len, hash, b, dir_nodeid,
835*4882a593Smuzhiyun 				      from_nodeid, flags, r_ret);
836*4882a593Smuzhiyun }
837*4882a593Smuzhiyun 
838*4882a593Smuzhiyun /* we have received a request and found that res_master_nodeid != our_nodeid,
839*4882a593Smuzhiyun    so we need to return an error or make ourself the master */
840*4882a593Smuzhiyun 
validate_master_nodeid(struct dlm_ls * ls,struct dlm_rsb * r,int from_nodeid)841*4882a593Smuzhiyun static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r,
842*4882a593Smuzhiyun 				  int from_nodeid)
843*4882a593Smuzhiyun {
844*4882a593Smuzhiyun 	if (dlm_no_directory(ls)) {
845*4882a593Smuzhiyun 		log_error(ls, "find_rsb keep from_nodeid %d master %d dir %d",
846*4882a593Smuzhiyun 			  from_nodeid, r->res_master_nodeid,
847*4882a593Smuzhiyun 			  r->res_dir_nodeid);
848*4882a593Smuzhiyun 		dlm_print_rsb(r);
849*4882a593Smuzhiyun 		return -ENOTBLK;
850*4882a593Smuzhiyun 	}
851*4882a593Smuzhiyun 
852*4882a593Smuzhiyun 	if (from_nodeid != r->res_dir_nodeid) {
853*4882a593Smuzhiyun 		/* our rsb is not master, and another node (not the dir node)
854*4882a593Smuzhiyun 	   	   has sent us a request.  this is much more common when our
855*4882a593Smuzhiyun 	   	   master_nodeid is zero, so limit debug to non-zero.  */
856*4882a593Smuzhiyun 
857*4882a593Smuzhiyun 		if (r->res_master_nodeid) {
858*4882a593Smuzhiyun 			log_debug(ls, "validate master from_other %d master %d "
859*4882a593Smuzhiyun 				  "dir %d first %x %s", from_nodeid,
860*4882a593Smuzhiyun 				  r->res_master_nodeid, r->res_dir_nodeid,
861*4882a593Smuzhiyun 				  r->res_first_lkid, r->res_name);
862*4882a593Smuzhiyun 		}
863*4882a593Smuzhiyun 		return -ENOTBLK;
864*4882a593Smuzhiyun 	} else {
865*4882a593Smuzhiyun 		/* our rsb is not master, but the dir nodeid has sent us a
866*4882a593Smuzhiyun 	   	   request; this could happen with master 0 / res_nodeid -1 */
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 		if (r->res_master_nodeid) {
869*4882a593Smuzhiyun 			log_error(ls, "validate master from_dir %d master %d "
870*4882a593Smuzhiyun 				  "first %x %s",
871*4882a593Smuzhiyun 				  from_nodeid, r->res_master_nodeid,
872*4882a593Smuzhiyun 				  r->res_first_lkid, r->res_name);
873*4882a593Smuzhiyun 		}
874*4882a593Smuzhiyun 
875*4882a593Smuzhiyun 		r->res_master_nodeid = dlm_our_nodeid();
876*4882a593Smuzhiyun 		r->res_nodeid = 0;
877*4882a593Smuzhiyun 		return 0;
878*4882a593Smuzhiyun 	}
879*4882a593Smuzhiyun }
880*4882a593Smuzhiyun 
881*4882a593Smuzhiyun /*
882*4882a593Smuzhiyun  * We're the dir node for this res and another node wants to know the
883*4882a593Smuzhiyun  * master nodeid.  During normal operation (non recovery) this is only
884*4882a593Smuzhiyun  * called from receive_lookup(); master lookups when the local node is
885*4882a593Smuzhiyun  * the dir node are done by find_rsb().
886*4882a593Smuzhiyun  *
887*4882a593Smuzhiyun  * normal operation, we are the dir node for a resource
888*4882a593Smuzhiyun  * . _request_lock
889*4882a593Smuzhiyun  * . set_master
890*4882a593Smuzhiyun  * . send_lookup
891*4882a593Smuzhiyun  * . receive_lookup
892*4882a593Smuzhiyun  * . dlm_master_lookup flags 0
893*4882a593Smuzhiyun  *
894*4882a593Smuzhiyun  * recover directory, we are rebuilding dir for all resources
895*4882a593Smuzhiyun  * . dlm_recover_directory
896*4882a593Smuzhiyun  * . dlm_rcom_names
897*4882a593Smuzhiyun  *   remote node sends back the rsb names it is master of and we are dir of
898*4882a593Smuzhiyun  * . dlm_master_lookup RECOVER_DIR (fix_master 0, from_master 1)
899*4882a593Smuzhiyun  *   we either create new rsb setting remote node as master, or find existing
900*4882a593Smuzhiyun  *   rsb and set master to be the remote node.
901*4882a593Smuzhiyun  *
902*4882a593Smuzhiyun  * recover masters, we are finding the new master for resources
903*4882a593Smuzhiyun  * . dlm_recover_masters
904*4882a593Smuzhiyun  * . recover_master
905*4882a593Smuzhiyun  * . dlm_send_rcom_lookup
906*4882a593Smuzhiyun  * . receive_rcom_lookup
907*4882a593Smuzhiyun  * . dlm_master_lookup RECOVER_MASTER (fix_master 1, from_master 0)
908*4882a593Smuzhiyun  */
909*4882a593Smuzhiyun 
dlm_master_lookup(struct dlm_ls * ls,int from_nodeid,char * name,int len,unsigned int flags,int * r_nodeid,int * result)910*4882a593Smuzhiyun int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
911*4882a593Smuzhiyun 		      unsigned int flags, int *r_nodeid, int *result)
912*4882a593Smuzhiyun {
913*4882a593Smuzhiyun 	struct dlm_rsb *r = NULL;
914*4882a593Smuzhiyun 	uint32_t hash, b;
915*4882a593Smuzhiyun 	int from_master = (flags & DLM_LU_RECOVER_DIR);
916*4882a593Smuzhiyun 	int fix_master = (flags & DLM_LU_RECOVER_MASTER);
917*4882a593Smuzhiyun 	int our_nodeid = dlm_our_nodeid();
918*4882a593Smuzhiyun 	int dir_nodeid, error, toss_list = 0;
919*4882a593Smuzhiyun 
920*4882a593Smuzhiyun 	if (len > DLM_RESNAME_MAXLEN)
921*4882a593Smuzhiyun 		return -EINVAL;
922*4882a593Smuzhiyun 
923*4882a593Smuzhiyun 	if (from_nodeid == our_nodeid) {
924*4882a593Smuzhiyun 		log_error(ls, "dlm_master_lookup from our_nodeid %d flags %x",
925*4882a593Smuzhiyun 			  our_nodeid, flags);
926*4882a593Smuzhiyun 		return -EINVAL;
927*4882a593Smuzhiyun 	}
928*4882a593Smuzhiyun 
929*4882a593Smuzhiyun 	hash = jhash(name, len, 0);
930*4882a593Smuzhiyun 	b = hash & (ls->ls_rsbtbl_size - 1);
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun 	dir_nodeid = dlm_hash2nodeid(ls, hash);
933*4882a593Smuzhiyun 	if (dir_nodeid != our_nodeid) {
934*4882a593Smuzhiyun 		log_error(ls, "dlm_master_lookup from %d dir %d our %d h %x %d",
935*4882a593Smuzhiyun 			  from_nodeid, dir_nodeid, our_nodeid, hash,
936*4882a593Smuzhiyun 			  ls->ls_num_nodes);
937*4882a593Smuzhiyun 		*r_nodeid = -1;
938*4882a593Smuzhiyun 		return -EINVAL;
939*4882a593Smuzhiyun 	}
940*4882a593Smuzhiyun 
941*4882a593Smuzhiyun  retry:
942*4882a593Smuzhiyun 	error = pre_rsb_struct(ls);
943*4882a593Smuzhiyun 	if (error < 0)
944*4882a593Smuzhiyun 		return error;
945*4882a593Smuzhiyun 
946*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[b].lock);
947*4882a593Smuzhiyun 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
948*4882a593Smuzhiyun 	if (!error) {
949*4882a593Smuzhiyun 		/* because the rsb is active, we need to lock_rsb before
950*4882a593Smuzhiyun 		   checking/changing re_master_nodeid */
951*4882a593Smuzhiyun 
952*4882a593Smuzhiyun 		hold_rsb(r);
953*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
954*4882a593Smuzhiyun 		lock_rsb(r);
955*4882a593Smuzhiyun 		goto found;
956*4882a593Smuzhiyun 	}
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
959*4882a593Smuzhiyun 	if (error)
960*4882a593Smuzhiyun 		goto not_found;
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun 	/* because the rsb is inactive (on toss list), it's not refcounted
963*4882a593Smuzhiyun 	   and lock_rsb is not used, but is protected by the rsbtbl lock */
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun 	toss_list = 1;
966*4882a593Smuzhiyun  found:
967*4882a593Smuzhiyun 	if (r->res_dir_nodeid != our_nodeid) {
968*4882a593Smuzhiyun 		/* should not happen, but may as well fix it and carry on */
969*4882a593Smuzhiyun 		log_error(ls, "dlm_master_lookup res_dir %d our %d %s",
970*4882a593Smuzhiyun 			  r->res_dir_nodeid, our_nodeid, r->res_name);
971*4882a593Smuzhiyun 		r->res_dir_nodeid = our_nodeid;
972*4882a593Smuzhiyun 	}
973*4882a593Smuzhiyun 
974*4882a593Smuzhiyun 	if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) {
975*4882a593Smuzhiyun 		/* Recovery uses this function to set a new master when
976*4882a593Smuzhiyun 		   the previous master failed.  Setting NEW_MASTER will
977*4882a593Smuzhiyun 		   force dlm_recover_masters to call recover_master on this
978*4882a593Smuzhiyun 		   rsb even though the res_nodeid is no longer removed. */
979*4882a593Smuzhiyun 
980*4882a593Smuzhiyun 		r->res_master_nodeid = from_nodeid;
981*4882a593Smuzhiyun 		r->res_nodeid = from_nodeid;
982*4882a593Smuzhiyun 		rsb_set_flag(r, RSB_NEW_MASTER);
983*4882a593Smuzhiyun 
984*4882a593Smuzhiyun 		if (toss_list) {
985*4882a593Smuzhiyun 			/* I don't think we should ever find it on toss list. */
986*4882a593Smuzhiyun 			log_error(ls, "dlm_master_lookup fix_master on toss");
987*4882a593Smuzhiyun 			dlm_dump_rsb(r);
988*4882a593Smuzhiyun 		}
989*4882a593Smuzhiyun 	}
990*4882a593Smuzhiyun 
991*4882a593Smuzhiyun 	if (from_master && (r->res_master_nodeid != from_nodeid)) {
992*4882a593Smuzhiyun 		/* this will happen if from_nodeid became master during
993*4882a593Smuzhiyun 		   a previous recovery cycle, and we aborted the previous
994*4882a593Smuzhiyun 		   cycle before recovering this master value */
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun 		log_limit(ls, "dlm_master_lookup from_master %d "
997*4882a593Smuzhiyun 			  "master_nodeid %d res_nodeid %d first %x %s",
998*4882a593Smuzhiyun 			  from_nodeid, r->res_master_nodeid, r->res_nodeid,
999*4882a593Smuzhiyun 			  r->res_first_lkid, r->res_name);
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 		if (r->res_master_nodeid == our_nodeid) {
1002*4882a593Smuzhiyun 			log_error(ls, "from_master %d our_master", from_nodeid);
1003*4882a593Smuzhiyun 			dlm_dump_rsb(r);
1004*4882a593Smuzhiyun 			goto out_found;
1005*4882a593Smuzhiyun 		}
1006*4882a593Smuzhiyun 
1007*4882a593Smuzhiyun 		r->res_master_nodeid = from_nodeid;
1008*4882a593Smuzhiyun 		r->res_nodeid = from_nodeid;
1009*4882a593Smuzhiyun 		rsb_set_flag(r, RSB_NEW_MASTER);
1010*4882a593Smuzhiyun 	}
1011*4882a593Smuzhiyun 
1012*4882a593Smuzhiyun 	if (!r->res_master_nodeid) {
1013*4882a593Smuzhiyun 		/* this will happen if recovery happens while we're looking
1014*4882a593Smuzhiyun 		   up the master for this rsb */
1015*4882a593Smuzhiyun 
1016*4882a593Smuzhiyun 		log_debug(ls, "dlm_master_lookup master 0 to %d first %x %s",
1017*4882a593Smuzhiyun 			  from_nodeid, r->res_first_lkid, r->res_name);
1018*4882a593Smuzhiyun 		r->res_master_nodeid = from_nodeid;
1019*4882a593Smuzhiyun 		r->res_nodeid = from_nodeid;
1020*4882a593Smuzhiyun 	}
1021*4882a593Smuzhiyun 
1022*4882a593Smuzhiyun 	if (!from_master && !fix_master &&
1023*4882a593Smuzhiyun 	    (r->res_master_nodeid == from_nodeid)) {
1024*4882a593Smuzhiyun 		/* this can happen when the master sends remove, the dir node
1025*4882a593Smuzhiyun 		   finds the rsb on the keep list and ignores the remove,
1026*4882a593Smuzhiyun 		   and the former master sends a lookup */
1027*4882a593Smuzhiyun 
1028*4882a593Smuzhiyun 		log_limit(ls, "dlm_master_lookup from master %d flags %x "
1029*4882a593Smuzhiyun 			  "first %x %s", from_nodeid, flags,
1030*4882a593Smuzhiyun 			  r->res_first_lkid, r->res_name);
1031*4882a593Smuzhiyun 	}
1032*4882a593Smuzhiyun 
1033*4882a593Smuzhiyun  out_found:
1034*4882a593Smuzhiyun 	*r_nodeid = r->res_master_nodeid;
1035*4882a593Smuzhiyun 	if (result)
1036*4882a593Smuzhiyun 		*result = DLM_LU_MATCH;
1037*4882a593Smuzhiyun 
1038*4882a593Smuzhiyun 	if (toss_list) {
1039*4882a593Smuzhiyun 		r->res_toss_time = jiffies;
1040*4882a593Smuzhiyun 		/* the rsb was inactive (on toss list) */
1041*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
1042*4882a593Smuzhiyun 	} else {
1043*4882a593Smuzhiyun 		/* the rsb was active */
1044*4882a593Smuzhiyun 		unlock_rsb(r);
1045*4882a593Smuzhiyun 		put_rsb(r);
1046*4882a593Smuzhiyun 	}
1047*4882a593Smuzhiyun 	return 0;
1048*4882a593Smuzhiyun 
1049*4882a593Smuzhiyun  not_found:
1050*4882a593Smuzhiyun 	error = get_rsb_struct(ls, name, len, &r);
1051*4882a593Smuzhiyun 	if (error == -EAGAIN) {
1052*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
1053*4882a593Smuzhiyun 		goto retry;
1054*4882a593Smuzhiyun 	}
1055*4882a593Smuzhiyun 	if (error)
1056*4882a593Smuzhiyun 		goto out_unlock;
1057*4882a593Smuzhiyun 
1058*4882a593Smuzhiyun 	r->res_hash = hash;
1059*4882a593Smuzhiyun 	r->res_bucket = b;
1060*4882a593Smuzhiyun 	r->res_dir_nodeid = our_nodeid;
1061*4882a593Smuzhiyun 	r->res_master_nodeid = from_nodeid;
1062*4882a593Smuzhiyun 	r->res_nodeid = from_nodeid;
1063*4882a593Smuzhiyun 	kref_init(&r->res_ref);
1064*4882a593Smuzhiyun 	r->res_toss_time = jiffies;
1065*4882a593Smuzhiyun 
1066*4882a593Smuzhiyun 	error = rsb_insert(r, &ls->ls_rsbtbl[b].toss);
1067*4882a593Smuzhiyun 	if (error) {
1068*4882a593Smuzhiyun 		/* should never happen */
1069*4882a593Smuzhiyun 		dlm_free_rsb(r);
1070*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
1071*4882a593Smuzhiyun 		goto retry;
1072*4882a593Smuzhiyun 	}
1073*4882a593Smuzhiyun 
1074*4882a593Smuzhiyun 	if (result)
1075*4882a593Smuzhiyun 		*result = DLM_LU_ADD;
1076*4882a593Smuzhiyun 	*r_nodeid = from_nodeid;
1077*4882a593Smuzhiyun 	error = 0;
1078*4882a593Smuzhiyun  out_unlock:
1079*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[b].lock);
1080*4882a593Smuzhiyun 	return error;
1081*4882a593Smuzhiyun }
1082*4882a593Smuzhiyun 
dlm_dump_rsb_hash(struct dlm_ls * ls,uint32_t hash)1083*4882a593Smuzhiyun static void dlm_dump_rsb_hash(struct dlm_ls *ls, uint32_t hash)
1084*4882a593Smuzhiyun {
1085*4882a593Smuzhiyun 	struct rb_node *n;
1086*4882a593Smuzhiyun 	struct dlm_rsb *r;
1087*4882a593Smuzhiyun 	int i;
1088*4882a593Smuzhiyun 
1089*4882a593Smuzhiyun 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
1090*4882a593Smuzhiyun 		spin_lock(&ls->ls_rsbtbl[i].lock);
1091*4882a593Smuzhiyun 		for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) {
1092*4882a593Smuzhiyun 			r = rb_entry(n, struct dlm_rsb, res_hashnode);
1093*4882a593Smuzhiyun 			if (r->res_hash == hash)
1094*4882a593Smuzhiyun 				dlm_dump_rsb(r);
1095*4882a593Smuzhiyun 		}
1096*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[i].lock);
1097*4882a593Smuzhiyun 	}
1098*4882a593Smuzhiyun }
1099*4882a593Smuzhiyun 
dlm_dump_rsb_name(struct dlm_ls * ls,char * name,int len)1100*4882a593Smuzhiyun void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len)
1101*4882a593Smuzhiyun {
1102*4882a593Smuzhiyun 	struct dlm_rsb *r = NULL;
1103*4882a593Smuzhiyun 	uint32_t hash, b;
1104*4882a593Smuzhiyun 	int error;
1105*4882a593Smuzhiyun 
1106*4882a593Smuzhiyun 	hash = jhash(name, len, 0);
1107*4882a593Smuzhiyun 	b = hash & (ls->ls_rsbtbl_size - 1);
1108*4882a593Smuzhiyun 
1109*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[b].lock);
1110*4882a593Smuzhiyun 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
1111*4882a593Smuzhiyun 	if (!error)
1112*4882a593Smuzhiyun 		goto out_dump;
1113*4882a593Smuzhiyun 
1114*4882a593Smuzhiyun 	error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
1115*4882a593Smuzhiyun 	if (error)
1116*4882a593Smuzhiyun 		goto out;
1117*4882a593Smuzhiyun  out_dump:
1118*4882a593Smuzhiyun 	dlm_dump_rsb(r);
1119*4882a593Smuzhiyun  out:
1120*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[b].lock);
1121*4882a593Smuzhiyun }
1122*4882a593Smuzhiyun 
toss_rsb(struct kref * kref)1123*4882a593Smuzhiyun static void toss_rsb(struct kref *kref)
1124*4882a593Smuzhiyun {
1125*4882a593Smuzhiyun 	struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref);
1126*4882a593Smuzhiyun 	struct dlm_ls *ls = r->res_ls;
1127*4882a593Smuzhiyun 
1128*4882a593Smuzhiyun 	DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r););
1129*4882a593Smuzhiyun 	kref_init(&r->res_ref);
1130*4882a593Smuzhiyun 	rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep);
1131*4882a593Smuzhiyun 	rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss);
1132*4882a593Smuzhiyun 	r->res_toss_time = jiffies;
1133*4882a593Smuzhiyun 	ls->ls_rsbtbl[r->res_bucket].flags |= DLM_RTF_SHRINK;
1134*4882a593Smuzhiyun 	if (r->res_lvbptr) {
1135*4882a593Smuzhiyun 		dlm_free_lvb(r->res_lvbptr);
1136*4882a593Smuzhiyun 		r->res_lvbptr = NULL;
1137*4882a593Smuzhiyun 	}
1138*4882a593Smuzhiyun }
1139*4882a593Smuzhiyun 
1140*4882a593Smuzhiyun /* See comment for unhold_lkb */
1141*4882a593Smuzhiyun 
unhold_rsb(struct dlm_rsb * r)1142*4882a593Smuzhiyun static void unhold_rsb(struct dlm_rsb *r)
1143*4882a593Smuzhiyun {
1144*4882a593Smuzhiyun 	int rv;
1145*4882a593Smuzhiyun 	rv = kref_put(&r->res_ref, toss_rsb);
1146*4882a593Smuzhiyun 	DLM_ASSERT(!rv, dlm_dump_rsb(r););
1147*4882a593Smuzhiyun }
1148*4882a593Smuzhiyun 
kill_rsb(struct kref * kref)1149*4882a593Smuzhiyun static void kill_rsb(struct kref *kref)
1150*4882a593Smuzhiyun {
1151*4882a593Smuzhiyun 	struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref);
1152*4882a593Smuzhiyun 
1153*4882a593Smuzhiyun 	/* All work is done after the return from kref_put() so we
1154*4882a593Smuzhiyun 	   can release the write_lock before the remove and free. */
1155*4882a593Smuzhiyun 
1156*4882a593Smuzhiyun 	DLM_ASSERT(list_empty(&r->res_lookup), dlm_dump_rsb(r););
1157*4882a593Smuzhiyun 	DLM_ASSERT(list_empty(&r->res_grantqueue), dlm_dump_rsb(r););
1158*4882a593Smuzhiyun 	DLM_ASSERT(list_empty(&r->res_convertqueue), dlm_dump_rsb(r););
1159*4882a593Smuzhiyun 	DLM_ASSERT(list_empty(&r->res_waitqueue), dlm_dump_rsb(r););
1160*4882a593Smuzhiyun 	DLM_ASSERT(list_empty(&r->res_root_list), dlm_dump_rsb(r););
1161*4882a593Smuzhiyun 	DLM_ASSERT(list_empty(&r->res_recover_list), dlm_dump_rsb(r););
1162*4882a593Smuzhiyun }
1163*4882a593Smuzhiyun 
1164*4882a593Smuzhiyun /* Attaching/detaching lkb's from rsb's is for rsb reference counting.
1165*4882a593Smuzhiyun    The rsb must exist as long as any lkb's for it do. */
1166*4882a593Smuzhiyun 
attach_lkb(struct dlm_rsb * r,struct dlm_lkb * lkb)1167*4882a593Smuzhiyun static void attach_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
1168*4882a593Smuzhiyun {
1169*4882a593Smuzhiyun 	hold_rsb(r);
1170*4882a593Smuzhiyun 	lkb->lkb_resource = r;
1171*4882a593Smuzhiyun }
1172*4882a593Smuzhiyun 
detach_lkb(struct dlm_lkb * lkb)1173*4882a593Smuzhiyun static void detach_lkb(struct dlm_lkb *lkb)
1174*4882a593Smuzhiyun {
1175*4882a593Smuzhiyun 	if (lkb->lkb_resource) {
1176*4882a593Smuzhiyun 		put_rsb(lkb->lkb_resource);
1177*4882a593Smuzhiyun 		lkb->lkb_resource = NULL;
1178*4882a593Smuzhiyun 	}
1179*4882a593Smuzhiyun }
1180*4882a593Smuzhiyun 
create_lkb(struct dlm_ls * ls,struct dlm_lkb ** lkb_ret)1181*4882a593Smuzhiyun static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
1182*4882a593Smuzhiyun {
1183*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
1184*4882a593Smuzhiyun 	int rv;
1185*4882a593Smuzhiyun 
1186*4882a593Smuzhiyun 	lkb = dlm_allocate_lkb(ls);
1187*4882a593Smuzhiyun 	if (!lkb)
1188*4882a593Smuzhiyun 		return -ENOMEM;
1189*4882a593Smuzhiyun 
1190*4882a593Smuzhiyun 	lkb->lkb_nodeid = -1;
1191*4882a593Smuzhiyun 	lkb->lkb_grmode = DLM_LOCK_IV;
1192*4882a593Smuzhiyun 	kref_init(&lkb->lkb_ref);
1193*4882a593Smuzhiyun 	INIT_LIST_HEAD(&lkb->lkb_ownqueue);
1194*4882a593Smuzhiyun 	INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
1195*4882a593Smuzhiyun 	INIT_LIST_HEAD(&lkb->lkb_time_list);
1196*4882a593Smuzhiyun 	INIT_LIST_HEAD(&lkb->lkb_cb_list);
1197*4882a593Smuzhiyun 	mutex_init(&lkb->lkb_cb_mutex);
1198*4882a593Smuzhiyun 	INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
1199*4882a593Smuzhiyun 
1200*4882a593Smuzhiyun 	idr_preload(GFP_NOFS);
1201*4882a593Smuzhiyun 	spin_lock(&ls->ls_lkbidr_spin);
1202*4882a593Smuzhiyun 	rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT);
1203*4882a593Smuzhiyun 	if (rv >= 0)
1204*4882a593Smuzhiyun 		lkb->lkb_id = rv;
1205*4882a593Smuzhiyun 	spin_unlock(&ls->ls_lkbidr_spin);
1206*4882a593Smuzhiyun 	idr_preload_end();
1207*4882a593Smuzhiyun 
1208*4882a593Smuzhiyun 	if (rv < 0) {
1209*4882a593Smuzhiyun 		log_error(ls, "create_lkb idr error %d", rv);
1210*4882a593Smuzhiyun 		dlm_free_lkb(lkb);
1211*4882a593Smuzhiyun 		return rv;
1212*4882a593Smuzhiyun 	}
1213*4882a593Smuzhiyun 
1214*4882a593Smuzhiyun 	*lkb_ret = lkb;
1215*4882a593Smuzhiyun 	return 0;
1216*4882a593Smuzhiyun }
1217*4882a593Smuzhiyun 
find_lkb(struct dlm_ls * ls,uint32_t lkid,struct dlm_lkb ** lkb_ret)1218*4882a593Smuzhiyun static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
1219*4882a593Smuzhiyun {
1220*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
1221*4882a593Smuzhiyun 
1222*4882a593Smuzhiyun 	spin_lock(&ls->ls_lkbidr_spin);
1223*4882a593Smuzhiyun 	lkb = idr_find(&ls->ls_lkbidr, lkid);
1224*4882a593Smuzhiyun 	if (lkb)
1225*4882a593Smuzhiyun 		kref_get(&lkb->lkb_ref);
1226*4882a593Smuzhiyun 	spin_unlock(&ls->ls_lkbidr_spin);
1227*4882a593Smuzhiyun 
1228*4882a593Smuzhiyun 	*lkb_ret = lkb;
1229*4882a593Smuzhiyun 	return lkb ? 0 : -ENOENT;
1230*4882a593Smuzhiyun }
1231*4882a593Smuzhiyun 
kill_lkb(struct kref * kref)1232*4882a593Smuzhiyun static void kill_lkb(struct kref *kref)
1233*4882a593Smuzhiyun {
1234*4882a593Smuzhiyun 	struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
1235*4882a593Smuzhiyun 
1236*4882a593Smuzhiyun 	/* All work is done after the return from kref_put() so we
1237*4882a593Smuzhiyun 	   can release the write_lock before the detach_lkb */
1238*4882a593Smuzhiyun 
1239*4882a593Smuzhiyun 	DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
1240*4882a593Smuzhiyun }
1241*4882a593Smuzhiyun 
1242*4882a593Smuzhiyun /* __put_lkb() is used when an lkb may not have an rsb attached to
1243*4882a593Smuzhiyun    it so we need to provide the lockspace explicitly */
1244*4882a593Smuzhiyun 
__put_lkb(struct dlm_ls * ls,struct dlm_lkb * lkb)1245*4882a593Smuzhiyun static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
1246*4882a593Smuzhiyun {
1247*4882a593Smuzhiyun 	uint32_t lkid = lkb->lkb_id;
1248*4882a593Smuzhiyun 
1249*4882a593Smuzhiyun 	spin_lock(&ls->ls_lkbidr_spin);
1250*4882a593Smuzhiyun 	if (kref_put(&lkb->lkb_ref, kill_lkb)) {
1251*4882a593Smuzhiyun 		idr_remove(&ls->ls_lkbidr, lkid);
1252*4882a593Smuzhiyun 		spin_unlock(&ls->ls_lkbidr_spin);
1253*4882a593Smuzhiyun 
1254*4882a593Smuzhiyun 		detach_lkb(lkb);
1255*4882a593Smuzhiyun 
1256*4882a593Smuzhiyun 		/* for local/process lkbs, lvbptr points to caller's lksb */
1257*4882a593Smuzhiyun 		if (lkb->lkb_lvbptr && is_master_copy(lkb))
1258*4882a593Smuzhiyun 			dlm_free_lvb(lkb->lkb_lvbptr);
1259*4882a593Smuzhiyun 		dlm_free_lkb(lkb);
1260*4882a593Smuzhiyun 		return 1;
1261*4882a593Smuzhiyun 	} else {
1262*4882a593Smuzhiyun 		spin_unlock(&ls->ls_lkbidr_spin);
1263*4882a593Smuzhiyun 		return 0;
1264*4882a593Smuzhiyun 	}
1265*4882a593Smuzhiyun }
1266*4882a593Smuzhiyun 
dlm_put_lkb(struct dlm_lkb * lkb)1267*4882a593Smuzhiyun int dlm_put_lkb(struct dlm_lkb *lkb)
1268*4882a593Smuzhiyun {
1269*4882a593Smuzhiyun 	struct dlm_ls *ls;
1270*4882a593Smuzhiyun 
1271*4882a593Smuzhiyun 	DLM_ASSERT(lkb->lkb_resource, dlm_print_lkb(lkb););
1272*4882a593Smuzhiyun 	DLM_ASSERT(lkb->lkb_resource->res_ls, dlm_print_lkb(lkb););
1273*4882a593Smuzhiyun 
1274*4882a593Smuzhiyun 	ls = lkb->lkb_resource->res_ls;
1275*4882a593Smuzhiyun 	return __put_lkb(ls, lkb);
1276*4882a593Smuzhiyun }
1277*4882a593Smuzhiyun 
1278*4882a593Smuzhiyun /* This is only called to add a reference when the code already holds
1279*4882a593Smuzhiyun    a valid reference to the lkb, so there's no need for locking. */
1280*4882a593Smuzhiyun 
hold_lkb(struct dlm_lkb * lkb)1281*4882a593Smuzhiyun static inline void hold_lkb(struct dlm_lkb *lkb)
1282*4882a593Smuzhiyun {
1283*4882a593Smuzhiyun 	kref_get(&lkb->lkb_ref);
1284*4882a593Smuzhiyun }
1285*4882a593Smuzhiyun 
1286*4882a593Smuzhiyun /* This is called when we need to remove a reference and are certain
1287*4882a593Smuzhiyun    it's not the last ref.  e.g. del_lkb is always called between a
1288*4882a593Smuzhiyun    find_lkb/put_lkb and is always the inverse of a previous add_lkb.
1289*4882a593Smuzhiyun    put_lkb would work fine, but would involve unnecessary locking */
1290*4882a593Smuzhiyun 
unhold_lkb(struct dlm_lkb * lkb)1291*4882a593Smuzhiyun static inline void unhold_lkb(struct dlm_lkb *lkb)
1292*4882a593Smuzhiyun {
1293*4882a593Smuzhiyun 	int rv;
1294*4882a593Smuzhiyun 	rv = kref_put(&lkb->lkb_ref, kill_lkb);
1295*4882a593Smuzhiyun 	DLM_ASSERT(!rv, dlm_print_lkb(lkb););
1296*4882a593Smuzhiyun }
1297*4882a593Smuzhiyun 
lkb_add_ordered(struct list_head * new,struct list_head * head,int mode)1298*4882a593Smuzhiyun static void lkb_add_ordered(struct list_head *new, struct list_head *head,
1299*4882a593Smuzhiyun 			    int mode)
1300*4882a593Smuzhiyun {
1301*4882a593Smuzhiyun 	struct dlm_lkb *lkb = NULL;
1302*4882a593Smuzhiyun 
1303*4882a593Smuzhiyun 	list_for_each_entry(lkb, head, lkb_statequeue)
1304*4882a593Smuzhiyun 		if (lkb->lkb_rqmode < mode)
1305*4882a593Smuzhiyun 			break;
1306*4882a593Smuzhiyun 
1307*4882a593Smuzhiyun 	__list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
1308*4882a593Smuzhiyun }
1309*4882a593Smuzhiyun 
1310*4882a593Smuzhiyun /* add/remove lkb to rsb's grant/convert/wait queue */
1311*4882a593Smuzhiyun 
add_lkb(struct dlm_rsb * r,struct dlm_lkb * lkb,int status)1312*4882a593Smuzhiyun static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status)
1313*4882a593Smuzhiyun {
1314*4882a593Smuzhiyun 	kref_get(&lkb->lkb_ref);
1315*4882a593Smuzhiyun 
1316*4882a593Smuzhiyun 	DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
1317*4882a593Smuzhiyun 
1318*4882a593Smuzhiyun 	lkb->lkb_timestamp = ktime_get();
1319*4882a593Smuzhiyun 
1320*4882a593Smuzhiyun 	lkb->lkb_status = status;
1321*4882a593Smuzhiyun 
1322*4882a593Smuzhiyun 	switch (status) {
1323*4882a593Smuzhiyun 	case DLM_LKSTS_WAITING:
1324*4882a593Smuzhiyun 		if (lkb->lkb_exflags & DLM_LKF_HEADQUE)
1325*4882a593Smuzhiyun 			list_add(&lkb->lkb_statequeue, &r->res_waitqueue);
1326*4882a593Smuzhiyun 		else
1327*4882a593Smuzhiyun 			list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue);
1328*4882a593Smuzhiyun 		break;
1329*4882a593Smuzhiyun 	case DLM_LKSTS_GRANTED:
1330*4882a593Smuzhiyun 		/* convention says granted locks kept in order of grmode */
1331*4882a593Smuzhiyun 		lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue,
1332*4882a593Smuzhiyun 				lkb->lkb_grmode);
1333*4882a593Smuzhiyun 		break;
1334*4882a593Smuzhiyun 	case DLM_LKSTS_CONVERT:
1335*4882a593Smuzhiyun 		if (lkb->lkb_exflags & DLM_LKF_HEADQUE)
1336*4882a593Smuzhiyun 			list_add(&lkb->lkb_statequeue, &r->res_convertqueue);
1337*4882a593Smuzhiyun 		else
1338*4882a593Smuzhiyun 			list_add_tail(&lkb->lkb_statequeue,
1339*4882a593Smuzhiyun 				      &r->res_convertqueue);
1340*4882a593Smuzhiyun 		break;
1341*4882a593Smuzhiyun 	default:
1342*4882a593Smuzhiyun 		DLM_ASSERT(0, dlm_print_lkb(lkb); printk("sts=%d\n", status););
1343*4882a593Smuzhiyun 	}
1344*4882a593Smuzhiyun }
1345*4882a593Smuzhiyun 
del_lkb(struct dlm_rsb * r,struct dlm_lkb * lkb)1346*4882a593Smuzhiyun static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
1347*4882a593Smuzhiyun {
1348*4882a593Smuzhiyun 	lkb->lkb_status = 0;
1349*4882a593Smuzhiyun 	list_del(&lkb->lkb_statequeue);
1350*4882a593Smuzhiyun 	unhold_lkb(lkb);
1351*4882a593Smuzhiyun }
1352*4882a593Smuzhiyun 
move_lkb(struct dlm_rsb * r,struct dlm_lkb * lkb,int sts)1353*4882a593Smuzhiyun static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts)
1354*4882a593Smuzhiyun {
1355*4882a593Smuzhiyun 	hold_lkb(lkb);
1356*4882a593Smuzhiyun 	del_lkb(r, lkb);
1357*4882a593Smuzhiyun 	add_lkb(r, lkb, sts);
1358*4882a593Smuzhiyun 	unhold_lkb(lkb);
1359*4882a593Smuzhiyun }
1360*4882a593Smuzhiyun 
msg_reply_type(int mstype)1361*4882a593Smuzhiyun static int msg_reply_type(int mstype)
1362*4882a593Smuzhiyun {
1363*4882a593Smuzhiyun 	switch (mstype) {
1364*4882a593Smuzhiyun 	case DLM_MSG_REQUEST:
1365*4882a593Smuzhiyun 		return DLM_MSG_REQUEST_REPLY;
1366*4882a593Smuzhiyun 	case DLM_MSG_CONVERT:
1367*4882a593Smuzhiyun 		return DLM_MSG_CONVERT_REPLY;
1368*4882a593Smuzhiyun 	case DLM_MSG_UNLOCK:
1369*4882a593Smuzhiyun 		return DLM_MSG_UNLOCK_REPLY;
1370*4882a593Smuzhiyun 	case DLM_MSG_CANCEL:
1371*4882a593Smuzhiyun 		return DLM_MSG_CANCEL_REPLY;
1372*4882a593Smuzhiyun 	case DLM_MSG_LOOKUP:
1373*4882a593Smuzhiyun 		return DLM_MSG_LOOKUP_REPLY;
1374*4882a593Smuzhiyun 	}
1375*4882a593Smuzhiyun 	return -1;
1376*4882a593Smuzhiyun }
1377*4882a593Smuzhiyun 
nodeid_warned(int nodeid,int num_nodes,int * warned)1378*4882a593Smuzhiyun static int nodeid_warned(int nodeid, int num_nodes, int *warned)
1379*4882a593Smuzhiyun {
1380*4882a593Smuzhiyun 	int i;
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 	for (i = 0; i < num_nodes; i++) {
1383*4882a593Smuzhiyun 		if (!warned[i]) {
1384*4882a593Smuzhiyun 			warned[i] = nodeid;
1385*4882a593Smuzhiyun 			return 0;
1386*4882a593Smuzhiyun 		}
1387*4882a593Smuzhiyun 		if (warned[i] == nodeid)
1388*4882a593Smuzhiyun 			return 1;
1389*4882a593Smuzhiyun 	}
1390*4882a593Smuzhiyun 	return 0;
1391*4882a593Smuzhiyun }
1392*4882a593Smuzhiyun 
dlm_scan_waiters(struct dlm_ls * ls)1393*4882a593Smuzhiyun void dlm_scan_waiters(struct dlm_ls *ls)
1394*4882a593Smuzhiyun {
1395*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
1396*4882a593Smuzhiyun 	s64 us;
1397*4882a593Smuzhiyun 	s64 debug_maxus = 0;
1398*4882a593Smuzhiyun 	u32 debug_scanned = 0;
1399*4882a593Smuzhiyun 	u32 debug_expired = 0;
1400*4882a593Smuzhiyun 	int num_nodes = 0;
1401*4882a593Smuzhiyun 	int *warned = NULL;
1402*4882a593Smuzhiyun 
1403*4882a593Smuzhiyun 	if (!dlm_config.ci_waitwarn_us)
1404*4882a593Smuzhiyun 		return;
1405*4882a593Smuzhiyun 
1406*4882a593Smuzhiyun 	mutex_lock(&ls->ls_waiters_mutex);
1407*4882a593Smuzhiyun 
1408*4882a593Smuzhiyun 	list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
1409*4882a593Smuzhiyun 		if (!lkb->lkb_wait_time)
1410*4882a593Smuzhiyun 			continue;
1411*4882a593Smuzhiyun 
1412*4882a593Smuzhiyun 		debug_scanned++;
1413*4882a593Smuzhiyun 
1414*4882a593Smuzhiyun 		us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
1415*4882a593Smuzhiyun 
1416*4882a593Smuzhiyun 		if (us < dlm_config.ci_waitwarn_us)
1417*4882a593Smuzhiyun 			continue;
1418*4882a593Smuzhiyun 
1419*4882a593Smuzhiyun 		lkb->lkb_wait_time = 0;
1420*4882a593Smuzhiyun 
1421*4882a593Smuzhiyun 		debug_expired++;
1422*4882a593Smuzhiyun 		if (us > debug_maxus)
1423*4882a593Smuzhiyun 			debug_maxus = us;
1424*4882a593Smuzhiyun 
1425*4882a593Smuzhiyun 		if (!num_nodes) {
1426*4882a593Smuzhiyun 			num_nodes = ls->ls_num_nodes;
1427*4882a593Smuzhiyun 			warned = kcalloc(num_nodes, sizeof(int), GFP_KERNEL);
1428*4882a593Smuzhiyun 		}
1429*4882a593Smuzhiyun 		if (!warned)
1430*4882a593Smuzhiyun 			continue;
1431*4882a593Smuzhiyun 		if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
1432*4882a593Smuzhiyun 			continue;
1433*4882a593Smuzhiyun 
1434*4882a593Smuzhiyun 		log_error(ls, "waitwarn %x %lld %d us check connection to "
1435*4882a593Smuzhiyun 			  "node %d", lkb->lkb_id, (long long)us,
1436*4882a593Smuzhiyun 			  dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
1437*4882a593Smuzhiyun 	}
1438*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_waiters_mutex);
1439*4882a593Smuzhiyun 	kfree(warned);
1440*4882a593Smuzhiyun 
1441*4882a593Smuzhiyun 	if (debug_expired)
1442*4882a593Smuzhiyun 		log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
1443*4882a593Smuzhiyun 			  debug_scanned, debug_expired,
1444*4882a593Smuzhiyun 			  dlm_config.ci_waitwarn_us, (long long)debug_maxus);
1445*4882a593Smuzhiyun }
1446*4882a593Smuzhiyun 
1447*4882a593Smuzhiyun /* add/remove lkb from global waiters list of lkb's waiting for
1448*4882a593Smuzhiyun    a reply from a remote node */
1449*4882a593Smuzhiyun 
add_to_waiters(struct dlm_lkb * lkb,int mstype,int to_nodeid)1450*4882a593Smuzhiyun static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
1451*4882a593Smuzhiyun {
1452*4882a593Smuzhiyun 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1453*4882a593Smuzhiyun 	int error = 0;
1454*4882a593Smuzhiyun 
1455*4882a593Smuzhiyun 	mutex_lock(&ls->ls_waiters_mutex);
1456*4882a593Smuzhiyun 
1457*4882a593Smuzhiyun 	if (is_overlap_unlock(lkb) ||
1458*4882a593Smuzhiyun 	    (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL))) {
1459*4882a593Smuzhiyun 		error = -EINVAL;
1460*4882a593Smuzhiyun 		goto out;
1461*4882a593Smuzhiyun 	}
1462*4882a593Smuzhiyun 
1463*4882a593Smuzhiyun 	if (lkb->lkb_wait_type || is_overlap_cancel(lkb)) {
1464*4882a593Smuzhiyun 		switch (mstype) {
1465*4882a593Smuzhiyun 		case DLM_MSG_UNLOCK:
1466*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
1467*4882a593Smuzhiyun 			break;
1468*4882a593Smuzhiyun 		case DLM_MSG_CANCEL:
1469*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
1470*4882a593Smuzhiyun 			break;
1471*4882a593Smuzhiyun 		default:
1472*4882a593Smuzhiyun 			error = -EBUSY;
1473*4882a593Smuzhiyun 			goto out;
1474*4882a593Smuzhiyun 		}
1475*4882a593Smuzhiyun 		lkb->lkb_wait_count++;
1476*4882a593Smuzhiyun 		hold_lkb(lkb);
1477*4882a593Smuzhiyun 
1478*4882a593Smuzhiyun 		log_debug(ls, "addwait %x cur %d overlap %d count %d f %x",
1479*4882a593Smuzhiyun 			  lkb->lkb_id, lkb->lkb_wait_type, mstype,
1480*4882a593Smuzhiyun 			  lkb->lkb_wait_count, lkb->lkb_flags);
1481*4882a593Smuzhiyun 		goto out;
1482*4882a593Smuzhiyun 	}
1483*4882a593Smuzhiyun 
1484*4882a593Smuzhiyun 	DLM_ASSERT(!lkb->lkb_wait_count,
1485*4882a593Smuzhiyun 		   dlm_print_lkb(lkb);
1486*4882a593Smuzhiyun 		   printk("wait_count %d\n", lkb->lkb_wait_count););
1487*4882a593Smuzhiyun 
1488*4882a593Smuzhiyun 	lkb->lkb_wait_count++;
1489*4882a593Smuzhiyun 	lkb->lkb_wait_type = mstype;
1490*4882a593Smuzhiyun 	lkb->lkb_wait_time = ktime_get();
1491*4882a593Smuzhiyun 	lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
1492*4882a593Smuzhiyun 	hold_lkb(lkb);
1493*4882a593Smuzhiyun 	list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
1494*4882a593Smuzhiyun  out:
1495*4882a593Smuzhiyun 	if (error)
1496*4882a593Smuzhiyun 		log_error(ls, "addwait error %x %d flags %x %d %d %s",
1497*4882a593Smuzhiyun 			  lkb->lkb_id, error, lkb->lkb_flags, mstype,
1498*4882a593Smuzhiyun 			  lkb->lkb_wait_type, lkb->lkb_resource->res_name);
1499*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_waiters_mutex);
1500*4882a593Smuzhiyun 	return error;
1501*4882a593Smuzhiyun }
1502*4882a593Smuzhiyun 
1503*4882a593Smuzhiyun /* We clear the RESEND flag because we might be taking an lkb off the waiters
1504*4882a593Smuzhiyun    list as part of process_requestqueue (e.g. a lookup that has an optimized
1505*4882a593Smuzhiyun    request reply on the requestqueue) between dlm_recover_waiters_pre() which
1506*4882a593Smuzhiyun    set RESEND and dlm_recover_waiters_post() */
1507*4882a593Smuzhiyun 
_remove_from_waiters(struct dlm_lkb * lkb,int mstype,struct dlm_message * ms)1508*4882a593Smuzhiyun static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
1509*4882a593Smuzhiyun 				struct dlm_message *ms)
1510*4882a593Smuzhiyun {
1511*4882a593Smuzhiyun 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1512*4882a593Smuzhiyun 	int overlap_done = 0;
1513*4882a593Smuzhiyun 
1514*4882a593Smuzhiyun 	if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) {
1515*4882a593Smuzhiyun 		log_debug(ls, "remwait %x unlock_reply overlap", lkb->lkb_id);
1516*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
1517*4882a593Smuzhiyun 		overlap_done = 1;
1518*4882a593Smuzhiyun 		goto out_del;
1519*4882a593Smuzhiyun 	}
1520*4882a593Smuzhiyun 
1521*4882a593Smuzhiyun 	if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) {
1522*4882a593Smuzhiyun 		log_debug(ls, "remwait %x cancel_reply overlap", lkb->lkb_id);
1523*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
1524*4882a593Smuzhiyun 		overlap_done = 1;
1525*4882a593Smuzhiyun 		goto out_del;
1526*4882a593Smuzhiyun 	}
1527*4882a593Smuzhiyun 
1528*4882a593Smuzhiyun 	/* Cancel state was preemptively cleared by a successful convert,
1529*4882a593Smuzhiyun 	   see next comment, nothing to do. */
1530*4882a593Smuzhiyun 
1531*4882a593Smuzhiyun 	if ((mstype == DLM_MSG_CANCEL_REPLY) &&
1532*4882a593Smuzhiyun 	    (lkb->lkb_wait_type != DLM_MSG_CANCEL)) {
1533*4882a593Smuzhiyun 		log_debug(ls, "remwait %x cancel_reply wait_type %d",
1534*4882a593Smuzhiyun 			  lkb->lkb_id, lkb->lkb_wait_type);
1535*4882a593Smuzhiyun 		return -1;
1536*4882a593Smuzhiyun 	}
1537*4882a593Smuzhiyun 
1538*4882a593Smuzhiyun 	/* Remove for the convert reply, and premptively remove for the
1539*4882a593Smuzhiyun 	   cancel reply.  A convert has been granted while there's still
1540*4882a593Smuzhiyun 	   an outstanding cancel on it (the cancel is moot and the result
1541*4882a593Smuzhiyun 	   in the cancel reply should be 0).  We preempt the cancel reply
1542*4882a593Smuzhiyun 	   because the app gets the convert result and then can follow up
1543*4882a593Smuzhiyun 	   with another op, like convert.  This subsequent op would see the
1544*4882a593Smuzhiyun 	   lingering state of the cancel and fail with -EBUSY. */
1545*4882a593Smuzhiyun 
1546*4882a593Smuzhiyun 	if ((mstype == DLM_MSG_CONVERT_REPLY) &&
1547*4882a593Smuzhiyun 	    (lkb->lkb_wait_type == DLM_MSG_CONVERT) &&
1548*4882a593Smuzhiyun 	    is_overlap_cancel(lkb) && ms && !ms->m_result) {
1549*4882a593Smuzhiyun 		log_debug(ls, "remwait %x convert_reply zap overlap_cancel",
1550*4882a593Smuzhiyun 			  lkb->lkb_id);
1551*4882a593Smuzhiyun 		lkb->lkb_wait_type = 0;
1552*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
1553*4882a593Smuzhiyun 		lkb->lkb_wait_count--;
1554*4882a593Smuzhiyun 		unhold_lkb(lkb);
1555*4882a593Smuzhiyun 		goto out_del;
1556*4882a593Smuzhiyun 	}
1557*4882a593Smuzhiyun 
1558*4882a593Smuzhiyun 	/* N.B. type of reply may not always correspond to type of original
1559*4882a593Smuzhiyun 	   msg due to lookup->request optimization, verify others? */
1560*4882a593Smuzhiyun 
1561*4882a593Smuzhiyun 	if (lkb->lkb_wait_type) {
1562*4882a593Smuzhiyun 		lkb->lkb_wait_type = 0;
1563*4882a593Smuzhiyun 		goto out_del;
1564*4882a593Smuzhiyun 	}
1565*4882a593Smuzhiyun 
1566*4882a593Smuzhiyun 	log_error(ls, "remwait error %x remote %d %x msg %d flags %x no wait",
1567*4882a593Smuzhiyun 		  lkb->lkb_id, ms ? ms->m_header.h_nodeid : 0, lkb->lkb_remid,
1568*4882a593Smuzhiyun 		  mstype, lkb->lkb_flags);
1569*4882a593Smuzhiyun 	return -1;
1570*4882a593Smuzhiyun 
1571*4882a593Smuzhiyun  out_del:
1572*4882a593Smuzhiyun 	/* the force-unlock/cancel has completed and we haven't recvd a reply
1573*4882a593Smuzhiyun 	   to the op that was in progress prior to the unlock/cancel; we
1574*4882a593Smuzhiyun 	   give up on any reply to the earlier op.  FIXME: not sure when/how
1575*4882a593Smuzhiyun 	   this would happen */
1576*4882a593Smuzhiyun 
1577*4882a593Smuzhiyun 	if (overlap_done && lkb->lkb_wait_type) {
1578*4882a593Smuzhiyun 		log_error(ls, "remwait error %x reply %d wait_type %d overlap",
1579*4882a593Smuzhiyun 			  lkb->lkb_id, mstype, lkb->lkb_wait_type);
1580*4882a593Smuzhiyun 		lkb->lkb_wait_count--;
1581*4882a593Smuzhiyun 		unhold_lkb(lkb);
1582*4882a593Smuzhiyun 		lkb->lkb_wait_type = 0;
1583*4882a593Smuzhiyun 	}
1584*4882a593Smuzhiyun 
1585*4882a593Smuzhiyun 	DLM_ASSERT(lkb->lkb_wait_count, dlm_print_lkb(lkb););
1586*4882a593Smuzhiyun 
1587*4882a593Smuzhiyun 	lkb->lkb_flags &= ~DLM_IFL_RESEND;
1588*4882a593Smuzhiyun 	lkb->lkb_wait_count--;
1589*4882a593Smuzhiyun 	if (!lkb->lkb_wait_count)
1590*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_wait_reply);
1591*4882a593Smuzhiyun 	unhold_lkb(lkb);
1592*4882a593Smuzhiyun 	return 0;
1593*4882a593Smuzhiyun }
1594*4882a593Smuzhiyun 
remove_from_waiters(struct dlm_lkb * lkb,int mstype)1595*4882a593Smuzhiyun static int remove_from_waiters(struct dlm_lkb *lkb, int mstype)
1596*4882a593Smuzhiyun {
1597*4882a593Smuzhiyun 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1598*4882a593Smuzhiyun 	int error;
1599*4882a593Smuzhiyun 
1600*4882a593Smuzhiyun 	mutex_lock(&ls->ls_waiters_mutex);
1601*4882a593Smuzhiyun 	error = _remove_from_waiters(lkb, mstype, NULL);
1602*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_waiters_mutex);
1603*4882a593Smuzhiyun 	return error;
1604*4882a593Smuzhiyun }
1605*4882a593Smuzhiyun 
1606*4882a593Smuzhiyun /* Handles situations where we might be processing a "fake" or "stub" reply in
1607*4882a593Smuzhiyun    which we can't try to take waiters_mutex again. */
1608*4882a593Smuzhiyun 
remove_from_waiters_ms(struct dlm_lkb * lkb,struct dlm_message * ms)1609*4882a593Smuzhiyun static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
1610*4882a593Smuzhiyun {
1611*4882a593Smuzhiyun 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1612*4882a593Smuzhiyun 	int error;
1613*4882a593Smuzhiyun 
1614*4882a593Smuzhiyun 	if (ms->m_flags != DLM_IFL_STUB_MS)
1615*4882a593Smuzhiyun 		mutex_lock(&ls->ls_waiters_mutex);
1616*4882a593Smuzhiyun 	error = _remove_from_waiters(lkb, ms->m_type, ms);
1617*4882a593Smuzhiyun 	if (ms->m_flags != DLM_IFL_STUB_MS)
1618*4882a593Smuzhiyun 		mutex_unlock(&ls->ls_waiters_mutex);
1619*4882a593Smuzhiyun 	return error;
1620*4882a593Smuzhiyun }
1621*4882a593Smuzhiyun 
1622*4882a593Smuzhiyun /* If there's an rsb for the same resource being removed, ensure
1623*4882a593Smuzhiyun    that the remove message is sent before the new lookup message.
1624*4882a593Smuzhiyun    It should be rare to need a delay here, but if not, then it may
1625*4882a593Smuzhiyun    be worthwhile to add a proper wait mechanism rather than a delay. */
1626*4882a593Smuzhiyun 
wait_pending_remove(struct dlm_rsb * r)1627*4882a593Smuzhiyun static void wait_pending_remove(struct dlm_rsb *r)
1628*4882a593Smuzhiyun {
1629*4882a593Smuzhiyun 	struct dlm_ls *ls = r->res_ls;
1630*4882a593Smuzhiyun  restart:
1631*4882a593Smuzhiyun 	spin_lock(&ls->ls_remove_spin);
1632*4882a593Smuzhiyun 	if (ls->ls_remove_len &&
1633*4882a593Smuzhiyun 	    !rsb_cmp(r, ls->ls_remove_name, ls->ls_remove_len)) {
1634*4882a593Smuzhiyun 		log_debug(ls, "delay lookup for remove dir %d %s",
1635*4882a593Smuzhiyun 		  	  r->res_dir_nodeid, r->res_name);
1636*4882a593Smuzhiyun 		spin_unlock(&ls->ls_remove_spin);
1637*4882a593Smuzhiyun 		msleep(1);
1638*4882a593Smuzhiyun 		goto restart;
1639*4882a593Smuzhiyun 	}
1640*4882a593Smuzhiyun 	spin_unlock(&ls->ls_remove_spin);
1641*4882a593Smuzhiyun }
1642*4882a593Smuzhiyun 
1643*4882a593Smuzhiyun /*
1644*4882a593Smuzhiyun  * ls_remove_spin protects ls_remove_name and ls_remove_len which are
1645*4882a593Smuzhiyun  * read by other threads in wait_pending_remove.  ls_remove_names
1646*4882a593Smuzhiyun  * and ls_remove_lens are only used by the scan thread, so they do
1647*4882a593Smuzhiyun  * not need protection.
1648*4882a593Smuzhiyun  */
1649*4882a593Smuzhiyun 
shrink_bucket(struct dlm_ls * ls,int b)1650*4882a593Smuzhiyun static void shrink_bucket(struct dlm_ls *ls, int b)
1651*4882a593Smuzhiyun {
1652*4882a593Smuzhiyun 	struct rb_node *n, *next;
1653*4882a593Smuzhiyun 	struct dlm_rsb *r;
1654*4882a593Smuzhiyun 	char *name;
1655*4882a593Smuzhiyun 	int our_nodeid = dlm_our_nodeid();
1656*4882a593Smuzhiyun 	int remote_count = 0;
1657*4882a593Smuzhiyun 	int need_shrink = 0;
1658*4882a593Smuzhiyun 	int i, len, rv;
1659*4882a593Smuzhiyun 
1660*4882a593Smuzhiyun 	memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX);
1661*4882a593Smuzhiyun 
1662*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[b].lock);
1663*4882a593Smuzhiyun 
1664*4882a593Smuzhiyun 	if (!(ls->ls_rsbtbl[b].flags & DLM_RTF_SHRINK)) {
1665*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
1666*4882a593Smuzhiyun 		return;
1667*4882a593Smuzhiyun 	}
1668*4882a593Smuzhiyun 
1669*4882a593Smuzhiyun 	for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) {
1670*4882a593Smuzhiyun 		next = rb_next(n);
1671*4882a593Smuzhiyun 		r = rb_entry(n, struct dlm_rsb, res_hashnode);
1672*4882a593Smuzhiyun 
1673*4882a593Smuzhiyun 		/* If we're the directory record for this rsb, and
1674*4882a593Smuzhiyun 		   we're not the master of it, then we need to wait
1675*4882a593Smuzhiyun 		   for the master node to send us a dir remove for
1676*4882a593Smuzhiyun 		   before removing the dir record. */
1677*4882a593Smuzhiyun 
1678*4882a593Smuzhiyun 		if (!dlm_no_directory(ls) &&
1679*4882a593Smuzhiyun 		    (r->res_master_nodeid != our_nodeid) &&
1680*4882a593Smuzhiyun 		    (dlm_dir_nodeid(r) == our_nodeid)) {
1681*4882a593Smuzhiyun 			continue;
1682*4882a593Smuzhiyun 		}
1683*4882a593Smuzhiyun 
1684*4882a593Smuzhiyun 		need_shrink = 1;
1685*4882a593Smuzhiyun 
1686*4882a593Smuzhiyun 		if (!time_after_eq(jiffies, r->res_toss_time +
1687*4882a593Smuzhiyun 				   dlm_config.ci_toss_secs * HZ)) {
1688*4882a593Smuzhiyun 			continue;
1689*4882a593Smuzhiyun 		}
1690*4882a593Smuzhiyun 
1691*4882a593Smuzhiyun 		if (!dlm_no_directory(ls) &&
1692*4882a593Smuzhiyun 		    (r->res_master_nodeid == our_nodeid) &&
1693*4882a593Smuzhiyun 		    (dlm_dir_nodeid(r) != our_nodeid)) {
1694*4882a593Smuzhiyun 
1695*4882a593Smuzhiyun 			/* We're the master of this rsb but we're not
1696*4882a593Smuzhiyun 			   the directory record, so we need to tell the
1697*4882a593Smuzhiyun 			   dir node to remove the dir record. */
1698*4882a593Smuzhiyun 
1699*4882a593Smuzhiyun 			ls->ls_remove_lens[remote_count] = r->res_length;
1700*4882a593Smuzhiyun 			memcpy(ls->ls_remove_names[remote_count], r->res_name,
1701*4882a593Smuzhiyun 			       DLM_RESNAME_MAXLEN);
1702*4882a593Smuzhiyun 			remote_count++;
1703*4882a593Smuzhiyun 
1704*4882a593Smuzhiyun 			if (remote_count >= DLM_REMOVE_NAMES_MAX)
1705*4882a593Smuzhiyun 				break;
1706*4882a593Smuzhiyun 			continue;
1707*4882a593Smuzhiyun 		}
1708*4882a593Smuzhiyun 
1709*4882a593Smuzhiyun 		if (!kref_put(&r->res_ref, kill_rsb)) {
1710*4882a593Smuzhiyun 			log_error(ls, "tossed rsb in use %s", r->res_name);
1711*4882a593Smuzhiyun 			continue;
1712*4882a593Smuzhiyun 		}
1713*4882a593Smuzhiyun 
1714*4882a593Smuzhiyun 		rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1715*4882a593Smuzhiyun 		dlm_free_rsb(r);
1716*4882a593Smuzhiyun 	}
1717*4882a593Smuzhiyun 
1718*4882a593Smuzhiyun 	if (need_shrink)
1719*4882a593Smuzhiyun 		ls->ls_rsbtbl[b].flags |= DLM_RTF_SHRINK;
1720*4882a593Smuzhiyun 	else
1721*4882a593Smuzhiyun 		ls->ls_rsbtbl[b].flags &= ~DLM_RTF_SHRINK;
1722*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[b].lock);
1723*4882a593Smuzhiyun 
1724*4882a593Smuzhiyun 	/*
1725*4882a593Smuzhiyun 	 * While searching for rsb's to free, we found some that require
1726*4882a593Smuzhiyun 	 * remote removal.  We leave them in place and find them again here
1727*4882a593Smuzhiyun 	 * so there is a very small gap between removing them from the toss
1728*4882a593Smuzhiyun 	 * list and sending the removal.  Keeping this gap small is
1729*4882a593Smuzhiyun 	 * important to keep us (the master node) from being out of sync
1730*4882a593Smuzhiyun 	 * with the remote dir node for very long.
1731*4882a593Smuzhiyun 	 *
1732*4882a593Smuzhiyun 	 * From the time the rsb is removed from toss until just after
1733*4882a593Smuzhiyun 	 * send_remove, the rsb name is saved in ls_remove_name.  A new
1734*4882a593Smuzhiyun 	 * lookup checks this to ensure that a new lookup message for the
1735*4882a593Smuzhiyun 	 * same resource name is not sent just before the remove message.
1736*4882a593Smuzhiyun 	 */
1737*4882a593Smuzhiyun 
1738*4882a593Smuzhiyun 	for (i = 0; i < remote_count; i++) {
1739*4882a593Smuzhiyun 		name = ls->ls_remove_names[i];
1740*4882a593Smuzhiyun 		len = ls->ls_remove_lens[i];
1741*4882a593Smuzhiyun 
1742*4882a593Smuzhiyun 		spin_lock(&ls->ls_rsbtbl[b].lock);
1743*4882a593Smuzhiyun 		rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
1744*4882a593Smuzhiyun 		if (rv) {
1745*4882a593Smuzhiyun 			spin_unlock(&ls->ls_rsbtbl[b].lock);
1746*4882a593Smuzhiyun 			log_debug(ls, "remove_name not toss %s", name);
1747*4882a593Smuzhiyun 			continue;
1748*4882a593Smuzhiyun 		}
1749*4882a593Smuzhiyun 
1750*4882a593Smuzhiyun 		if (r->res_master_nodeid != our_nodeid) {
1751*4882a593Smuzhiyun 			spin_unlock(&ls->ls_rsbtbl[b].lock);
1752*4882a593Smuzhiyun 			log_debug(ls, "remove_name master %d dir %d our %d %s",
1753*4882a593Smuzhiyun 				  r->res_master_nodeid, r->res_dir_nodeid,
1754*4882a593Smuzhiyun 				  our_nodeid, name);
1755*4882a593Smuzhiyun 			continue;
1756*4882a593Smuzhiyun 		}
1757*4882a593Smuzhiyun 
1758*4882a593Smuzhiyun 		if (r->res_dir_nodeid == our_nodeid) {
1759*4882a593Smuzhiyun 			/* should never happen */
1760*4882a593Smuzhiyun 			spin_unlock(&ls->ls_rsbtbl[b].lock);
1761*4882a593Smuzhiyun 			log_error(ls, "remove_name dir %d master %d our %d %s",
1762*4882a593Smuzhiyun 				  r->res_dir_nodeid, r->res_master_nodeid,
1763*4882a593Smuzhiyun 				  our_nodeid, name);
1764*4882a593Smuzhiyun 			continue;
1765*4882a593Smuzhiyun 		}
1766*4882a593Smuzhiyun 
1767*4882a593Smuzhiyun 		if (!time_after_eq(jiffies, r->res_toss_time +
1768*4882a593Smuzhiyun 				   dlm_config.ci_toss_secs * HZ)) {
1769*4882a593Smuzhiyun 			spin_unlock(&ls->ls_rsbtbl[b].lock);
1770*4882a593Smuzhiyun 			log_debug(ls, "remove_name toss_time %lu now %lu %s",
1771*4882a593Smuzhiyun 				  r->res_toss_time, jiffies, name);
1772*4882a593Smuzhiyun 			continue;
1773*4882a593Smuzhiyun 		}
1774*4882a593Smuzhiyun 
1775*4882a593Smuzhiyun 		if (!kref_put(&r->res_ref, kill_rsb)) {
1776*4882a593Smuzhiyun 			spin_unlock(&ls->ls_rsbtbl[b].lock);
1777*4882a593Smuzhiyun 			log_error(ls, "remove_name in use %s", name);
1778*4882a593Smuzhiyun 			continue;
1779*4882a593Smuzhiyun 		}
1780*4882a593Smuzhiyun 
1781*4882a593Smuzhiyun 		rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1782*4882a593Smuzhiyun 
1783*4882a593Smuzhiyun 		/* block lookup of same name until we've sent remove */
1784*4882a593Smuzhiyun 		spin_lock(&ls->ls_remove_spin);
1785*4882a593Smuzhiyun 		ls->ls_remove_len = len;
1786*4882a593Smuzhiyun 		memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN);
1787*4882a593Smuzhiyun 		spin_unlock(&ls->ls_remove_spin);
1788*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
1789*4882a593Smuzhiyun 
1790*4882a593Smuzhiyun 		send_remove(r);
1791*4882a593Smuzhiyun 
1792*4882a593Smuzhiyun 		/* allow lookup of name again */
1793*4882a593Smuzhiyun 		spin_lock(&ls->ls_remove_spin);
1794*4882a593Smuzhiyun 		ls->ls_remove_len = 0;
1795*4882a593Smuzhiyun 		memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN);
1796*4882a593Smuzhiyun 		spin_unlock(&ls->ls_remove_spin);
1797*4882a593Smuzhiyun 
1798*4882a593Smuzhiyun 		dlm_free_rsb(r);
1799*4882a593Smuzhiyun 	}
1800*4882a593Smuzhiyun }
1801*4882a593Smuzhiyun 
dlm_scan_rsbs(struct dlm_ls * ls)1802*4882a593Smuzhiyun void dlm_scan_rsbs(struct dlm_ls *ls)
1803*4882a593Smuzhiyun {
1804*4882a593Smuzhiyun 	int i;
1805*4882a593Smuzhiyun 
1806*4882a593Smuzhiyun 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
1807*4882a593Smuzhiyun 		shrink_bucket(ls, i);
1808*4882a593Smuzhiyun 		if (dlm_locking_stopped(ls))
1809*4882a593Smuzhiyun 			break;
1810*4882a593Smuzhiyun 		cond_resched();
1811*4882a593Smuzhiyun 	}
1812*4882a593Smuzhiyun }
1813*4882a593Smuzhiyun 
add_timeout(struct dlm_lkb * lkb)1814*4882a593Smuzhiyun static void add_timeout(struct dlm_lkb *lkb)
1815*4882a593Smuzhiyun {
1816*4882a593Smuzhiyun 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1817*4882a593Smuzhiyun 
1818*4882a593Smuzhiyun 	if (is_master_copy(lkb))
1819*4882a593Smuzhiyun 		return;
1820*4882a593Smuzhiyun 
1821*4882a593Smuzhiyun 	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
1822*4882a593Smuzhiyun 	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
1823*4882a593Smuzhiyun 		lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
1824*4882a593Smuzhiyun 		goto add_it;
1825*4882a593Smuzhiyun 	}
1826*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
1827*4882a593Smuzhiyun 		goto add_it;
1828*4882a593Smuzhiyun 	return;
1829*4882a593Smuzhiyun 
1830*4882a593Smuzhiyun  add_it:
1831*4882a593Smuzhiyun 	DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
1832*4882a593Smuzhiyun 	mutex_lock(&ls->ls_timeout_mutex);
1833*4882a593Smuzhiyun 	hold_lkb(lkb);
1834*4882a593Smuzhiyun 	list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
1835*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_timeout_mutex);
1836*4882a593Smuzhiyun }
1837*4882a593Smuzhiyun 
del_timeout(struct dlm_lkb * lkb)1838*4882a593Smuzhiyun static void del_timeout(struct dlm_lkb *lkb)
1839*4882a593Smuzhiyun {
1840*4882a593Smuzhiyun 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1841*4882a593Smuzhiyun 
1842*4882a593Smuzhiyun 	mutex_lock(&ls->ls_timeout_mutex);
1843*4882a593Smuzhiyun 	if (!list_empty(&lkb->lkb_time_list)) {
1844*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_time_list);
1845*4882a593Smuzhiyun 		unhold_lkb(lkb);
1846*4882a593Smuzhiyun 	}
1847*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_timeout_mutex);
1848*4882a593Smuzhiyun }
1849*4882a593Smuzhiyun 
1850*4882a593Smuzhiyun /* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
1851*4882a593Smuzhiyun    lkb_lksb_timeout without lock_rsb?  Note: we can't lock timeout_mutex
1852*4882a593Smuzhiyun    and then lock rsb because of lock ordering in add_timeout.  We may need
1853*4882a593Smuzhiyun    to specify some special timeout-related bits in the lkb that are just to
1854*4882a593Smuzhiyun    be accessed under the timeout_mutex. */
1855*4882a593Smuzhiyun 
dlm_scan_timeout(struct dlm_ls * ls)1856*4882a593Smuzhiyun void dlm_scan_timeout(struct dlm_ls *ls)
1857*4882a593Smuzhiyun {
1858*4882a593Smuzhiyun 	struct dlm_rsb *r;
1859*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
1860*4882a593Smuzhiyun 	int do_cancel, do_warn;
1861*4882a593Smuzhiyun 	s64 wait_us;
1862*4882a593Smuzhiyun 
1863*4882a593Smuzhiyun 	for (;;) {
1864*4882a593Smuzhiyun 		if (dlm_locking_stopped(ls))
1865*4882a593Smuzhiyun 			break;
1866*4882a593Smuzhiyun 
1867*4882a593Smuzhiyun 		do_cancel = 0;
1868*4882a593Smuzhiyun 		do_warn = 0;
1869*4882a593Smuzhiyun 		mutex_lock(&ls->ls_timeout_mutex);
1870*4882a593Smuzhiyun 		list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
1871*4882a593Smuzhiyun 
1872*4882a593Smuzhiyun 			wait_us = ktime_to_us(ktime_sub(ktime_get(),
1873*4882a593Smuzhiyun 					      		lkb->lkb_timestamp));
1874*4882a593Smuzhiyun 
1875*4882a593Smuzhiyun 			if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
1876*4882a593Smuzhiyun 			    wait_us >= (lkb->lkb_timeout_cs * 10000))
1877*4882a593Smuzhiyun 				do_cancel = 1;
1878*4882a593Smuzhiyun 
1879*4882a593Smuzhiyun 			if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
1880*4882a593Smuzhiyun 			    wait_us >= dlm_config.ci_timewarn_cs * 10000)
1881*4882a593Smuzhiyun 				do_warn = 1;
1882*4882a593Smuzhiyun 
1883*4882a593Smuzhiyun 			if (!do_cancel && !do_warn)
1884*4882a593Smuzhiyun 				continue;
1885*4882a593Smuzhiyun 			hold_lkb(lkb);
1886*4882a593Smuzhiyun 			break;
1887*4882a593Smuzhiyun 		}
1888*4882a593Smuzhiyun 		mutex_unlock(&ls->ls_timeout_mutex);
1889*4882a593Smuzhiyun 
1890*4882a593Smuzhiyun 		if (!do_cancel && !do_warn)
1891*4882a593Smuzhiyun 			break;
1892*4882a593Smuzhiyun 
1893*4882a593Smuzhiyun 		r = lkb->lkb_resource;
1894*4882a593Smuzhiyun 		hold_rsb(r);
1895*4882a593Smuzhiyun 		lock_rsb(r);
1896*4882a593Smuzhiyun 
1897*4882a593Smuzhiyun 		if (do_warn) {
1898*4882a593Smuzhiyun 			/* clear flag so we only warn once */
1899*4882a593Smuzhiyun 			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1900*4882a593Smuzhiyun 			if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
1901*4882a593Smuzhiyun 				del_timeout(lkb);
1902*4882a593Smuzhiyun 			dlm_timeout_warn(lkb);
1903*4882a593Smuzhiyun 		}
1904*4882a593Smuzhiyun 
1905*4882a593Smuzhiyun 		if (do_cancel) {
1906*4882a593Smuzhiyun 			log_debug(ls, "timeout cancel %x node %d %s",
1907*4882a593Smuzhiyun 				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
1908*4882a593Smuzhiyun 			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1909*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
1910*4882a593Smuzhiyun 			del_timeout(lkb);
1911*4882a593Smuzhiyun 			_cancel_lock(r, lkb);
1912*4882a593Smuzhiyun 		}
1913*4882a593Smuzhiyun 
1914*4882a593Smuzhiyun 		unlock_rsb(r);
1915*4882a593Smuzhiyun 		unhold_rsb(r);
1916*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
1917*4882a593Smuzhiyun 	}
1918*4882a593Smuzhiyun }
1919*4882a593Smuzhiyun 
1920*4882a593Smuzhiyun /* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
1921*4882a593Smuzhiyun    dlm_recoverd before checking/setting ls_recover_begin. */
1922*4882a593Smuzhiyun 
dlm_adjust_timeouts(struct dlm_ls * ls)1923*4882a593Smuzhiyun void dlm_adjust_timeouts(struct dlm_ls *ls)
1924*4882a593Smuzhiyun {
1925*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
1926*4882a593Smuzhiyun 	u64 adj_us = jiffies_to_usecs(jiffies - ls->ls_recover_begin);
1927*4882a593Smuzhiyun 
1928*4882a593Smuzhiyun 	ls->ls_recover_begin = 0;
1929*4882a593Smuzhiyun 	mutex_lock(&ls->ls_timeout_mutex);
1930*4882a593Smuzhiyun 	list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
1931*4882a593Smuzhiyun 		lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
1932*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_timeout_mutex);
1933*4882a593Smuzhiyun 
1934*4882a593Smuzhiyun 	if (!dlm_config.ci_waitwarn_us)
1935*4882a593Smuzhiyun 		return;
1936*4882a593Smuzhiyun 
1937*4882a593Smuzhiyun 	mutex_lock(&ls->ls_waiters_mutex);
1938*4882a593Smuzhiyun 	list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
1939*4882a593Smuzhiyun 		if (ktime_to_us(lkb->lkb_wait_time))
1940*4882a593Smuzhiyun 			lkb->lkb_wait_time = ktime_get();
1941*4882a593Smuzhiyun 	}
1942*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_waiters_mutex);
1943*4882a593Smuzhiyun }
1944*4882a593Smuzhiyun 
1945*4882a593Smuzhiyun /* lkb is master or local copy */
1946*4882a593Smuzhiyun 
set_lvb_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)1947*4882a593Smuzhiyun static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1948*4882a593Smuzhiyun {
1949*4882a593Smuzhiyun 	int b, len = r->res_ls->ls_lvblen;
1950*4882a593Smuzhiyun 
1951*4882a593Smuzhiyun 	/* b=1 lvb returned to caller
1952*4882a593Smuzhiyun 	   b=0 lvb written to rsb or invalidated
1953*4882a593Smuzhiyun 	   b=-1 do nothing */
1954*4882a593Smuzhiyun 
1955*4882a593Smuzhiyun 	b =  dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
1956*4882a593Smuzhiyun 
1957*4882a593Smuzhiyun 	if (b == 1) {
1958*4882a593Smuzhiyun 		if (!lkb->lkb_lvbptr)
1959*4882a593Smuzhiyun 			return;
1960*4882a593Smuzhiyun 
1961*4882a593Smuzhiyun 		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
1962*4882a593Smuzhiyun 			return;
1963*4882a593Smuzhiyun 
1964*4882a593Smuzhiyun 		if (!r->res_lvbptr)
1965*4882a593Smuzhiyun 			return;
1966*4882a593Smuzhiyun 
1967*4882a593Smuzhiyun 		memcpy(lkb->lkb_lvbptr, r->res_lvbptr, len);
1968*4882a593Smuzhiyun 		lkb->lkb_lvbseq = r->res_lvbseq;
1969*4882a593Smuzhiyun 
1970*4882a593Smuzhiyun 	} else if (b == 0) {
1971*4882a593Smuzhiyun 		if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) {
1972*4882a593Smuzhiyun 			rsb_set_flag(r, RSB_VALNOTVALID);
1973*4882a593Smuzhiyun 			return;
1974*4882a593Smuzhiyun 		}
1975*4882a593Smuzhiyun 
1976*4882a593Smuzhiyun 		if (!lkb->lkb_lvbptr)
1977*4882a593Smuzhiyun 			return;
1978*4882a593Smuzhiyun 
1979*4882a593Smuzhiyun 		if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
1980*4882a593Smuzhiyun 			return;
1981*4882a593Smuzhiyun 
1982*4882a593Smuzhiyun 		if (!r->res_lvbptr)
1983*4882a593Smuzhiyun 			r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
1984*4882a593Smuzhiyun 
1985*4882a593Smuzhiyun 		if (!r->res_lvbptr)
1986*4882a593Smuzhiyun 			return;
1987*4882a593Smuzhiyun 
1988*4882a593Smuzhiyun 		memcpy(r->res_lvbptr, lkb->lkb_lvbptr, len);
1989*4882a593Smuzhiyun 		r->res_lvbseq++;
1990*4882a593Smuzhiyun 		lkb->lkb_lvbseq = r->res_lvbseq;
1991*4882a593Smuzhiyun 		rsb_clear_flag(r, RSB_VALNOTVALID);
1992*4882a593Smuzhiyun 	}
1993*4882a593Smuzhiyun 
1994*4882a593Smuzhiyun 	if (rsb_flag(r, RSB_VALNOTVALID))
1995*4882a593Smuzhiyun 		lkb->lkb_sbflags |= DLM_SBF_VALNOTVALID;
1996*4882a593Smuzhiyun }
1997*4882a593Smuzhiyun 
set_lvb_unlock(struct dlm_rsb * r,struct dlm_lkb * lkb)1998*4882a593Smuzhiyun static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1999*4882a593Smuzhiyun {
2000*4882a593Smuzhiyun 	if (lkb->lkb_grmode < DLM_LOCK_PW)
2001*4882a593Smuzhiyun 		return;
2002*4882a593Smuzhiyun 
2003*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) {
2004*4882a593Smuzhiyun 		rsb_set_flag(r, RSB_VALNOTVALID);
2005*4882a593Smuzhiyun 		return;
2006*4882a593Smuzhiyun 	}
2007*4882a593Smuzhiyun 
2008*4882a593Smuzhiyun 	if (!lkb->lkb_lvbptr)
2009*4882a593Smuzhiyun 		return;
2010*4882a593Smuzhiyun 
2011*4882a593Smuzhiyun 	if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
2012*4882a593Smuzhiyun 		return;
2013*4882a593Smuzhiyun 
2014*4882a593Smuzhiyun 	if (!r->res_lvbptr)
2015*4882a593Smuzhiyun 		r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
2016*4882a593Smuzhiyun 
2017*4882a593Smuzhiyun 	if (!r->res_lvbptr)
2018*4882a593Smuzhiyun 		return;
2019*4882a593Smuzhiyun 
2020*4882a593Smuzhiyun 	memcpy(r->res_lvbptr, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
2021*4882a593Smuzhiyun 	r->res_lvbseq++;
2022*4882a593Smuzhiyun 	rsb_clear_flag(r, RSB_VALNOTVALID);
2023*4882a593Smuzhiyun }
2024*4882a593Smuzhiyun 
2025*4882a593Smuzhiyun /* lkb is process copy (pc) */
2026*4882a593Smuzhiyun 
set_lvb_lock_pc(struct dlm_rsb * r,struct dlm_lkb * lkb,struct dlm_message * ms)2027*4882a593Smuzhiyun static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
2028*4882a593Smuzhiyun 			    struct dlm_message *ms)
2029*4882a593Smuzhiyun {
2030*4882a593Smuzhiyun 	int b;
2031*4882a593Smuzhiyun 
2032*4882a593Smuzhiyun 	if (!lkb->lkb_lvbptr)
2033*4882a593Smuzhiyun 		return;
2034*4882a593Smuzhiyun 
2035*4882a593Smuzhiyun 	if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
2036*4882a593Smuzhiyun 		return;
2037*4882a593Smuzhiyun 
2038*4882a593Smuzhiyun 	b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
2039*4882a593Smuzhiyun 	if (b == 1) {
2040*4882a593Smuzhiyun 		int len = receive_extralen(ms);
2041*4882a593Smuzhiyun 		if (len > r->res_ls->ls_lvblen)
2042*4882a593Smuzhiyun 			len = r->res_ls->ls_lvblen;
2043*4882a593Smuzhiyun 		memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
2044*4882a593Smuzhiyun 		lkb->lkb_lvbseq = ms->m_lvbseq;
2045*4882a593Smuzhiyun 	}
2046*4882a593Smuzhiyun }
2047*4882a593Smuzhiyun 
2048*4882a593Smuzhiyun /* Manipulate lkb's on rsb's convert/granted/waiting queues
2049*4882a593Smuzhiyun    remove_lock -- used for unlock, removes lkb from granted
2050*4882a593Smuzhiyun    revert_lock -- used for cancel, moves lkb from convert to granted
2051*4882a593Smuzhiyun    grant_lock  -- used for request and convert, adds lkb to granted or
2052*4882a593Smuzhiyun                   moves lkb from convert or waiting to granted
2053*4882a593Smuzhiyun 
2054*4882a593Smuzhiyun    Each of these is used for master or local copy lkb's.  There is
2055*4882a593Smuzhiyun    also a _pc() variation used to make the corresponding change on
2056*4882a593Smuzhiyun    a process copy (pc) lkb. */
2057*4882a593Smuzhiyun 
_remove_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)2058*4882a593Smuzhiyun static void _remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2059*4882a593Smuzhiyun {
2060*4882a593Smuzhiyun 	del_lkb(r, lkb);
2061*4882a593Smuzhiyun 	lkb->lkb_grmode = DLM_LOCK_IV;
2062*4882a593Smuzhiyun 	/* this unhold undoes the original ref from create_lkb()
2063*4882a593Smuzhiyun 	   so this leads to the lkb being freed */
2064*4882a593Smuzhiyun 	unhold_lkb(lkb);
2065*4882a593Smuzhiyun }
2066*4882a593Smuzhiyun 
remove_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)2067*4882a593Smuzhiyun static void remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2068*4882a593Smuzhiyun {
2069*4882a593Smuzhiyun 	set_lvb_unlock(r, lkb);
2070*4882a593Smuzhiyun 	_remove_lock(r, lkb);
2071*4882a593Smuzhiyun }
2072*4882a593Smuzhiyun 
remove_lock_pc(struct dlm_rsb * r,struct dlm_lkb * lkb)2073*4882a593Smuzhiyun static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
2074*4882a593Smuzhiyun {
2075*4882a593Smuzhiyun 	_remove_lock(r, lkb);
2076*4882a593Smuzhiyun }
2077*4882a593Smuzhiyun 
2078*4882a593Smuzhiyun /* returns: 0 did nothing
2079*4882a593Smuzhiyun 	    1 moved lock to granted
2080*4882a593Smuzhiyun 	   -1 removed lock */
2081*4882a593Smuzhiyun 
revert_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)2082*4882a593Smuzhiyun static int revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2083*4882a593Smuzhiyun {
2084*4882a593Smuzhiyun 	int rv = 0;
2085*4882a593Smuzhiyun 
2086*4882a593Smuzhiyun 	lkb->lkb_rqmode = DLM_LOCK_IV;
2087*4882a593Smuzhiyun 
2088*4882a593Smuzhiyun 	switch (lkb->lkb_status) {
2089*4882a593Smuzhiyun 	case DLM_LKSTS_GRANTED:
2090*4882a593Smuzhiyun 		break;
2091*4882a593Smuzhiyun 	case DLM_LKSTS_CONVERT:
2092*4882a593Smuzhiyun 		move_lkb(r, lkb, DLM_LKSTS_GRANTED);
2093*4882a593Smuzhiyun 		rv = 1;
2094*4882a593Smuzhiyun 		break;
2095*4882a593Smuzhiyun 	case DLM_LKSTS_WAITING:
2096*4882a593Smuzhiyun 		del_lkb(r, lkb);
2097*4882a593Smuzhiyun 		lkb->lkb_grmode = DLM_LOCK_IV;
2098*4882a593Smuzhiyun 		/* this unhold undoes the original ref from create_lkb()
2099*4882a593Smuzhiyun 		   so this leads to the lkb being freed */
2100*4882a593Smuzhiyun 		unhold_lkb(lkb);
2101*4882a593Smuzhiyun 		rv = -1;
2102*4882a593Smuzhiyun 		break;
2103*4882a593Smuzhiyun 	default:
2104*4882a593Smuzhiyun 		log_print("invalid status for revert %d", lkb->lkb_status);
2105*4882a593Smuzhiyun 	}
2106*4882a593Smuzhiyun 	return rv;
2107*4882a593Smuzhiyun }
2108*4882a593Smuzhiyun 
revert_lock_pc(struct dlm_rsb * r,struct dlm_lkb * lkb)2109*4882a593Smuzhiyun static int revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
2110*4882a593Smuzhiyun {
2111*4882a593Smuzhiyun 	return revert_lock(r, lkb);
2112*4882a593Smuzhiyun }
2113*4882a593Smuzhiyun 
_grant_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)2114*4882a593Smuzhiyun static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2115*4882a593Smuzhiyun {
2116*4882a593Smuzhiyun 	if (lkb->lkb_grmode != lkb->lkb_rqmode) {
2117*4882a593Smuzhiyun 		lkb->lkb_grmode = lkb->lkb_rqmode;
2118*4882a593Smuzhiyun 		if (lkb->lkb_status)
2119*4882a593Smuzhiyun 			move_lkb(r, lkb, DLM_LKSTS_GRANTED);
2120*4882a593Smuzhiyun 		else
2121*4882a593Smuzhiyun 			add_lkb(r, lkb, DLM_LKSTS_GRANTED);
2122*4882a593Smuzhiyun 	}
2123*4882a593Smuzhiyun 
2124*4882a593Smuzhiyun 	lkb->lkb_rqmode = DLM_LOCK_IV;
2125*4882a593Smuzhiyun 	lkb->lkb_highbast = 0;
2126*4882a593Smuzhiyun }
2127*4882a593Smuzhiyun 
grant_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)2128*4882a593Smuzhiyun static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2129*4882a593Smuzhiyun {
2130*4882a593Smuzhiyun 	set_lvb_lock(r, lkb);
2131*4882a593Smuzhiyun 	_grant_lock(r, lkb);
2132*4882a593Smuzhiyun }
2133*4882a593Smuzhiyun 
grant_lock_pc(struct dlm_rsb * r,struct dlm_lkb * lkb,struct dlm_message * ms)2134*4882a593Smuzhiyun static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
2135*4882a593Smuzhiyun 			  struct dlm_message *ms)
2136*4882a593Smuzhiyun {
2137*4882a593Smuzhiyun 	set_lvb_lock_pc(r, lkb, ms);
2138*4882a593Smuzhiyun 	_grant_lock(r, lkb);
2139*4882a593Smuzhiyun }
2140*4882a593Smuzhiyun 
2141*4882a593Smuzhiyun /* called by grant_pending_locks() which means an async grant message must
2142*4882a593Smuzhiyun    be sent to the requesting node in addition to granting the lock if the
2143*4882a593Smuzhiyun    lkb belongs to a remote node. */
2144*4882a593Smuzhiyun 
grant_lock_pending(struct dlm_rsb * r,struct dlm_lkb * lkb)2145*4882a593Smuzhiyun static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
2146*4882a593Smuzhiyun {
2147*4882a593Smuzhiyun 	grant_lock(r, lkb);
2148*4882a593Smuzhiyun 	if (is_master_copy(lkb))
2149*4882a593Smuzhiyun 		send_grant(r, lkb);
2150*4882a593Smuzhiyun 	else
2151*4882a593Smuzhiyun 		queue_cast(r, lkb, 0);
2152*4882a593Smuzhiyun }
2153*4882a593Smuzhiyun 
2154*4882a593Smuzhiyun /* The special CONVDEADLK, ALTPR and ALTCW flags allow the master to
2155*4882a593Smuzhiyun    change the granted/requested modes.  We're munging things accordingly in
2156*4882a593Smuzhiyun    the process copy.
2157*4882a593Smuzhiyun    CONVDEADLK: our grmode may have been forced down to NL to resolve a
2158*4882a593Smuzhiyun    conversion deadlock
2159*4882a593Smuzhiyun    ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
2160*4882a593Smuzhiyun    compatible with other granted locks */
2161*4882a593Smuzhiyun 
munge_demoted(struct dlm_lkb * lkb)2162*4882a593Smuzhiyun static void munge_demoted(struct dlm_lkb *lkb)
2163*4882a593Smuzhiyun {
2164*4882a593Smuzhiyun 	if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
2165*4882a593Smuzhiyun 		log_print("munge_demoted %x invalid modes gr %d rq %d",
2166*4882a593Smuzhiyun 			  lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
2167*4882a593Smuzhiyun 		return;
2168*4882a593Smuzhiyun 	}
2169*4882a593Smuzhiyun 
2170*4882a593Smuzhiyun 	lkb->lkb_grmode = DLM_LOCK_NL;
2171*4882a593Smuzhiyun }
2172*4882a593Smuzhiyun 
munge_altmode(struct dlm_lkb * lkb,struct dlm_message * ms)2173*4882a593Smuzhiyun static void munge_altmode(struct dlm_lkb *lkb, struct dlm_message *ms)
2174*4882a593Smuzhiyun {
2175*4882a593Smuzhiyun 	if (ms->m_type != DLM_MSG_REQUEST_REPLY &&
2176*4882a593Smuzhiyun 	    ms->m_type != DLM_MSG_GRANT) {
2177*4882a593Smuzhiyun 		log_print("munge_altmode %x invalid reply type %d",
2178*4882a593Smuzhiyun 			  lkb->lkb_id, ms->m_type);
2179*4882a593Smuzhiyun 		return;
2180*4882a593Smuzhiyun 	}
2181*4882a593Smuzhiyun 
2182*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_ALTPR)
2183*4882a593Smuzhiyun 		lkb->lkb_rqmode = DLM_LOCK_PR;
2184*4882a593Smuzhiyun 	else if (lkb->lkb_exflags & DLM_LKF_ALTCW)
2185*4882a593Smuzhiyun 		lkb->lkb_rqmode = DLM_LOCK_CW;
2186*4882a593Smuzhiyun 	else {
2187*4882a593Smuzhiyun 		log_print("munge_altmode invalid exflags %x", lkb->lkb_exflags);
2188*4882a593Smuzhiyun 		dlm_print_lkb(lkb);
2189*4882a593Smuzhiyun 	}
2190*4882a593Smuzhiyun }
2191*4882a593Smuzhiyun 
first_in_list(struct dlm_lkb * lkb,struct list_head * head)2192*4882a593Smuzhiyun static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head)
2193*4882a593Smuzhiyun {
2194*4882a593Smuzhiyun 	struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb,
2195*4882a593Smuzhiyun 					   lkb_statequeue);
2196*4882a593Smuzhiyun 	if (lkb->lkb_id == first->lkb_id)
2197*4882a593Smuzhiyun 		return 1;
2198*4882a593Smuzhiyun 
2199*4882a593Smuzhiyun 	return 0;
2200*4882a593Smuzhiyun }
2201*4882a593Smuzhiyun 
2202*4882a593Smuzhiyun /* Check if the given lkb conflicts with another lkb on the queue. */
2203*4882a593Smuzhiyun 
queue_conflict(struct list_head * head,struct dlm_lkb * lkb)2204*4882a593Smuzhiyun static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
2205*4882a593Smuzhiyun {
2206*4882a593Smuzhiyun 	struct dlm_lkb *this;
2207*4882a593Smuzhiyun 
2208*4882a593Smuzhiyun 	list_for_each_entry(this, head, lkb_statequeue) {
2209*4882a593Smuzhiyun 		if (this == lkb)
2210*4882a593Smuzhiyun 			continue;
2211*4882a593Smuzhiyun 		if (!modes_compat(this, lkb))
2212*4882a593Smuzhiyun 			return 1;
2213*4882a593Smuzhiyun 	}
2214*4882a593Smuzhiyun 	return 0;
2215*4882a593Smuzhiyun }
2216*4882a593Smuzhiyun 
2217*4882a593Smuzhiyun /*
2218*4882a593Smuzhiyun  * "A conversion deadlock arises with a pair of lock requests in the converting
2219*4882a593Smuzhiyun  * queue for one resource.  The granted mode of each lock blocks the requested
2220*4882a593Smuzhiyun  * mode of the other lock."
2221*4882a593Smuzhiyun  *
2222*4882a593Smuzhiyun  * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
2223*4882a593Smuzhiyun  * convert queue from being granted, then deadlk/demote lkb.
2224*4882a593Smuzhiyun  *
2225*4882a593Smuzhiyun  * Example:
2226*4882a593Smuzhiyun  * Granted Queue: empty
2227*4882a593Smuzhiyun  * Convert Queue: NL->EX (first lock)
2228*4882a593Smuzhiyun  *                PR->EX (second lock)
2229*4882a593Smuzhiyun  *
2230*4882a593Smuzhiyun  * The first lock can't be granted because of the granted mode of the second
2231*4882a593Smuzhiyun  * lock and the second lock can't be granted because it's not first in the
2232*4882a593Smuzhiyun  * list.  We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
2233*4882a593Smuzhiyun  * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
2234*4882a593Smuzhiyun  * flag set and return DEMOTED in the lksb flags.
2235*4882a593Smuzhiyun  *
2236*4882a593Smuzhiyun  * Originally, this function detected conv-deadlk in a more limited scope:
2237*4882a593Smuzhiyun  * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
2238*4882a593Smuzhiyun  * - if lkb1 was the first entry in the queue (not just earlier), and was
2239*4882a593Smuzhiyun  *   blocked by the granted mode of lkb2, and there was nothing on the
2240*4882a593Smuzhiyun  *   granted queue preventing lkb1 from being granted immediately, i.e.
2241*4882a593Smuzhiyun  *   lkb2 was the only thing preventing lkb1 from being granted.
2242*4882a593Smuzhiyun  *
2243*4882a593Smuzhiyun  * That second condition meant we'd only say there was conv-deadlk if
2244*4882a593Smuzhiyun  * resolving it (by demotion) would lead to the first lock on the convert
2245*4882a593Smuzhiyun  * queue being granted right away.  It allowed conversion deadlocks to exist
2246*4882a593Smuzhiyun  * between locks on the convert queue while they couldn't be granted anyway.
2247*4882a593Smuzhiyun  *
2248*4882a593Smuzhiyun  * Now, we detect and take action on conversion deadlocks immediately when
2249*4882a593Smuzhiyun  * they're created, even if they may not be immediately consequential.  If
2250*4882a593Smuzhiyun  * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
2251*4882a593Smuzhiyun  * mode that would prevent lkb1's conversion from being granted, we do a
2252*4882a593Smuzhiyun  * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
2253*4882a593Smuzhiyun  * I think this means that the lkb_is_ahead condition below should always
2254*4882a593Smuzhiyun  * be zero, i.e. there will never be conv-deadlk between two locks that are
2255*4882a593Smuzhiyun  * both already on the convert queue.
2256*4882a593Smuzhiyun  */
2257*4882a593Smuzhiyun 
conversion_deadlock_detect(struct dlm_rsb * r,struct dlm_lkb * lkb2)2258*4882a593Smuzhiyun static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
2259*4882a593Smuzhiyun {
2260*4882a593Smuzhiyun 	struct dlm_lkb *lkb1;
2261*4882a593Smuzhiyun 	int lkb_is_ahead = 0;
2262*4882a593Smuzhiyun 
2263*4882a593Smuzhiyun 	list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
2264*4882a593Smuzhiyun 		if (lkb1 == lkb2) {
2265*4882a593Smuzhiyun 			lkb_is_ahead = 1;
2266*4882a593Smuzhiyun 			continue;
2267*4882a593Smuzhiyun 		}
2268*4882a593Smuzhiyun 
2269*4882a593Smuzhiyun 		if (!lkb_is_ahead) {
2270*4882a593Smuzhiyun 			if (!modes_compat(lkb2, lkb1))
2271*4882a593Smuzhiyun 				return 1;
2272*4882a593Smuzhiyun 		} else {
2273*4882a593Smuzhiyun 			if (!modes_compat(lkb2, lkb1) &&
2274*4882a593Smuzhiyun 			    !modes_compat(lkb1, lkb2))
2275*4882a593Smuzhiyun 				return 1;
2276*4882a593Smuzhiyun 		}
2277*4882a593Smuzhiyun 	}
2278*4882a593Smuzhiyun 	return 0;
2279*4882a593Smuzhiyun }
2280*4882a593Smuzhiyun 
2281*4882a593Smuzhiyun /*
2282*4882a593Smuzhiyun  * Return 1 if the lock can be granted, 0 otherwise.
2283*4882a593Smuzhiyun  * Also detect and resolve conversion deadlocks.
2284*4882a593Smuzhiyun  *
2285*4882a593Smuzhiyun  * lkb is the lock to be granted
2286*4882a593Smuzhiyun  *
2287*4882a593Smuzhiyun  * now is 1 if the function is being called in the context of the
2288*4882a593Smuzhiyun  * immediate request, it is 0 if called later, after the lock has been
2289*4882a593Smuzhiyun  * queued.
2290*4882a593Smuzhiyun  *
2291*4882a593Smuzhiyun  * recover is 1 if dlm_recover_grant() is trying to grant conversions
2292*4882a593Smuzhiyun  * after recovery.
2293*4882a593Smuzhiyun  *
2294*4882a593Smuzhiyun  * References are from chapter 6 of "VAXcluster Principles" by Roy Davis
2295*4882a593Smuzhiyun  */
2296*4882a593Smuzhiyun 
_can_be_granted(struct dlm_rsb * r,struct dlm_lkb * lkb,int now,int recover)2297*4882a593Smuzhiyun static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
2298*4882a593Smuzhiyun 			   int recover)
2299*4882a593Smuzhiyun {
2300*4882a593Smuzhiyun 	int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV);
2301*4882a593Smuzhiyun 
2302*4882a593Smuzhiyun 	/*
2303*4882a593Smuzhiyun 	 * 6-10: Version 5.4 introduced an option to address the phenomenon of
2304*4882a593Smuzhiyun 	 * a new request for a NL mode lock being blocked.
2305*4882a593Smuzhiyun 	 *
2306*4882a593Smuzhiyun 	 * 6-11: If the optional EXPEDITE flag is used with the new NL mode
2307*4882a593Smuzhiyun 	 * request, then it would be granted.  In essence, the use of this flag
2308*4882a593Smuzhiyun 	 * tells the Lock Manager to expedite theis request by not considering
2309*4882a593Smuzhiyun 	 * what may be in the CONVERTING or WAITING queues...  As of this
2310*4882a593Smuzhiyun 	 * writing, the EXPEDITE flag can be used only with new requests for NL
2311*4882a593Smuzhiyun 	 * mode locks.  This flag is not valid for conversion requests.
2312*4882a593Smuzhiyun 	 *
2313*4882a593Smuzhiyun 	 * A shortcut.  Earlier checks return an error if EXPEDITE is used in a
2314*4882a593Smuzhiyun 	 * conversion or used with a non-NL requested mode.  We also know an
2315*4882a593Smuzhiyun 	 * EXPEDITE request is always granted immediately, so now must always
2316*4882a593Smuzhiyun 	 * be 1.  The full condition to grant an expedite request: (now &&
2317*4882a593Smuzhiyun 	 * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can
2318*4882a593Smuzhiyun 	 * therefore be shortened to just checking the flag.
2319*4882a593Smuzhiyun 	 */
2320*4882a593Smuzhiyun 
2321*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_EXPEDITE)
2322*4882a593Smuzhiyun 		return 1;
2323*4882a593Smuzhiyun 
2324*4882a593Smuzhiyun 	/*
2325*4882a593Smuzhiyun 	 * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be
2326*4882a593Smuzhiyun 	 * added to the remaining conditions.
2327*4882a593Smuzhiyun 	 */
2328*4882a593Smuzhiyun 
2329*4882a593Smuzhiyun 	if (queue_conflict(&r->res_grantqueue, lkb))
2330*4882a593Smuzhiyun 		return 0;
2331*4882a593Smuzhiyun 
2332*4882a593Smuzhiyun 	/*
2333*4882a593Smuzhiyun 	 * 6-3: By default, a conversion request is immediately granted if the
2334*4882a593Smuzhiyun 	 * requested mode is compatible with the modes of all other granted
2335*4882a593Smuzhiyun 	 * locks
2336*4882a593Smuzhiyun 	 */
2337*4882a593Smuzhiyun 
2338*4882a593Smuzhiyun 	if (queue_conflict(&r->res_convertqueue, lkb))
2339*4882a593Smuzhiyun 		return 0;
2340*4882a593Smuzhiyun 
2341*4882a593Smuzhiyun 	/*
2342*4882a593Smuzhiyun 	 * The RECOVER_GRANT flag means dlm_recover_grant() is granting
2343*4882a593Smuzhiyun 	 * locks for a recovered rsb, on which lkb's have been rebuilt.
2344*4882a593Smuzhiyun 	 * The lkb's may have been rebuilt on the queues in a different
2345*4882a593Smuzhiyun 	 * order than they were in on the previous master.  So, granting
2346*4882a593Smuzhiyun 	 * queued conversions in order after recovery doesn't make sense
2347*4882a593Smuzhiyun 	 * since the order hasn't been preserved anyway.  The new order
2348*4882a593Smuzhiyun 	 * could also have created a new "in place" conversion deadlock.
2349*4882a593Smuzhiyun 	 * (e.g. old, failed master held granted EX, with PR->EX, NL->EX.
2350*4882a593Smuzhiyun 	 * After recovery, there would be no granted locks, and possibly
2351*4882a593Smuzhiyun 	 * NL->EX, PR->EX, an in-place conversion deadlock.)  So, after
2352*4882a593Smuzhiyun 	 * recovery, grant conversions without considering order.
2353*4882a593Smuzhiyun 	 */
2354*4882a593Smuzhiyun 
2355*4882a593Smuzhiyun 	if (conv && recover)
2356*4882a593Smuzhiyun 		return 1;
2357*4882a593Smuzhiyun 
2358*4882a593Smuzhiyun 	/*
2359*4882a593Smuzhiyun 	 * 6-5: But the default algorithm for deciding whether to grant or
2360*4882a593Smuzhiyun 	 * queue conversion requests does not by itself guarantee that such
2361*4882a593Smuzhiyun 	 * requests are serviced on a "first come first serve" basis.  This, in
2362*4882a593Smuzhiyun 	 * turn, can lead to a phenomenon known as "indefinate postponement".
2363*4882a593Smuzhiyun 	 *
2364*4882a593Smuzhiyun 	 * 6-7: This issue is dealt with by using the optional QUECVT flag with
2365*4882a593Smuzhiyun 	 * the system service employed to request a lock conversion.  This flag
2366*4882a593Smuzhiyun 	 * forces certain conversion requests to be queued, even if they are
2367*4882a593Smuzhiyun 	 * compatible with the granted modes of other locks on the same
2368*4882a593Smuzhiyun 	 * resource.  Thus, the use of this flag results in conversion requests
2369*4882a593Smuzhiyun 	 * being ordered on a "first come first servce" basis.
2370*4882a593Smuzhiyun 	 *
2371*4882a593Smuzhiyun 	 * DCT: This condition is all about new conversions being able to occur
2372*4882a593Smuzhiyun 	 * "in place" while the lock remains on the granted queue (assuming
2373*4882a593Smuzhiyun 	 * nothing else conflicts.)  IOW if QUECVT isn't set, a conversion
2374*4882a593Smuzhiyun 	 * doesn't _have_ to go onto the convert queue where it's processed in
2375*4882a593Smuzhiyun 	 * order.  The "now" variable is necessary to distinguish converts
2376*4882a593Smuzhiyun 	 * being received and processed for the first time now, because once a
2377*4882a593Smuzhiyun 	 * convert is moved to the conversion queue the condition below applies
2378*4882a593Smuzhiyun 	 * requiring fifo granting.
2379*4882a593Smuzhiyun 	 */
2380*4882a593Smuzhiyun 
2381*4882a593Smuzhiyun 	if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT))
2382*4882a593Smuzhiyun 		return 1;
2383*4882a593Smuzhiyun 
2384*4882a593Smuzhiyun 	/*
2385*4882a593Smuzhiyun 	 * Even if the convert is compat with all granted locks,
2386*4882a593Smuzhiyun 	 * QUECVT forces it behind other locks on the convert queue.
2387*4882a593Smuzhiyun 	 */
2388*4882a593Smuzhiyun 
2389*4882a593Smuzhiyun 	if (now && conv && (lkb->lkb_exflags & DLM_LKF_QUECVT)) {
2390*4882a593Smuzhiyun 		if (list_empty(&r->res_convertqueue))
2391*4882a593Smuzhiyun 			return 1;
2392*4882a593Smuzhiyun 		else
2393*4882a593Smuzhiyun 			return 0;
2394*4882a593Smuzhiyun 	}
2395*4882a593Smuzhiyun 
2396*4882a593Smuzhiyun 	/*
2397*4882a593Smuzhiyun 	 * The NOORDER flag is set to avoid the standard vms rules on grant
2398*4882a593Smuzhiyun 	 * order.
2399*4882a593Smuzhiyun 	 */
2400*4882a593Smuzhiyun 
2401*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_NOORDER)
2402*4882a593Smuzhiyun 		return 1;
2403*4882a593Smuzhiyun 
2404*4882a593Smuzhiyun 	/*
2405*4882a593Smuzhiyun 	 * 6-3: Once in that queue [CONVERTING], a conversion request cannot be
2406*4882a593Smuzhiyun 	 * granted until all other conversion requests ahead of it are granted
2407*4882a593Smuzhiyun 	 * and/or canceled.
2408*4882a593Smuzhiyun 	 */
2409*4882a593Smuzhiyun 
2410*4882a593Smuzhiyun 	if (!now && conv && first_in_list(lkb, &r->res_convertqueue))
2411*4882a593Smuzhiyun 		return 1;
2412*4882a593Smuzhiyun 
2413*4882a593Smuzhiyun 	/*
2414*4882a593Smuzhiyun 	 * 6-4: By default, a new request is immediately granted only if all
2415*4882a593Smuzhiyun 	 * three of the following conditions are satisfied when the request is
2416*4882a593Smuzhiyun 	 * issued:
2417*4882a593Smuzhiyun 	 * - The queue of ungranted conversion requests for the resource is
2418*4882a593Smuzhiyun 	 *   empty.
2419*4882a593Smuzhiyun 	 * - The queue of ungranted new requests for the resource is empty.
2420*4882a593Smuzhiyun 	 * - The mode of the new request is compatible with the most
2421*4882a593Smuzhiyun 	 *   restrictive mode of all granted locks on the resource.
2422*4882a593Smuzhiyun 	 */
2423*4882a593Smuzhiyun 
2424*4882a593Smuzhiyun 	if (now && !conv && list_empty(&r->res_convertqueue) &&
2425*4882a593Smuzhiyun 	    list_empty(&r->res_waitqueue))
2426*4882a593Smuzhiyun 		return 1;
2427*4882a593Smuzhiyun 
2428*4882a593Smuzhiyun 	/*
2429*4882a593Smuzhiyun 	 * 6-4: Once a lock request is in the queue of ungranted new requests,
2430*4882a593Smuzhiyun 	 * it cannot be granted until the queue of ungranted conversion
2431*4882a593Smuzhiyun 	 * requests is empty, all ungranted new requests ahead of it are
2432*4882a593Smuzhiyun 	 * granted and/or canceled, and it is compatible with the granted mode
2433*4882a593Smuzhiyun 	 * of the most restrictive lock granted on the resource.
2434*4882a593Smuzhiyun 	 */
2435*4882a593Smuzhiyun 
2436*4882a593Smuzhiyun 	if (!now && !conv && list_empty(&r->res_convertqueue) &&
2437*4882a593Smuzhiyun 	    first_in_list(lkb, &r->res_waitqueue))
2438*4882a593Smuzhiyun 		return 1;
2439*4882a593Smuzhiyun 
2440*4882a593Smuzhiyun 	return 0;
2441*4882a593Smuzhiyun }
2442*4882a593Smuzhiyun 
can_be_granted(struct dlm_rsb * r,struct dlm_lkb * lkb,int now,int recover,int * err)2443*4882a593Smuzhiyun static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
2444*4882a593Smuzhiyun 			  int recover, int *err)
2445*4882a593Smuzhiyun {
2446*4882a593Smuzhiyun 	int rv;
2447*4882a593Smuzhiyun 	int8_t alt = 0, rqmode = lkb->lkb_rqmode;
2448*4882a593Smuzhiyun 	int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
2449*4882a593Smuzhiyun 
2450*4882a593Smuzhiyun 	if (err)
2451*4882a593Smuzhiyun 		*err = 0;
2452*4882a593Smuzhiyun 
2453*4882a593Smuzhiyun 	rv = _can_be_granted(r, lkb, now, recover);
2454*4882a593Smuzhiyun 	if (rv)
2455*4882a593Smuzhiyun 		goto out;
2456*4882a593Smuzhiyun 
2457*4882a593Smuzhiyun 	/*
2458*4882a593Smuzhiyun 	 * The CONVDEADLK flag is non-standard and tells the dlm to resolve
2459*4882a593Smuzhiyun 	 * conversion deadlocks by demoting grmode to NL, otherwise the dlm
2460*4882a593Smuzhiyun 	 * cancels one of the locks.
2461*4882a593Smuzhiyun 	 */
2462*4882a593Smuzhiyun 
2463*4882a593Smuzhiyun 	if (is_convert && can_be_queued(lkb) &&
2464*4882a593Smuzhiyun 	    conversion_deadlock_detect(r, lkb)) {
2465*4882a593Smuzhiyun 		if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
2466*4882a593Smuzhiyun 			lkb->lkb_grmode = DLM_LOCK_NL;
2467*4882a593Smuzhiyun 			lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
2468*4882a593Smuzhiyun 		} else if (err) {
2469*4882a593Smuzhiyun 			*err = -EDEADLK;
2470*4882a593Smuzhiyun 		} else {
2471*4882a593Smuzhiyun 			log_print("can_be_granted deadlock %x now %d",
2472*4882a593Smuzhiyun 				  lkb->lkb_id, now);
2473*4882a593Smuzhiyun 			dlm_dump_rsb(r);
2474*4882a593Smuzhiyun 		}
2475*4882a593Smuzhiyun 		goto out;
2476*4882a593Smuzhiyun 	}
2477*4882a593Smuzhiyun 
2478*4882a593Smuzhiyun 	/*
2479*4882a593Smuzhiyun 	 * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
2480*4882a593Smuzhiyun 	 * to grant a request in a mode other than the normal rqmode.  It's a
2481*4882a593Smuzhiyun 	 * simple way to provide a big optimization to applications that can
2482*4882a593Smuzhiyun 	 * use them.
2483*4882a593Smuzhiyun 	 */
2484*4882a593Smuzhiyun 
2485*4882a593Smuzhiyun 	if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
2486*4882a593Smuzhiyun 		alt = DLM_LOCK_PR;
2487*4882a593Smuzhiyun 	else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
2488*4882a593Smuzhiyun 		alt = DLM_LOCK_CW;
2489*4882a593Smuzhiyun 
2490*4882a593Smuzhiyun 	if (alt) {
2491*4882a593Smuzhiyun 		lkb->lkb_rqmode = alt;
2492*4882a593Smuzhiyun 		rv = _can_be_granted(r, lkb, now, 0);
2493*4882a593Smuzhiyun 		if (rv)
2494*4882a593Smuzhiyun 			lkb->lkb_sbflags |= DLM_SBF_ALTMODE;
2495*4882a593Smuzhiyun 		else
2496*4882a593Smuzhiyun 			lkb->lkb_rqmode = rqmode;
2497*4882a593Smuzhiyun 	}
2498*4882a593Smuzhiyun  out:
2499*4882a593Smuzhiyun 	return rv;
2500*4882a593Smuzhiyun }
2501*4882a593Smuzhiyun 
2502*4882a593Smuzhiyun /* Returns the highest requested mode of all blocked conversions; sets
2503*4882a593Smuzhiyun    cw if there's a blocked conversion to DLM_LOCK_CW. */
2504*4882a593Smuzhiyun 
grant_pending_convert(struct dlm_rsb * r,int high,int * cw,unsigned int * count)2505*4882a593Smuzhiyun static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw,
2506*4882a593Smuzhiyun 				 unsigned int *count)
2507*4882a593Smuzhiyun {
2508*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *s;
2509*4882a593Smuzhiyun 	int recover = rsb_flag(r, RSB_RECOVER_GRANT);
2510*4882a593Smuzhiyun 	int hi, demoted, quit, grant_restart, demote_restart;
2511*4882a593Smuzhiyun 	int deadlk;
2512*4882a593Smuzhiyun 
2513*4882a593Smuzhiyun 	quit = 0;
2514*4882a593Smuzhiyun  restart:
2515*4882a593Smuzhiyun 	grant_restart = 0;
2516*4882a593Smuzhiyun 	demote_restart = 0;
2517*4882a593Smuzhiyun 	hi = DLM_LOCK_IV;
2518*4882a593Smuzhiyun 
2519*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
2520*4882a593Smuzhiyun 		demoted = is_demoted(lkb);
2521*4882a593Smuzhiyun 		deadlk = 0;
2522*4882a593Smuzhiyun 
2523*4882a593Smuzhiyun 		if (can_be_granted(r, lkb, 0, recover, &deadlk)) {
2524*4882a593Smuzhiyun 			grant_lock_pending(r, lkb);
2525*4882a593Smuzhiyun 			grant_restart = 1;
2526*4882a593Smuzhiyun 			if (count)
2527*4882a593Smuzhiyun 				(*count)++;
2528*4882a593Smuzhiyun 			continue;
2529*4882a593Smuzhiyun 		}
2530*4882a593Smuzhiyun 
2531*4882a593Smuzhiyun 		if (!demoted && is_demoted(lkb)) {
2532*4882a593Smuzhiyun 			log_print("WARN: pending demoted %x node %d %s",
2533*4882a593Smuzhiyun 				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
2534*4882a593Smuzhiyun 			demote_restart = 1;
2535*4882a593Smuzhiyun 			continue;
2536*4882a593Smuzhiyun 		}
2537*4882a593Smuzhiyun 
2538*4882a593Smuzhiyun 		if (deadlk) {
2539*4882a593Smuzhiyun 			/*
2540*4882a593Smuzhiyun 			 * If DLM_LKB_NODLKWT flag is set and conversion
2541*4882a593Smuzhiyun 			 * deadlock is detected, we request blocking AST and
2542*4882a593Smuzhiyun 			 * down (or cancel) conversion.
2543*4882a593Smuzhiyun 			 */
2544*4882a593Smuzhiyun 			if (lkb->lkb_exflags & DLM_LKF_NODLCKWT) {
2545*4882a593Smuzhiyun 				if (lkb->lkb_highbast < lkb->lkb_rqmode) {
2546*4882a593Smuzhiyun 					queue_bast(r, lkb, lkb->lkb_rqmode);
2547*4882a593Smuzhiyun 					lkb->lkb_highbast = lkb->lkb_rqmode;
2548*4882a593Smuzhiyun 				}
2549*4882a593Smuzhiyun 			} else {
2550*4882a593Smuzhiyun 				log_print("WARN: pending deadlock %x node %d %s",
2551*4882a593Smuzhiyun 					  lkb->lkb_id, lkb->lkb_nodeid,
2552*4882a593Smuzhiyun 					  r->res_name);
2553*4882a593Smuzhiyun 				dlm_dump_rsb(r);
2554*4882a593Smuzhiyun 			}
2555*4882a593Smuzhiyun 			continue;
2556*4882a593Smuzhiyun 		}
2557*4882a593Smuzhiyun 
2558*4882a593Smuzhiyun 		hi = max_t(int, lkb->lkb_rqmode, hi);
2559*4882a593Smuzhiyun 
2560*4882a593Smuzhiyun 		if (cw && lkb->lkb_rqmode == DLM_LOCK_CW)
2561*4882a593Smuzhiyun 			*cw = 1;
2562*4882a593Smuzhiyun 	}
2563*4882a593Smuzhiyun 
2564*4882a593Smuzhiyun 	if (grant_restart)
2565*4882a593Smuzhiyun 		goto restart;
2566*4882a593Smuzhiyun 	if (demote_restart && !quit) {
2567*4882a593Smuzhiyun 		quit = 1;
2568*4882a593Smuzhiyun 		goto restart;
2569*4882a593Smuzhiyun 	}
2570*4882a593Smuzhiyun 
2571*4882a593Smuzhiyun 	return max_t(int, high, hi);
2572*4882a593Smuzhiyun }
2573*4882a593Smuzhiyun 
grant_pending_wait(struct dlm_rsb * r,int high,int * cw,unsigned int * count)2574*4882a593Smuzhiyun static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw,
2575*4882a593Smuzhiyun 			      unsigned int *count)
2576*4882a593Smuzhiyun {
2577*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *s;
2578*4882a593Smuzhiyun 
2579*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
2580*4882a593Smuzhiyun 		if (can_be_granted(r, lkb, 0, 0, NULL)) {
2581*4882a593Smuzhiyun 			grant_lock_pending(r, lkb);
2582*4882a593Smuzhiyun 			if (count)
2583*4882a593Smuzhiyun 				(*count)++;
2584*4882a593Smuzhiyun 		} else {
2585*4882a593Smuzhiyun 			high = max_t(int, lkb->lkb_rqmode, high);
2586*4882a593Smuzhiyun 			if (lkb->lkb_rqmode == DLM_LOCK_CW)
2587*4882a593Smuzhiyun 				*cw = 1;
2588*4882a593Smuzhiyun 		}
2589*4882a593Smuzhiyun 	}
2590*4882a593Smuzhiyun 
2591*4882a593Smuzhiyun 	return high;
2592*4882a593Smuzhiyun }
2593*4882a593Smuzhiyun 
2594*4882a593Smuzhiyun /* cw of 1 means there's a lock with a rqmode of DLM_LOCK_CW that's blocked
2595*4882a593Smuzhiyun    on either the convert or waiting queue.
2596*4882a593Smuzhiyun    high is the largest rqmode of all locks blocked on the convert or
2597*4882a593Smuzhiyun    waiting queue. */
2598*4882a593Smuzhiyun 
lock_requires_bast(struct dlm_lkb * gr,int high,int cw)2599*4882a593Smuzhiyun static int lock_requires_bast(struct dlm_lkb *gr, int high, int cw)
2600*4882a593Smuzhiyun {
2601*4882a593Smuzhiyun 	if (gr->lkb_grmode == DLM_LOCK_PR && cw) {
2602*4882a593Smuzhiyun 		if (gr->lkb_highbast < DLM_LOCK_EX)
2603*4882a593Smuzhiyun 			return 1;
2604*4882a593Smuzhiyun 		return 0;
2605*4882a593Smuzhiyun 	}
2606*4882a593Smuzhiyun 
2607*4882a593Smuzhiyun 	if (gr->lkb_highbast < high &&
2608*4882a593Smuzhiyun 	    !__dlm_compat_matrix[gr->lkb_grmode+1][high+1])
2609*4882a593Smuzhiyun 		return 1;
2610*4882a593Smuzhiyun 	return 0;
2611*4882a593Smuzhiyun }
2612*4882a593Smuzhiyun 
grant_pending_locks(struct dlm_rsb * r,unsigned int * count)2613*4882a593Smuzhiyun static void grant_pending_locks(struct dlm_rsb *r, unsigned int *count)
2614*4882a593Smuzhiyun {
2615*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *s;
2616*4882a593Smuzhiyun 	int high = DLM_LOCK_IV;
2617*4882a593Smuzhiyun 	int cw = 0;
2618*4882a593Smuzhiyun 
2619*4882a593Smuzhiyun 	if (!is_master(r)) {
2620*4882a593Smuzhiyun 		log_print("grant_pending_locks r nodeid %d", r->res_nodeid);
2621*4882a593Smuzhiyun 		dlm_dump_rsb(r);
2622*4882a593Smuzhiyun 		return;
2623*4882a593Smuzhiyun 	}
2624*4882a593Smuzhiyun 
2625*4882a593Smuzhiyun 	high = grant_pending_convert(r, high, &cw, count);
2626*4882a593Smuzhiyun 	high = grant_pending_wait(r, high, &cw, count);
2627*4882a593Smuzhiyun 
2628*4882a593Smuzhiyun 	if (high == DLM_LOCK_IV)
2629*4882a593Smuzhiyun 		return;
2630*4882a593Smuzhiyun 
2631*4882a593Smuzhiyun 	/*
2632*4882a593Smuzhiyun 	 * If there are locks left on the wait/convert queue then send blocking
2633*4882a593Smuzhiyun 	 * ASTs to granted locks based on the largest requested mode (high)
2634*4882a593Smuzhiyun 	 * found above.
2635*4882a593Smuzhiyun 	 */
2636*4882a593Smuzhiyun 
2637*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
2638*4882a593Smuzhiyun 		if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) {
2639*4882a593Smuzhiyun 			if (cw && high == DLM_LOCK_PR &&
2640*4882a593Smuzhiyun 			    lkb->lkb_grmode == DLM_LOCK_PR)
2641*4882a593Smuzhiyun 				queue_bast(r, lkb, DLM_LOCK_CW);
2642*4882a593Smuzhiyun 			else
2643*4882a593Smuzhiyun 				queue_bast(r, lkb, high);
2644*4882a593Smuzhiyun 			lkb->lkb_highbast = high;
2645*4882a593Smuzhiyun 		}
2646*4882a593Smuzhiyun 	}
2647*4882a593Smuzhiyun }
2648*4882a593Smuzhiyun 
modes_require_bast(struct dlm_lkb * gr,struct dlm_lkb * rq)2649*4882a593Smuzhiyun static int modes_require_bast(struct dlm_lkb *gr, struct dlm_lkb *rq)
2650*4882a593Smuzhiyun {
2651*4882a593Smuzhiyun 	if ((gr->lkb_grmode == DLM_LOCK_PR && rq->lkb_rqmode == DLM_LOCK_CW) ||
2652*4882a593Smuzhiyun 	    (gr->lkb_grmode == DLM_LOCK_CW && rq->lkb_rqmode == DLM_LOCK_PR)) {
2653*4882a593Smuzhiyun 		if (gr->lkb_highbast < DLM_LOCK_EX)
2654*4882a593Smuzhiyun 			return 1;
2655*4882a593Smuzhiyun 		return 0;
2656*4882a593Smuzhiyun 	}
2657*4882a593Smuzhiyun 
2658*4882a593Smuzhiyun 	if (gr->lkb_highbast < rq->lkb_rqmode && !modes_compat(gr, rq))
2659*4882a593Smuzhiyun 		return 1;
2660*4882a593Smuzhiyun 	return 0;
2661*4882a593Smuzhiyun }
2662*4882a593Smuzhiyun 
send_bast_queue(struct dlm_rsb * r,struct list_head * head,struct dlm_lkb * lkb)2663*4882a593Smuzhiyun static void send_bast_queue(struct dlm_rsb *r, struct list_head *head,
2664*4882a593Smuzhiyun 			    struct dlm_lkb *lkb)
2665*4882a593Smuzhiyun {
2666*4882a593Smuzhiyun 	struct dlm_lkb *gr;
2667*4882a593Smuzhiyun 
2668*4882a593Smuzhiyun 	list_for_each_entry(gr, head, lkb_statequeue) {
2669*4882a593Smuzhiyun 		/* skip self when sending basts to convertqueue */
2670*4882a593Smuzhiyun 		if (gr == lkb)
2671*4882a593Smuzhiyun 			continue;
2672*4882a593Smuzhiyun 		if (gr->lkb_bastfn && modes_require_bast(gr, lkb)) {
2673*4882a593Smuzhiyun 			queue_bast(r, gr, lkb->lkb_rqmode);
2674*4882a593Smuzhiyun 			gr->lkb_highbast = lkb->lkb_rqmode;
2675*4882a593Smuzhiyun 		}
2676*4882a593Smuzhiyun 	}
2677*4882a593Smuzhiyun }
2678*4882a593Smuzhiyun 
send_blocking_asts(struct dlm_rsb * r,struct dlm_lkb * lkb)2679*4882a593Smuzhiyun static void send_blocking_asts(struct dlm_rsb *r, struct dlm_lkb *lkb)
2680*4882a593Smuzhiyun {
2681*4882a593Smuzhiyun 	send_bast_queue(r, &r->res_grantqueue, lkb);
2682*4882a593Smuzhiyun }
2683*4882a593Smuzhiyun 
send_blocking_asts_all(struct dlm_rsb * r,struct dlm_lkb * lkb)2684*4882a593Smuzhiyun static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb)
2685*4882a593Smuzhiyun {
2686*4882a593Smuzhiyun 	send_bast_queue(r, &r->res_grantqueue, lkb);
2687*4882a593Smuzhiyun 	send_bast_queue(r, &r->res_convertqueue, lkb);
2688*4882a593Smuzhiyun }
2689*4882a593Smuzhiyun 
2690*4882a593Smuzhiyun /* set_master(r, lkb) -- set the master nodeid of a resource
2691*4882a593Smuzhiyun 
2692*4882a593Smuzhiyun    The purpose of this function is to set the nodeid field in the given
2693*4882a593Smuzhiyun    lkb using the nodeid field in the given rsb.  If the rsb's nodeid is
2694*4882a593Smuzhiyun    known, it can just be copied to the lkb and the function will return
2695*4882a593Smuzhiyun    0.  If the rsb's nodeid is _not_ known, it needs to be looked up
2696*4882a593Smuzhiyun    before it can be copied to the lkb.
2697*4882a593Smuzhiyun 
2698*4882a593Smuzhiyun    When the rsb nodeid is being looked up remotely, the initial lkb
2699*4882a593Smuzhiyun    causing the lookup is kept on the ls_waiters list waiting for the
2700*4882a593Smuzhiyun    lookup reply.  Other lkb's waiting for the same rsb lookup are kept
2701*4882a593Smuzhiyun    on the rsb's res_lookup list until the master is verified.
2702*4882a593Smuzhiyun 
2703*4882a593Smuzhiyun    Return values:
2704*4882a593Smuzhiyun    0: nodeid is set in rsb/lkb and the caller should go ahead and use it
2705*4882a593Smuzhiyun    1: the rsb master is not available and the lkb has been placed on
2706*4882a593Smuzhiyun       a wait queue
2707*4882a593Smuzhiyun */
2708*4882a593Smuzhiyun 
set_master(struct dlm_rsb * r,struct dlm_lkb * lkb)2709*4882a593Smuzhiyun static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
2710*4882a593Smuzhiyun {
2711*4882a593Smuzhiyun 	int our_nodeid = dlm_our_nodeid();
2712*4882a593Smuzhiyun 
2713*4882a593Smuzhiyun 	if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) {
2714*4882a593Smuzhiyun 		rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
2715*4882a593Smuzhiyun 		r->res_first_lkid = lkb->lkb_id;
2716*4882a593Smuzhiyun 		lkb->lkb_nodeid = r->res_nodeid;
2717*4882a593Smuzhiyun 		return 0;
2718*4882a593Smuzhiyun 	}
2719*4882a593Smuzhiyun 
2720*4882a593Smuzhiyun 	if (r->res_first_lkid && r->res_first_lkid != lkb->lkb_id) {
2721*4882a593Smuzhiyun 		list_add_tail(&lkb->lkb_rsb_lookup, &r->res_lookup);
2722*4882a593Smuzhiyun 		return 1;
2723*4882a593Smuzhiyun 	}
2724*4882a593Smuzhiyun 
2725*4882a593Smuzhiyun 	if (r->res_master_nodeid == our_nodeid) {
2726*4882a593Smuzhiyun 		lkb->lkb_nodeid = 0;
2727*4882a593Smuzhiyun 		return 0;
2728*4882a593Smuzhiyun 	}
2729*4882a593Smuzhiyun 
2730*4882a593Smuzhiyun 	if (r->res_master_nodeid) {
2731*4882a593Smuzhiyun 		lkb->lkb_nodeid = r->res_master_nodeid;
2732*4882a593Smuzhiyun 		return 0;
2733*4882a593Smuzhiyun 	}
2734*4882a593Smuzhiyun 
2735*4882a593Smuzhiyun 	if (dlm_dir_nodeid(r) == our_nodeid) {
2736*4882a593Smuzhiyun 		/* This is a somewhat unusual case; find_rsb will usually
2737*4882a593Smuzhiyun 		   have set res_master_nodeid when dir nodeid is local, but
2738*4882a593Smuzhiyun 		   there are cases where we become the dir node after we've
2739*4882a593Smuzhiyun 		   past find_rsb and go through _request_lock again.
2740*4882a593Smuzhiyun 		   confirm_master() or process_lookup_list() needs to be
2741*4882a593Smuzhiyun 		   called after this. */
2742*4882a593Smuzhiyun 		log_debug(r->res_ls, "set_master %x self master %d dir %d %s",
2743*4882a593Smuzhiyun 			  lkb->lkb_id, r->res_master_nodeid, r->res_dir_nodeid,
2744*4882a593Smuzhiyun 			  r->res_name);
2745*4882a593Smuzhiyun 		r->res_master_nodeid = our_nodeid;
2746*4882a593Smuzhiyun 		r->res_nodeid = 0;
2747*4882a593Smuzhiyun 		lkb->lkb_nodeid = 0;
2748*4882a593Smuzhiyun 		return 0;
2749*4882a593Smuzhiyun 	}
2750*4882a593Smuzhiyun 
2751*4882a593Smuzhiyun 	wait_pending_remove(r);
2752*4882a593Smuzhiyun 
2753*4882a593Smuzhiyun 	r->res_first_lkid = lkb->lkb_id;
2754*4882a593Smuzhiyun 	send_lookup(r, lkb);
2755*4882a593Smuzhiyun 	return 1;
2756*4882a593Smuzhiyun }
2757*4882a593Smuzhiyun 
process_lookup_list(struct dlm_rsb * r)2758*4882a593Smuzhiyun static void process_lookup_list(struct dlm_rsb *r)
2759*4882a593Smuzhiyun {
2760*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *safe;
2761*4882a593Smuzhiyun 
2762*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) {
2763*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_rsb_lookup);
2764*4882a593Smuzhiyun 		_request_lock(r, lkb);
2765*4882a593Smuzhiyun 		schedule();
2766*4882a593Smuzhiyun 	}
2767*4882a593Smuzhiyun }
2768*4882a593Smuzhiyun 
2769*4882a593Smuzhiyun /* confirm_master -- confirm (or deny) an rsb's master nodeid */
2770*4882a593Smuzhiyun 
confirm_master(struct dlm_rsb * r,int error)2771*4882a593Smuzhiyun static void confirm_master(struct dlm_rsb *r, int error)
2772*4882a593Smuzhiyun {
2773*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
2774*4882a593Smuzhiyun 
2775*4882a593Smuzhiyun 	if (!r->res_first_lkid)
2776*4882a593Smuzhiyun 		return;
2777*4882a593Smuzhiyun 
2778*4882a593Smuzhiyun 	switch (error) {
2779*4882a593Smuzhiyun 	case 0:
2780*4882a593Smuzhiyun 	case -EINPROGRESS:
2781*4882a593Smuzhiyun 		r->res_first_lkid = 0;
2782*4882a593Smuzhiyun 		process_lookup_list(r);
2783*4882a593Smuzhiyun 		break;
2784*4882a593Smuzhiyun 
2785*4882a593Smuzhiyun 	case -EAGAIN:
2786*4882a593Smuzhiyun 	case -EBADR:
2787*4882a593Smuzhiyun 	case -ENOTBLK:
2788*4882a593Smuzhiyun 		/* the remote request failed and won't be retried (it was
2789*4882a593Smuzhiyun 		   a NOQUEUE, or has been canceled/unlocked); make a waiting
2790*4882a593Smuzhiyun 		   lkb the first_lkid */
2791*4882a593Smuzhiyun 
2792*4882a593Smuzhiyun 		r->res_first_lkid = 0;
2793*4882a593Smuzhiyun 
2794*4882a593Smuzhiyun 		if (!list_empty(&r->res_lookup)) {
2795*4882a593Smuzhiyun 			lkb = list_entry(r->res_lookup.next, struct dlm_lkb,
2796*4882a593Smuzhiyun 					 lkb_rsb_lookup);
2797*4882a593Smuzhiyun 			list_del_init(&lkb->lkb_rsb_lookup);
2798*4882a593Smuzhiyun 			r->res_first_lkid = lkb->lkb_id;
2799*4882a593Smuzhiyun 			_request_lock(r, lkb);
2800*4882a593Smuzhiyun 		}
2801*4882a593Smuzhiyun 		break;
2802*4882a593Smuzhiyun 
2803*4882a593Smuzhiyun 	default:
2804*4882a593Smuzhiyun 		log_error(r->res_ls, "confirm_master unknown error %d", error);
2805*4882a593Smuzhiyun 	}
2806*4882a593Smuzhiyun }
2807*4882a593Smuzhiyun 
set_lock_args(int mode,struct dlm_lksb * lksb,uint32_t flags,int namelen,unsigned long timeout_cs,void (* ast)(void * astparam),void * astparam,void (* bast)(void * astparam,int mode),struct dlm_args * args)2808*4882a593Smuzhiyun static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
2809*4882a593Smuzhiyun 			 int namelen, unsigned long timeout_cs,
2810*4882a593Smuzhiyun 			 void (*ast) (void *astparam),
2811*4882a593Smuzhiyun 			 void *astparam,
2812*4882a593Smuzhiyun 			 void (*bast) (void *astparam, int mode),
2813*4882a593Smuzhiyun 			 struct dlm_args *args)
2814*4882a593Smuzhiyun {
2815*4882a593Smuzhiyun 	int rv = -EINVAL;
2816*4882a593Smuzhiyun 
2817*4882a593Smuzhiyun 	/* check for invalid arg usage */
2818*4882a593Smuzhiyun 
2819*4882a593Smuzhiyun 	if (mode < 0 || mode > DLM_LOCK_EX)
2820*4882a593Smuzhiyun 		goto out;
2821*4882a593Smuzhiyun 
2822*4882a593Smuzhiyun 	if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN))
2823*4882a593Smuzhiyun 		goto out;
2824*4882a593Smuzhiyun 
2825*4882a593Smuzhiyun 	if (flags & DLM_LKF_CANCEL)
2826*4882a593Smuzhiyun 		goto out;
2827*4882a593Smuzhiyun 
2828*4882a593Smuzhiyun 	if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT))
2829*4882a593Smuzhiyun 		goto out;
2830*4882a593Smuzhiyun 
2831*4882a593Smuzhiyun 	if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT))
2832*4882a593Smuzhiyun 		goto out;
2833*4882a593Smuzhiyun 
2834*4882a593Smuzhiyun 	if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE)
2835*4882a593Smuzhiyun 		goto out;
2836*4882a593Smuzhiyun 
2837*4882a593Smuzhiyun 	if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT)
2838*4882a593Smuzhiyun 		goto out;
2839*4882a593Smuzhiyun 
2840*4882a593Smuzhiyun 	if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT)
2841*4882a593Smuzhiyun 		goto out;
2842*4882a593Smuzhiyun 
2843*4882a593Smuzhiyun 	if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE)
2844*4882a593Smuzhiyun 		goto out;
2845*4882a593Smuzhiyun 
2846*4882a593Smuzhiyun 	if (flags & DLM_LKF_EXPEDITE && mode != DLM_LOCK_NL)
2847*4882a593Smuzhiyun 		goto out;
2848*4882a593Smuzhiyun 
2849*4882a593Smuzhiyun 	if (!ast || !lksb)
2850*4882a593Smuzhiyun 		goto out;
2851*4882a593Smuzhiyun 
2852*4882a593Smuzhiyun 	if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
2853*4882a593Smuzhiyun 		goto out;
2854*4882a593Smuzhiyun 
2855*4882a593Smuzhiyun 	if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
2856*4882a593Smuzhiyun 		goto out;
2857*4882a593Smuzhiyun 
2858*4882a593Smuzhiyun 	/* these args will be copied to the lkb in validate_lock_args,
2859*4882a593Smuzhiyun 	   it cannot be done now because when converting locks, fields in
2860*4882a593Smuzhiyun 	   an active lkb cannot be modified before locking the rsb */
2861*4882a593Smuzhiyun 
2862*4882a593Smuzhiyun 	args->flags = flags;
2863*4882a593Smuzhiyun 	args->astfn = ast;
2864*4882a593Smuzhiyun 	args->astparam = astparam;
2865*4882a593Smuzhiyun 	args->bastfn = bast;
2866*4882a593Smuzhiyun 	args->timeout = timeout_cs;
2867*4882a593Smuzhiyun 	args->mode = mode;
2868*4882a593Smuzhiyun 	args->lksb = lksb;
2869*4882a593Smuzhiyun 	rv = 0;
2870*4882a593Smuzhiyun  out:
2871*4882a593Smuzhiyun 	return rv;
2872*4882a593Smuzhiyun }
2873*4882a593Smuzhiyun 
set_unlock_args(uint32_t flags,void * astarg,struct dlm_args * args)2874*4882a593Smuzhiyun static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args)
2875*4882a593Smuzhiyun {
2876*4882a593Smuzhiyun 	if (flags & ~(DLM_LKF_CANCEL | DLM_LKF_VALBLK | DLM_LKF_IVVALBLK |
2877*4882a593Smuzhiyun  		      DLM_LKF_FORCEUNLOCK))
2878*4882a593Smuzhiyun 		return -EINVAL;
2879*4882a593Smuzhiyun 
2880*4882a593Smuzhiyun 	if (flags & DLM_LKF_CANCEL && flags & DLM_LKF_FORCEUNLOCK)
2881*4882a593Smuzhiyun 		return -EINVAL;
2882*4882a593Smuzhiyun 
2883*4882a593Smuzhiyun 	args->flags = flags;
2884*4882a593Smuzhiyun 	args->astparam = astarg;
2885*4882a593Smuzhiyun 	return 0;
2886*4882a593Smuzhiyun }
2887*4882a593Smuzhiyun 
validate_lock_args(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_args * args)2888*4882a593Smuzhiyun static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2889*4882a593Smuzhiyun 			      struct dlm_args *args)
2890*4882a593Smuzhiyun {
2891*4882a593Smuzhiyun 	int rv = -EBUSY;
2892*4882a593Smuzhiyun 
2893*4882a593Smuzhiyun 	if (args->flags & DLM_LKF_CONVERT) {
2894*4882a593Smuzhiyun 		if (lkb->lkb_status != DLM_LKSTS_GRANTED)
2895*4882a593Smuzhiyun 			goto out;
2896*4882a593Smuzhiyun 
2897*4882a593Smuzhiyun 		if (lkb->lkb_wait_type)
2898*4882a593Smuzhiyun 			goto out;
2899*4882a593Smuzhiyun 
2900*4882a593Smuzhiyun 		if (is_overlap(lkb))
2901*4882a593Smuzhiyun 			goto out;
2902*4882a593Smuzhiyun 
2903*4882a593Smuzhiyun 		rv = -EINVAL;
2904*4882a593Smuzhiyun 		if (lkb->lkb_flags & DLM_IFL_MSTCPY)
2905*4882a593Smuzhiyun 			goto out;
2906*4882a593Smuzhiyun 
2907*4882a593Smuzhiyun 		if (args->flags & DLM_LKF_QUECVT &&
2908*4882a593Smuzhiyun 		    !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
2909*4882a593Smuzhiyun 			goto out;
2910*4882a593Smuzhiyun 	}
2911*4882a593Smuzhiyun 
2912*4882a593Smuzhiyun 	lkb->lkb_exflags = args->flags;
2913*4882a593Smuzhiyun 	lkb->lkb_sbflags = 0;
2914*4882a593Smuzhiyun 	lkb->lkb_astfn = args->astfn;
2915*4882a593Smuzhiyun 	lkb->lkb_astparam = args->astparam;
2916*4882a593Smuzhiyun 	lkb->lkb_bastfn = args->bastfn;
2917*4882a593Smuzhiyun 	lkb->lkb_rqmode = args->mode;
2918*4882a593Smuzhiyun 	lkb->lkb_lksb = args->lksb;
2919*4882a593Smuzhiyun 	lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
2920*4882a593Smuzhiyun 	lkb->lkb_ownpid = (int) current->pid;
2921*4882a593Smuzhiyun 	lkb->lkb_timeout_cs = args->timeout;
2922*4882a593Smuzhiyun 	rv = 0;
2923*4882a593Smuzhiyun  out:
2924*4882a593Smuzhiyun 	if (rv)
2925*4882a593Smuzhiyun 		log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s",
2926*4882a593Smuzhiyun 			  rv, lkb->lkb_id, lkb->lkb_flags, args->flags,
2927*4882a593Smuzhiyun 			  lkb->lkb_status, lkb->lkb_wait_type,
2928*4882a593Smuzhiyun 			  lkb->lkb_resource->res_name);
2929*4882a593Smuzhiyun 	return rv;
2930*4882a593Smuzhiyun }
2931*4882a593Smuzhiyun 
2932*4882a593Smuzhiyun /* when dlm_unlock() sees -EBUSY with CANCEL/FORCEUNLOCK it returns 0
2933*4882a593Smuzhiyun    for success */
2934*4882a593Smuzhiyun 
2935*4882a593Smuzhiyun /* note: it's valid for lkb_nodeid/res_nodeid to be -1 when we get here
2936*4882a593Smuzhiyun    because there may be a lookup in progress and it's valid to do
2937*4882a593Smuzhiyun    cancel/unlockf on it */
2938*4882a593Smuzhiyun 
validate_unlock_args(struct dlm_lkb * lkb,struct dlm_args * args)2939*4882a593Smuzhiyun static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
2940*4882a593Smuzhiyun {
2941*4882a593Smuzhiyun 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
2942*4882a593Smuzhiyun 	int rv = -EINVAL;
2943*4882a593Smuzhiyun 
2944*4882a593Smuzhiyun 	if (lkb->lkb_flags & DLM_IFL_MSTCPY) {
2945*4882a593Smuzhiyun 		log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id);
2946*4882a593Smuzhiyun 		dlm_print_lkb(lkb);
2947*4882a593Smuzhiyun 		goto out;
2948*4882a593Smuzhiyun 	}
2949*4882a593Smuzhiyun 
2950*4882a593Smuzhiyun 	/* an lkb may still exist even though the lock is EOL'ed due to a
2951*4882a593Smuzhiyun 	   cancel, unlock or failed noqueue request; an app can't use these
2952*4882a593Smuzhiyun 	   locks; return same error as if the lkid had not been found at all */
2953*4882a593Smuzhiyun 
2954*4882a593Smuzhiyun 	if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
2955*4882a593Smuzhiyun 		log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id);
2956*4882a593Smuzhiyun 		rv = -ENOENT;
2957*4882a593Smuzhiyun 		goto out;
2958*4882a593Smuzhiyun 	}
2959*4882a593Smuzhiyun 
2960*4882a593Smuzhiyun 	/* an lkb may be waiting for an rsb lookup to complete where the
2961*4882a593Smuzhiyun 	   lookup was initiated by another lock */
2962*4882a593Smuzhiyun 
2963*4882a593Smuzhiyun 	if (!list_empty(&lkb->lkb_rsb_lookup)) {
2964*4882a593Smuzhiyun 		if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) {
2965*4882a593Smuzhiyun 			log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id);
2966*4882a593Smuzhiyun 			list_del_init(&lkb->lkb_rsb_lookup);
2967*4882a593Smuzhiyun 			queue_cast(lkb->lkb_resource, lkb,
2968*4882a593Smuzhiyun 				   args->flags & DLM_LKF_CANCEL ?
2969*4882a593Smuzhiyun 				   -DLM_ECANCEL : -DLM_EUNLOCK);
2970*4882a593Smuzhiyun 			unhold_lkb(lkb); /* undoes create_lkb() */
2971*4882a593Smuzhiyun 		}
2972*4882a593Smuzhiyun 		/* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */
2973*4882a593Smuzhiyun 		rv = -EBUSY;
2974*4882a593Smuzhiyun 		goto out;
2975*4882a593Smuzhiyun 	}
2976*4882a593Smuzhiyun 
2977*4882a593Smuzhiyun 	/* cancel not allowed with another cancel/unlock in progress */
2978*4882a593Smuzhiyun 
2979*4882a593Smuzhiyun 	if (args->flags & DLM_LKF_CANCEL) {
2980*4882a593Smuzhiyun 		if (lkb->lkb_exflags & DLM_LKF_CANCEL)
2981*4882a593Smuzhiyun 			goto out;
2982*4882a593Smuzhiyun 
2983*4882a593Smuzhiyun 		if (is_overlap(lkb))
2984*4882a593Smuzhiyun 			goto out;
2985*4882a593Smuzhiyun 
2986*4882a593Smuzhiyun 		/* don't let scand try to do a cancel */
2987*4882a593Smuzhiyun 		del_timeout(lkb);
2988*4882a593Smuzhiyun 
2989*4882a593Smuzhiyun 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
2990*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
2991*4882a593Smuzhiyun 			rv = -EBUSY;
2992*4882a593Smuzhiyun 			goto out;
2993*4882a593Smuzhiyun 		}
2994*4882a593Smuzhiyun 
2995*4882a593Smuzhiyun 		/* there's nothing to cancel */
2996*4882a593Smuzhiyun 		if (lkb->lkb_status == DLM_LKSTS_GRANTED &&
2997*4882a593Smuzhiyun 		    !lkb->lkb_wait_type) {
2998*4882a593Smuzhiyun 			rv = -EBUSY;
2999*4882a593Smuzhiyun 			goto out;
3000*4882a593Smuzhiyun 		}
3001*4882a593Smuzhiyun 
3002*4882a593Smuzhiyun 		switch (lkb->lkb_wait_type) {
3003*4882a593Smuzhiyun 		case DLM_MSG_LOOKUP:
3004*4882a593Smuzhiyun 		case DLM_MSG_REQUEST:
3005*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
3006*4882a593Smuzhiyun 			rv = -EBUSY;
3007*4882a593Smuzhiyun 			goto out;
3008*4882a593Smuzhiyun 		case DLM_MSG_UNLOCK:
3009*4882a593Smuzhiyun 		case DLM_MSG_CANCEL:
3010*4882a593Smuzhiyun 			goto out;
3011*4882a593Smuzhiyun 		}
3012*4882a593Smuzhiyun 		/* add_to_waiters() will set OVERLAP_CANCEL */
3013*4882a593Smuzhiyun 		goto out_ok;
3014*4882a593Smuzhiyun 	}
3015*4882a593Smuzhiyun 
3016*4882a593Smuzhiyun 	/* do we need to allow a force-unlock if there's a normal unlock
3017*4882a593Smuzhiyun 	   already in progress?  in what conditions could the normal unlock
3018*4882a593Smuzhiyun 	   fail such that we'd want to send a force-unlock to be sure? */
3019*4882a593Smuzhiyun 
3020*4882a593Smuzhiyun 	if (args->flags & DLM_LKF_FORCEUNLOCK) {
3021*4882a593Smuzhiyun 		if (lkb->lkb_exflags & DLM_LKF_FORCEUNLOCK)
3022*4882a593Smuzhiyun 			goto out;
3023*4882a593Smuzhiyun 
3024*4882a593Smuzhiyun 		if (is_overlap_unlock(lkb))
3025*4882a593Smuzhiyun 			goto out;
3026*4882a593Smuzhiyun 
3027*4882a593Smuzhiyun 		/* don't let scand try to do a cancel */
3028*4882a593Smuzhiyun 		del_timeout(lkb);
3029*4882a593Smuzhiyun 
3030*4882a593Smuzhiyun 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
3031*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
3032*4882a593Smuzhiyun 			rv = -EBUSY;
3033*4882a593Smuzhiyun 			goto out;
3034*4882a593Smuzhiyun 		}
3035*4882a593Smuzhiyun 
3036*4882a593Smuzhiyun 		switch (lkb->lkb_wait_type) {
3037*4882a593Smuzhiyun 		case DLM_MSG_LOOKUP:
3038*4882a593Smuzhiyun 		case DLM_MSG_REQUEST:
3039*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
3040*4882a593Smuzhiyun 			rv = -EBUSY;
3041*4882a593Smuzhiyun 			goto out;
3042*4882a593Smuzhiyun 		case DLM_MSG_UNLOCK:
3043*4882a593Smuzhiyun 			goto out;
3044*4882a593Smuzhiyun 		}
3045*4882a593Smuzhiyun 		/* add_to_waiters() will set OVERLAP_UNLOCK */
3046*4882a593Smuzhiyun 		goto out_ok;
3047*4882a593Smuzhiyun 	}
3048*4882a593Smuzhiyun 
3049*4882a593Smuzhiyun 	/* normal unlock not allowed if there's any op in progress */
3050*4882a593Smuzhiyun 	rv = -EBUSY;
3051*4882a593Smuzhiyun 	if (lkb->lkb_wait_type || lkb->lkb_wait_count)
3052*4882a593Smuzhiyun 		goto out;
3053*4882a593Smuzhiyun 
3054*4882a593Smuzhiyun  out_ok:
3055*4882a593Smuzhiyun 	/* an overlapping op shouldn't blow away exflags from other op */
3056*4882a593Smuzhiyun 	lkb->lkb_exflags |= args->flags;
3057*4882a593Smuzhiyun 	lkb->lkb_sbflags = 0;
3058*4882a593Smuzhiyun 	lkb->lkb_astparam = args->astparam;
3059*4882a593Smuzhiyun 	rv = 0;
3060*4882a593Smuzhiyun  out:
3061*4882a593Smuzhiyun 	if (rv)
3062*4882a593Smuzhiyun 		log_debug(ls, "validate_unlock_args %d %x %x %x %x %d %s", rv,
3063*4882a593Smuzhiyun 			  lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags,
3064*4882a593Smuzhiyun 			  args->flags, lkb->lkb_wait_type,
3065*4882a593Smuzhiyun 			  lkb->lkb_resource->res_name);
3066*4882a593Smuzhiyun 	return rv;
3067*4882a593Smuzhiyun }
3068*4882a593Smuzhiyun 
3069*4882a593Smuzhiyun /*
3070*4882a593Smuzhiyun  * Four stage 4 varieties:
3071*4882a593Smuzhiyun  * do_request(), do_convert(), do_unlock(), do_cancel()
3072*4882a593Smuzhiyun  * These are called on the master node for the given lock and
3073*4882a593Smuzhiyun  * from the central locking logic.
3074*4882a593Smuzhiyun  */
3075*4882a593Smuzhiyun 
do_request(struct dlm_rsb * r,struct dlm_lkb * lkb)3076*4882a593Smuzhiyun static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
3077*4882a593Smuzhiyun {
3078*4882a593Smuzhiyun 	int error = 0;
3079*4882a593Smuzhiyun 
3080*4882a593Smuzhiyun 	if (can_be_granted(r, lkb, 1, 0, NULL)) {
3081*4882a593Smuzhiyun 		grant_lock(r, lkb);
3082*4882a593Smuzhiyun 		queue_cast(r, lkb, 0);
3083*4882a593Smuzhiyun 		goto out;
3084*4882a593Smuzhiyun 	}
3085*4882a593Smuzhiyun 
3086*4882a593Smuzhiyun 	if (can_be_queued(lkb)) {
3087*4882a593Smuzhiyun 		error = -EINPROGRESS;
3088*4882a593Smuzhiyun 		add_lkb(r, lkb, DLM_LKSTS_WAITING);
3089*4882a593Smuzhiyun 		add_timeout(lkb);
3090*4882a593Smuzhiyun 		goto out;
3091*4882a593Smuzhiyun 	}
3092*4882a593Smuzhiyun 
3093*4882a593Smuzhiyun 	error = -EAGAIN;
3094*4882a593Smuzhiyun 	queue_cast(r, lkb, -EAGAIN);
3095*4882a593Smuzhiyun  out:
3096*4882a593Smuzhiyun 	return error;
3097*4882a593Smuzhiyun }
3098*4882a593Smuzhiyun 
do_request_effects(struct dlm_rsb * r,struct dlm_lkb * lkb,int error)3099*4882a593Smuzhiyun static void do_request_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
3100*4882a593Smuzhiyun 			       int error)
3101*4882a593Smuzhiyun {
3102*4882a593Smuzhiyun 	switch (error) {
3103*4882a593Smuzhiyun 	case -EAGAIN:
3104*4882a593Smuzhiyun 		if (force_blocking_asts(lkb))
3105*4882a593Smuzhiyun 			send_blocking_asts_all(r, lkb);
3106*4882a593Smuzhiyun 		break;
3107*4882a593Smuzhiyun 	case -EINPROGRESS:
3108*4882a593Smuzhiyun 		send_blocking_asts(r, lkb);
3109*4882a593Smuzhiyun 		break;
3110*4882a593Smuzhiyun 	}
3111*4882a593Smuzhiyun }
3112*4882a593Smuzhiyun 
do_convert(struct dlm_rsb * r,struct dlm_lkb * lkb)3113*4882a593Smuzhiyun static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
3114*4882a593Smuzhiyun {
3115*4882a593Smuzhiyun 	int error = 0;
3116*4882a593Smuzhiyun 	int deadlk = 0;
3117*4882a593Smuzhiyun 
3118*4882a593Smuzhiyun 	/* changing an existing lock may allow others to be granted */
3119*4882a593Smuzhiyun 
3120*4882a593Smuzhiyun 	if (can_be_granted(r, lkb, 1, 0, &deadlk)) {
3121*4882a593Smuzhiyun 		grant_lock(r, lkb);
3122*4882a593Smuzhiyun 		queue_cast(r, lkb, 0);
3123*4882a593Smuzhiyun 		goto out;
3124*4882a593Smuzhiyun 	}
3125*4882a593Smuzhiyun 
3126*4882a593Smuzhiyun 	/* can_be_granted() detected that this lock would block in a conversion
3127*4882a593Smuzhiyun 	   deadlock, so we leave it on the granted queue and return EDEADLK in
3128*4882a593Smuzhiyun 	   the ast for the convert. */
3129*4882a593Smuzhiyun 
3130*4882a593Smuzhiyun 	if (deadlk && !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
3131*4882a593Smuzhiyun 		/* it's left on the granted queue */
3132*4882a593Smuzhiyun 		revert_lock(r, lkb);
3133*4882a593Smuzhiyun 		queue_cast(r, lkb, -EDEADLK);
3134*4882a593Smuzhiyun 		error = -EDEADLK;
3135*4882a593Smuzhiyun 		goto out;
3136*4882a593Smuzhiyun 	}
3137*4882a593Smuzhiyun 
3138*4882a593Smuzhiyun 	/* is_demoted() means the can_be_granted() above set the grmode
3139*4882a593Smuzhiyun 	   to NL, and left us on the granted queue.  This auto-demotion
3140*4882a593Smuzhiyun 	   (due to CONVDEADLK) might mean other locks, and/or this lock, are
3141*4882a593Smuzhiyun 	   now grantable.  We have to try to grant other converting locks
3142*4882a593Smuzhiyun 	   before we try again to grant this one. */
3143*4882a593Smuzhiyun 
3144*4882a593Smuzhiyun 	if (is_demoted(lkb)) {
3145*4882a593Smuzhiyun 		grant_pending_convert(r, DLM_LOCK_IV, NULL, NULL);
3146*4882a593Smuzhiyun 		if (_can_be_granted(r, lkb, 1, 0)) {
3147*4882a593Smuzhiyun 			grant_lock(r, lkb);
3148*4882a593Smuzhiyun 			queue_cast(r, lkb, 0);
3149*4882a593Smuzhiyun 			goto out;
3150*4882a593Smuzhiyun 		}
3151*4882a593Smuzhiyun 		/* else fall through and move to convert queue */
3152*4882a593Smuzhiyun 	}
3153*4882a593Smuzhiyun 
3154*4882a593Smuzhiyun 	if (can_be_queued(lkb)) {
3155*4882a593Smuzhiyun 		error = -EINPROGRESS;
3156*4882a593Smuzhiyun 		del_lkb(r, lkb);
3157*4882a593Smuzhiyun 		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3158*4882a593Smuzhiyun 		add_timeout(lkb);
3159*4882a593Smuzhiyun 		goto out;
3160*4882a593Smuzhiyun 	}
3161*4882a593Smuzhiyun 
3162*4882a593Smuzhiyun 	error = -EAGAIN;
3163*4882a593Smuzhiyun 	queue_cast(r, lkb, -EAGAIN);
3164*4882a593Smuzhiyun  out:
3165*4882a593Smuzhiyun 	return error;
3166*4882a593Smuzhiyun }
3167*4882a593Smuzhiyun 
do_convert_effects(struct dlm_rsb * r,struct dlm_lkb * lkb,int error)3168*4882a593Smuzhiyun static void do_convert_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
3169*4882a593Smuzhiyun 			       int error)
3170*4882a593Smuzhiyun {
3171*4882a593Smuzhiyun 	switch (error) {
3172*4882a593Smuzhiyun 	case 0:
3173*4882a593Smuzhiyun 		grant_pending_locks(r, NULL);
3174*4882a593Smuzhiyun 		/* grant_pending_locks also sends basts */
3175*4882a593Smuzhiyun 		break;
3176*4882a593Smuzhiyun 	case -EAGAIN:
3177*4882a593Smuzhiyun 		if (force_blocking_asts(lkb))
3178*4882a593Smuzhiyun 			send_blocking_asts_all(r, lkb);
3179*4882a593Smuzhiyun 		break;
3180*4882a593Smuzhiyun 	case -EINPROGRESS:
3181*4882a593Smuzhiyun 		send_blocking_asts(r, lkb);
3182*4882a593Smuzhiyun 		break;
3183*4882a593Smuzhiyun 	}
3184*4882a593Smuzhiyun }
3185*4882a593Smuzhiyun 
do_unlock(struct dlm_rsb * r,struct dlm_lkb * lkb)3186*4882a593Smuzhiyun static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3187*4882a593Smuzhiyun {
3188*4882a593Smuzhiyun 	remove_lock(r, lkb);
3189*4882a593Smuzhiyun 	queue_cast(r, lkb, -DLM_EUNLOCK);
3190*4882a593Smuzhiyun 	return -DLM_EUNLOCK;
3191*4882a593Smuzhiyun }
3192*4882a593Smuzhiyun 
do_unlock_effects(struct dlm_rsb * r,struct dlm_lkb * lkb,int error)3193*4882a593Smuzhiyun static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
3194*4882a593Smuzhiyun 			      int error)
3195*4882a593Smuzhiyun {
3196*4882a593Smuzhiyun 	grant_pending_locks(r, NULL);
3197*4882a593Smuzhiyun }
3198*4882a593Smuzhiyun 
3199*4882a593Smuzhiyun /* returns: 0 did nothing, -DLM_ECANCEL canceled lock */
3200*4882a593Smuzhiyun 
do_cancel(struct dlm_rsb * r,struct dlm_lkb * lkb)3201*4882a593Smuzhiyun static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
3202*4882a593Smuzhiyun {
3203*4882a593Smuzhiyun 	int error;
3204*4882a593Smuzhiyun 
3205*4882a593Smuzhiyun 	error = revert_lock(r, lkb);
3206*4882a593Smuzhiyun 	if (error) {
3207*4882a593Smuzhiyun 		queue_cast(r, lkb, -DLM_ECANCEL);
3208*4882a593Smuzhiyun 		return -DLM_ECANCEL;
3209*4882a593Smuzhiyun 	}
3210*4882a593Smuzhiyun 	return 0;
3211*4882a593Smuzhiyun }
3212*4882a593Smuzhiyun 
do_cancel_effects(struct dlm_rsb * r,struct dlm_lkb * lkb,int error)3213*4882a593Smuzhiyun static void do_cancel_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
3214*4882a593Smuzhiyun 			      int error)
3215*4882a593Smuzhiyun {
3216*4882a593Smuzhiyun 	if (error)
3217*4882a593Smuzhiyun 		grant_pending_locks(r, NULL);
3218*4882a593Smuzhiyun }
3219*4882a593Smuzhiyun 
3220*4882a593Smuzhiyun /*
3221*4882a593Smuzhiyun  * Four stage 3 varieties:
3222*4882a593Smuzhiyun  * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock()
3223*4882a593Smuzhiyun  */
3224*4882a593Smuzhiyun 
3225*4882a593Smuzhiyun /* add a new lkb to a possibly new rsb, called by requesting process */
3226*4882a593Smuzhiyun 
_request_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)3227*4882a593Smuzhiyun static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3228*4882a593Smuzhiyun {
3229*4882a593Smuzhiyun 	int error;
3230*4882a593Smuzhiyun 
3231*4882a593Smuzhiyun 	/* set_master: sets lkb nodeid from r */
3232*4882a593Smuzhiyun 
3233*4882a593Smuzhiyun 	error = set_master(r, lkb);
3234*4882a593Smuzhiyun 	if (error < 0)
3235*4882a593Smuzhiyun 		goto out;
3236*4882a593Smuzhiyun 	if (error) {
3237*4882a593Smuzhiyun 		error = 0;
3238*4882a593Smuzhiyun 		goto out;
3239*4882a593Smuzhiyun 	}
3240*4882a593Smuzhiyun 
3241*4882a593Smuzhiyun 	if (is_remote(r)) {
3242*4882a593Smuzhiyun 		/* receive_request() calls do_request() on remote node */
3243*4882a593Smuzhiyun 		error = send_request(r, lkb);
3244*4882a593Smuzhiyun 	} else {
3245*4882a593Smuzhiyun 		error = do_request(r, lkb);
3246*4882a593Smuzhiyun 		/* for remote locks the request_reply is sent
3247*4882a593Smuzhiyun 		   between do_request and do_request_effects */
3248*4882a593Smuzhiyun 		do_request_effects(r, lkb, error);
3249*4882a593Smuzhiyun 	}
3250*4882a593Smuzhiyun  out:
3251*4882a593Smuzhiyun 	return error;
3252*4882a593Smuzhiyun }
3253*4882a593Smuzhiyun 
3254*4882a593Smuzhiyun /* change some property of an existing lkb, e.g. mode */
3255*4882a593Smuzhiyun 
_convert_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)3256*4882a593Smuzhiyun static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3257*4882a593Smuzhiyun {
3258*4882a593Smuzhiyun 	int error;
3259*4882a593Smuzhiyun 
3260*4882a593Smuzhiyun 	if (is_remote(r)) {
3261*4882a593Smuzhiyun 		/* receive_convert() calls do_convert() on remote node */
3262*4882a593Smuzhiyun 		error = send_convert(r, lkb);
3263*4882a593Smuzhiyun 	} else {
3264*4882a593Smuzhiyun 		error = do_convert(r, lkb);
3265*4882a593Smuzhiyun 		/* for remote locks the convert_reply is sent
3266*4882a593Smuzhiyun 		   between do_convert and do_convert_effects */
3267*4882a593Smuzhiyun 		do_convert_effects(r, lkb, error);
3268*4882a593Smuzhiyun 	}
3269*4882a593Smuzhiyun 
3270*4882a593Smuzhiyun 	return error;
3271*4882a593Smuzhiyun }
3272*4882a593Smuzhiyun 
3273*4882a593Smuzhiyun /* remove an existing lkb from the granted queue */
3274*4882a593Smuzhiyun 
_unlock_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)3275*4882a593Smuzhiyun static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3276*4882a593Smuzhiyun {
3277*4882a593Smuzhiyun 	int error;
3278*4882a593Smuzhiyun 
3279*4882a593Smuzhiyun 	if (is_remote(r)) {
3280*4882a593Smuzhiyun 		/* receive_unlock() calls do_unlock() on remote node */
3281*4882a593Smuzhiyun 		error = send_unlock(r, lkb);
3282*4882a593Smuzhiyun 	} else {
3283*4882a593Smuzhiyun 		error = do_unlock(r, lkb);
3284*4882a593Smuzhiyun 		/* for remote locks the unlock_reply is sent
3285*4882a593Smuzhiyun 		   between do_unlock and do_unlock_effects */
3286*4882a593Smuzhiyun 		do_unlock_effects(r, lkb, error);
3287*4882a593Smuzhiyun 	}
3288*4882a593Smuzhiyun 
3289*4882a593Smuzhiyun 	return error;
3290*4882a593Smuzhiyun }
3291*4882a593Smuzhiyun 
3292*4882a593Smuzhiyun /* remove an existing lkb from the convert or wait queue */
3293*4882a593Smuzhiyun 
_cancel_lock(struct dlm_rsb * r,struct dlm_lkb * lkb)3294*4882a593Smuzhiyun static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3295*4882a593Smuzhiyun {
3296*4882a593Smuzhiyun 	int error;
3297*4882a593Smuzhiyun 
3298*4882a593Smuzhiyun 	if (is_remote(r)) {
3299*4882a593Smuzhiyun 		/* receive_cancel() calls do_cancel() on remote node */
3300*4882a593Smuzhiyun 		error = send_cancel(r, lkb);
3301*4882a593Smuzhiyun 	} else {
3302*4882a593Smuzhiyun 		error = do_cancel(r, lkb);
3303*4882a593Smuzhiyun 		/* for remote locks the cancel_reply is sent
3304*4882a593Smuzhiyun 		   between do_cancel and do_cancel_effects */
3305*4882a593Smuzhiyun 		do_cancel_effects(r, lkb, error);
3306*4882a593Smuzhiyun 	}
3307*4882a593Smuzhiyun 
3308*4882a593Smuzhiyun 	return error;
3309*4882a593Smuzhiyun }
3310*4882a593Smuzhiyun 
3311*4882a593Smuzhiyun /*
3312*4882a593Smuzhiyun  * Four stage 2 varieties:
3313*4882a593Smuzhiyun  * request_lock(), convert_lock(), unlock_lock(), cancel_lock()
3314*4882a593Smuzhiyun  */
3315*4882a593Smuzhiyun 
request_lock(struct dlm_ls * ls,struct dlm_lkb * lkb,char * name,int len,struct dlm_args * args)3316*4882a593Smuzhiyun static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name,
3317*4882a593Smuzhiyun 			int len, struct dlm_args *args)
3318*4882a593Smuzhiyun {
3319*4882a593Smuzhiyun 	struct dlm_rsb *r;
3320*4882a593Smuzhiyun 	int error;
3321*4882a593Smuzhiyun 
3322*4882a593Smuzhiyun 	error = validate_lock_args(ls, lkb, args);
3323*4882a593Smuzhiyun 	if (error)
3324*4882a593Smuzhiyun 		return error;
3325*4882a593Smuzhiyun 
3326*4882a593Smuzhiyun 	error = find_rsb(ls, name, len, 0, R_REQUEST, &r);
3327*4882a593Smuzhiyun 	if (error)
3328*4882a593Smuzhiyun 		return error;
3329*4882a593Smuzhiyun 
3330*4882a593Smuzhiyun 	lock_rsb(r);
3331*4882a593Smuzhiyun 
3332*4882a593Smuzhiyun 	attach_lkb(r, lkb);
3333*4882a593Smuzhiyun 	lkb->lkb_lksb->sb_lkid = lkb->lkb_id;
3334*4882a593Smuzhiyun 
3335*4882a593Smuzhiyun 	error = _request_lock(r, lkb);
3336*4882a593Smuzhiyun 
3337*4882a593Smuzhiyun 	unlock_rsb(r);
3338*4882a593Smuzhiyun 	put_rsb(r);
3339*4882a593Smuzhiyun 	return error;
3340*4882a593Smuzhiyun }
3341*4882a593Smuzhiyun 
convert_lock(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_args * args)3342*4882a593Smuzhiyun static int convert_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
3343*4882a593Smuzhiyun 			struct dlm_args *args)
3344*4882a593Smuzhiyun {
3345*4882a593Smuzhiyun 	struct dlm_rsb *r;
3346*4882a593Smuzhiyun 	int error;
3347*4882a593Smuzhiyun 
3348*4882a593Smuzhiyun 	r = lkb->lkb_resource;
3349*4882a593Smuzhiyun 
3350*4882a593Smuzhiyun 	hold_rsb(r);
3351*4882a593Smuzhiyun 	lock_rsb(r);
3352*4882a593Smuzhiyun 
3353*4882a593Smuzhiyun 	error = validate_lock_args(ls, lkb, args);
3354*4882a593Smuzhiyun 	if (error)
3355*4882a593Smuzhiyun 		goto out;
3356*4882a593Smuzhiyun 
3357*4882a593Smuzhiyun 	error = _convert_lock(r, lkb);
3358*4882a593Smuzhiyun  out:
3359*4882a593Smuzhiyun 	unlock_rsb(r);
3360*4882a593Smuzhiyun 	put_rsb(r);
3361*4882a593Smuzhiyun 	return error;
3362*4882a593Smuzhiyun }
3363*4882a593Smuzhiyun 
unlock_lock(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_args * args)3364*4882a593Smuzhiyun static int unlock_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
3365*4882a593Smuzhiyun 		       struct dlm_args *args)
3366*4882a593Smuzhiyun {
3367*4882a593Smuzhiyun 	struct dlm_rsb *r;
3368*4882a593Smuzhiyun 	int error;
3369*4882a593Smuzhiyun 
3370*4882a593Smuzhiyun 	r = lkb->lkb_resource;
3371*4882a593Smuzhiyun 
3372*4882a593Smuzhiyun 	hold_rsb(r);
3373*4882a593Smuzhiyun 	lock_rsb(r);
3374*4882a593Smuzhiyun 
3375*4882a593Smuzhiyun 	error = validate_unlock_args(lkb, args);
3376*4882a593Smuzhiyun 	if (error)
3377*4882a593Smuzhiyun 		goto out;
3378*4882a593Smuzhiyun 
3379*4882a593Smuzhiyun 	error = _unlock_lock(r, lkb);
3380*4882a593Smuzhiyun  out:
3381*4882a593Smuzhiyun 	unlock_rsb(r);
3382*4882a593Smuzhiyun 	put_rsb(r);
3383*4882a593Smuzhiyun 	return error;
3384*4882a593Smuzhiyun }
3385*4882a593Smuzhiyun 
cancel_lock(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_args * args)3386*4882a593Smuzhiyun static int cancel_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
3387*4882a593Smuzhiyun 		       struct dlm_args *args)
3388*4882a593Smuzhiyun {
3389*4882a593Smuzhiyun 	struct dlm_rsb *r;
3390*4882a593Smuzhiyun 	int error;
3391*4882a593Smuzhiyun 
3392*4882a593Smuzhiyun 	r = lkb->lkb_resource;
3393*4882a593Smuzhiyun 
3394*4882a593Smuzhiyun 	hold_rsb(r);
3395*4882a593Smuzhiyun 	lock_rsb(r);
3396*4882a593Smuzhiyun 
3397*4882a593Smuzhiyun 	error = validate_unlock_args(lkb, args);
3398*4882a593Smuzhiyun 	if (error)
3399*4882a593Smuzhiyun 		goto out;
3400*4882a593Smuzhiyun 
3401*4882a593Smuzhiyun 	error = _cancel_lock(r, lkb);
3402*4882a593Smuzhiyun  out:
3403*4882a593Smuzhiyun 	unlock_rsb(r);
3404*4882a593Smuzhiyun 	put_rsb(r);
3405*4882a593Smuzhiyun 	return error;
3406*4882a593Smuzhiyun }
3407*4882a593Smuzhiyun 
3408*4882a593Smuzhiyun /*
3409*4882a593Smuzhiyun  * Two stage 1 varieties:  dlm_lock() and dlm_unlock()
3410*4882a593Smuzhiyun  */
3411*4882a593Smuzhiyun 
dlm_lock(dlm_lockspace_t * lockspace,int mode,struct dlm_lksb * lksb,uint32_t flags,void * name,unsigned int namelen,uint32_t parent_lkid,void (* ast)(void * astarg),void * astarg,void (* bast)(void * astarg,int mode))3412*4882a593Smuzhiyun int dlm_lock(dlm_lockspace_t *lockspace,
3413*4882a593Smuzhiyun 	     int mode,
3414*4882a593Smuzhiyun 	     struct dlm_lksb *lksb,
3415*4882a593Smuzhiyun 	     uint32_t flags,
3416*4882a593Smuzhiyun 	     void *name,
3417*4882a593Smuzhiyun 	     unsigned int namelen,
3418*4882a593Smuzhiyun 	     uint32_t parent_lkid,
3419*4882a593Smuzhiyun 	     void (*ast) (void *astarg),
3420*4882a593Smuzhiyun 	     void *astarg,
3421*4882a593Smuzhiyun 	     void (*bast) (void *astarg, int mode))
3422*4882a593Smuzhiyun {
3423*4882a593Smuzhiyun 	struct dlm_ls *ls;
3424*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
3425*4882a593Smuzhiyun 	struct dlm_args args;
3426*4882a593Smuzhiyun 	int error, convert = flags & DLM_LKF_CONVERT;
3427*4882a593Smuzhiyun 
3428*4882a593Smuzhiyun 	ls = dlm_find_lockspace_local(lockspace);
3429*4882a593Smuzhiyun 	if (!ls)
3430*4882a593Smuzhiyun 		return -EINVAL;
3431*4882a593Smuzhiyun 
3432*4882a593Smuzhiyun 	dlm_lock_recovery(ls);
3433*4882a593Smuzhiyun 
3434*4882a593Smuzhiyun 	if (convert)
3435*4882a593Smuzhiyun 		error = find_lkb(ls, lksb->sb_lkid, &lkb);
3436*4882a593Smuzhiyun 	else
3437*4882a593Smuzhiyun 		error = create_lkb(ls, &lkb);
3438*4882a593Smuzhiyun 
3439*4882a593Smuzhiyun 	if (error)
3440*4882a593Smuzhiyun 		goto out;
3441*4882a593Smuzhiyun 
3442*4882a593Smuzhiyun 	error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
3443*4882a593Smuzhiyun 			      astarg, bast, &args);
3444*4882a593Smuzhiyun 	if (error)
3445*4882a593Smuzhiyun 		goto out_put;
3446*4882a593Smuzhiyun 
3447*4882a593Smuzhiyun 	if (convert)
3448*4882a593Smuzhiyun 		error = convert_lock(ls, lkb, &args);
3449*4882a593Smuzhiyun 	else
3450*4882a593Smuzhiyun 		error = request_lock(ls, lkb, name, namelen, &args);
3451*4882a593Smuzhiyun 
3452*4882a593Smuzhiyun 	if (error == -EINPROGRESS)
3453*4882a593Smuzhiyun 		error = 0;
3454*4882a593Smuzhiyun  out_put:
3455*4882a593Smuzhiyun 	if (convert || error)
3456*4882a593Smuzhiyun 		__put_lkb(ls, lkb);
3457*4882a593Smuzhiyun 	if (error == -EAGAIN || error == -EDEADLK)
3458*4882a593Smuzhiyun 		error = 0;
3459*4882a593Smuzhiyun  out:
3460*4882a593Smuzhiyun 	dlm_unlock_recovery(ls);
3461*4882a593Smuzhiyun 	dlm_put_lockspace(ls);
3462*4882a593Smuzhiyun 	return error;
3463*4882a593Smuzhiyun }
3464*4882a593Smuzhiyun 
dlm_unlock(dlm_lockspace_t * lockspace,uint32_t lkid,uint32_t flags,struct dlm_lksb * lksb,void * astarg)3465*4882a593Smuzhiyun int dlm_unlock(dlm_lockspace_t *lockspace,
3466*4882a593Smuzhiyun 	       uint32_t lkid,
3467*4882a593Smuzhiyun 	       uint32_t flags,
3468*4882a593Smuzhiyun 	       struct dlm_lksb *lksb,
3469*4882a593Smuzhiyun 	       void *astarg)
3470*4882a593Smuzhiyun {
3471*4882a593Smuzhiyun 	struct dlm_ls *ls;
3472*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
3473*4882a593Smuzhiyun 	struct dlm_args args;
3474*4882a593Smuzhiyun 	int error;
3475*4882a593Smuzhiyun 
3476*4882a593Smuzhiyun 	ls = dlm_find_lockspace_local(lockspace);
3477*4882a593Smuzhiyun 	if (!ls)
3478*4882a593Smuzhiyun 		return -EINVAL;
3479*4882a593Smuzhiyun 
3480*4882a593Smuzhiyun 	dlm_lock_recovery(ls);
3481*4882a593Smuzhiyun 
3482*4882a593Smuzhiyun 	error = find_lkb(ls, lkid, &lkb);
3483*4882a593Smuzhiyun 	if (error)
3484*4882a593Smuzhiyun 		goto out;
3485*4882a593Smuzhiyun 
3486*4882a593Smuzhiyun 	error = set_unlock_args(flags, astarg, &args);
3487*4882a593Smuzhiyun 	if (error)
3488*4882a593Smuzhiyun 		goto out_put;
3489*4882a593Smuzhiyun 
3490*4882a593Smuzhiyun 	if (flags & DLM_LKF_CANCEL)
3491*4882a593Smuzhiyun 		error = cancel_lock(ls, lkb, &args);
3492*4882a593Smuzhiyun 	else
3493*4882a593Smuzhiyun 		error = unlock_lock(ls, lkb, &args);
3494*4882a593Smuzhiyun 
3495*4882a593Smuzhiyun 	if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL)
3496*4882a593Smuzhiyun 		error = 0;
3497*4882a593Smuzhiyun 	if (error == -EBUSY && (flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)))
3498*4882a593Smuzhiyun 		error = 0;
3499*4882a593Smuzhiyun  out_put:
3500*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
3501*4882a593Smuzhiyun  out:
3502*4882a593Smuzhiyun 	dlm_unlock_recovery(ls);
3503*4882a593Smuzhiyun 	dlm_put_lockspace(ls);
3504*4882a593Smuzhiyun 	return error;
3505*4882a593Smuzhiyun }
3506*4882a593Smuzhiyun 
3507*4882a593Smuzhiyun /*
3508*4882a593Smuzhiyun  * send/receive routines for remote operations and replies
3509*4882a593Smuzhiyun  *
3510*4882a593Smuzhiyun  * send_args
3511*4882a593Smuzhiyun  * send_common
3512*4882a593Smuzhiyun  * send_request			receive_request
3513*4882a593Smuzhiyun  * send_convert			receive_convert
3514*4882a593Smuzhiyun  * send_unlock			receive_unlock
3515*4882a593Smuzhiyun  * send_cancel			receive_cancel
3516*4882a593Smuzhiyun  * send_grant			receive_grant
3517*4882a593Smuzhiyun  * send_bast			receive_bast
3518*4882a593Smuzhiyun  * send_lookup			receive_lookup
3519*4882a593Smuzhiyun  * send_remove			receive_remove
3520*4882a593Smuzhiyun  *
3521*4882a593Smuzhiyun  * 				send_common_reply
3522*4882a593Smuzhiyun  * receive_request_reply	send_request_reply
3523*4882a593Smuzhiyun  * receive_convert_reply	send_convert_reply
3524*4882a593Smuzhiyun  * receive_unlock_reply		send_unlock_reply
3525*4882a593Smuzhiyun  * receive_cancel_reply		send_cancel_reply
3526*4882a593Smuzhiyun  * receive_lookup_reply		send_lookup_reply
3527*4882a593Smuzhiyun  */
3528*4882a593Smuzhiyun 
_create_message(struct dlm_ls * ls,int mb_len,int to_nodeid,int mstype,struct dlm_message ** ms_ret,struct dlm_mhandle ** mh_ret)3529*4882a593Smuzhiyun static int _create_message(struct dlm_ls *ls, int mb_len,
3530*4882a593Smuzhiyun 			   int to_nodeid, int mstype,
3531*4882a593Smuzhiyun 			   struct dlm_message **ms_ret,
3532*4882a593Smuzhiyun 			   struct dlm_mhandle **mh_ret)
3533*4882a593Smuzhiyun {
3534*4882a593Smuzhiyun 	struct dlm_message *ms;
3535*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
3536*4882a593Smuzhiyun 	char *mb;
3537*4882a593Smuzhiyun 
3538*4882a593Smuzhiyun 	/* get_buffer gives us a message handle (mh) that we need to
3539*4882a593Smuzhiyun 	   pass into lowcomms_commit and a message buffer (mb) that we
3540*4882a593Smuzhiyun 	   write our data into */
3541*4882a593Smuzhiyun 
3542*4882a593Smuzhiyun 	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
3543*4882a593Smuzhiyun 	if (!mh)
3544*4882a593Smuzhiyun 		return -ENOBUFS;
3545*4882a593Smuzhiyun 
3546*4882a593Smuzhiyun 	memset(mb, 0, mb_len);
3547*4882a593Smuzhiyun 
3548*4882a593Smuzhiyun 	ms = (struct dlm_message *) mb;
3549*4882a593Smuzhiyun 
3550*4882a593Smuzhiyun 	ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
3551*4882a593Smuzhiyun 	ms->m_header.h_lockspace = ls->ls_global_id;
3552*4882a593Smuzhiyun 	ms->m_header.h_nodeid = dlm_our_nodeid();
3553*4882a593Smuzhiyun 	ms->m_header.h_length = mb_len;
3554*4882a593Smuzhiyun 	ms->m_header.h_cmd = DLM_MSG;
3555*4882a593Smuzhiyun 
3556*4882a593Smuzhiyun 	ms->m_type = mstype;
3557*4882a593Smuzhiyun 
3558*4882a593Smuzhiyun 	*mh_ret = mh;
3559*4882a593Smuzhiyun 	*ms_ret = ms;
3560*4882a593Smuzhiyun 	return 0;
3561*4882a593Smuzhiyun }
3562*4882a593Smuzhiyun 
create_message(struct dlm_rsb * r,struct dlm_lkb * lkb,int to_nodeid,int mstype,struct dlm_message ** ms_ret,struct dlm_mhandle ** mh_ret)3563*4882a593Smuzhiyun static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
3564*4882a593Smuzhiyun 			  int to_nodeid, int mstype,
3565*4882a593Smuzhiyun 			  struct dlm_message **ms_ret,
3566*4882a593Smuzhiyun 			  struct dlm_mhandle **mh_ret)
3567*4882a593Smuzhiyun {
3568*4882a593Smuzhiyun 	int mb_len = sizeof(struct dlm_message);
3569*4882a593Smuzhiyun 
3570*4882a593Smuzhiyun 	switch (mstype) {
3571*4882a593Smuzhiyun 	case DLM_MSG_REQUEST:
3572*4882a593Smuzhiyun 	case DLM_MSG_LOOKUP:
3573*4882a593Smuzhiyun 	case DLM_MSG_REMOVE:
3574*4882a593Smuzhiyun 		mb_len += r->res_length;
3575*4882a593Smuzhiyun 		break;
3576*4882a593Smuzhiyun 	case DLM_MSG_CONVERT:
3577*4882a593Smuzhiyun 	case DLM_MSG_UNLOCK:
3578*4882a593Smuzhiyun 	case DLM_MSG_REQUEST_REPLY:
3579*4882a593Smuzhiyun 	case DLM_MSG_CONVERT_REPLY:
3580*4882a593Smuzhiyun 	case DLM_MSG_GRANT:
3581*4882a593Smuzhiyun 		if (lkb && lkb->lkb_lvbptr)
3582*4882a593Smuzhiyun 			mb_len += r->res_ls->ls_lvblen;
3583*4882a593Smuzhiyun 		break;
3584*4882a593Smuzhiyun 	}
3585*4882a593Smuzhiyun 
3586*4882a593Smuzhiyun 	return _create_message(r->res_ls, mb_len, to_nodeid, mstype,
3587*4882a593Smuzhiyun 			       ms_ret, mh_ret);
3588*4882a593Smuzhiyun }
3589*4882a593Smuzhiyun 
3590*4882a593Smuzhiyun /* further lowcomms enhancements or alternate implementations may make
3591*4882a593Smuzhiyun    the return value from this function useful at some point */
3592*4882a593Smuzhiyun 
send_message(struct dlm_mhandle * mh,struct dlm_message * ms)3593*4882a593Smuzhiyun static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
3594*4882a593Smuzhiyun {
3595*4882a593Smuzhiyun 	dlm_message_out(ms);
3596*4882a593Smuzhiyun 	dlm_lowcomms_commit_buffer(mh);
3597*4882a593Smuzhiyun 	return 0;
3598*4882a593Smuzhiyun }
3599*4882a593Smuzhiyun 
send_args(struct dlm_rsb * r,struct dlm_lkb * lkb,struct dlm_message * ms)3600*4882a593Smuzhiyun static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
3601*4882a593Smuzhiyun 		      struct dlm_message *ms)
3602*4882a593Smuzhiyun {
3603*4882a593Smuzhiyun 	ms->m_nodeid   = lkb->lkb_nodeid;
3604*4882a593Smuzhiyun 	ms->m_pid      = lkb->lkb_ownpid;
3605*4882a593Smuzhiyun 	ms->m_lkid     = lkb->lkb_id;
3606*4882a593Smuzhiyun 	ms->m_remid    = lkb->lkb_remid;
3607*4882a593Smuzhiyun 	ms->m_exflags  = lkb->lkb_exflags;
3608*4882a593Smuzhiyun 	ms->m_sbflags  = lkb->lkb_sbflags;
3609*4882a593Smuzhiyun 	ms->m_flags    = lkb->lkb_flags;
3610*4882a593Smuzhiyun 	ms->m_lvbseq   = lkb->lkb_lvbseq;
3611*4882a593Smuzhiyun 	ms->m_status   = lkb->lkb_status;
3612*4882a593Smuzhiyun 	ms->m_grmode   = lkb->lkb_grmode;
3613*4882a593Smuzhiyun 	ms->m_rqmode   = lkb->lkb_rqmode;
3614*4882a593Smuzhiyun 	ms->m_hash     = r->res_hash;
3615*4882a593Smuzhiyun 
3616*4882a593Smuzhiyun 	/* m_result and m_bastmode are set from function args,
3617*4882a593Smuzhiyun 	   not from lkb fields */
3618*4882a593Smuzhiyun 
3619*4882a593Smuzhiyun 	if (lkb->lkb_bastfn)
3620*4882a593Smuzhiyun 		ms->m_asts |= DLM_CB_BAST;
3621*4882a593Smuzhiyun 	if (lkb->lkb_astfn)
3622*4882a593Smuzhiyun 		ms->m_asts |= DLM_CB_CAST;
3623*4882a593Smuzhiyun 
3624*4882a593Smuzhiyun 	/* compare with switch in create_message; send_remove() doesn't
3625*4882a593Smuzhiyun 	   use send_args() */
3626*4882a593Smuzhiyun 
3627*4882a593Smuzhiyun 	switch (ms->m_type) {
3628*4882a593Smuzhiyun 	case DLM_MSG_REQUEST:
3629*4882a593Smuzhiyun 	case DLM_MSG_LOOKUP:
3630*4882a593Smuzhiyun 		memcpy(ms->m_extra, r->res_name, r->res_length);
3631*4882a593Smuzhiyun 		break;
3632*4882a593Smuzhiyun 	case DLM_MSG_CONVERT:
3633*4882a593Smuzhiyun 	case DLM_MSG_UNLOCK:
3634*4882a593Smuzhiyun 	case DLM_MSG_REQUEST_REPLY:
3635*4882a593Smuzhiyun 	case DLM_MSG_CONVERT_REPLY:
3636*4882a593Smuzhiyun 	case DLM_MSG_GRANT:
3637*4882a593Smuzhiyun 		if (!lkb->lkb_lvbptr)
3638*4882a593Smuzhiyun 			break;
3639*4882a593Smuzhiyun 		memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
3640*4882a593Smuzhiyun 		break;
3641*4882a593Smuzhiyun 	}
3642*4882a593Smuzhiyun }
3643*4882a593Smuzhiyun 
send_common(struct dlm_rsb * r,struct dlm_lkb * lkb,int mstype)3644*4882a593Smuzhiyun static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
3645*4882a593Smuzhiyun {
3646*4882a593Smuzhiyun 	struct dlm_message *ms;
3647*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
3648*4882a593Smuzhiyun 	int to_nodeid, error;
3649*4882a593Smuzhiyun 
3650*4882a593Smuzhiyun 	to_nodeid = r->res_nodeid;
3651*4882a593Smuzhiyun 
3652*4882a593Smuzhiyun 	error = add_to_waiters(lkb, mstype, to_nodeid);
3653*4882a593Smuzhiyun 	if (error)
3654*4882a593Smuzhiyun 		return error;
3655*4882a593Smuzhiyun 
3656*4882a593Smuzhiyun 	error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
3657*4882a593Smuzhiyun 	if (error)
3658*4882a593Smuzhiyun 		goto fail;
3659*4882a593Smuzhiyun 
3660*4882a593Smuzhiyun 	send_args(r, lkb, ms);
3661*4882a593Smuzhiyun 
3662*4882a593Smuzhiyun 	error = send_message(mh, ms);
3663*4882a593Smuzhiyun 	if (error)
3664*4882a593Smuzhiyun 		goto fail;
3665*4882a593Smuzhiyun 	return 0;
3666*4882a593Smuzhiyun 
3667*4882a593Smuzhiyun  fail:
3668*4882a593Smuzhiyun 	remove_from_waiters(lkb, msg_reply_type(mstype));
3669*4882a593Smuzhiyun 	return error;
3670*4882a593Smuzhiyun }
3671*4882a593Smuzhiyun 
send_request(struct dlm_rsb * r,struct dlm_lkb * lkb)3672*4882a593Smuzhiyun static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
3673*4882a593Smuzhiyun {
3674*4882a593Smuzhiyun 	return send_common(r, lkb, DLM_MSG_REQUEST);
3675*4882a593Smuzhiyun }
3676*4882a593Smuzhiyun 
send_convert(struct dlm_rsb * r,struct dlm_lkb * lkb)3677*4882a593Smuzhiyun static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
3678*4882a593Smuzhiyun {
3679*4882a593Smuzhiyun 	int error;
3680*4882a593Smuzhiyun 
3681*4882a593Smuzhiyun 	error = send_common(r, lkb, DLM_MSG_CONVERT);
3682*4882a593Smuzhiyun 
3683*4882a593Smuzhiyun 	/* down conversions go without a reply from the master */
3684*4882a593Smuzhiyun 	if (!error && down_conversion(lkb)) {
3685*4882a593Smuzhiyun 		remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
3686*4882a593Smuzhiyun 		r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
3687*4882a593Smuzhiyun 		r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
3688*4882a593Smuzhiyun 		r->res_ls->ls_stub_ms.m_result = 0;
3689*4882a593Smuzhiyun 		__receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
3690*4882a593Smuzhiyun 	}
3691*4882a593Smuzhiyun 
3692*4882a593Smuzhiyun 	return error;
3693*4882a593Smuzhiyun }
3694*4882a593Smuzhiyun 
3695*4882a593Smuzhiyun /* FIXME: if this lkb is the only lock we hold on the rsb, then set
3696*4882a593Smuzhiyun    MASTER_UNCERTAIN to force the next request on the rsb to confirm
3697*4882a593Smuzhiyun    that the master is still correct. */
3698*4882a593Smuzhiyun 
send_unlock(struct dlm_rsb * r,struct dlm_lkb * lkb)3699*4882a593Smuzhiyun static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3700*4882a593Smuzhiyun {
3701*4882a593Smuzhiyun 	return send_common(r, lkb, DLM_MSG_UNLOCK);
3702*4882a593Smuzhiyun }
3703*4882a593Smuzhiyun 
send_cancel(struct dlm_rsb * r,struct dlm_lkb * lkb)3704*4882a593Smuzhiyun static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
3705*4882a593Smuzhiyun {
3706*4882a593Smuzhiyun 	return send_common(r, lkb, DLM_MSG_CANCEL);
3707*4882a593Smuzhiyun }
3708*4882a593Smuzhiyun 
send_grant(struct dlm_rsb * r,struct dlm_lkb * lkb)3709*4882a593Smuzhiyun static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb)
3710*4882a593Smuzhiyun {
3711*4882a593Smuzhiyun 	struct dlm_message *ms;
3712*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
3713*4882a593Smuzhiyun 	int to_nodeid, error;
3714*4882a593Smuzhiyun 
3715*4882a593Smuzhiyun 	to_nodeid = lkb->lkb_nodeid;
3716*4882a593Smuzhiyun 
3717*4882a593Smuzhiyun 	error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh);
3718*4882a593Smuzhiyun 	if (error)
3719*4882a593Smuzhiyun 		goto out;
3720*4882a593Smuzhiyun 
3721*4882a593Smuzhiyun 	send_args(r, lkb, ms);
3722*4882a593Smuzhiyun 
3723*4882a593Smuzhiyun 	ms->m_result = 0;
3724*4882a593Smuzhiyun 
3725*4882a593Smuzhiyun 	error = send_message(mh, ms);
3726*4882a593Smuzhiyun  out:
3727*4882a593Smuzhiyun 	return error;
3728*4882a593Smuzhiyun }
3729*4882a593Smuzhiyun 
send_bast(struct dlm_rsb * r,struct dlm_lkb * lkb,int mode)3730*4882a593Smuzhiyun static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode)
3731*4882a593Smuzhiyun {
3732*4882a593Smuzhiyun 	struct dlm_message *ms;
3733*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
3734*4882a593Smuzhiyun 	int to_nodeid, error;
3735*4882a593Smuzhiyun 
3736*4882a593Smuzhiyun 	to_nodeid = lkb->lkb_nodeid;
3737*4882a593Smuzhiyun 
3738*4882a593Smuzhiyun 	error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh);
3739*4882a593Smuzhiyun 	if (error)
3740*4882a593Smuzhiyun 		goto out;
3741*4882a593Smuzhiyun 
3742*4882a593Smuzhiyun 	send_args(r, lkb, ms);
3743*4882a593Smuzhiyun 
3744*4882a593Smuzhiyun 	ms->m_bastmode = mode;
3745*4882a593Smuzhiyun 
3746*4882a593Smuzhiyun 	error = send_message(mh, ms);
3747*4882a593Smuzhiyun  out:
3748*4882a593Smuzhiyun 	return error;
3749*4882a593Smuzhiyun }
3750*4882a593Smuzhiyun 
send_lookup(struct dlm_rsb * r,struct dlm_lkb * lkb)3751*4882a593Smuzhiyun static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
3752*4882a593Smuzhiyun {
3753*4882a593Smuzhiyun 	struct dlm_message *ms;
3754*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
3755*4882a593Smuzhiyun 	int to_nodeid, error;
3756*4882a593Smuzhiyun 
3757*4882a593Smuzhiyun 	to_nodeid = dlm_dir_nodeid(r);
3758*4882a593Smuzhiyun 
3759*4882a593Smuzhiyun 	error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
3760*4882a593Smuzhiyun 	if (error)
3761*4882a593Smuzhiyun 		return error;
3762*4882a593Smuzhiyun 
3763*4882a593Smuzhiyun 	error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
3764*4882a593Smuzhiyun 	if (error)
3765*4882a593Smuzhiyun 		goto fail;
3766*4882a593Smuzhiyun 
3767*4882a593Smuzhiyun 	send_args(r, lkb, ms);
3768*4882a593Smuzhiyun 
3769*4882a593Smuzhiyun 	error = send_message(mh, ms);
3770*4882a593Smuzhiyun 	if (error)
3771*4882a593Smuzhiyun 		goto fail;
3772*4882a593Smuzhiyun 	return 0;
3773*4882a593Smuzhiyun 
3774*4882a593Smuzhiyun  fail:
3775*4882a593Smuzhiyun 	remove_from_waiters(lkb, DLM_MSG_LOOKUP_REPLY);
3776*4882a593Smuzhiyun 	return error;
3777*4882a593Smuzhiyun }
3778*4882a593Smuzhiyun 
send_remove(struct dlm_rsb * r)3779*4882a593Smuzhiyun static int send_remove(struct dlm_rsb *r)
3780*4882a593Smuzhiyun {
3781*4882a593Smuzhiyun 	struct dlm_message *ms;
3782*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
3783*4882a593Smuzhiyun 	int to_nodeid, error;
3784*4882a593Smuzhiyun 
3785*4882a593Smuzhiyun 	to_nodeid = dlm_dir_nodeid(r);
3786*4882a593Smuzhiyun 
3787*4882a593Smuzhiyun 	error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh);
3788*4882a593Smuzhiyun 	if (error)
3789*4882a593Smuzhiyun 		goto out;
3790*4882a593Smuzhiyun 
3791*4882a593Smuzhiyun 	memcpy(ms->m_extra, r->res_name, r->res_length);
3792*4882a593Smuzhiyun 	ms->m_hash = r->res_hash;
3793*4882a593Smuzhiyun 
3794*4882a593Smuzhiyun 	error = send_message(mh, ms);
3795*4882a593Smuzhiyun  out:
3796*4882a593Smuzhiyun 	return error;
3797*4882a593Smuzhiyun }
3798*4882a593Smuzhiyun 
send_common_reply(struct dlm_rsb * r,struct dlm_lkb * lkb,int mstype,int rv)3799*4882a593Smuzhiyun static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3800*4882a593Smuzhiyun 			     int mstype, int rv)
3801*4882a593Smuzhiyun {
3802*4882a593Smuzhiyun 	struct dlm_message *ms;
3803*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
3804*4882a593Smuzhiyun 	int to_nodeid, error;
3805*4882a593Smuzhiyun 
3806*4882a593Smuzhiyun 	to_nodeid = lkb->lkb_nodeid;
3807*4882a593Smuzhiyun 
3808*4882a593Smuzhiyun 	error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
3809*4882a593Smuzhiyun 	if (error)
3810*4882a593Smuzhiyun 		goto out;
3811*4882a593Smuzhiyun 
3812*4882a593Smuzhiyun 	send_args(r, lkb, ms);
3813*4882a593Smuzhiyun 
3814*4882a593Smuzhiyun 	ms->m_result = rv;
3815*4882a593Smuzhiyun 
3816*4882a593Smuzhiyun 	error = send_message(mh, ms);
3817*4882a593Smuzhiyun  out:
3818*4882a593Smuzhiyun 	return error;
3819*4882a593Smuzhiyun }
3820*4882a593Smuzhiyun 
send_request_reply(struct dlm_rsb * r,struct dlm_lkb * lkb,int rv)3821*4882a593Smuzhiyun static int send_request_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
3822*4882a593Smuzhiyun {
3823*4882a593Smuzhiyun 	return send_common_reply(r, lkb, DLM_MSG_REQUEST_REPLY, rv);
3824*4882a593Smuzhiyun }
3825*4882a593Smuzhiyun 
send_convert_reply(struct dlm_rsb * r,struct dlm_lkb * lkb,int rv)3826*4882a593Smuzhiyun static int send_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
3827*4882a593Smuzhiyun {
3828*4882a593Smuzhiyun 	return send_common_reply(r, lkb, DLM_MSG_CONVERT_REPLY, rv);
3829*4882a593Smuzhiyun }
3830*4882a593Smuzhiyun 
send_unlock_reply(struct dlm_rsb * r,struct dlm_lkb * lkb,int rv)3831*4882a593Smuzhiyun static int send_unlock_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
3832*4882a593Smuzhiyun {
3833*4882a593Smuzhiyun 	return send_common_reply(r, lkb, DLM_MSG_UNLOCK_REPLY, rv);
3834*4882a593Smuzhiyun }
3835*4882a593Smuzhiyun 
send_cancel_reply(struct dlm_rsb * r,struct dlm_lkb * lkb,int rv)3836*4882a593Smuzhiyun static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
3837*4882a593Smuzhiyun {
3838*4882a593Smuzhiyun 	return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv);
3839*4882a593Smuzhiyun }
3840*4882a593Smuzhiyun 
send_lookup_reply(struct dlm_ls * ls,struct dlm_message * ms_in,int ret_nodeid,int rv)3841*4882a593Smuzhiyun static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
3842*4882a593Smuzhiyun 			     int ret_nodeid, int rv)
3843*4882a593Smuzhiyun {
3844*4882a593Smuzhiyun 	struct dlm_rsb *r = &ls->ls_stub_rsb;
3845*4882a593Smuzhiyun 	struct dlm_message *ms;
3846*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
3847*4882a593Smuzhiyun 	int error, nodeid = ms_in->m_header.h_nodeid;
3848*4882a593Smuzhiyun 
3849*4882a593Smuzhiyun 	error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh);
3850*4882a593Smuzhiyun 	if (error)
3851*4882a593Smuzhiyun 		goto out;
3852*4882a593Smuzhiyun 
3853*4882a593Smuzhiyun 	ms->m_lkid = ms_in->m_lkid;
3854*4882a593Smuzhiyun 	ms->m_result = rv;
3855*4882a593Smuzhiyun 	ms->m_nodeid = ret_nodeid;
3856*4882a593Smuzhiyun 
3857*4882a593Smuzhiyun 	error = send_message(mh, ms);
3858*4882a593Smuzhiyun  out:
3859*4882a593Smuzhiyun 	return error;
3860*4882a593Smuzhiyun }
3861*4882a593Smuzhiyun 
3862*4882a593Smuzhiyun /* which args we save from a received message depends heavily on the type
3863*4882a593Smuzhiyun    of message, unlike the send side where we can safely send everything about
3864*4882a593Smuzhiyun    the lkb for any type of message */
3865*4882a593Smuzhiyun 
receive_flags(struct dlm_lkb * lkb,struct dlm_message * ms)3866*4882a593Smuzhiyun static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
3867*4882a593Smuzhiyun {
3868*4882a593Smuzhiyun 	lkb->lkb_exflags = ms->m_exflags;
3869*4882a593Smuzhiyun 	lkb->lkb_sbflags = ms->m_sbflags;
3870*4882a593Smuzhiyun 	lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
3871*4882a593Smuzhiyun 		         (ms->m_flags & 0x0000FFFF);
3872*4882a593Smuzhiyun }
3873*4882a593Smuzhiyun 
receive_flags_reply(struct dlm_lkb * lkb,struct dlm_message * ms)3874*4882a593Smuzhiyun static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3875*4882a593Smuzhiyun {
3876*4882a593Smuzhiyun 	if (ms->m_flags == DLM_IFL_STUB_MS)
3877*4882a593Smuzhiyun 		return;
3878*4882a593Smuzhiyun 
3879*4882a593Smuzhiyun 	lkb->lkb_sbflags = ms->m_sbflags;
3880*4882a593Smuzhiyun 	lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
3881*4882a593Smuzhiyun 		         (ms->m_flags & 0x0000FFFF);
3882*4882a593Smuzhiyun }
3883*4882a593Smuzhiyun 
receive_extralen(struct dlm_message * ms)3884*4882a593Smuzhiyun static int receive_extralen(struct dlm_message *ms)
3885*4882a593Smuzhiyun {
3886*4882a593Smuzhiyun 	return (ms->m_header.h_length - sizeof(struct dlm_message));
3887*4882a593Smuzhiyun }
3888*4882a593Smuzhiyun 
receive_lvb(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_message * ms)3889*4882a593Smuzhiyun static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb,
3890*4882a593Smuzhiyun 		       struct dlm_message *ms)
3891*4882a593Smuzhiyun {
3892*4882a593Smuzhiyun 	int len;
3893*4882a593Smuzhiyun 
3894*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3895*4882a593Smuzhiyun 		if (!lkb->lkb_lvbptr)
3896*4882a593Smuzhiyun 			lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
3897*4882a593Smuzhiyun 		if (!lkb->lkb_lvbptr)
3898*4882a593Smuzhiyun 			return -ENOMEM;
3899*4882a593Smuzhiyun 		len = receive_extralen(ms);
3900*4882a593Smuzhiyun 		if (len > ls->ls_lvblen)
3901*4882a593Smuzhiyun 			len = ls->ls_lvblen;
3902*4882a593Smuzhiyun 		memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
3903*4882a593Smuzhiyun 	}
3904*4882a593Smuzhiyun 	return 0;
3905*4882a593Smuzhiyun }
3906*4882a593Smuzhiyun 
fake_bastfn(void * astparam,int mode)3907*4882a593Smuzhiyun static void fake_bastfn(void *astparam, int mode)
3908*4882a593Smuzhiyun {
3909*4882a593Smuzhiyun 	log_print("fake_bastfn should not be called");
3910*4882a593Smuzhiyun }
3911*4882a593Smuzhiyun 
fake_astfn(void * astparam)3912*4882a593Smuzhiyun static void fake_astfn(void *astparam)
3913*4882a593Smuzhiyun {
3914*4882a593Smuzhiyun 	log_print("fake_astfn should not be called");
3915*4882a593Smuzhiyun }
3916*4882a593Smuzhiyun 
receive_request_args(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_message * ms)3917*4882a593Smuzhiyun static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3918*4882a593Smuzhiyun 				struct dlm_message *ms)
3919*4882a593Smuzhiyun {
3920*4882a593Smuzhiyun 	lkb->lkb_nodeid = ms->m_header.h_nodeid;
3921*4882a593Smuzhiyun 	lkb->lkb_ownpid = ms->m_pid;
3922*4882a593Smuzhiyun 	lkb->lkb_remid = ms->m_lkid;
3923*4882a593Smuzhiyun 	lkb->lkb_grmode = DLM_LOCK_IV;
3924*4882a593Smuzhiyun 	lkb->lkb_rqmode = ms->m_rqmode;
3925*4882a593Smuzhiyun 
3926*4882a593Smuzhiyun 	lkb->lkb_bastfn = (ms->m_asts & DLM_CB_BAST) ? &fake_bastfn : NULL;
3927*4882a593Smuzhiyun 	lkb->lkb_astfn = (ms->m_asts & DLM_CB_CAST) ? &fake_astfn : NULL;
3928*4882a593Smuzhiyun 
3929*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3930*4882a593Smuzhiyun 		/* lkb was just created so there won't be an lvb yet */
3931*4882a593Smuzhiyun 		lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
3932*4882a593Smuzhiyun 		if (!lkb->lkb_lvbptr)
3933*4882a593Smuzhiyun 			return -ENOMEM;
3934*4882a593Smuzhiyun 	}
3935*4882a593Smuzhiyun 
3936*4882a593Smuzhiyun 	return 0;
3937*4882a593Smuzhiyun }
3938*4882a593Smuzhiyun 
receive_convert_args(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_message * ms)3939*4882a593Smuzhiyun static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3940*4882a593Smuzhiyun 				struct dlm_message *ms)
3941*4882a593Smuzhiyun {
3942*4882a593Smuzhiyun 	if (lkb->lkb_status != DLM_LKSTS_GRANTED)
3943*4882a593Smuzhiyun 		return -EBUSY;
3944*4882a593Smuzhiyun 
3945*4882a593Smuzhiyun 	if (receive_lvb(ls, lkb, ms))
3946*4882a593Smuzhiyun 		return -ENOMEM;
3947*4882a593Smuzhiyun 
3948*4882a593Smuzhiyun 	lkb->lkb_rqmode = ms->m_rqmode;
3949*4882a593Smuzhiyun 	lkb->lkb_lvbseq = ms->m_lvbseq;
3950*4882a593Smuzhiyun 
3951*4882a593Smuzhiyun 	return 0;
3952*4882a593Smuzhiyun }
3953*4882a593Smuzhiyun 
receive_unlock_args(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_message * ms)3954*4882a593Smuzhiyun static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3955*4882a593Smuzhiyun 			       struct dlm_message *ms)
3956*4882a593Smuzhiyun {
3957*4882a593Smuzhiyun 	if (receive_lvb(ls, lkb, ms))
3958*4882a593Smuzhiyun 		return -ENOMEM;
3959*4882a593Smuzhiyun 	return 0;
3960*4882a593Smuzhiyun }
3961*4882a593Smuzhiyun 
3962*4882a593Smuzhiyun /* We fill in the stub-lkb fields with the info that send_xxxx_reply()
3963*4882a593Smuzhiyun    uses to send a reply and that the remote end uses to process the reply. */
3964*4882a593Smuzhiyun 
setup_stub_lkb(struct dlm_ls * ls,struct dlm_message * ms)3965*4882a593Smuzhiyun static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms)
3966*4882a593Smuzhiyun {
3967*4882a593Smuzhiyun 	struct dlm_lkb *lkb = &ls->ls_stub_lkb;
3968*4882a593Smuzhiyun 	lkb->lkb_nodeid = ms->m_header.h_nodeid;
3969*4882a593Smuzhiyun 	lkb->lkb_remid = ms->m_lkid;
3970*4882a593Smuzhiyun }
3971*4882a593Smuzhiyun 
3972*4882a593Smuzhiyun /* This is called after the rsb is locked so that we can safely inspect
3973*4882a593Smuzhiyun    fields in the lkb. */
3974*4882a593Smuzhiyun 
validate_message(struct dlm_lkb * lkb,struct dlm_message * ms)3975*4882a593Smuzhiyun static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
3976*4882a593Smuzhiyun {
3977*4882a593Smuzhiyun 	int from = ms->m_header.h_nodeid;
3978*4882a593Smuzhiyun 	int error = 0;
3979*4882a593Smuzhiyun 
3980*4882a593Smuzhiyun 	/* currently mixing of user/kernel locks are not supported */
3981*4882a593Smuzhiyun 	if (ms->m_flags & DLM_IFL_USER && ~lkb->lkb_flags & DLM_IFL_USER) {
3982*4882a593Smuzhiyun 		log_error(lkb->lkb_resource->res_ls,
3983*4882a593Smuzhiyun 			  "got user dlm message for a kernel lock");
3984*4882a593Smuzhiyun 		error = -EINVAL;
3985*4882a593Smuzhiyun 		goto out;
3986*4882a593Smuzhiyun 	}
3987*4882a593Smuzhiyun 
3988*4882a593Smuzhiyun 	switch (ms->m_type) {
3989*4882a593Smuzhiyun 	case DLM_MSG_CONVERT:
3990*4882a593Smuzhiyun 	case DLM_MSG_UNLOCK:
3991*4882a593Smuzhiyun 	case DLM_MSG_CANCEL:
3992*4882a593Smuzhiyun 		if (!is_master_copy(lkb) || lkb->lkb_nodeid != from)
3993*4882a593Smuzhiyun 			error = -EINVAL;
3994*4882a593Smuzhiyun 		break;
3995*4882a593Smuzhiyun 
3996*4882a593Smuzhiyun 	case DLM_MSG_CONVERT_REPLY:
3997*4882a593Smuzhiyun 	case DLM_MSG_UNLOCK_REPLY:
3998*4882a593Smuzhiyun 	case DLM_MSG_CANCEL_REPLY:
3999*4882a593Smuzhiyun 	case DLM_MSG_GRANT:
4000*4882a593Smuzhiyun 	case DLM_MSG_BAST:
4001*4882a593Smuzhiyun 		if (!is_process_copy(lkb) || lkb->lkb_nodeid != from)
4002*4882a593Smuzhiyun 			error = -EINVAL;
4003*4882a593Smuzhiyun 		break;
4004*4882a593Smuzhiyun 
4005*4882a593Smuzhiyun 	case DLM_MSG_REQUEST_REPLY:
4006*4882a593Smuzhiyun 		if (!is_process_copy(lkb))
4007*4882a593Smuzhiyun 			error = -EINVAL;
4008*4882a593Smuzhiyun 		else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from)
4009*4882a593Smuzhiyun 			error = -EINVAL;
4010*4882a593Smuzhiyun 		break;
4011*4882a593Smuzhiyun 
4012*4882a593Smuzhiyun 	default:
4013*4882a593Smuzhiyun 		error = -EINVAL;
4014*4882a593Smuzhiyun 	}
4015*4882a593Smuzhiyun 
4016*4882a593Smuzhiyun out:
4017*4882a593Smuzhiyun 	if (error)
4018*4882a593Smuzhiyun 		log_error(lkb->lkb_resource->res_ls,
4019*4882a593Smuzhiyun 			  "ignore invalid message %d from %d %x %x %x %d",
4020*4882a593Smuzhiyun 			  ms->m_type, from, lkb->lkb_id, lkb->lkb_remid,
4021*4882a593Smuzhiyun 			  lkb->lkb_flags, lkb->lkb_nodeid);
4022*4882a593Smuzhiyun 	return error;
4023*4882a593Smuzhiyun }
4024*4882a593Smuzhiyun 
send_repeat_remove(struct dlm_ls * ls,char * ms_name,int len)4025*4882a593Smuzhiyun static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len)
4026*4882a593Smuzhiyun {
4027*4882a593Smuzhiyun 	char name[DLM_RESNAME_MAXLEN + 1];
4028*4882a593Smuzhiyun 	struct dlm_message *ms;
4029*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
4030*4882a593Smuzhiyun 	struct dlm_rsb *r;
4031*4882a593Smuzhiyun 	uint32_t hash, b;
4032*4882a593Smuzhiyun 	int rv, dir_nodeid;
4033*4882a593Smuzhiyun 
4034*4882a593Smuzhiyun 	memset(name, 0, sizeof(name));
4035*4882a593Smuzhiyun 	memcpy(name, ms_name, len);
4036*4882a593Smuzhiyun 
4037*4882a593Smuzhiyun 	hash = jhash(name, len, 0);
4038*4882a593Smuzhiyun 	b = hash & (ls->ls_rsbtbl_size - 1);
4039*4882a593Smuzhiyun 
4040*4882a593Smuzhiyun 	dir_nodeid = dlm_hash2nodeid(ls, hash);
4041*4882a593Smuzhiyun 
4042*4882a593Smuzhiyun 	log_error(ls, "send_repeat_remove dir %d %s", dir_nodeid, name);
4043*4882a593Smuzhiyun 
4044*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[b].lock);
4045*4882a593Smuzhiyun 	rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
4046*4882a593Smuzhiyun 	if (!rv) {
4047*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
4048*4882a593Smuzhiyun 		log_error(ls, "repeat_remove on keep %s", name);
4049*4882a593Smuzhiyun 		return;
4050*4882a593Smuzhiyun 	}
4051*4882a593Smuzhiyun 
4052*4882a593Smuzhiyun 	rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
4053*4882a593Smuzhiyun 	if (!rv) {
4054*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
4055*4882a593Smuzhiyun 		log_error(ls, "repeat_remove on toss %s", name);
4056*4882a593Smuzhiyun 		return;
4057*4882a593Smuzhiyun 	}
4058*4882a593Smuzhiyun 
4059*4882a593Smuzhiyun 	/* use ls->remove_name2 to avoid conflict with shrink? */
4060*4882a593Smuzhiyun 
4061*4882a593Smuzhiyun 	spin_lock(&ls->ls_remove_spin);
4062*4882a593Smuzhiyun 	ls->ls_remove_len = len;
4063*4882a593Smuzhiyun 	memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN);
4064*4882a593Smuzhiyun 	spin_unlock(&ls->ls_remove_spin);
4065*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[b].lock);
4066*4882a593Smuzhiyun 
4067*4882a593Smuzhiyun 	rv = _create_message(ls, sizeof(struct dlm_message) + len,
4068*4882a593Smuzhiyun 			     dir_nodeid, DLM_MSG_REMOVE, &ms, &mh);
4069*4882a593Smuzhiyun 	if (rv)
4070*4882a593Smuzhiyun 		goto out;
4071*4882a593Smuzhiyun 
4072*4882a593Smuzhiyun 	memcpy(ms->m_extra, name, len);
4073*4882a593Smuzhiyun 	ms->m_hash = hash;
4074*4882a593Smuzhiyun 
4075*4882a593Smuzhiyun 	send_message(mh, ms);
4076*4882a593Smuzhiyun 
4077*4882a593Smuzhiyun out:
4078*4882a593Smuzhiyun 	spin_lock(&ls->ls_remove_spin);
4079*4882a593Smuzhiyun 	ls->ls_remove_len = 0;
4080*4882a593Smuzhiyun 	memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN);
4081*4882a593Smuzhiyun 	spin_unlock(&ls->ls_remove_spin);
4082*4882a593Smuzhiyun }
4083*4882a593Smuzhiyun 
receive_request(struct dlm_ls * ls,struct dlm_message * ms)4084*4882a593Smuzhiyun static int receive_request(struct dlm_ls *ls, struct dlm_message *ms)
4085*4882a593Smuzhiyun {
4086*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4087*4882a593Smuzhiyun 	struct dlm_rsb *r;
4088*4882a593Smuzhiyun 	int from_nodeid;
4089*4882a593Smuzhiyun 	int error, namelen = 0;
4090*4882a593Smuzhiyun 
4091*4882a593Smuzhiyun 	from_nodeid = ms->m_header.h_nodeid;
4092*4882a593Smuzhiyun 
4093*4882a593Smuzhiyun 	error = create_lkb(ls, &lkb);
4094*4882a593Smuzhiyun 	if (error)
4095*4882a593Smuzhiyun 		goto fail;
4096*4882a593Smuzhiyun 
4097*4882a593Smuzhiyun 	receive_flags(lkb, ms);
4098*4882a593Smuzhiyun 	lkb->lkb_flags |= DLM_IFL_MSTCPY;
4099*4882a593Smuzhiyun 	error = receive_request_args(ls, lkb, ms);
4100*4882a593Smuzhiyun 	if (error) {
4101*4882a593Smuzhiyun 		__put_lkb(ls, lkb);
4102*4882a593Smuzhiyun 		goto fail;
4103*4882a593Smuzhiyun 	}
4104*4882a593Smuzhiyun 
4105*4882a593Smuzhiyun 	/* The dir node is the authority on whether we are the master
4106*4882a593Smuzhiyun 	   for this rsb or not, so if the master sends us a request, we should
4107*4882a593Smuzhiyun 	   recreate the rsb if we've destroyed it.   This race happens when we
4108*4882a593Smuzhiyun 	   send a remove message to the dir node at the same time that the dir
4109*4882a593Smuzhiyun 	   node sends us a request for the rsb. */
4110*4882a593Smuzhiyun 
4111*4882a593Smuzhiyun 	namelen = receive_extralen(ms);
4112*4882a593Smuzhiyun 
4113*4882a593Smuzhiyun 	error = find_rsb(ls, ms->m_extra, namelen, from_nodeid,
4114*4882a593Smuzhiyun 			 R_RECEIVE_REQUEST, &r);
4115*4882a593Smuzhiyun 	if (error) {
4116*4882a593Smuzhiyun 		__put_lkb(ls, lkb);
4117*4882a593Smuzhiyun 		goto fail;
4118*4882a593Smuzhiyun 	}
4119*4882a593Smuzhiyun 
4120*4882a593Smuzhiyun 	lock_rsb(r);
4121*4882a593Smuzhiyun 
4122*4882a593Smuzhiyun 	if (r->res_master_nodeid != dlm_our_nodeid()) {
4123*4882a593Smuzhiyun 		error = validate_master_nodeid(ls, r, from_nodeid);
4124*4882a593Smuzhiyun 		if (error) {
4125*4882a593Smuzhiyun 			unlock_rsb(r);
4126*4882a593Smuzhiyun 			put_rsb(r);
4127*4882a593Smuzhiyun 			__put_lkb(ls, lkb);
4128*4882a593Smuzhiyun 			goto fail;
4129*4882a593Smuzhiyun 		}
4130*4882a593Smuzhiyun 	}
4131*4882a593Smuzhiyun 
4132*4882a593Smuzhiyun 	attach_lkb(r, lkb);
4133*4882a593Smuzhiyun 	error = do_request(r, lkb);
4134*4882a593Smuzhiyun 	send_request_reply(r, lkb, error);
4135*4882a593Smuzhiyun 	do_request_effects(r, lkb, error);
4136*4882a593Smuzhiyun 
4137*4882a593Smuzhiyun 	unlock_rsb(r);
4138*4882a593Smuzhiyun 	put_rsb(r);
4139*4882a593Smuzhiyun 
4140*4882a593Smuzhiyun 	if (error == -EINPROGRESS)
4141*4882a593Smuzhiyun 		error = 0;
4142*4882a593Smuzhiyun 	if (error)
4143*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
4144*4882a593Smuzhiyun 	return 0;
4145*4882a593Smuzhiyun 
4146*4882a593Smuzhiyun  fail:
4147*4882a593Smuzhiyun 	/* TODO: instead of returning ENOTBLK, add the lkb to res_lookup
4148*4882a593Smuzhiyun 	   and do this receive_request again from process_lookup_list once
4149*4882a593Smuzhiyun 	   we get the lookup reply.  This would avoid a many repeated
4150*4882a593Smuzhiyun 	   ENOTBLK request failures when the lookup reply designating us
4151*4882a593Smuzhiyun 	   as master is delayed. */
4152*4882a593Smuzhiyun 
4153*4882a593Smuzhiyun 	/* We could repeatedly return -EBADR here if our send_remove() is
4154*4882a593Smuzhiyun 	   delayed in being sent/arriving/being processed on the dir node.
4155*4882a593Smuzhiyun 	   Another node would repeatedly lookup up the master, and the dir
4156*4882a593Smuzhiyun 	   node would continue returning our nodeid until our send_remove
4157*4882a593Smuzhiyun 	   took effect.
4158*4882a593Smuzhiyun 
4159*4882a593Smuzhiyun 	   We send another remove message in case our previous send_remove
4160*4882a593Smuzhiyun 	   was lost/ignored/missed somehow. */
4161*4882a593Smuzhiyun 
4162*4882a593Smuzhiyun 	if (error != -ENOTBLK) {
4163*4882a593Smuzhiyun 		log_limit(ls, "receive_request %x from %d %d",
4164*4882a593Smuzhiyun 			  ms->m_lkid, from_nodeid, error);
4165*4882a593Smuzhiyun 	}
4166*4882a593Smuzhiyun 
4167*4882a593Smuzhiyun 	if (namelen && error == -EBADR) {
4168*4882a593Smuzhiyun 		send_repeat_remove(ls, ms->m_extra, namelen);
4169*4882a593Smuzhiyun 		msleep(1000);
4170*4882a593Smuzhiyun 	}
4171*4882a593Smuzhiyun 
4172*4882a593Smuzhiyun 	setup_stub_lkb(ls, ms);
4173*4882a593Smuzhiyun 	send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
4174*4882a593Smuzhiyun 	return error;
4175*4882a593Smuzhiyun }
4176*4882a593Smuzhiyun 
receive_convert(struct dlm_ls * ls,struct dlm_message * ms)4177*4882a593Smuzhiyun static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
4178*4882a593Smuzhiyun {
4179*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4180*4882a593Smuzhiyun 	struct dlm_rsb *r;
4181*4882a593Smuzhiyun 	int error, reply = 1;
4182*4882a593Smuzhiyun 
4183*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4184*4882a593Smuzhiyun 	if (error)
4185*4882a593Smuzhiyun 		goto fail;
4186*4882a593Smuzhiyun 
4187*4882a593Smuzhiyun 	if (lkb->lkb_remid != ms->m_lkid) {
4188*4882a593Smuzhiyun 		log_error(ls, "receive_convert %x remid %x recover_seq %llu "
4189*4882a593Smuzhiyun 			  "remote %d %x", lkb->lkb_id, lkb->lkb_remid,
4190*4882a593Smuzhiyun 			  (unsigned long long)lkb->lkb_recover_seq,
4191*4882a593Smuzhiyun 			  ms->m_header.h_nodeid, ms->m_lkid);
4192*4882a593Smuzhiyun 		error = -ENOENT;
4193*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
4194*4882a593Smuzhiyun 		goto fail;
4195*4882a593Smuzhiyun 	}
4196*4882a593Smuzhiyun 
4197*4882a593Smuzhiyun 	r = lkb->lkb_resource;
4198*4882a593Smuzhiyun 
4199*4882a593Smuzhiyun 	hold_rsb(r);
4200*4882a593Smuzhiyun 	lock_rsb(r);
4201*4882a593Smuzhiyun 
4202*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4203*4882a593Smuzhiyun 	if (error)
4204*4882a593Smuzhiyun 		goto out;
4205*4882a593Smuzhiyun 
4206*4882a593Smuzhiyun 	receive_flags(lkb, ms);
4207*4882a593Smuzhiyun 
4208*4882a593Smuzhiyun 	error = receive_convert_args(ls, lkb, ms);
4209*4882a593Smuzhiyun 	if (error) {
4210*4882a593Smuzhiyun 		send_convert_reply(r, lkb, error);
4211*4882a593Smuzhiyun 		goto out;
4212*4882a593Smuzhiyun 	}
4213*4882a593Smuzhiyun 
4214*4882a593Smuzhiyun 	reply = !down_conversion(lkb);
4215*4882a593Smuzhiyun 
4216*4882a593Smuzhiyun 	error = do_convert(r, lkb);
4217*4882a593Smuzhiyun 	if (reply)
4218*4882a593Smuzhiyun 		send_convert_reply(r, lkb, error);
4219*4882a593Smuzhiyun 	do_convert_effects(r, lkb, error);
4220*4882a593Smuzhiyun  out:
4221*4882a593Smuzhiyun 	unlock_rsb(r);
4222*4882a593Smuzhiyun 	put_rsb(r);
4223*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4224*4882a593Smuzhiyun 	return 0;
4225*4882a593Smuzhiyun 
4226*4882a593Smuzhiyun  fail:
4227*4882a593Smuzhiyun 	setup_stub_lkb(ls, ms);
4228*4882a593Smuzhiyun 	send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
4229*4882a593Smuzhiyun 	return error;
4230*4882a593Smuzhiyun }
4231*4882a593Smuzhiyun 
receive_unlock(struct dlm_ls * ls,struct dlm_message * ms)4232*4882a593Smuzhiyun static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
4233*4882a593Smuzhiyun {
4234*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4235*4882a593Smuzhiyun 	struct dlm_rsb *r;
4236*4882a593Smuzhiyun 	int error;
4237*4882a593Smuzhiyun 
4238*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4239*4882a593Smuzhiyun 	if (error)
4240*4882a593Smuzhiyun 		goto fail;
4241*4882a593Smuzhiyun 
4242*4882a593Smuzhiyun 	if (lkb->lkb_remid != ms->m_lkid) {
4243*4882a593Smuzhiyun 		log_error(ls, "receive_unlock %x remid %x remote %d %x",
4244*4882a593Smuzhiyun 			  lkb->lkb_id, lkb->lkb_remid,
4245*4882a593Smuzhiyun 			  ms->m_header.h_nodeid, ms->m_lkid);
4246*4882a593Smuzhiyun 		error = -ENOENT;
4247*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
4248*4882a593Smuzhiyun 		goto fail;
4249*4882a593Smuzhiyun 	}
4250*4882a593Smuzhiyun 
4251*4882a593Smuzhiyun 	r = lkb->lkb_resource;
4252*4882a593Smuzhiyun 
4253*4882a593Smuzhiyun 	hold_rsb(r);
4254*4882a593Smuzhiyun 	lock_rsb(r);
4255*4882a593Smuzhiyun 
4256*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4257*4882a593Smuzhiyun 	if (error)
4258*4882a593Smuzhiyun 		goto out;
4259*4882a593Smuzhiyun 
4260*4882a593Smuzhiyun 	receive_flags(lkb, ms);
4261*4882a593Smuzhiyun 
4262*4882a593Smuzhiyun 	error = receive_unlock_args(ls, lkb, ms);
4263*4882a593Smuzhiyun 	if (error) {
4264*4882a593Smuzhiyun 		send_unlock_reply(r, lkb, error);
4265*4882a593Smuzhiyun 		goto out;
4266*4882a593Smuzhiyun 	}
4267*4882a593Smuzhiyun 
4268*4882a593Smuzhiyun 	error = do_unlock(r, lkb);
4269*4882a593Smuzhiyun 	send_unlock_reply(r, lkb, error);
4270*4882a593Smuzhiyun 	do_unlock_effects(r, lkb, error);
4271*4882a593Smuzhiyun  out:
4272*4882a593Smuzhiyun 	unlock_rsb(r);
4273*4882a593Smuzhiyun 	put_rsb(r);
4274*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4275*4882a593Smuzhiyun 	return 0;
4276*4882a593Smuzhiyun 
4277*4882a593Smuzhiyun  fail:
4278*4882a593Smuzhiyun 	setup_stub_lkb(ls, ms);
4279*4882a593Smuzhiyun 	send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
4280*4882a593Smuzhiyun 	return error;
4281*4882a593Smuzhiyun }
4282*4882a593Smuzhiyun 
receive_cancel(struct dlm_ls * ls,struct dlm_message * ms)4283*4882a593Smuzhiyun static int receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
4284*4882a593Smuzhiyun {
4285*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4286*4882a593Smuzhiyun 	struct dlm_rsb *r;
4287*4882a593Smuzhiyun 	int error;
4288*4882a593Smuzhiyun 
4289*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4290*4882a593Smuzhiyun 	if (error)
4291*4882a593Smuzhiyun 		goto fail;
4292*4882a593Smuzhiyun 
4293*4882a593Smuzhiyun 	receive_flags(lkb, ms);
4294*4882a593Smuzhiyun 
4295*4882a593Smuzhiyun 	r = lkb->lkb_resource;
4296*4882a593Smuzhiyun 
4297*4882a593Smuzhiyun 	hold_rsb(r);
4298*4882a593Smuzhiyun 	lock_rsb(r);
4299*4882a593Smuzhiyun 
4300*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4301*4882a593Smuzhiyun 	if (error)
4302*4882a593Smuzhiyun 		goto out;
4303*4882a593Smuzhiyun 
4304*4882a593Smuzhiyun 	error = do_cancel(r, lkb);
4305*4882a593Smuzhiyun 	send_cancel_reply(r, lkb, error);
4306*4882a593Smuzhiyun 	do_cancel_effects(r, lkb, error);
4307*4882a593Smuzhiyun  out:
4308*4882a593Smuzhiyun 	unlock_rsb(r);
4309*4882a593Smuzhiyun 	put_rsb(r);
4310*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4311*4882a593Smuzhiyun 	return 0;
4312*4882a593Smuzhiyun 
4313*4882a593Smuzhiyun  fail:
4314*4882a593Smuzhiyun 	setup_stub_lkb(ls, ms);
4315*4882a593Smuzhiyun 	send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
4316*4882a593Smuzhiyun 	return error;
4317*4882a593Smuzhiyun }
4318*4882a593Smuzhiyun 
receive_grant(struct dlm_ls * ls,struct dlm_message * ms)4319*4882a593Smuzhiyun static int receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
4320*4882a593Smuzhiyun {
4321*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4322*4882a593Smuzhiyun 	struct dlm_rsb *r;
4323*4882a593Smuzhiyun 	int error;
4324*4882a593Smuzhiyun 
4325*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4326*4882a593Smuzhiyun 	if (error)
4327*4882a593Smuzhiyun 		return error;
4328*4882a593Smuzhiyun 
4329*4882a593Smuzhiyun 	r = lkb->lkb_resource;
4330*4882a593Smuzhiyun 
4331*4882a593Smuzhiyun 	hold_rsb(r);
4332*4882a593Smuzhiyun 	lock_rsb(r);
4333*4882a593Smuzhiyun 
4334*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4335*4882a593Smuzhiyun 	if (error)
4336*4882a593Smuzhiyun 		goto out;
4337*4882a593Smuzhiyun 
4338*4882a593Smuzhiyun 	receive_flags_reply(lkb, ms);
4339*4882a593Smuzhiyun 	if (is_altmode(lkb))
4340*4882a593Smuzhiyun 		munge_altmode(lkb, ms);
4341*4882a593Smuzhiyun 	grant_lock_pc(r, lkb, ms);
4342*4882a593Smuzhiyun 	queue_cast(r, lkb, 0);
4343*4882a593Smuzhiyun  out:
4344*4882a593Smuzhiyun 	unlock_rsb(r);
4345*4882a593Smuzhiyun 	put_rsb(r);
4346*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4347*4882a593Smuzhiyun 	return 0;
4348*4882a593Smuzhiyun }
4349*4882a593Smuzhiyun 
receive_bast(struct dlm_ls * ls,struct dlm_message * ms)4350*4882a593Smuzhiyun static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
4351*4882a593Smuzhiyun {
4352*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4353*4882a593Smuzhiyun 	struct dlm_rsb *r;
4354*4882a593Smuzhiyun 	int error;
4355*4882a593Smuzhiyun 
4356*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4357*4882a593Smuzhiyun 	if (error)
4358*4882a593Smuzhiyun 		return error;
4359*4882a593Smuzhiyun 
4360*4882a593Smuzhiyun 	r = lkb->lkb_resource;
4361*4882a593Smuzhiyun 
4362*4882a593Smuzhiyun 	hold_rsb(r);
4363*4882a593Smuzhiyun 	lock_rsb(r);
4364*4882a593Smuzhiyun 
4365*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4366*4882a593Smuzhiyun 	if (error)
4367*4882a593Smuzhiyun 		goto out;
4368*4882a593Smuzhiyun 
4369*4882a593Smuzhiyun 	queue_bast(r, lkb, ms->m_bastmode);
4370*4882a593Smuzhiyun 	lkb->lkb_highbast = ms->m_bastmode;
4371*4882a593Smuzhiyun  out:
4372*4882a593Smuzhiyun 	unlock_rsb(r);
4373*4882a593Smuzhiyun 	put_rsb(r);
4374*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4375*4882a593Smuzhiyun 	return 0;
4376*4882a593Smuzhiyun }
4377*4882a593Smuzhiyun 
receive_lookup(struct dlm_ls * ls,struct dlm_message * ms)4378*4882a593Smuzhiyun static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms)
4379*4882a593Smuzhiyun {
4380*4882a593Smuzhiyun 	int len, error, ret_nodeid, from_nodeid, our_nodeid;
4381*4882a593Smuzhiyun 
4382*4882a593Smuzhiyun 	from_nodeid = ms->m_header.h_nodeid;
4383*4882a593Smuzhiyun 	our_nodeid = dlm_our_nodeid();
4384*4882a593Smuzhiyun 
4385*4882a593Smuzhiyun 	len = receive_extralen(ms);
4386*4882a593Smuzhiyun 
4387*4882a593Smuzhiyun 	error = dlm_master_lookup(ls, from_nodeid, ms->m_extra, len, 0,
4388*4882a593Smuzhiyun 				  &ret_nodeid, NULL);
4389*4882a593Smuzhiyun 
4390*4882a593Smuzhiyun 	/* Optimization: we're master so treat lookup as a request */
4391*4882a593Smuzhiyun 	if (!error && ret_nodeid == our_nodeid) {
4392*4882a593Smuzhiyun 		receive_request(ls, ms);
4393*4882a593Smuzhiyun 		return;
4394*4882a593Smuzhiyun 	}
4395*4882a593Smuzhiyun 	send_lookup_reply(ls, ms, ret_nodeid, error);
4396*4882a593Smuzhiyun }
4397*4882a593Smuzhiyun 
receive_remove(struct dlm_ls * ls,struct dlm_message * ms)4398*4882a593Smuzhiyun static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
4399*4882a593Smuzhiyun {
4400*4882a593Smuzhiyun 	char name[DLM_RESNAME_MAXLEN+1];
4401*4882a593Smuzhiyun 	struct dlm_rsb *r;
4402*4882a593Smuzhiyun 	uint32_t hash, b;
4403*4882a593Smuzhiyun 	int rv, len, dir_nodeid, from_nodeid;
4404*4882a593Smuzhiyun 
4405*4882a593Smuzhiyun 	from_nodeid = ms->m_header.h_nodeid;
4406*4882a593Smuzhiyun 
4407*4882a593Smuzhiyun 	len = receive_extralen(ms);
4408*4882a593Smuzhiyun 
4409*4882a593Smuzhiyun 	if (len > DLM_RESNAME_MAXLEN) {
4410*4882a593Smuzhiyun 		log_error(ls, "receive_remove from %d bad len %d",
4411*4882a593Smuzhiyun 			  from_nodeid, len);
4412*4882a593Smuzhiyun 		return;
4413*4882a593Smuzhiyun 	}
4414*4882a593Smuzhiyun 
4415*4882a593Smuzhiyun 	dir_nodeid = dlm_hash2nodeid(ls, ms->m_hash);
4416*4882a593Smuzhiyun 	if (dir_nodeid != dlm_our_nodeid()) {
4417*4882a593Smuzhiyun 		log_error(ls, "receive_remove from %d bad nodeid %d",
4418*4882a593Smuzhiyun 			  from_nodeid, dir_nodeid);
4419*4882a593Smuzhiyun 		return;
4420*4882a593Smuzhiyun 	}
4421*4882a593Smuzhiyun 
4422*4882a593Smuzhiyun 	/* Look for name on rsbtbl.toss, if it's there, kill it.
4423*4882a593Smuzhiyun 	   If it's on rsbtbl.keep, it's being used, and we should ignore this
4424*4882a593Smuzhiyun 	   message.  This is an expected race between the dir node sending a
4425*4882a593Smuzhiyun 	   request to the master node at the same time as the master node sends
4426*4882a593Smuzhiyun 	   a remove to the dir node.  The resolution to that race is for the
4427*4882a593Smuzhiyun 	   dir node to ignore the remove message, and the master node to
4428*4882a593Smuzhiyun 	   recreate the master rsb when it gets a request from the dir node for
4429*4882a593Smuzhiyun 	   an rsb it doesn't have. */
4430*4882a593Smuzhiyun 
4431*4882a593Smuzhiyun 	memset(name, 0, sizeof(name));
4432*4882a593Smuzhiyun 	memcpy(name, ms->m_extra, len);
4433*4882a593Smuzhiyun 
4434*4882a593Smuzhiyun 	hash = jhash(name, len, 0);
4435*4882a593Smuzhiyun 	b = hash & (ls->ls_rsbtbl_size - 1);
4436*4882a593Smuzhiyun 
4437*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[b].lock);
4438*4882a593Smuzhiyun 
4439*4882a593Smuzhiyun 	rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
4440*4882a593Smuzhiyun 	if (rv) {
4441*4882a593Smuzhiyun 		/* verify the rsb is on keep list per comment above */
4442*4882a593Smuzhiyun 		rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
4443*4882a593Smuzhiyun 		if (rv) {
4444*4882a593Smuzhiyun 			/* should not happen */
4445*4882a593Smuzhiyun 			log_error(ls, "receive_remove from %d not found %s",
4446*4882a593Smuzhiyun 				  from_nodeid, name);
4447*4882a593Smuzhiyun 			spin_unlock(&ls->ls_rsbtbl[b].lock);
4448*4882a593Smuzhiyun 			return;
4449*4882a593Smuzhiyun 		}
4450*4882a593Smuzhiyun 		if (r->res_master_nodeid != from_nodeid) {
4451*4882a593Smuzhiyun 			/* should not happen */
4452*4882a593Smuzhiyun 			log_error(ls, "receive_remove keep from %d master %d",
4453*4882a593Smuzhiyun 				  from_nodeid, r->res_master_nodeid);
4454*4882a593Smuzhiyun 			dlm_print_rsb(r);
4455*4882a593Smuzhiyun 			spin_unlock(&ls->ls_rsbtbl[b].lock);
4456*4882a593Smuzhiyun 			return;
4457*4882a593Smuzhiyun 		}
4458*4882a593Smuzhiyun 
4459*4882a593Smuzhiyun 		log_debug(ls, "receive_remove from %d master %d first %x %s",
4460*4882a593Smuzhiyun 			  from_nodeid, r->res_master_nodeid, r->res_first_lkid,
4461*4882a593Smuzhiyun 			  name);
4462*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
4463*4882a593Smuzhiyun 		return;
4464*4882a593Smuzhiyun 	}
4465*4882a593Smuzhiyun 
4466*4882a593Smuzhiyun 	if (r->res_master_nodeid != from_nodeid) {
4467*4882a593Smuzhiyun 		log_error(ls, "receive_remove toss from %d master %d",
4468*4882a593Smuzhiyun 			  from_nodeid, r->res_master_nodeid);
4469*4882a593Smuzhiyun 		dlm_print_rsb(r);
4470*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
4471*4882a593Smuzhiyun 		return;
4472*4882a593Smuzhiyun 	}
4473*4882a593Smuzhiyun 
4474*4882a593Smuzhiyun 	if (kref_put(&r->res_ref, kill_rsb)) {
4475*4882a593Smuzhiyun 		rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
4476*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
4477*4882a593Smuzhiyun 		dlm_free_rsb(r);
4478*4882a593Smuzhiyun 	} else {
4479*4882a593Smuzhiyun 		log_error(ls, "receive_remove from %d rsb ref error",
4480*4882a593Smuzhiyun 			  from_nodeid);
4481*4882a593Smuzhiyun 		dlm_print_rsb(r);
4482*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[b].lock);
4483*4882a593Smuzhiyun 	}
4484*4882a593Smuzhiyun }
4485*4882a593Smuzhiyun 
receive_purge(struct dlm_ls * ls,struct dlm_message * ms)4486*4882a593Smuzhiyun static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms)
4487*4882a593Smuzhiyun {
4488*4882a593Smuzhiyun 	do_purge(ls, ms->m_nodeid, ms->m_pid);
4489*4882a593Smuzhiyun }
4490*4882a593Smuzhiyun 
receive_request_reply(struct dlm_ls * ls,struct dlm_message * ms)4491*4882a593Smuzhiyun static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
4492*4882a593Smuzhiyun {
4493*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4494*4882a593Smuzhiyun 	struct dlm_rsb *r;
4495*4882a593Smuzhiyun 	int error, mstype, result;
4496*4882a593Smuzhiyun 	int from_nodeid = ms->m_header.h_nodeid;
4497*4882a593Smuzhiyun 
4498*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4499*4882a593Smuzhiyun 	if (error)
4500*4882a593Smuzhiyun 		return error;
4501*4882a593Smuzhiyun 
4502*4882a593Smuzhiyun 	r = lkb->lkb_resource;
4503*4882a593Smuzhiyun 	hold_rsb(r);
4504*4882a593Smuzhiyun 	lock_rsb(r);
4505*4882a593Smuzhiyun 
4506*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4507*4882a593Smuzhiyun 	if (error)
4508*4882a593Smuzhiyun 		goto out;
4509*4882a593Smuzhiyun 
4510*4882a593Smuzhiyun 	mstype = lkb->lkb_wait_type;
4511*4882a593Smuzhiyun 	error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY);
4512*4882a593Smuzhiyun 	if (error) {
4513*4882a593Smuzhiyun 		log_error(ls, "receive_request_reply %x remote %d %x result %d",
4514*4882a593Smuzhiyun 			  lkb->lkb_id, from_nodeid, ms->m_lkid, ms->m_result);
4515*4882a593Smuzhiyun 		dlm_dump_rsb(r);
4516*4882a593Smuzhiyun 		goto out;
4517*4882a593Smuzhiyun 	}
4518*4882a593Smuzhiyun 
4519*4882a593Smuzhiyun 	/* Optimization: the dir node was also the master, so it took our
4520*4882a593Smuzhiyun 	   lookup as a request and sent request reply instead of lookup reply */
4521*4882a593Smuzhiyun 	if (mstype == DLM_MSG_LOOKUP) {
4522*4882a593Smuzhiyun 		r->res_master_nodeid = from_nodeid;
4523*4882a593Smuzhiyun 		r->res_nodeid = from_nodeid;
4524*4882a593Smuzhiyun 		lkb->lkb_nodeid = from_nodeid;
4525*4882a593Smuzhiyun 	}
4526*4882a593Smuzhiyun 
4527*4882a593Smuzhiyun 	/* this is the value returned from do_request() on the master */
4528*4882a593Smuzhiyun 	result = ms->m_result;
4529*4882a593Smuzhiyun 
4530*4882a593Smuzhiyun 	switch (result) {
4531*4882a593Smuzhiyun 	case -EAGAIN:
4532*4882a593Smuzhiyun 		/* request would block (be queued) on remote master */
4533*4882a593Smuzhiyun 		queue_cast(r, lkb, -EAGAIN);
4534*4882a593Smuzhiyun 		confirm_master(r, -EAGAIN);
4535*4882a593Smuzhiyun 		unhold_lkb(lkb); /* undoes create_lkb() */
4536*4882a593Smuzhiyun 		break;
4537*4882a593Smuzhiyun 
4538*4882a593Smuzhiyun 	case -EINPROGRESS:
4539*4882a593Smuzhiyun 	case 0:
4540*4882a593Smuzhiyun 		/* request was queued or granted on remote master */
4541*4882a593Smuzhiyun 		receive_flags_reply(lkb, ms);
4542*4882a593Smuzhiyun 		lkb->lkb_remid = ms->m_lkid;
4543*4882a593Smuzhiyun 		if (is_altmode(lkb))
4544*4882a593Smuzhiyun 			munge_altmode(lkb, ms);
4545*4882a593Smuzhiyun 		if (result) {
4546*4882a593Smuzhiyun 			add_lkb(r, lkb, DLM_LKSTS_WAITING);
4547*4882a593Smuzhiyun 			add_timeout(lkb);
4548*4882a593Smuzhiyun 		} else {
4549*4882a593Smuzhiyun 			grant_lock_pc(r, lkb, ms);
4550*4882a593Smuzhiyun 			queue_cast(r, lkb, 0);
4551*4882a593Smuzhiyun 		}
4552*4882a593Smuzhiyun 		confirm_master(r, result);
4553*4882a593Smuzhiyun 		break;
4554*4882a593Smuzhiyun 
4555*4882a593Smuzhiyun 	case -EBADR:
4556*4882a593Smuzhiyun 	case -ENOTBLK:
4557*4882a593Smuzhiyun 		/* find_rsb failed to find rsb or rsb wasn't master */
4558*4882a593Smuzhiyun 		log_limit(ls, "receive_request_reply %x from %d %d "
4559*4882a593Smuzhiyun 			  "master %d dir %d first %x %s", lkb->lkb_id,
4560*4882a593Smuzhiyun 			  from_nodeid, result, r->res_master_nodeid,
4561*4882a593Smuzhiyun 			  r->res_dir_nodeid, r->res_first_lkid, r->res_name);
4562*4882a593Smuzhiyun 
4563*4882a593Smuzhiyun 		if (r->res_dir_nodeid != dlm_our_nodeid() &&
4564*4882a593Smuzhiyun 		    r->res_master_nodeid != dlm_our_nodeid()) {
4565*4882a593Smuzhiyun 			/* cause _request_lock->set_master->send_lookup */
4566*4882a593Smuzhiyun 			r->res_master_nodeid = 0;
4567*4882a593Smuzhiyun 			r->res_nodeid = -1;
4568*4882a593Smuzhiyun 			lkb->lkb_nodeid = -1;
4569*4882a593Smuzhiyun 		}
4570*4882a593Smuzhiyun 
4571*4882a593Smuzhiyun 		if (is_overlap(lkb)) {
4572*4882a593Smuzhiyun 			/* we'll ignore error in cancel/unlock reply */
4573*4882a593Smuzhiyun 			queue_cast_overlap(r, lkb);
4574*4882a593Smuzhiyun 			confirm_master(r, result);
4575*4882a593Smuzhiyun 			unhold_lkb(lkb); /* undoes create_lkb() */
4576*4882a593Smuzhiyun 		} else {
4577*4882a593Smuzhiyun 			_request_lock(r, lkb);
4578*4882a593Smuzhiyun 
4579*4882a593Smuzhiyun 			if (r->res_master_nodeid == dlm_our_nodeid())
4580*4882a593Smuzhiyun 				confirm_master(r, 0);
4581*4882a593Smuzhiyun 		}
4582*4882a593Smuzhiyun 		break;
4583*4882a593Smuzhiyun 
4584*4882a593Smuzhiyun 	default:
4585*4882a593Smuzhiyun 		log_error(ls, "receive_request_reply %x error %d",
4586*4882a593Smuzhiyun 			  lkb->lkb_id, result);
4587*4882a593Smuzhiyun 	}
4588*4882a593Smuzhiyun 
4589*4882a593Smuzhiyun 	if (is_overlap_unlock(lkb) && (result == 0 || result == -EINPROGRESS)) {
4590*4882a593Smuzhiyun 		log_debug(ls, "receive_request_reply %x result %d unlock",
4591*4882a593Smuzhiyun 			  lkb->lkb_id, result);
4592*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
4593*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
4594*4882a593Smuzhiyun 		send_unlock(r, lkb);
4595*4882a593Smuzhiyun 	} else if (is_overlap_cancel(lkb) && (result == -EINPROGRESS)) {
4596*4882a593Smuzhiyun 		log_debug(ls, "receive_request_reply %x cancel", lkb->lkb_id);
4597*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
4598*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
4599*4882a593Smuzhiyun 		send_cancel(r, lkb);
4600*4882a593Smuzhiyun 	} else {
4601*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
4602*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
4603*4882a593Smuzhiyun 	}
4604*4882a593Smuzhiyun  out:
4605*4882a593Smuzhiyun 	unlock_rsb(r);
4606*4882a593Smuzhiyun 	put_rsb(r);
4607*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4608*4882a593Smuzhiyun 	return 0;
4609*4882a593Smuzhiyun }
4610*4882a593Smuzhiyun 
__receive_convert_reply(struct dlm_rsb * r,struct dlm_lkb * lkb,struct dlm_message * ms)4611*4882a593Smuzhiyun static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
4612*4882a593Smuzhiyun 				    struct dlm_message *ms)
4613*4882a593Smuzhiyun {
4614*4882a593Smuzhiyun 	/* this is the value returned from do_convert() on the master */
4615*4882a593Smuzhiyun 	switch (ms->m_result) {
4616*4882a593Smuzhiyun 	case -EAGAIN:
4617*4882a593Smuzhiyun 		/* convert would block (be queued) on remote master */
4618*4882a593Smuzhiyun 		queue_cast(r, lkb, -EAGAIN);
4619*4882a593Smuzhiyun 		break;
4620*4882a593Smuzhiyun 
4621*4882a593Smuzhiyun 	case -EDEADLK:
4622*4882a593Smuzhiyun 		receive_flags_reply(lkb, ms);
4623*4882a593Smuzhiyun 		revert_lock_pc(r, lkb);
4624*4882a593Smuzhiyun 		queue_cast(r, lkb, -EDEADLK);
4625*4882a593Smuzhiyun 		break;
4626*4882a593Smuzhiyun 
4627*4882a593Smuzhiyun 	case -EINPROGRESS:
4628*4882a593Smuzhiyun 		/* convert was queued on remote master */
4629*4882a593Smuzhiyun 		receive_flags_reply(lkb, ms);
4630*4882a593Smuzhiyun 		if (is_demoted(lkb))
4631*4882a593Smuzhiyun 			munge_demoted(lkb);
4632*4882a593Smuzhiyun 		del_lkb(r, lkb);
4633*4882a593Smuzhiyun 		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
4634*4882a593Smuzhiyun 		add_timeout(lkb);
4635*4882a593Smuzhiyun 		break;
4636*4882a593Smuzhiyun 
4637*4882a593Smuzhiyun 	case 0:
4638*4882a593Smuzhiyun 		/* convert was granted on remote master */
4639*4882a593Smuzhiyun 		receive_flags_reply(lkb, ms);
4640*4882a593Smuzhiyun 		if (is_demoted(lkb))
4641*4882a593Smuzhiyun 			munge_demoted(lkb);
4642*4882a593Smuzhiyun 		grant_lock_pc(r, lkb, ms);
4643*4882a593Smuzhiyun 		queue_cast(r, lkb, 0);
4644*4882a593Smuzhiyun 		break;
4645*4882a593Smuzhiyun 
4646*4882a593Smuzhiyun 	default:
4647*4882a593Smuzhiyun 		log_error(r->res_ls, "receive_convert_reply %x remote %d %x %d",
4648*4882a593Smuzhiyun 			  lkb->lkb_id, ms->m_header.h_nodeid, ms->m_lkid,
4649*4882a593Smuzhiyun 			  ms->m_result);
4650*4882a593Smuzhiyun 		dlm_print_rsb(r);
4651*4882a593Smuzhiyun 		dlm_print_lkb(lkb);
4652*4882a593Smuzhiyun 	}
4653*4882a593Smuzhiyun }
4654*4882a593Smuzhiyun 
_receive_convert_reply(struct dlm_lkb * lkb,struct dlm_message * ms)4655*4882a593Smuzhiyun static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
4656*4882a593Smuzhiyun {
4657*4882a593Smuzhiyun 	struct dlm_rsb *r = lkb->lkb_resource;
4658*4882a593Smuzhiyun 	int error;
4659*4882a593Smuzhiyun 
4660*4882a593Smuzhiyun 	hold_rsb(r);
4661*4882a593Smuzhiyun 	lock_rsb(r);
4662*4882a593Smuzhiyun 
4663*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4664*4882a593Smuzhiyun 	if (error)
4665*4882a593Smuzhiyun 		goto out;
4666*4882a593Smuzhiyun 
4667*4882a593Smuzhiyun 	/* stub reply can happen with waiters_mutex held */
4668*4882a593Smuzhiyun 	error = remove_from_waiters_ms(lkb, ms);
4669*4882a593Smuzhiyun 	if (error)
4670*4882a593Smuzhiyun 		goto out;
4671*4882a593Smuzhiyun 
4672*4882a593Smuzhiyun 	__receive_convert_reply(r, lkb, ms);
4673*4882a593Smuzhiyun  out:
4674*4882a593Smuzhiyun 	unlock_rsb(r);
4675*4882a593Smuzhiyun 	put_rsb(r);
4676*4882a593Smuzhiyun }
4677*4882a593Smuzhiyun 
receive_convert_reply(struct dlm_ls * ls,struct dlm_message * ms)4678*4882a593Smuzhiyun static int receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
4679*4882a593Smuzhiyun {
4680*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4681*4882a593Smuzhiyun 	int error;
4682*4882a593Smuzhiyun 
4683*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4684*4882a593Smuzhiyun 	if (error)
4685*4882a593Smuzhiyun 		return error;
4686*4882a593Smuzhiyun 
4687*4882a593Smuzhiyun 	_receive_convert_reply(lkb, ms);
4688*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4689*4882a593Smuzhiyun 	return 0;
4690*4882a593Smuzhiyun }
4691*4882a593Smuzhiyun 
_receive_unlock_reply(struct dlm_lkb * lkb,struct dlm_message * ms)4692*4882a593Smuzhiyun static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
4693*4882a593Smuzhiyun {
4694*4882a593Smuzhiyun 	struct dlm_rsb *r = lkb->lkb_resource;
4695*4882a593Smuzhiyun 	int error;
4696*4882a593Smuzhiyun 
4697*4882a593Smuzhiyun 	hold_rsb(r);
4698*4882a593Smuzhiyun 	lock_rsb(r);
4699*4882a593Smuzhiyun 
4700*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4701*4882a593Smuzhiyun 	if (error)
4702*4882a593Smuzhiyun 		goto out;
4703*4882a593Smuzhiyun 
4704*4882a593Smuzhiyun 	/* stub reply can happen with waiters_mutex held */
4705*4882a593Smuzhiyun 	error = remove_from_waiters_ms(lkb, ms);
4706*4882a593Smuzhiyun 	if (error)
4707*4882a593Smuzhiyun 		goto out;
4708*4882a593Smuzhiyun 
4709*4882a593Smuzhiyun 	/* this is the value returned from do_unlock() on the master */
4710*4882a593Smuzhiyun 
4711*4882a593Smuzhiyun 	switch (ms->m_result) {
4712*4882a593Smuzhiyun 	case -DLM_EUNLOCK:
4713*4882a593Smuzhiyun 		receive_flags_reply(lkb, ms);
4714*4882a593Smuzhiyun 		remove_lock_pc(r, lkb);
4715*4882a593Smuzhiyun 		queue_cast(r, lkb, -DLM_EUNLOCK);
4716*4882a593Smuzhiyun 		break;
4717*4882a593Smuzhiyun 	case -ENOENT:
4718*4882a593Smuzhiyun 		break;
4719*4882a593Smuzhiyun 	default:
4720*4882a593Smuzhiyun 		log_error(r->res_ls, "receive_unlock_reply %x error %d",
4721*4882a593Smuzhiyun 			  lkb->lkb_id, ms->m_result);
4722*4882a593Smuzhiyun 	}
4723*4882a593Smuzhiyun  out:
4724*4882a593Smuzhiyun 	unlock_rsb(r);
4725*4882a593Smuzhiyun 	put_rsb(r);
4726*4882a593Smuzhiyun }
4727*4882a593Smuzhiyun 
receive_unlock_reply(struct dlm_ls * ls,struct dlm_message * ms)4728*4882a593Smuzhiyun static int receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
4729*4882a593Smuzhiyun {
4730*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4731*4882a593Smuzhiyun 	int error;
4732*4882a593Smuzhiyun 
4733*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4734*4882a593Smuzhiyun 	if (error)
4735*4882a593Smuzhiyun 		return error;
4736*4882a593Smuzhiyun 
4737*4882a593Smuzhiyun 	_receive_unlock_reply(lkb, ms);
4738*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4739*4882a593Smuzhiyun 	return 0;
4740*4882a593Smuzhiyun }
4741*4882a593Smuzhiyun 
_receive_cancel_reply(struct dlm_lkb * lkb,struct dlm_message * ms)4742*4882a593Smuzhiyun static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
4743*4882a593Smuzhiyun {
4744*4882a593Smuzhiyun 	struct dlm_rsb *r = lkb->lkb_resource;
4745*4882a593Smuzhiyun 	int error;
4746*4882a593Smuzhiyun 
4747*4882a593Smuzhiyun 	hold_rsb(r);
4748*4882a593Smuzhiyun 	lock_rsb(r);
4749*4882a593Smuzhiyun 
4750*4882a593Smuzhiyun 	error = validate_message(lkb, ms);
4751*4882a593Smuzhiyun 	if (error)
4752*4882a593Smuzhiyun 		goto out;
4753*4882a593Smuzhiyun 
4754*4882a593Smuzhiyun 	/* stub reply can happen with waiters_mutex held */
4755*4882a593Smuzhiyun 	error = remove_from_waiters_ms(lkb, ms);
4756*4882a593Smuzhiyun 	if (error)
4757*4882a593Smuzhiyun 		goto out;
4758*4882a593Smuzhiyun 
4759*4882a593Smuzhiyun 	/* this is the value returned from do_cancel() on the master */
4760*4882a593Smuzhiyun 
4761*4882a593Smuzhiyun 	switch (ms->m_result) {
4762*4882a593Smuzhiyun 	case -DLM_ECANCEL:
4763*4882a593Smuzhiyun 		receive_flags_reply(lkb, ms);
4764*4882a593Smuzhiyun 		revert_lock_pc(r, lkb);
4765*4882a593Smuzhiyun 		queue_cast(r, lkb, -DLM_ECANCEL);
4766*4882a593Smuzhiyun 		break;
4767*4882a593Smuzhiyun 	case 0:
4768*4882a593Smuzhiyun 		break;
4769*4882a593Smuzhiyun 	default:
4770*4882a593Smuzhiyun 		log_error(r->res_ls, "receive_cancel_reply %x error %d",
4771*4882a593Smuzhiyun 			  lkb->lkb_id, ms->m_result);
4772*4882a593Smuzhiyun 	}
4773*4882a593Smuzhiyun  out:
4774*4882a593Smuzhiyun 	unlock_rsb(r);
4775*4882a593Smuzhiyun 	put_rsb(r);
4776*4882a593Smuzhiyun }
4777*4882a593Smuzhiyun 
receive_cancel_reply(struct dlm_ls * ls,struct dlm_message * ms)4778*4882a593Smuzhiyun static int receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
4779*4882a593Smuzhiyun {
4780*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4781*4882a593Smuzhiyun 	int error;
4782*4882a593Smuzhiyun 
4783*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_remid, &lkb);
4784*4882a593Smuzhiyun 	if (error)
4785*4882a593Smuzhiyun 		return error;
4786*4882a593Smuzhiyun 
4787*4882a593Smuzhiyun 	_receive_cancel_reply(lkb, ms);
4788*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4789*4882a593Smuzhiyun 	return 0;
4790*4882a593Smuzhiyun }
4791*4882a593Smuzhiyun 
receive_lookup_reply(struct dlm_ls * ls,struct dlm_message * ms)4792*4882a593Smuzhiyun static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
4793*4882a593Smuzhiyun {
4794*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
4795*4882a593Smuzhiyun 	struct dlm_rsb *r;
4796*4882a593Smuzhiyun 	int error, ret_nodeid;
4797*4882a593Smuzhiyun 	int do_lookup_list = 0;
4798*4882a593Smuzhiyun 
4799*4882a593Smuzhiyun 	error = find_lkb(ls, ms->m_lkid, &lkb);
4800*4882a593Smuzhiyun 	if (error) {
4801*4882a593Smuzhiyun 		log_error(ls, "receive_lookup_reply no lkid %x", ms->m_lkid);
4802*4882a593Smuzhiyun 		return;
4803*4882a593Smuzhiyun 	}
4804*4882a593Smuzhiyun 
4805*4882a593Smuzhiyun 	/* ms->m_result is the value returned by dlm_master_lookup on dir node
4806*4882a593Smuzhiyun 	   FIXME: will a non-zero error ever be returned? */
4807*4882a593Smuzhiyun 
4808*4882a593Smuzhiyun 	r = lkb->lkb_resource;
4809*4882a593Smuzhiyun 	hold_rsb(r);
4810*4882a593Smuzhiyun 	lock_rsb(r);
4811*4882a593Smuzhiyun 
4812*4882a593Smuzhiyun 	error = remove_from_waiters(lkb, DLM_MSG_LOOKUP_REPLY);
4813*4882a593Smuzhiyun 	if (error)
4814*4882a593Smuzhiyun 		goto out;
4815*4882a593Smuzhiyun 
4816*4882a593Smuzhiyun 	ret_nodeid = ms->m_nodeid;
4817*4882a593Smuzhiyun 
4818*4882a593Smuzhiyun 	/* We sometimes receive a request from the dir node for this
4819*4882a593Smuzhiyun 	   rsb before we've received the dir node's loookup_reply for it.
4820*4882a593Smuzhiyun 	   The request from the dir node implies we're the master, so we set
4821*4882a593Smuzhiyun 	   ourself as master in receive_request_reply, and verify here that
4822*4882a593Smuzhiyun 	   we are indeed the master. */
4823*4882a593Smuzhiyun 
4824*4882a593Smuzhiyun 	if (r->res_master_nodeid && (r->res_master_nodeid != ret_nodeid)) {
4825*4882a593Smuzhiyun 		/* This should never happen */
4826*4882a593Smuzhiyun 		log_error(ls, "receive_lookup_reply %x from %d ret %d "
4827*4882a593Smuzhiyun 			  "master %d dir %d our %d first %x %s",
4828*4882a593Smuzhiyun 			  lkb->lkb_id, ms->m_header.h_nodeid, ret_nodeid,
4829*4882a593Smuzhiyun 			  r->res_master_nodeid, r->res_dir_nodeid,
4830*4882a593Smuzhiyun 			  dlm_our_nodeid(), r->res_first_lkid, r->res_name);
4831*4882a593Smuzhiyun 	}
4832*4882a593Smuzhiyun 
4833*4882a593Smuzhiyun 	if (ret_nodeid == dlm_our_nodeid()) {
4834*4882a593Smuzhiyun 		r->res_master_nodeid = ret_nodeid;
4835*4882a593Smuzhiyun 		r->res_nodeid = 0;
4836*4882a593Smuzhiyun 		do_lookup_list = 1;
4837*4882a593Smuzhiyun 		r->res_first_lkid = 0;
4838*4882a593Smuzhiyun 	} else if (ret_nodeid == -1) {
4839*4882a593Smuzhiyun 		/* the remote node doesn't believe it's the dir node */
4840*4882a593Smuzhiyun 		log_error(ls, "receive_lookup_reply %x from %d bad ret_nodeid",
4841*4882a593Smuzhiyun 			  lkb->lkb_id, ms->m_header.h_nodeid);
4842*4882a593Smuzhiyun 		r->res_master_nodeid = 0;
4843*4882a593Smuzhiyun 		r->res_nodeid = -1;
4844*4882a593Smuzhiyun 		lkb->lkb_nodeid = -1;
4845*4882a593Smuzhiyun 	} else {
4846*4882a593Smuzhiyun 		/* set_master() will set lkb_nodeid from r */
4847*4882a593Smuzhiyun 		r->res_master_nodeid = ret_nodeid;
4848*4882a593Smuzhiyun 		r->res_nodeid = ret_nodeid;
4849*4882a593Smuzhiyun 	}
4850*4882a593Smuzhiyun 
4851*4882a593Smuzhiyun 	if (is_overlap(lkb)) {
4852*4882a593Smuzhiyun 		log_debug(ls, "receive_lookup_reply %x unlock %x",
4853*4882a593Smuzhiyun 			  lkb->lkb_id, lkb->lkb_flags);
4854*4882a593Smuzhiyun 		queue_cast_overlap(r, lkb);
4855*4882a593Smuzhiyun 		unhold_lkb(lkb); /* undoes create_lkb() */
4856*4882a593Smuzhiyun 		goto out_list;
4857*4882a593Smuzhiyun 	}
4858*4882a593Smuzhiyun 
4859*4882a593Smuzhiyun 	_request_lock(r, lkb);
4860*4882a593Smuzhiyun 
4861*4882a593Smuzhiyun  out_list:
4862*4882a593Smuzhiyun 	if (do_lookup_list)
4863*4882a593Smuzhiyun 		process_lookup_list(r);
4864*4882a593Smuzhiyun  out:
4865*4882a593Smuzhiyun 	unlock_rsb(r);
4866*4882a593Smuzhiyun 	put_rsb(r);
4867*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
4868*4882a593Smuzhiyun }
4869*4882a593Smuzhiyun 
_receive_message(struct dlm_ls * ls,struct dlm_message * ms,uint32_t saved_seq)4870*4882a593Smuzhiyun static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms,
4871*4882a593Smuzhiyun 			     uint32_t saved_seq)
4872*4882a593Smuzhiyun {
4873*4882a593Smuzhiyun 	int error = 0, noent = 0;
4874*4882a593Smuzhiyun 
4875*4882a593Smuzhiyun 	if (!dlm_is_member(ls, ms->m_header.h_nodeid)) {
4876*4882a593Smuzhiyun 		log_limit(ls, "receive %d from non-member %d %x %x %d",
4877*4882a593Smuzhiyun 			  ms->m_type, ms->m_header.h_nodeid, ms->m_lkid,
4878*4882a593Smuzhiyun 			  ms->m_remid, ms->m_result);
4879*4882a593Smuzhiyun 		return;
4880*4882a593Smuzhiyun 	}
4881*4882a593Smuzhiyun 
4882*4882a593Smuzhiyun 	switch (ms->m_type) {
4883*4882a593Smuzhiyun 
4884*4882a593Smuzhiyun 	/* messages sent to a master node */
4885*4882a593Smuzhiyun 
4886*4882a593Smuzhiyun 	case DLM_MSG_REQUEST:
4887*4882a593Smuzhiyun 		error = receive_request(ls, ms);
4888*4882a593Smuzhiyun 		break;
4889*4882a593Smuzhiyun 
4890*4882a593Smuzhiyun 	case DLM_MSG_CONVERT:
4891*4882a593Smuzhiyun 		error = receive_convert(ls, ms);
4892*4882a593Smuzhiyun 		break;
4893*4882a593Smuzhiyun 
4894*4882a593Smuzhiyun 	case DLM_MSG_UNLOCK:
4895*4882a593Smuzhiyun 		error = receive_unlock(ls, ms);
4896*4882a593Smuzhiyun 		break;
4897*4882a593Smuzhiyun 
4898*4882a593Smuzhiyun 	case DLM_MSG_CANCEL:
4899*4882a593Smuzhiyun 		noent = 1;
4900*4882a593Smuzhiyun 		error = receive_cancel(ls, ms);
4901*4882a593Smuzhiyun 		break;
4902*4882a593Smuzhiyun 
4903*4882a593Smuzhiyun 	/* messages sent from a master node (replies to above) */
4904*4882a593Smuzhiyun 
4905*4882a593Smuzhiyun 	case DLM_MSG_REQUEST_REPLY:
4906*4882a593Smuzhiyun 		error = receive_request_reply(ls, ms);
4907*4882a593Smuzhiyun 		break;
4908*4882a593Smuzhiyun 
4909*4882a593Smuzhiyun 	case DLM_MSG_CONVERT_REPLY:
4910*4882a593Smuzhiyun 		error = receive_convert_reply(ls, ms);
4911*4882a593Smuzhiyun 		break;
4912*4882a593Smuzhiyun 
4913*4882a593Smuzhiyun 	case DLM_MSG_UNLOCK_REPLY:
4914*4882a593Smuzhiyun 		error = receive_unlock_reply(ls, ms);
4915*4882a593Smuzhiyun 		break;
4916*4882a593Smuzhiyun 
4917*4882a593Smuzhiyun 	case DLM_MSG_CANCEL_REPLY:
4918*4882a593Smuzhiyun 		error = receive_cancel_reply(ls, ms);
4919*4882a593Smuzhiyun 		break;
4920*4882a593Smuzhiyun 
4921*4882a593Smuzhiyun 	/* messages sent from a master node (only two types of async msg) */
4922*4882a593Smuzhiyun 
4923*4882a593Smuzhiyun 	case DLM_MSG_GRANT:
4924*4882a593Smuzhiyun 		noent = 1;
4925*4882a593Smuzhiyun 		error = receive_grant(ls, ms);
4926*4882a593Smuzhiyun 		break;
4927*4882a593Smuzhiyun 
4928*4882a593Smuzhiyun 	case DLM_MSG_BAST:
4929*4882a593Smuzhiyun 		noent = 1;
4930*4882a593Smuzhiyun 		error = receive_bast(ls, ms);
4931*4882a593Smuzhiyun 		break;
4932*4882a593Smuzhiyun 
4933*4882a593Smuzhiyun 	/* messages sent to a dir node */
4934*4882a593Smuzhiyun 
4935*4882a593Smuzhiyun 	case DLM_MSG_LOOKUP:
4936*4882a593Smuzhiyun 		receive_lookup(ls, ms);
4937*4882a593Smuzhiyun 		break;
4938*4882a593Smuzhiyun 
4939*4882a593Smuzhiyun 	case DLM_MSG_REMOVE:
4940*4882a593Smuzhiyun 		receive_remove(ls, ms);
4941*4882a593Smuzhiyun 		break;
4942*4882a593Smuzhiyun 
4943*4882a593Smuzhiyun 	/* messages sent from a dir node (remove has no reply) */
4944*4882a593Smuzhiyun 
4945*4882a593Smuzhiyun 	case DLM_MSG_LOOKUP_REPLY:
4946*4882a593Smuzhiyun 		receive_lookup_reply(ls, ms);
4947*4882a593Smuzhiyun 		break;
4948*4882a593Smuzhiyun 
4949*4882a593Smuzhiyun 	/* other messages */
4950*4882a593Smuzhiyun 
4951*4882a593Smuzhiyun 	case DLM_MSG_PURGE:
4952*4882a593Smuzhiyun 		receive_purge(ls, ms);
4953*4882a593Smuzhiyun 		break;
4954*4882a593Smuzhiyun 
4955*4882a593Smuzhiyun 	default:
4956*4882a593Smuzhiyun 		log_error(ls, "unknown message type %d", ms->m_type);
4957*4882a593Smuzhiyun 	}
4958*4882a593Smuzhiyun 
4959*4882a593Smuzhiyun 	/*
4960*4882a593Smuzhiyun 	 * When checking for ENOENT, we're checking the result of
4961*4882a593Smuzhiyun 	 * find_lkb(m_remid):
4962*4882a593Smuzhiyun 	 *
4963*4882a593Smuzhiyun 	 * The lock id referenced in the message wasn't found.  This may
4964*4882a593Smuzhiyun 	 * happen in normal usage for the async messages and cancel, so
4965*4882a593Smuzhiyun 	 * only use log_debug for them.
4966*4882a593Smuzhiyun 	 *
4967*4882a593Smuzhiyun 	 * Some errors are expected and normal.
4968*4882a593Smuzhiyun 	 */
4969*4882a593Smuzhiyun 
4970*4882a593Smuzhiyun 	if (error == -ENOENT && noent) {
4971*4882a593Smuzhiyun 		log_debug(ls, "receive %d no %x remote %d %x saved_seq %u",
4972*4882a593Smuzhiyun 			  ms->m_type, ms->m_remid, ms->m_header.h_nodeid,
4973*4882a593Smuzhiyun 			  ms->m_lkid, saved_seq);
4974*4882a593Smuzhiyun 	} else if (error == -ENOENT) {
4975*4882a593Smuzhiyun 		log_error(ls, "receive %d no %x remote %d %x saved_seq %u",
4976*4882a593Smuzhiyun 			  ms->m_type, ms->m_remid, ms->m_header.h_nodeid,
4977*4882a593Smuzhiyun 			  ms->m_lkid, saved_seq);
4978*4882a593Smuzhiyun 
4979*4882a593Smuzhiyun 		if (ms->m_type == DLM_MSG_CONVERT)
4980*4882a593Smuzhiyun 			dlm_dump_rsb_hash(ls, ms->m_hash);
4981*4882a593Smuzhiyun 	}
4982*4882a593Smuzhiyun 
4983*4882a593Smuzhiyun 	if (error == -EINVAL) {
4984*4882a593Smuzhiyun 		log_error(ls, "receive %d inval from %d lkid %x remid %x "
4985*4882a593Smuzhiyun 			  "saved_seq %u",
4986*4882a593Smuzhiyun 			  ms->m_type, ms->m_header.h_nodeid,
4987*4882a593Smuzhiyun 			  ms->m_lkid, ms->m_remid, saved_seq);
4988*4882a593Smuzhiyun 	}
4989*4882a593Smuzhiyun }
4990*4882a593Smuzhiyun 
4991*4882a593Smuzhiyun /* If the lockspace is in recovery mode (locking stopped), then normal
4992*4882a593Smuzhiyun    messages are saved on the requestqueue for processing after recovery is
4993*4882a593Smuzhiyun    done.  When not in recovery mode, we wait for dlm_recoverd to drain saved
4994*4882a593Smuzhiyun    messages off the requestqueue before we process new ones. This occurs right
4995*4882a593Smuzhiyun    after recovery completes when we transition from saving all messages on
4996*4882a593Smuzhiyun    requestqueue, to processing all the saved messages, to processing new
4997*4882a593Smuzhiyun    messages as they arrive. */
4998*4882a593Smuzhiyun 
dlm_receive_message(struct dlm_ls * ls,struct dlm_message * ms,int nodeid)4999*4882a593Smuzhiyun static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
5000*4882a593Smuzhiyun 				int nodeid)
5001*4882a593Smuzhiyun {
5002*4882a593Smuzhiyun 	if (dlm_locking_stopped(ls)) {
5003*4882a593Smuzhiyun 		/* If we were a member of this lockspace, left, and rejoined,
5004*4882a593Smuzhiyun 		   other nodes may still be sending us messages from the
5005*4882a593Smuzhiyun 		   lockspace generation before we left. */
5006*4882a593Smuzhiyun 		if (!ls->ls_generation) {
5007*4882a593Smuzhiyun 			log_limit(ls, "receive %d from %d ignore old gen",
5008*4882a593Smuzhiyun 				  ms->m_type, nodeid);
5009*4882a593Smuzhiyun 			return;
5010*4882a593Smuzhiyun 		}
5011*4882a593Smuzhiyun 
5012*4882a593Smuzhiyun 		dlm_add_requestqueue(ls, nodeid, ms);
5013*4882a593Smuzhiyun 	} else {
5014*4882a593Smuzhiyun 		dlm_wait_requestqueue(ls);
5015*4882a593Smuzhiyun 		_receive_message(ls, ms, 0);
5016*4882a593Smuzhiyun 	}
5017*4882a593Smuzhiyun }
5018*4882a593Smuzhiyun 
5019*4882a593Smuzhiyun /* This is called by dlm_recoverd to process messages that were saved on
5020*4882a593Smuzhiyun    the requestqueue. */
5021*4882a593Smuzhiyun 
dlm_receive_message_saved(struct dlm_ls * ls,struct dlm_message * ms,uint32_t saved_seq)5022*4882a593Smuzhiyun void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms,
5023*4882a593Smuzhiyun 			       uint32_t saved_seq)
5024*4882a593Smuzhiyun {
5025*4882a593Smuzhiyun 	_receive_message(ls, ms, saved_seq);
5026*4882a593Smuzhiyun }
5027*4882a593Smuzhiyun 
5028*4882a593Smuzhiyun /* This is called by the midcomms layer when something is received for
5029*4882a593Smuzhiyun    the lockspace.  It could be either a MSG (normal message sent as part of
5030*4882a593Smuzhiyun    standard locking activity) or an RCOM (recovery message sent as part of
5031*4882a593Smuzhiyun    lockspace recovery). */
5032*4882a593Smuzhiyun 
dlm_receive_buffer(union dlm_packet * p,int nodeid)5033*4882a593Smuzhiyun void dlm_receive_buffer(union dlm_packet *p, int nodeid)
5034*4882a593Smuzhiyun {
5035*4882a593Smuzhiyun 	struct dlm_header *hd = &p->header;
5036*4882a593Smuzhiyun 	struct dlm_ls *ls;
5037*4882a593Smuzhiyun 	int type = 0;
5038*4882a593Smuzhiyun 
5039*4882a593Smuzhiyun 	switch (hd->h_cmd) {
5040*4882a593Smuzhiyun 	case DLM_MSG:
5041*4882a593Smuzhiyun 		dlm_message_in(&p->message);
5042*4882a593Smuzhiyun 		type = p->message.m_type;
5043*4882a593Smuzhiyun 		break;
5044*4882a593Smuzhiyun 	case DLM_RCOM:
5045*4882a593Smuzhiyun 		dlm_rcom_in(&p->rcom);
5046*4882a593Smuzhiyun 		type = p->rcom.rc_type;
5047*4882a593Smuzhiyun 		break;
5048*4882a593Smuzhiyun 	default:
5049*4882a593Smuzhiyun 		log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
5050*4882a593Smuzhiyun 		return;
5051*4882a593Smuzhiyun 	}
5052*4882a593Smuzhiyun 
5053*4882a593Smuzhiyun 	if (hd->h_nodeid != nodeid) {
5054*4882a593Smuzhiyun 		log_print("invalid h_nodeid %d from %d lockspace %x",
5055*4882a593Smuzhiyun 			  hd->h_nodeid, nodeid, hd->h_lockspace);
5056*4882a593Smuzhiyun 		return;
5057*4882a593Smuzhiyun 	}
5058*4882a593Smuzhiyun 
5059*4882a593Smuzhiyun 	ls = dlm_find_lockspace_global(hd->h_lockspace);
5060*4882a593Smuzhiyun 	if (!ls) {
5061*4882a593Smuzhiyun 		if (dlm_config.ci_log_debug) {
5062*4882a593Smuzhiyun 			printk_ratelimited(KERN_DEBUG "dlm: invalid lockspace "
5063*4882a593Smuzhiyun 				"%u from %d cmd %d type %d\n",
5064*4882a593Smuzhiyun 				hd->h_lockspace, nodeid, hd->h_cmd, type);
5065*4882a593Smuzhiyun 		}
5066*4882a593Smuzhiyun 
5067*4882a593Smuzhiyun 		if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
5068*4882a593Smuzhiyun 			dlm_send_ls_not_ready(nodeid, &p->rcom);
5069*4882a593Smuzhiyun 		return;
5070*4882a593Smuzhiyun 	}
5071*4882a593Smuzhiyun 
5072*4882a593Smuzhiyun 	/* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
5073*4882a593Smuzhiyun 	   be inactive (in this ls) before transitioning to recovery mode */
5074*4882a593Smuzhiyun 
5075*4882a593Smuzhiyun 	down_read(&ls->ls_recv_active);
5076*4882a593Smuzhiyun 	if (hd->h_cmd == DLM_MSG)
5077*4882a593Smuzhiyun 		dlm_receive_message(ls, &p->message, nodeid);
5078*4882a593Smuzhiyun 	else
5079*4882a593Smuzhiyun 		dlm_receive_rcom(ls, &p->rcom, nodeid);
5080*4882a593Smuzhiyun 	up_read(&ls->ls_recv_active);
5081*4882a593Smuzhiyun 
5082*4882a593Smuzhiyun 	dlm_put_lockspace(ls);
5083*4882a593Smuzhiyun }
5084*4882a593Smuzhiyun 
recover_convert_waiter(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_message * ms_stub)5085*4882a593Smuzhiyun static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
5086*4882a593Smuzhiyun 				   struct dlm_message *ms_stub)
5087*4882a593Smuzhiyun {
5088*4882a593Smuzhiyun 	if (middle_conversion(lkb)) {
5089*4882a593Smuzhiyun 		hold_lkb(lkb);
5090*4882a593Smuzhiyun 		memset(ms_stub, 0, sizeof(struct dlm_message));
5091*4882a593Smuzhiyun 		ms_stub->m_flags = DLM_IFL_STUB_MS;
5092*4882a593Smuzhiyun 		ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
5093*4882a593Smuzhiyun 		ms_stub->m_result = -EINPROGRESS;
5094*4882a593Smuzhiyun 		ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
5095*4882a593Smuzhiyun 		_receive_convert_reply(lkb, ms_stub);
5096*4882a593Smuzhiyun 
5097*4882a593Smuzhiyun 		/* Same special case as in receive_rcom_lock_args() */
5098*4882a593Smuzhiyun 		lkb->lkb_grmode = DLM_LOCK_IV;
5099*4882a593Smuzhiyun 		rsb_set_flag(lkb->lkb_resource, RSB_RECOVER_CONVERT);
5100*4882a593Smuzhiyun 		unhold_lkb(lkb);
5101*4882a593Smuzhiyun 
5102*4882a593Smuzhiyun 	} else if (lkb->lkb_rqmode >= lkb->lkb_grmode) {
5103*4882a593Smuzhiyun 		lkb->lkb_flags |= DLM_IFL_RESEND;
5104*4882a593Smuzhiyun 	}
5105*4882a593Smuzhiyun 
5106*4882a593Smuzhiyun 	/* lkb->lkb_rqmode < lkb->lkb_grmode shouldn't happen since down
5107*4882a593Smuzhiyun 	   conversions are async; there's no reply from the remote master */
5108*4882a593Smuzhiyun }
5109*4882a593Smuzhiyun 
5110*4882a593Smuzhiyun /* A waiting lkb needs recovery if the master node has failed, or
5111*4882a593Smuzhiyun    the master node is changing (only when no directory is used) */
5112*4882a593Smuzhiyun 
waiter_needs_recovery(struct dlm_ls * ls,struct dlm_lkb * lkb,int dir_nodeid)5113*4882a593Smuzhiyun static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb,
5114*4882a593Smuzhiyun 				 int dir_nodeid)
5115*4882a593Smuzhiyun {
5116*4882a593Smuzhiyun 	if (dlm_no_directory(ls))
5117*4882a593Smuzhiyun 		return 1;
5118*4882a593Smuzhiyun 
5119*4882a593Smuzhiyun 	if (dlm_is_removed(ls, lkb->lkb_wait_nodeid))
5120*4882a593Smuzhiyun 		return 1;
5121*4882a593Smuzhiyun 
5122*4882a593Smuzhiyun 	return 0;
5123*4882a593Smuzhiyun }
5124*4882a593Smuzhiyun 
5125*4882a593Smuzhiyun /* Recovery for locks that are waiting for replies from nodes that are now
5126*4882a593Smuzhiyun    gone.  We can just complete unlocks and cancels by faking a reply from the
5127*4882a593Smuzhiyun    dead node.  Requests and up-conversions we flag to be resent after
5128*4882a593Smuzhiyun    recovery.  Down-conversions can just be completed with a fake reply like
5129*4882a593Smuzhiyun    unlocks.  Conversions between PR and CW need special attention. */
5130*4882a593Smuzhiyun 
dlm_recover_waiters_pre(struct dlm_ls * ls)5131*4882a593Smuzhiyun void dlm_recover_waiters_pre(struct dlm_ls *ls)
5132*4882a593Smuzhiyun {
5133*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *safe;
5134*4882a593Smuzhiyun 	struct dlm_message *ms_stub;
5135*4882a593Smuzhiyun 	int wait_type, stub_unlock_result, stub_cancel_result;
5136*4882a593Smuzhiyun 	int dir_nodeid;
5137*4882a593Smuzhiyun 
5138*4882a593Smuzhiyun 	ms_stub = kmalloc(sizeof(*ms_stub), GFP_KERNEL);
5139*4882a593Smuzhiyun 	if (!ms_stub)
5140*4882a593Smuzhiyun 		return;
5141*4882a593Smuzhiyun 
5142*4882a593Smuzhiyun 	mutex_lock(&ls->ls_waiters_mutex);
5143*4882a593Smuzhiyun 
5144*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
5145*4882a593Smuzhiyun 
5146*4882a593Smuzhiyun 		dir_nodeid = dlm_dir_nodeid(lkb->lkb_resource);
5147*4882a593Smuzhiyun 
5148*4882a593Smuzhiyun 		/* exclude debug messages about unlocks because there can be so
5149*4882a593Smuzhiyun 		   many and they aren't very interesting */
5150*4882a593Smuzhiyun 
5151*4882a593Smuzhiyun 		if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
5152*4882a593Smuzhiyun 			log_debug(ls, "waiter %x remote %x msg %d r_nodeid %d "
5153*4882a593Smuzhiyun 				  "lkb_nodeid %d wait_nodeid %d dir_nodeid %d",
5154*4882a593Smuzhiyun 				  lkb->lkb_id,
5155*4882a593Smuzhiyun 				  lkb->lkb_remid,
5156*4882a593Smuzhiyun 				  lkb->lkb_wait_type,
5157*4882a593Smuzhiyun 				  lkb->lkb_resource->res_nodeid,
5158*4882a593Smuzhiyun 				  lkb->lkb_nodeid,
5159*4882a593Smuzhiyun 				  lkb->lkb_wait_nodeid,
5160*4882a593Smuzhiyun 				  dir_nodeid);
5161*4882a593Smuzhiyun 		}
5162*4882a593Smuzhiyun 
5163*4882a593Smuzhiyun 		/* all outstanding lookups, regardless of destination  will be
5164*4882a593Smuzhiyun 		   resent after recovery is done */
5165*4882a593Smuzhiyun 
5166*4882a593Smuzhiyun 		if (lkb->lkb_wait_type == DLM_MSG_LOOKUP) {
5167*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_RESEND;
5168*4882a593Smuzhiyun 			continue;
5169*4882a593Smuzhiyun 		}
5170*4882a593Smuzhiyun 
5171*4882a593Smuzhiyun 		if (!waiter_needs_recovery(ls, lkb, dir_nodeid))
5172*4882a593Smuzhiyun 			continue;
5173*4882a593Smuzhiyun 
5174*4882a593Smuzhiyun 		wait_type = lkb->lkb_wait_type;
5175*4882a593Smuzhiyun 		stub_unlock_result = -DLM_EUNLOCK;
5176*4882a593Smuzhiyun 		stub_cancel_result = -DLM_ECANCEL;
5177*4882a593Smuzhiyun 
5178*4882a593Smuzhiyun 		/* Main reply may have been received leaving a zero wait_type,
5179*4882a593Smuzhiyun 		   but a reply for the overlapping op may not have been
5180*4882a593Smuzhiyun 		   received.  In that case we need to fake the appropriate
5181*4882a593Smuzhiyun 		   reply for the overlap op. */
5182*4882a593Smuzhiyun 
5183*4882a593Smuzhiyun 		if (!wait_type) {
5184*4882a593Smuzhiyun 			if (is_overlap_cancel(lkb)) {
5185*4882a593Smuzhiyun 				wait_type = DLM_MSG_CANCEL;
5186*4882a593Smuzhiyun 				if (lkb->lkb_grmode == DLM_LOCK_IV)
5187*4882a593Smuzhiyun 					stub_cancel_result = 0;
5188*4882a593Smuzhiyun 			}
5189*4882a593Smuzhiyun 			if (is_overlap_unlock(lkb)) {
5190*4882a593Smuzhiyun 				wait_type = DLM_MSG_UNLOCK;
5191*4882a593Smuzhiyun 				if (lkb->lkb_grmode == DLM_LOCK_IV)
5192*4882a593Smuzhiyun 					stub_unlock_result = -ENOENT;
5193*4882a593Smuzhiyun 			}
5194*4882a593Smuzhiyun 
5195*4882a593Smuzhiyun 			log_debug(ls, "rwpre overlap %x %x %d %d %d",
5196*4882a593Smuzhiyun 				  lkb->lkb_id, lkb->lkb_flags, wait_type,
5197*4882a593Smuzhiyun 				  stub_cancel_result, stub_unlock_result);
5198*4882a593Smuzhiyun 		}
5199*4882a593Smuzhiyun 
5200*4882a593Smuzhiyun 		switch (wait_type) {
5201*4882a593Smuzhiyun 
5202*4882a593Smuzhiyun 		case DLM_MSG_REQUEST:
5203*4882a593Smuzhiyun 			lkb->lkb_flags |= DLM_IFL_RESEND;
5204*4882a593Smuzhiyun 			break;
5205*4882a593Smuzhiyun 
5206*4882a593Smuzhiyun 		case DLM_MSG_CONVERT:
5207*4882a593Smuzhiyun 			recover_convert_waiter(ls, lkb, ms_stub);
5208*4882a593Smuzhiyun 			break;
5209*4882a593Smuzhiyun 
5210*4882a593Smuzhiyun 		case DLM_MSG_UNLOCK:
5211*4882a593Smuzhiyun 			hold_lkb(lkb);
5212*4882a593Smuzhiyun 			memset(ms_stub, 0, sizeof(struct dlm_message));
5213*4882a593Smuzhiyun 			ms_stub->m_flags = DLM_IFL_STUB_MS;
5214*4882a593Smuzhiyun 			ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
5215*4882a593Smuzhiyun 			ms_stub->m_result = stub_unlock_result;
5216*4882a593Smuzhiyun 			ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
5217*4882a593Smuzhiyun 			_receive_unlock_reply(lkb, ms_stub);
5218*4882a593Smuzhiyun 			dlm_put_lkb(lkb);
5219*4882a593Smuzhiyun 			break;
5220*4882a593Smuzhiyun 
5221*4882a593Smuzhiyun 		case DLM_MSG_CANCEL:
5222*4882a593Smuzhiyun 			hold_lkb(lkb);
5223*4882a593Smuzhiyun 			memset(ms_stub, 0, sizeof(struct dlm_message));
5224*4882a593Smuzhiyun 			ms_stub->m_flags = DLM_IFL_STUB_MS;
5225*4882a593Smuzhiyun 			ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
5226*4882a593Smuzhiyun 			ms_stub->m_result = stub_cancel_result;
5227*4882a593Smuzhiyun 			ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
5228*4882a593Smuzhiyun 			_receive_cancel_reply(lkb, ms_stub);
5229*4882a593Smuzhiyun 			dlm_put_lkb(lkb);
5230*4882a593Smuzhiyun 			break;
5231*4882a593Smuzhiyun 
5232*4882a593Smuzhiyun 		default:
5233*4882a593Smuzhiyun 			log_error(ls, "invalid lkb wait_type %d %d",
5234*4882a593Smuzhiyun 				  lkb->lkb_wait_type, wait_type);
5235*4882a593Smuzhiyun 		}
5236*4882a593Smuzhiyun 		schedule();
5237*4882a593Smuzhiyun 	}
5238*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_waiters_mutex);
5239*4882a593Smuzhiyun 	kfree(ms_stub);
5240*4882a593Smuzhiyun }
5241*4882a593Smuzhiyun 
find_resend_waiter(struct dlm_ls * ls)5242*4882a593Smuzhiyun static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
5243*4882a593Smuzhiyun {
5244*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5245*4882a593Smuzhiyun 	int found = 0;
5246*4882a593Smuzhiyun 
5247*4882a593Smuzhiyun 	mutex_lock(&ls->ls_waiters_mutex);
5248*4882a593Smuzhiyun 	list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
5249*4882a593Smuzhiyun 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
5250*4882a593Smuzhiyun 			hold_lkb(lkb);
5251*4882a593Smuzhiyun 			found = 1;
5252*4882a593Smuzhiyun 			break;
5253*4882a593Smuzhiyun 		}
5254*4882a593Smuzhiyun 	}
5255*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_waiters_mutex);
5256*4882a593Smuzhiyun 
5257*4882a593Smuzhiyun 	if (!found)
5258*4882a593Smuzhiyun 		lkb = NULL;
5259*4882a593Smuzhiyun 	return lkb;
5260*4882a593Smuzhiyun }
5261*4882a593Smuzhiyun 
5262*4882a593Smuzhiyun /* Deal with lookups and lkb's marked RESEND from _pre.  We may now be the
5263*4882a593Smuzhiyun    master or dir-node for r.  Processing the lkb may result in it being placed
5264*4882a593Smuzhiyun    back on waiters. */
5265*4882a593Smuzhiyun 
5266*4882a593Smuzhiyun /* We do this after normal locking has been enabled and any saved messages
5267*4882a593Smuzhiyun    (in requestqueue) have been processed.  We should be confident that at
5268*4882a593Smuzhiyun    this point we won't get or process a reply to any of these waiting
5269*4882a593Smuzhiyun    operations.  But, new ops may be coming in on the rsbs/locks here from
5270*4882a593Smuzhiyun    userspace or remotely. */
5271*4882a593Smuzhiyun 
5272*4882a593Smuzhiyun /* there may have been an overlap unlock/cancel prior to recovery or after
5273*4882a593Smuzhiyun    recovery.  if before, the lkb may still have a pos wait_count; if after, the
5274*4882a593Smuzhiyun    overlap flag would just have been set and nothing new sent.  we can be
5275*4882a593Smuzhiyun    confident here than any replies to either the initial op or overlap ops
5276*4882a593Smuzhiyun    prior to recovery have been received. */
5277*4882a593Smuzhiyun 
dlm_recover_waiters_post(struct dlm_ls * ls)5278*4882a593Smuzhiyun int dlm_recover_waiters_post(struct dlm_ls *ls)
5279*4882a593Smuzhiyun {
5280*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5281*4882a593Smuzhiyun 	struct dlm_rsb *r;
5282*4882a593Smuzhiyun 	int error = 0, mstype, err, oc, ou;
5283*4882a593Smuzhiyun 
5284*4882a593Smuzhiyun 	while (1) {
5285*4882a593Smuzhiyun 		if (dlm_locking_stopped(ls)) {
5286*4882a593Smuzhiyun 			log_debug(ls, "recover_waiters_post aborted");
5287*4882a593Smuzhiyun 			error = -EINTR;
5288*4882a593Smuzhiyun 			break;
5289*4882a593Smuzhiyun 		}
5290*4882a593Smuzhiyun 
5291*4882a593Smuzhiyun 		lkb = find_resend_waiter(ls);
5292*4882a593Smuzhiyun 		if (!lkb)
5293*4882a593Smuzhiyun 			break;
5294*4882a593Smuzhiyun 
5295*4882a593Smuzhiyun 		r = lkb->lkb_resource;
5296*4882a593Smuzhiyun 		hold_rsb(r);
5297*4882a593Smuzhiyun 		lock_rsb(r);
5298*4882a593Smuzhiyun 
5299*4882a593Smuzhiyun 		mstype = lkb->lkb_wait_type;
5300*4882a593Smuzhiyun 		oc = is_overlap_cancel(lkb);
5301*4882a593Smuzhiyun 		ou = is_overlap_unlock(lkb);
5302*4882a593Smuzhiyun 		err = 0;
5303*4882a593Smuzhiyun 
5304*4882a593Smuzhiyun 		log_debug(ls, "waiter %x remote %x msg %d r_nodeid %d "
5305*4882a593Smuzhiyun 			  "lkb_nodeid %d wait_nodeid %d dir_nodeid %d "
5306*4882a593Smuzhiyun 			  "overlap %d %d", lkb->lkb_id, lkb->lkb_remid, mstype,
5307*4882a593Smuzhiyun 			  r->res_nodeid, lkb->lkb_nodeid, lkb->lkb_wait_nodeid,
5308*4882a593Smuzhiyun 			  dlm_dir_nodeid(r), oc, ou);
5309*4882a593Smuzhiyun 
5310*4882a593Smuzhiyun 		/* At this point we assume that we won't get a reply to any
5311*4882a593Smuzhiyun 		   previous op or overlap op on this lock.  First, do a big
5312*4882a593Smuzhiyun 		   remove_from_waiters() for all previous ops. */
5313*4882a593Smuzhiyun 
5314*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_RESEND;
5315*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
5316*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
5317*4882a593Smuzhiyun 		lkb->lkb_wait_type = 0;
5318*4882a593Smuzhiyun 		/* drop all wait_count references we still
5319*4882a593Smuzhiyun 		 * hold a reference for this iteration.
5320*4882a593Smuzhiyun 		 */
5321*4882a593Smuzhiyun 		while (lkb->lkb_wait_count) {
5322*4882a593Smuzhiyun 			lkb->lkb_wait_count--;
5323*4882a593Smuzhiyun 			unhold_lkb(lkb);
5324*4882a593Smuzhiyun 		}
5325*4882a593Smuzhiyun 		mutex_lock(&ls->ls_waiters_mutex);
5326*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_wait_reply);
5327*4882a593Smuzhiyun 		mutex_unlock(&ls->ls_waiters_mutex);
5328*4882a593Smuzhiyun 
5329*4882a593Smuzhiyun 		if (oc || ou) {
5330*4882a593Smuzhiyun 			/* do an unlock or cancel instead of resending */
5331*4882a593Smuzhiyun 			switch (mstype) {
5332*4882a593Smuzhiyun 			case DLM_MSG_LOOKUP:
5333*4882a593Smuzhiyun 			case DLM_MSG_REQUEST:
5334*4882a593Smuzhiyun 				queue_cast(r, lkb, ou ? -DLM_EUNLOCK :
5335*4882a593Smuzhiyun 							-DLM_ECANCEL);
5336*4882a593Smuzhiyun 				unhold_lkb(lkb); /* undoes create_lkb() */
5337*4882a593Smuzhiyun 				break;
5338*4882a593Smuzhiyun 			case DLM_MSG_CONVERT:
5339*4882a593Smuzhiyun 				if (oc) {
5340*4882a593Smuzhiyun 					queue_cast(r, lkb, -DLM_ECANCEL);
5341*4882a593Smuzhiyun 				} else {
5342*4882a593Smuzhiyun 					lkb->lkb_exflags |= DLM_LKF_FORCEUNLOCK;
5343*4882a593Smuzhiyun 					_unlock_lock(r, lkb);
5344*4882a593Smuzhiyun 				}
5345*4882a593Smuzhiyun 				break;
5346*4882a593Smuzhiyun 			default:
5347*4882a593Smuzhiyun 				err = 1;
5348*4882a593Smuzhiyun 			}
5349*4882a593Smuzhiyun 		} else {
5350*4882a593Smuzhiyun 			switch (mstype) {
5351*4882a593Smuzhiyun 			case DLM_MSG_LOOKUP:
5352*4882a593Smuzhiyun 			case DLM_MSG_REQUEST:
5353*4882a593Smuzhiyun 				_request_lock(r, lkb);
5354*4882a593Smuzhiyun 				if (is_master(r))
5355*4882a593Smuzhiyun 					confirm_master(r, 0);
5356*4882a593Smuzhiyun 				break;
5357*4882a593Smuzhiyun 			case DLM_MSG_CONVERT:
5358*4882a593Smuzhiyun 				_convert_lock(r, lkb);
5359*4882a593Smuzhiyun 				break;
5360*4882a593Smuzhiyun 			default:
5361*4882a593Smuzhiyun 				err = 1;
5362*4882a593Smuzhiyun 			}
5363*4882a593Smuzhiyun 		}
5364*4882a593Smuzhiyun 
5365*4882a593Smuzhiyun 		if (err) {
5366*4882a593Smuzhiyun 			log_error(ls, "waiter %x msg %d r_nodeid %d "
5367*4882a593Smuzhiyun 				  "dir_nodeid %d overlap %d %d",
5368*4882a593Smuzhiyun 				  lkb->lkb_id, mstype, r->res_nodeid,
5369*4882a593Smuzhiyun 				  dlm_dir_nodeid(r), oc, ou);
5370*4882a593Smuzhiyun 		}
5371*4882a593Smuzhiyun 		unlock_rsb(r);
5372*4882a593Smuzhiyun 		put_rsb(r);
5373*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
5374*4882a593Smuzhiyun 	}
5375*4882a593Smuzhiyun 
5376*4882a593Smuzhiyun 	return error;
5377*4882a593Smuzhiyun }
5378*4882a593Smuzhiyun 
purge_mstcpy_list(struct dlm_ls * ls,struct dlm_rsb * r,struct list_head * list)5379*4882a593Smuzhiyun static void purge_mstcpy_list(struct dlm_ls *ls, struct dlm_rsb *r,
5380*4882a593Smuzhiyun 			      struct list_head *list)
5381*4882a593Smuzhiyun {
5382*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *safe;
5383*4882a593Smuzhiyun 
5384*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, list, lkb_statequeue) {
5385*4882a593Smuzhiyun 		if (!is_master_copy(lkb))
5386*4882a593Smuzhiyun 			continue;
5387*4882a593Smuzhiyun 
5388*4882a593Smuzhiyun 		/* don't purge lkbs we've added in recover_master_copy for
5389*4882a593Smuzhiyun 		   the current recovery seq */
5390*4882a593Smuzhiyun 
5391*4882a593Smuzhiyun 		if (lkb->lkb_recover_seq == ls->ls_recover_seq)
5392*4882a593Smuzhiyun 			continue;
5393*4882a593Smuzhiyun 
5394*4882a593Smuzhiyun 		del_lkb(r, lkb);
5395*4882a593Smuzhiyun 
5396*4882a593Smuzhiyun 		/* this put should free the lkb */
5397*4882a593Smuzhiyun 		if (!dlm_put_lkb(lkb))
5398*4882a593Smuzhiyun 			log_error(ls, "purged mstcpy lkb not released");
5399*4882a593Smuzhiyun 	}
5400*4882a593Smuzhiyun }
5401*4882a593Smuzhiyun 
dlm_purge_mstcpy_locks(struct dlm_rsb * r)5402*4882a593Smuzhiyun void dlm_purge_mstcpy_locks(struct dlm_rsb *r)
5403*4882a593Smuzhiyun {
5404*4882a593Smuzhiyun 	struct dlm_ls *ls = r->res_ls;
5405*4882a593Smuzhiyun 
5406*4882a593Smuzhiyun 	purge_mstcpy_list(ls, r, &r->res_grantqueue);
5407*4882a593Smuzhiyun 	purge_mstcpy_list(ls, r, &r->res_convertqueue);
5408*4882a593Smuzhiyun 	purge_mstcpy_list(ls, r, &r->res_waitqueue);
5409*4882a593Smuzhiyun }
5410*4882a593Smuzhiyun 
purge_dead_list(struct dlm_ls * ls,struct dlm_rsb * r,struct list_head * list,int nodeid_gone,unsigned int * count)5411*4882a593Smuzhiyun static void purge_dead_list(struct dlm_ls *ls, struct dlm_rsb *r,
5412*4882a593Smuzhiyun 			    struct list_head *list,
5413*4882a593Smuzhiyun 			    int nodeid_gone, unsigned int *count)
5414*4882a593Smuzhiyun {
5415*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *safe;
5416*4882a593Smuzhiyun 
5417*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, list, lkb_statequeue) {
5418*4882a593Smuzhiyun 		if (!is_master_copy(lkb))
5419*4882a593Smuzhiyun 			continue;
5420*4882a593Smuzhiyun 
5421*4882a593Smuzhiyun 		if ((lkb->lkb_nodeid == nodeid_gone) ||
5422*4882a593Smuzhiyun 		    dlm_is_removed(ls, lkb->lkb_nodeid)) {
5423*4882a593Smuzhiyun 
5424*4882a593Smuzhiyun 			/* tell recover_lvb to invalidate the lvb
5425*4882a593Smuzhiyun 			   because a node holding EX/PW failed */
5426*4882a593Smuzhiyun 			if ((lkb->lkb_exflags & DLM_LKF_VALBLK) &&
5427*4882a593Smuzhiyun 			    (lkb->lkb_grmode >= DLM_LOCK_PW)) {
5428*4882a593Smuzhiyun 				rsb_set_flag(r, RSB_RECOVER_LVB_INVAL);
5429*4882a593Smuzhiyun 			}
5430*4882a593Smuzhiyun 
5431*4882a593Smuzhiyun 			del_lkb(r, lkb);
5432*4882a593Smuzhiyun 
5433*4882a593Smuzhiyun 			/* this put should free the lkb */
5434*4882a593Smuzhiyun 			if (!dlm_put_lkb(lkb))
5435*4882a593Smuzhiyun 				log_error(ls, "purged dead lkb not released");
5436*4882a593Smuzhiyun 
5437*4882a593Smuzhiyun 			rsb_set_flag(r, RSB_RECOVER_GRANT);
5438*4882a593Smuzhiyun 
5439*4882a593Smuzhiyun 			(*count)++;
5440*4882a593Smuzhiyun 		}
5441*4882a593Smuzhiyun 	}
5442*4882a593Smuzhiyun }
5443*4882a593Smuzhiyun 
5444*4882a593Smuzhiyun /* Get rid of locks held by nodes that are gone. */
5445*4882a593Smuzhiyun 
dlm_recover_purge(struct dlm_ls * ls)5446*4882a593Smuzhiyun void dlm_recover_purge(struct dlm_ls *ls)
5447*4882a593Smuzhiyun {
5448*4882a593Smuzhiyun 	struct dlm_rsb *r;
5449*4882a593Smuzhiyun 	struct dlm_member *memb;
5450*4882a593Smuzhiyun 	int nodes_count = 0;
5451*4882a593Smuzhiyun 	int nodeid_gone = 0;
5452*4882a593Smuzhiyun 	unsigned int lkb_count = 0;
5453*4882a593Smuzhiyun 
5454*4882a593Smuzhiyun 	/* cache one removed nodeid to optimize the common
5455*4882a593Smuzhiyun 	   case of a single node removed */
5456*4882a593Smuzhiyun 
5457*4882a593Smuzhiyun 	list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
5458*4882a593Smuzhiyun 		nodes_count++;
5459*4882a593Smuzhiyun 		nodeid_gone = memb->nodeid;
5460*4882a593Smuzhiyun 	}
5461*4882a593Smuzhiyun 
5462*4882a593Smuzhiyun 	if (!nodes_count)
5463*4882a593Smuzhiyun 		return;
5464*4882a593Smuzhiyun 
5465*4882a593Smuzhiyun 	down_write(&ls->ls_root_sem);
5466*4882a593Smuzhiyun 	list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
5467*4882a593Smuzhiyun 		hold_rsb(r);
5468*4882a593Smuzhiyun 		lock_rsb(r);
5469*4882a593Smuzhiyun 		if (is_master(r)) {
5470*4882a593Smuzhiyun 			purge_dead_list(ls, r, &r->res_grantqueue,
5471*4882a593Smuzhiyun 					nodeid_gone, &lkb_count);
5472*4882a593Smuzhiyun 			purge_dead_list(ls, r, &r->res_convertqueue,
5473*4882a593Smuzhiyun 					nodeid_gone, &lkb_count);
5474*4882a593Smuzhiyun 			purge_dead_list(ls, r, &r->res_waitqueue,
5475*4882a593Smuzhiyun 					nodeid_gone, &lkb_count);
5476*4882a593Smuzhiyun 		}
5477*4882a593Smuzhiyun 		unlock_rsb(r);
5478*4882a593Smuzhiyun 		unhold_rsb(r);
5479*4882a593Smuzhiyun 		cond_resched();
5480*4882a593Smuzhiyun 	}
5481*4882a593Smuzhiyun 	up_write(&ls->ls_root_sem);
5482*4882a593Smuzhiyun 
5483*4882a593Smuzhiyun 	if (lkb_count)
5484*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_purge %u locks for %u nodes",
5485*4882a593Smuzhiyun 			  lkb_count, nodes_count);
5486*4882a593Smuzhiyun }
5487*4882a593Smuzhiyun 
find_grant_rsb(struct dlm_ls * ls,int bucket)5488*4882a593Smuzhiyun static struct dlm_rsb *find_grant_rsb(struct dlm_ls *ls, int bucket)
5489*4882a593Smuzhiyun {
5490*4882a593Smuzhiyun 	struct rb_node *n;
5491*4882a593Smuzhiyun 	struct dlm_rsb *r;
5492*4882a593Smuzhiyun 
5493*4882a593Smuzhiyun 	spin_lock(&ls->ls_rsbtbl[bucket].lock);
5494*4882a593Smuzhiyun 	for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) {
5495*4882a593Smuzhiyun 		r = rb_entry(n, struct dlm_rsb, res_hashnode);
5496*4882a593Smuzhiyun 
5497*4882a593Smuzhiyun 		if (!rsb_flag(r, RSB_RECOVER_GRANT))
5498*4882a593Smuzhiyun 			continue;
5499*4882a593Smuzhiyun 		if (!is_master(r)) {
5500*4882a593Smuzhiyun 			rsb_clear_flag(r, RSB_RECOVER_GRANT);
5501*4882a593Smuzhiyun 			continue;
5502*4882a593Smuzhiyun 		}
5503*4882a593Smuzhiyun 		hold_rsb(r);
5504*4882a593Smuzhiyun 		spin_unlock(&ls->ls_rsbtbl[bucket].lock);
5505*4882a593Smuzhiyun 		return r;
5506*4882a593Smuzhiyun 	}
5507*4882a593Smuzhiyun 	spin_unlock(&ls->ls_rsbtbl[bucket].lock);
5508*4882a593Smuzhiyun 	return NULL;
5509*4882a593Smuzhiyun }
5510*4882a593Smuzhiyun 
5511*4882a593Smuzhiyun /*
5512*4882a593Smuzhiyun  * Attempt to grant locks on resources that we are the master of.
5513*4882a593Smuzhiyun  * Locks may have become grantable during recovery because locks
5514*4882a593Smuzhiyun  * from departed nodes have been purged (or not rebuilt), allowing
5515*4882a593Smuzhiyun  * previously blocked locks to now be granted.  The subset of rsb's
5516*4882a593Smuzhiyun  * we are interested in are those with lkb's on either the convert or
5517*4882a593Smuzhiyun  * waiting queues.
5518*4882a593Smuzhiyun  *
5519*4882a593Smuzhiyun  * Simplest would be to go through each master rsb and check for non-empty
5520*4882a593Smuzhiyun  * convert or waiting queues, and attempt to grant on those rsbs.
5521*4882a593Smuzhiyun  * Checking the queues requires lock_rsb, though, for which we'd need
5522*4882a593Smuzhiyun  * to release the rsbtbl lock.  This would make iterating through all
5523*4882a593Smuzhiyun  * rsb's very inefficient.  So, we rely on earlier recovery routines
5524*4882a593Smuzhiyun  * to set RECOVER_GRANT on any rsb's that we should attempt to grant
5525*4882a593Smuzhiyun  * locks for.
5526*4882a593Smuzhiyun  */
5527*4882a593Smuzhiyun 
dlm_recover_grant(struct dlm_ls * ls)5528*4882a593Smuzhiyun void dlm_recover_grant(struct dlm_ls *ls)
5529*4882a593Smuzhiyun {
5530*4882a593Smuzhiyun 	struct dlm_rsb *r;
5531*4882a593Smuzhiyun 	int bucket = 0;
5532*4882a593Smuzhiyun 	unsigned int count = 0;
5533*4882a593Smuzhiyun 	unsigned int rsb_count = 0;
5534*4882a593Smuzhiyun 	unsigned int lkb_count = 0;
5535*4882a593Smuzhiyun 
5536*4882a593Smuzhiyun 	while (1) {
5537*4882a593Smuzhiyun 		r = find_grant_rsb(ls, bucket);
5538*4882a593Smuzhiyun 		if (!r) {
5539*4882a593Smuzhiyun 			if (bucket == ls->ls_rsbtbl_size - 1)
5540*4882a593Smuzhiyun 				break;
5541*4882a593Smuzhiyun 			bucket++;
5542*4882a593Smuzhiyun 			continue;
5543*4882a593Smuzhiyun 		}
5544*4882a593Smuzhiyun 		rsb_count++;
5545*4882a593Smuzhiyun 		count = 0;
5546*4882a593Smuzhiyun 		lock_rsb(r);
5547*4882a593Smuzhiyun 		/* the RECOVER_GRANT flag is checked in the grant path */
5548*4882a593Smuzhiyun 		grant_pending_locks(r, &count);
5549*4882a593Smuzhiyun 		rsb_clear_flag(r, RSB_RECOVER_GRANT);
5550*4882a593Smuzhiyun 		lkb_count += count;
5551*4882a593Smuzhiyun 		confirm_master(r, 0);
5552*4882a593Smuzhiyun 		unlock_rsb(r);
5553*4882a593Smuzhiyun 		put_rsb(r);
5554*4882a593Smuzhiyun 		cond_resched();
5555*4882a593Smuzhiyun 	}
5556*4882a593Smuzhiyun 
5557*4882a593Smuzhiyun 	if (lkb_count)
5558*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_grant %u locks on %u resources",
5559*4882a593Smuzhiyun 			  lkb_count, rsb_count);
5560*4882a593Smuzhiyun }
5561*4882a593Smuzhiyun 
search_remid_list(struct list_head * head,int nodeid,uint32_t remid)5562*4882a593Smuzhiyun static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid,
5563*4882a593Smuzhiyun 					 uint32_t remid)
5564*4882a593Smuzhiyun {
5565*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5566*4882a593Smuzhiyun 
5567*4882a593Smuzhiyun 	list_for_each_entry(lkb, head, lkb_statequeue) {
5568*4882a593Smuzhiyun 		if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid)
5569*4882a593Smuzhiyun 			return lkb;
5570*4882a593Smuzhiyun 	}
5571*4882a593Smuzhiyun 	return NULL;
5572*4882a593Smuzhiyun }
5573*4882a593Smuzhiyun 
search_remid(struct dlm_rsb * r,int nodeid,uint32_t remid)5574*4882a593Smuzhiyun static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid,
5575*4882a593Smuzhiyun 				    uint32_t remid)
5576*4882a593Smuzhiyun {
5577*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5578*4882a593Smuzhiyun 
5579*4882a593Smuzhiyun 	lkb = search_remid_list(&r->res_grantqueue, nodeid, remid);
5580*4882a593Smuzhiyun 	if (lkb)
5581*4882a593Smuzhiyun 		return lkb;
5582*4882a593Smuzhiyun 	lkb = search_remid_list(&r->res_convertqueue, nodeid, remid);
5583*4882a593Smuzhiyun 	if (lkb)
5584*4882a593Smuzhiyun 		return lkb;
5585*4882a593Smuzhiyun 	lkb = search_remid_list(&r->res_waitqueue, nodeid, remid);
5586*4882a593Smuzhiyun 	if (lkb)
5587*4882a593Smuzhiyun 		return lkb;
5588*4882a593Smuzhiyun 	return NULL;
5589*4882a593Smuzhiyun }
5590*4882a593Smuzhiyun 
5591*4882a593Smuzhiyun /* needs at least dlm_rcom + rcom_lock */
receive_rcom_lock_args(struct dlm_ls * ls,struct dlm_lkb * lkb,struct dlm_rsb * r,struct dlm_rcom * rc)5592*4882a593Smuzhiyun static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
5593*4882a593Smuzhiyun 				  struct dlm_rsb *r, struct dlm_rcom *rc)
5594*4882a593Smuzhiyun {
5595*4882a593Smuzhiyun 	struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
5596*4882a593Smuzhiyun 
5597*4882a593Smuzhiyun 	lkb->lkb_nodeid = rc->rc_header.h_nodeid;
5598*4882a593Smuzhiyun 	lkb->lkb_ownpid = le32_to_cpu(rl->rl_ownpid);
5599*4882a593Smuzhiyun 	lkb->lkb_remid = le32_to_cpu(rl->rl_lkid);
5600*4882a593Smuzhiyun 	lkb->lkb_exflags = le32_to_cpu(rl->rl_exflags);
5601*4882a593Smuzhiyun 	lkb->lkb_flags = le32_to_cpu(rl->rl_flags) & 0x0000FFFF;
5602*4882a593Smuzhiyun 	lkb->lkb_flags |= DLM_IFL_MSTCPY;
5603*4882a593Smuzhiyun 	lkb->lkb_lvbseq = le32_to_cpu(rl->rl_lvbseq);
5604*4882a593Smuzhiyun 	lkb->lkb_rqmode = rl->rl_rqmode;
5605*4882a593Smuzhiyun 	lkb->lkb_grmode = rl->rl_grmode;
5606*4882a593Smuzhiyun 	/* don't set lkb_status because add_lkb wants to itself */
5607*4882a593Smuzhiyun 
5608*4882a593Smuzhiyun 	lkb->lkb_bastfn = (rl->rl_asts & DLM_CB_BAST) ? &fake_bastfn : NULL;
5609*4882a593Smuzhiyun 	lkb->lkb_astfn = (rl->rl_asts & DLM_CB_CAST) ? &fake_astfn : NULL;
5610*4882a593Smuzhiyun 
5611*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
5612*4882a593Smuzhiyun 		int lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
5613*4882a593Smuzhiyun 			 sizeof(struct rcom_lock);
5614*4882a593Smuzhiyun 		if (lvblen > ls->ls_lvblen)
5615*4882a593Smuzhiyun 			return -EINVAL;
5616*4882a593Smuzhiyun 		lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
5617*4882a593Smuzhiyun 		if (!lkb->lkb_lvbptr)
5618*4882a593Smuzhiyun 			return -ENOMEM;
5619*4882a593Smuzhiyun 		memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen);
5620*4882a593Smuzhiyun 	}
5621*4882a593Smuzhiyun 
5622*4882a593Smuzhiyun 	/* Conversions between PR and CW (middle modes) need special handling.
5623*4882a593Smuzhiyun 	   The real granted mode of these converting locks cannot be determined
5624*4882a593Smuzhiyun 	   until all locks have been rebuilt on the rsb (recover_conversion) */
5625*4882a593Smuzhiyun 
5626*4882a593Smuzhiyun 	if (rl->rl_wait_type == cpu_to_le16(DLM_MSG_CONVERT) &&
5627*4882a593Smuzhiyun 	    middle_conversion(lkb)) {
5628*4882a593Smuzhiyun 		rl->rl_status = DLM_LKSTS_CONVERT;
5629*4882a593Smuzhiyun 		lkb->lkb_grmode = DLM_LOCK_IV;
5630*4882a593Smuzhiyun 		rsb_set_flag(r, RSB_RECOVER_CONVERT);
5631*4882a593Smuzhiyun 	}
5632*4882a593Smuzhiyun 
5633*4882a593Smuzhiyun 	return 0;
5634*4882a593Smuzhiyun }
5635*4882a593Smuzhiyun 
5636*4882a593Smuzhiyun /* This lkb may have been recovered in a previous aborted recovery so we need
5637*4882a593Smuzhiyun    to check if the rsb already has an lkb with the given remote nodeid/lkid.
5638*4882a593Smuzhiyun    If so we just send back a standard reply.  If not, we create a new lkb with
5639*4882a593Smuzhiyun    the given values and send back our lkid.  We send back our lkid by sending
5640*4882a593Smuzhiyun    back the rcom_lock struct we got but with the remid field filled in. */
5641*4882a593Smuzhiyun 
5642*4882a593Smuzhiyun /* needs at least dlm_rcom + rcom_lock */
dlm_recover_master_copy(struct dlm_ls * ls,struct dlm_rcom * rc)5643*4882a593Smuzhiyun int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
5644*4882a593Smuzhiyun {
5645*4882a593Smuzhiyun 	struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
5646*4882a593Smuzhiyun 	struct dlm_rsb *r;
5647*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5648*4882a593Smuzhiyun 	uint32_t remid = 0;
5649*4882a593Smuzhiyun 	int from_nodeid = rc->rc_header.h_nodeid;
5650*4882a593Smuzhiyun 	int error;
5651*4882a593Smuzhiyun 
5652*4882a593Smuzhiyun 	if (rl->rl_parent_lkid) {
5653*4882a593Smuzhiyun 		error = -EOPNOTSUPP;
5654*4882a593Smuzhiyun 		goto out;
5655*4882a593Smuzhiyun 	}
5656*4882a593Smuzhiyun 
5657*4882a593Smuzhiyun 	remid = le32_to_cpu(rl->rl_lkid);
5658*4882a593Smuzhiyun 
5659*4882a593Smuzhiyun 	/* In general we expect the rsb returned to be R_MASTER, but we don't
5660*4882a593Smuzhiyun 	   have to require it.  Recovery of masters on one node can overlap
5661*4882a593Smuzhiyun 	   recovery of locks on another node, so one node can send us MSTCPY
5662*4882a593Smuzhiyun 	   locks before we've made ourselves master of this rsb.  We can still
5663*4882a593Smuzhiyun 	   add new MSTCPY locks that we receive here without any harm; when
5664*4882a593Smuzhiyun 	   we make ourselves master, dlm_recover_masters() won't touch the
5665*4882a593Smuzhiyun 	   MSTCPY locks we've received early. */
5666*4882a593Smuzhiyun 
5667*4882a593Smuzhiyun 	error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen),
5668*4882a593Smuzhiyun 			 from_nodeid, R_RECEIVE_RECOVER, &r);
5669*4882a593Smuzhiyun 	if (error)
5670*4882a593Smuzhiyun 		goto out;
5671*4882a593Smuzhiyun 
5672*4882a593Smuzhiyun 	lock_rsb(r);
5673*4882a593Smuzhiyun 
5674*4882a593Smuzhiyun 	if (dlm_no_directory(ls) && (dlm_dir_nodeid(r) != dlm_our_nodeid())) {
5675*4882a593Smuzhiyun 		log_error(ls, "dlm_recover_master_copy remote %d %x not dir",
5676*4882a593Smuzhiyun 			  from_nodeid, remid);
5677*4882a593Smuzhiyun 		error = -EBADR;
5678*4882a593Smuzhiyun 		goto out_unlock;
5679*4882a593Smuzhiyun 	}
5680*4882a593Smuzhiyun 
5681*4882a593Smuzhiyun 	lkb = search_remid(r, from_nodeid, remid);
5682*4882a593Smuzhiyun 	if (lkb) {
5683*4882a593Smuzhiyun 		error = -EEXIST;
5684*4882a593Smuzhiyun 		goto out_remid;
5685*4882a593Smuzhiyun 	}
5686*4882a593Smuzhiyun 
5687*4882a593Smuzhiyun 	error = create_lkb(ls, &lkb);
5688*4882a593Smuzhiyun 	if (error)
5689*4882a593Smuzhiyun 		goto out_unlock;
5690*4882a593Smuzhiyun 
5691*4882a593Smuzhiyun 	error = receive_rcom_lock_args(ls, lkb, r, rc);
5692*4882a593Smuzhiyun 	if (error) {
5693*4882a593Smuzhiyun 		__put_lkb(ls, lkb);
5694*4882a593Smuzhiyun 		goto out_unlock;
5695*4882a593Smuzhiyun 	}
5696*4882a593Smuzhiyun 
5697*4882a593Smuzhiyun 	attach_lkb(r, lkb);
5698*4882a593Smuzhiyun 	add_lkb(r, lkb, rl->rl_status);
5699*4882a593Smuzhiyun 	error = 0;
5700*4882a593Smuzhiyun 	ls->ls_recover_locks_in++;
5701*4882a593Smuzhiyun 
5702*4882a593Smuzhiyun 	if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
5703*4882a593Smuzhiyun 		rsb_set_flag(r, RSB_RECOVER_GRANT);
5704*4882a593Smuzhiyun 
5705*4882a593Smuzhiyun  out_remid:
5706*4882a593Smuzhiyun 	/* this is the new value returned to the lock holder for
5707*4882a593Smuzhiyun 	   saving in its process-copy lkb */
5708*4882a593Smuzhiyun 	rl->rl_remid = cpu_to_le32(lkb->lkb_id);
5709*4882a593Smuzhiyun 
5710*4882a593Smuzhiyun 	lkb->lkb_recover_seq = ls->ls_recover_seq;
5711*4882a593Smuzhiyun 
5712*4882a593Smuzhiyun  out_unlock:
5713*4882a593Smuzhiyun 	unlock_rsb(r);
5714*4882a593Smuzhiyun 	put_rsb(r);
5715*4882a593Smuzhiyun  out:
5716*4882a593Smuzhiyun 	if (error && error != -EEXIST)
5717*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_master_copy remote %d %x error %d",
5718*4882a593Smuzhiyun 			  from_nodeid, remid, error);
5719*4882a593Smuzhiyun 	rl->rl_result = cpu_to_le32(error);
5720*4882a593Smuzhiyun 	return error;
5721*4882a593Smuzhiyun }
5722*4882a593Smuzhiyun 
5723*4882a593Smuzhiyun /* needs at least dlm_rcom + rcom_lock */
dlm_recover_process_copy(struct dlm_ls * ls,struct dlm_rcom * rc)5724*4882a593Smuzhiyun int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
5725*4882a593Smuzhiyun {
5726*4882a593Smuzhiyun 	struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
5727*4882a593Smuzhiyun 	struct dlm_rsb *r;
5728*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5729*4882a593Smuzhiyun 	uint32_t lkid, remid;
5730*4882a593Smuzhiyun 	int error, result;
5731*4882a593Smuzhiyun 
5732*4882a593Smuzhiyun 	lkid = le32_to_cpu(rl->rl_lkid);
5733*4882a593Smuzhiyun 	remid = le32_to_cpu(rl->rl_remid);
5734*4882a593Smuzhiyun 	result = le32_to_cpu(rl->rl_result);
5735*4882a593Smuzhiyun 
5736*4882a593Smuzhiyun 	error = find_lkb(ls, lkid, &lkb);
5737*4882a593Smuzhiyun 	if (error) {
5738*4882a593Smuzhiyun 		log_error(ls, "dlm_recover_process_copy no %x remote %d %x %d",
5739*4882a593Smuzhiyun 			  lkid, rc->rc_header.h_nodeid, remid, result);
5740*4882a593Smuzhiyun 		return error;
5741*4882a593Smuzhiyun 	}
5742*4882a593Smuzhiyun 
5743*4882a593Smuzhiyun 	r = lkb->lkb_resource;
5744*4882a593Smuzhiyun 	hold_rsb(r);
5745*4882a593Smuzhiyun 	lock_rsb(r);
5746*4882a593Smuzhiyun 
5747*4882a593Smuzhiyun 	if (!is_process_copy(lkb)) {
5748*4882a593Smuzhiyun 		log_error(ls, "dlm_recover_process_copy bad %x remote %d %x %d",
5749*4882a593Smuzhiyun 			  lkid, rc->rc_header.h_nodeid, remid, result);
5750*4882a593Smuzhiyun 		dlm_dump_rsb(r);
5751*4882a593Smuzhiyun 		unlock_rsb(r);
5752*4882a593Smuzhiyun 		put_rsb(r);
5753*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
5754*4882a593Smuzhiyun 		return -EINVAL;
5755*4882a593Smuzhiyun 	}
5756*4882a593Smuzhiyun 
5757*4882a593Smuzhiyun 	switch (result) {
5758*4882a593Smuzhiyun 	case -EBADR:
5759*4882a593Smuzhiyun 		/* There's a chance the new master received our lock before
5760*4882a593Smuzhiyun 		   dlm_recover_master_reply(), this wouldn't happen if we did
5761*4882a593Smuzhiyun 		   a barrier between recover_masters and recover_locks. */
5762*4882a593Smuzhiyun 
5763*4882a593Smuzhiyun 		log_debug(ls, "dlm_recover_process_copy %x remote %d %x %d",
5764*4882a593Smuzhiyun 			  lkid, rc->rc_header.h_nodeid, remid, result);
5765*4882a593Smuzhiyun 
5766*4882a593Smuzhiyun 		dlm_send_rcom_lock(r, lkb);
5767*4882a593Smuzhiyun 		goto out;
5768*4882a593Smuzhiyun 	case -EEXIST:
5769*4882a593Smuzhiyun 	case 0:
5770*4882a593Smuzhiyun 		lkb->lkb_remid = remid;
5771*4882a593Smuzhiyun 		break;
5772*4882a593Smuzhiyun 	default:
5773*4882a593Smuzhiyun 		log_error(ls, "dlm_recover_process_copy %x remote %d %x %d unk",
5774*4882a593Smuzhiyun 			  lkid, rc->rc_header.h_nodeid, remid, result);
5775*4882a593Smuzhiyun 	}
5776*4882a593Smuzhiyun 
5777*4882a593Smuzhiyun 	/* an ack for dlm_recover_locks() which waits for replies from
5778*4882a593Smuzhiyun 	   all the locks it sends to new masters */
5779*4882a593Smuzhiyun 	dlm_recovered_lock(r);
5780*4882a593Smuzhiyun  out:
5781*4882a593Smuzhiyun 	unlock_rsb(r);
5782*4882a593Smuzhiyun 	put_rsb(r);
5783*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
5784*4882a593Smuzhiyun 
5785*4882a593Smuzhiyun 	return 0;
5786*4882a593Smuzhiyun }
5787*4882a593Smuzhiyun 
dlm_user_request(struct dlm_ls * ls,struct dlm_user_args * ua,int mode,uint32_t flags,void * name,unsigned int namelen,unsigned long timeout_cs)5788*4882a593Smuzhiyun int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
5789*4882a593Smuzhiyun 		     int mode, uint32_t flags, void *name, unsigned int namelen,
5790*4882a593Smuzhiyun 		     unsigned long timeout_cs)
5791*4882a593Smuzhiyun {
5792*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5793*4882a593Smuzhiyun 	struct dlm_args args;
5794*4882a593Smuzhiyun 	int error;
5795*4882a593Smuzhiyun 
5796*4882a593Smuzhiyun 	dlm_lock_recovery(ls);
5797*4882a593Smuzhiyun 
5798*4882a593Smuzhiyun 	error = create_lkb(ls, &lkb);
5799*4882a593Smuzhiyun 	if (error) {
5800*4882a593Smuzhiyun 		kfree(ua);
5801*4882a593Smuzhiyun 		goto out;
5802*4882a593Smuzhiyun 	}
5803*4882a593Smuzhiyun 
5804*4882a593Smuzhiyun 	if (flags & DLM_LKF_VALBLK) {
5805*4882a593Smuzhiyun 		ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
5806*4882a593Smuzhiyun 		if (!ua->lksb.sb_lvbptr) {
5807*4882a593Smuzhiyun 			kfree(ua);
5808*4882a593Smuzhiyun 			__put_lkb(ls, lkb);
5809*4882a593Smuzhiyun 			error = -ENOMEM;
5810*4882a593Smuzhiyun 			goto out;
5811*4882a593Smuzhiyun 		}
5812*4882a593Smuzhiyun 	}
5813*4882a593Smuzhiyun 	error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
5814*4882a593Smuzhiyun 			      fake_astfn, ua, fake_bastfn, &args);
5815*4882a593Smuzhiyun 	if (error) {
5816*4882a593Smuzhiyun 		kfree(ua->lksb.sb_lvbptr);
5817*4882a593Smuzhiyun 		ua->lksb.sb_lvbptr = NULL;
5818*4882a593Smuzhiyun 		kfree(ua);
5819*4882a593Smuzhiyun 		__put_lkb(ls, lkb);
5820*4882a593Smuzhiyun 		goto out;
5821*4882a593Smuzhiyun 	}
5822*4882a593Smuzhiyun 
5823*4882a593Smuzhiyun 	/* After ua is attached to lkb it will be freed by dlm_free_lkb().
5824*4882a593Smuzhiyun 	   When DLM_IFL_USER is set, the dlm knows that this is a userspace
5825*4882a593Smuzhiyun 	   lock and that lkb_astparam is the dlm_user_args structure. */
5826*4882a593Smuzhiyun 	lkb->lkb_flags |= DLM_IFL_USER;
5827*4882a593Smuzhiyun 	error = request_lock(ls, lkb, name, namelen, &args);
5828*4882a593Smuzhiyun 
5829*4882a593Smuzhiyun 	switch (error) {
5830*4882a593Smuzhiyun 	case 0:
5831*4882a593Smuzhiyun 		break;
5832*4882a593Smuzhiyun 	case -EINPROGRESS:
5833*4882a593Smuzhiyun 		error = 0;
5834*4882a593Smuzhiyun 		break;
5835*4882a593Smuzhiyun 	case -EAGAIN:
5836*4882a593Smuzhiyun 		error = 0;
5837*4882a593Smuzhiyun 		fallthrough;
5838*4882a593Smuzhiyun 	default:
5839*4882a593Smuzhiyun 		__put_lkb(ls, lkb);
5840*4882a593Smuzhiyun 		goto out;
5841*4882a593Smuzhiyun 	}
5842*4882a593Smuzhiyun 
5843*4882a593Smuzhiyun 	/* add this new lkb to the per-process list of locks */
5844*4882a593Smuzhiyun 	spin_lock(&ua->proc->locks_spin);
5845*4882a593Smuzhiyun 	hold_lkb(lkb);
5846*4882a593Smuzhiyun 	list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
5847*4882a593Smuzhiyun 	spin_unlock(&ua->proc->locks_spin);
5848*4882a593Smuzhiyun  out:
5849*4882a593Smuzhiyun 	dlm_unlock_recovery(ls);
5850*4882a593Smuzhiyun 	return error;
5851*4882a593Smuzhiyun }
5852*4882a593Smuzhiyun 
dlm_user_convert(struct dlm_ls * ls,struct dlm_user_args * ua_tmp,int mode,uint32_t flags,uint32_t lkid,char * lvb_in,unsigned long timeout_cs)5853*4882a593Smuzhiyun int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
5854*4882a593Smuzhiyun 		     int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
5855*4882a593Smuzhiyun 		     unsigned long timeout_cs)
5856*4882a593Smuzhiyun {
5857*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5858*4882a593Smuzhiyun 	struct dlm_args args;
5859*4882a593Smuzhiyun 	struct dlm_user_args *ua;
5860*4882a593Smuzhiyun 	int error;
5861*4882a593Smuzhiyun 
5862*4882a593Smuzhiyun 	dlm_lock_recovery(ls);
5863*4882a593Smuzhiyun 
5864*4882a593Smuzhiyun 	error = find_lkb(ls, lkid, &lkb);
5865*4882a593Smuzhiyun 	if (error)
5866*4882a593Smuzhiyun 		goto out;
5867*4882a593Smuzhiyun 
5868*4882a593Smuzhiyun 	/* user can change the params on its lock when it converts it, or
5869*4882a593Smuzhiyun 	   add an lvb that didn't exist before */
5870*4882a593Smuzhiyun 
5871*4882a593Smuzhiyun 	ua = lkb->lkb_ua;
5872*4882a593Smuzhiyun 
5873*4882a593Smuzhiyun 	if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
5874*4882a593Smuzhiyun 		ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
5875*4882a593Smuzhiyun 		if (!ua->lksb.sb_lvbptr) {
5876*4882a593Smuzhiyun 			error = -ENOMEM;
5877*4882a593Smuzhiyun 			goto out_put;
5878*4882a593Smuzhiyun 		}
5879*4882a593Smuzhiyun 	}
5880*4882a593Smuzhiyun 	if (lvb_in && ua->lksb.sb_lvbptr)
5881*4882a593Smuzhiyun 		memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
5882*4882a593Smuzhiyun 
5883*4882a593Smuzhiyun 	ua->xid = ua_tmp->xid;
5884*4882a593Smuzhiyun 	ua->castparam = ua_tmp->castparam;
5885*4882a593Smuzhiyun 	ua->castaddr = ua_tmp->castaddr;
5886*4882a593Smuzhiyun 	ua->bastparam = ua_tmp->bastparam;
5887*4882a593Smuzhiyun 	ua->bastaddr = ua_tmp->bastaddr;
5888*4882a593Smuzhiyun 	ua->user_lksb = ua_tmp->user_lksb;
5889*4882a593Smuzhiyun 
5890*4882a593Smuzhiyun 	error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
5891*4882a593Smuzhiyun 			      fake_astfn, ua, fake_bastfn, &args);
5892*4882a593Smuzhiyun 	if (error)
5893*4882a593Smuzhiyun 		goto out_put;
5894*4882a593Smuzhiyun 
5895*4882a593Smuzhiyun 	error = convert_lock(ls, lkb, &args);
5896*4882a593Smuzhiyun 
5897*4882a593Smuzhiyun 	if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
5898*4882a593Smuzhiyun 		error = 0;
5899*4882a593Smuzhiyun  out_put:
5900*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
5901*4882a593Smuzhiyun  out:
5902*4882a593Smuzhiyun 	dlm_unlock_recovery(ls);
5903*4882a593Smuzhiyun 	kfree(ua_tmp);
5904*4882a593Smuzhiyun 	return error;
5905*4882a593Smuzhiyun }
5906*4882a593Smuzhiyun 
5907*4882a593Smuzhiyun /*
5908*4882a593Smuzhiyun  * The caller asks for an orphan lock on a given resource with a given mode.
5909*4882a593Smuzhiyun  * If a matching lock exists, it's moved to the owner's list of locks and
5910*4882a593Smuzhiyun  * the lkid is returned.
5911*4882a593Smuzhiyun  */
5912*4882a593Smuzhiyun 
dlm_user_adopt_orphan(struct dlm_ls * ls,struct dlm_user_args * ua_tmp,int mode,uint32_t flags,void * name,unsigned int namelen,unsigned long timeout_cs,uint32_t * lkid)5913*4882a593Smuzhiyun int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
5914*4882a593Smuzhiyun 		     int mode, uint32_t flags, void *name, unsigned int namelen,
5915*4882a593Smuzhiyun 		     unsigned long timeout_cs, uint32_t *lkid)
5916*4882a593Smuzhiyun {
5917*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5918*4882a593Smuzhiyun 	struct dlm_user_args *ua;
5919*4882a593Smuzhiyun 	int found_other_mode = 0;
5920*4882a593Smuzhiyun 	int found = 0;
5921*4882a593Smuzhiyun 	int rv = 0;
5922*4882a593Smuzhiyun 
5923*4882a593Smuzhiyun 	mutex_lock(&ls->ls_orphans_mutex);
5924*4882a593Smuzhiyun 	list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) {
5925*4882a593Smuzhiyun 		if (lkb->lkb_resource->res_length != namelen)
5926*4882a593Smuzhiyun 			continue;
5927*4882a593Smuzhiyun 		if (memcmp(lkb->lkb_resource->res_name, name, namelen))
5928*4882a593Smuzhiyun 			continue;
5929*4882a593Smuzhiyun 		if (lkb->lkb_grmode != mode) {
5930*4882a593Smuzhiyun 			found_other_mode = 1;
5931*4882a593Smuzhiyun 			continue;
5932*4882a593Smuzhiyun 		}
5933*4882a593Smuzhiyun 
5934*4882a593Smuzhiyun 		found = 1;
5935*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_ownqueue);
5936*4882a593Smuzhiyun 		lkb->lkb_flags &= ~DLM_IFL_ORPHAN;
5937*4882a593Smuzhiyun 		*lkid = lkb->lkb_id;
5938*4882a593Smuzhiyun 		break;
5939*4882a593Smuzhiyun 	}
5940*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_orphans_mutex);
5941*4882a593Smuzhiyun 
5942*4882a593Smuzhiyun 	if (!found && found_other_mode) {
5943*4882a593Smuzhiyun 		rv = -EAGAIN;
5944*4882a593Smuzhiyun 		goto out;
5945*4882a593Smuzhiyun 	}
5946*4882a593Smuzhiyun 
5947*4882a593Smuzhiyun 	if (!found) {
5948*4882a593Smuzhiyun 		rv = -ENOENT;
5949*4882a593Smuzhiyun 		goto out;
5950*4882a593Smuzhiyun 	}
5951*4882a593Smuzhiyun 
5952*4882a593Smuzhiyun 	lkb->lkb_exflags = flags;
5953*4882a593Smuzhiyun 	lkb->lkb_ownpid = (int) current->pid;
5954*4882a593Smuzhiyun 
5955*4882a593Smuzhiyun 	ua = lkb->lkb_ua;
5956*4882a593Smuzhiyun 
5957*4882a593Smuzhiyun 	ua->proc = ua_tmp->proc;
5958*4882a593Smuzhiyun 	ua->xid = ua_tmp->xid;
5959*4882a593Smuzhiyun 	ua->castparam = ua_tmp->castparam;
5960*4882a593Smuzhiyun 	ua->castaddr = ua_tmp->castaddr;
5961*4882a593Smuzhiyun 	ua->bastparam = ua_tmp->bastparam;
5962*4882a593Smuzhiyun 	ua->bastaddr = ua_tmp->bastaddr;
5963*4882a593Smuzhiyun 	ua->user_lksb = ua_tmp->user_lksb;
5964*4882a593Smuzhiyun 
5965*4882a593Smuzhiyun 	/*
5966*4882a593Smuzhiyun 	 * The lkb reference from the ls_orphans list was not
5967*4882a593Smuzhiyun 	 * removed above, and is now considered the reference
5968*4882a593Smuzhiyun 	 * for the proc locks list.
5969*4882a593Smuzhiyun 	 */
5970*4882a593Smuzhiyun 
5971*4882a593Smuzhiyun 	spin_lock(&ua->proc->locks_spin);
5972*4882a593Smuzhiyun 	list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
5973*4882a593Smuzhiyun 	spin_unlock(&ua->proc->locks_spin);
5974*4882a593Smuzhiyun  out:
5975*4882a593Smuzhiyun 	kfree(ua_tmp);
5976*4882a593Smuzhiyun 	return rv;
5977*4882a593Smuzhiyun }
5978*4882a593Smuzhiyun 
dlm_user_unlock(struct dlm_ls * ls,struct dlm_user_args * ua_tmp,uint32_t flags,uint32_t lkid,char * lvb_in)5979*4882a593Smuzhiyun int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
5980*4882a593Smuzhiyun 		    uint32_t flags, uint32_t lkid, char *lvb_in)
5981*4882a593Smuzhiyun {
5982*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
5983*4882a593Smuzhiyun 	struct dlm_args args;
5984*4882a593Smuzhiyun 	struct dlm_user_args *ua;
5985*4882a593Smuzhiyun 	int error;
5986*4882a593Smuzhiyun 
5987*4882a593Smuzhiyun 	dlm_lock_recovery(ls);
5988*4882a593Smuzhiyun 
5989*4882a593Smuzhiyun 	error = find_lkb(ls, lkid, &lkb);
5990*4882a593Smuzhiyun 	if (error)
5991*4882a593Smuzhiyun 		goto out;
5992*4882a593Smuzhiyun 
5993*4882a593Smuzhiyun 	ua = lkb->lkb_ua;
5994*4882a593Smuzhiyun 
5995*4882a593Smuzhiyun 	if (lvb_in && ua->lksb.sb_lvbptr)
5996*4882a593Smuzhiyun 		memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
5997*4882a593Smuzhiyun 	if (ua_tmp->castparam)
5998*4882a593Smuzhiyun 		ua->castparam = ua_tmp->castparam;
5999*4882a593Smuzhiyun 	ua->user_lksb = ua_tmp->user_lksb;
6000*4882a593Smuzhiyun 
6001*4882a593Smuzhiyun 	error = set_unlock_args(flags, ua, &args);
6002*4882a593Smuzhiyun 	if (error)
6003*4882a593Smuzhiyun 		goto out_put;
6004*4882a593Smuzhiyun 
6005*4882a593Smuzhiyun 	error = unlock_lock(ls, lkb, &args);
6006*4882a593Smuzhiyun 
6007*4882a593Smuzhiyun 	if (error == -DLM_EUNLOCK)
6008*4882a593Smuzhiyun 		error = 0;
6009*4882a593Smuzhiyun 	/* from validate_unlock_args() */
6010*4882a593Smuzhiyun 	if (error == -EBUSY && (flags & DLM_LKF_FORCEUNLOCK))
6011*4882a593Smuzhiyun 		error = 0;
6012*4882a593Smuzhiyun 	if (error)
6013*4882a593Smuzhiyun 		goto out_put;
6014*4882a593Smuzhiyun 
6015*4882a593Smuzhiyun 	spin_lock(&ua->proc->locks_spin);
6016*4882a593Smuzhiyun 	/* dlm_user_add_cb() may have already taken lkb off the proc list */
6017*4882a593Smuzhiyun 	if (!list_empty(&lkb->lkb_ownqueue))
6018*4882a593Smuzhiyun 		list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
6019*4882a593Smuzhiyun 	spin_unlock(&ua->proc->locks_spin);
6020*4882a593Smuzhiyun  out_put:
6021*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
6022*4882a593Smuzhiyun  out:
6023*4882a593Smuzhiyun 	dlm_unlock_recovery(ls);
6024*4882a593Smuzhiyun 	kfree(ua_tmp);
6025*4882a593Smuzhiyun 	return error;
6026*4882a593Smuzhiyun }
6027*4882a593Smuzhiyun 
dlm_user_cancel(struct dlm_ls * ls,struct dlm_user_args * ua_tmp,uint32_t flags,uint32_t lkid)6028*4882a593Smuzhiyun int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
6029*4882a593Smuzhiyun 		    uint32_t flags, uint32_t lkid)
6030*4882a593Smuzhiyun {
6031*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
6032*4882a593Smuzhiyun 	struct dlm_args args;
6033*4882a593Smuzhiyun 	struct dlm_user_args *ua;
6034*4882a593Smuzhiyun 	int error;
6035*4882a593Smuzhiyun 
6036*4882a593Smuzhiyun 	dlm_lock_recovery(ls);
6037*4882a593Smuzhiyun 
6038*4882a593Smuzhiyun 	error = find_lkb(ls, lkid, &lkb);
6039*4882a593Smuzhiyun 	if (error)
6040*4882a593Smuzhiyun 		goto out;
6041*4882a593Smuzhiyun 
6042*4882a593Smuzhiyun 	ua = lkb->lkb_ua;
6043*4882a593Smuzhiyun 	if (ua_tmp->castparam)
6044*4882a593Smuzhiyun 		ua->castparam = ua_tmp->castparam;
6045*4882a593Smuzhiyun 	ua->user_lksb = ua_tmp->user_lksb;
6046*4882a593Smuzhiyun 
6047*4882a593Smuzhiyun 	error = set_unlock_args(flags, ua, &args);
6048*4882a593Smuzhiyun 	if (error)
6049*4882a593Smuzhiyun 		goto out_put;
6050*4882a593Smuzhiyun 
6051*4882a593Smuzhiyun 	error = cancel_lock(ls, lkb, &args);
6052*4882a593Smuzhiyun 
6053*4882a593Smuzhiyun 	if (error == -DLM_ECANCEL)
6054*4882a593Smuzhiyun 		error = 0;
6055*4882a593Smuzhiyun 	/* from validate_unlock_args() */
6056*4882a593Smuzhiyun 	if (error == -EBUSY)
6057*4882a593Smuzhiyun 		error = 0;
6058*4882a593Smuzhiyun  out_put:
6059*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
6060*4882a593Smuzhiyun  out:
6061*4882a593Smuzhiyun 	dlm_unlock_recovery(ls);
6062*4882a593Smuzhiyun 	kfree(ua_tmp);
6063*4882a593Smuzhiyun 	return error;
6064*4882a593Smuzhiyun }
6065*4882a593Smuzhiyun 
dlm_user_deadlock(struct dlm_ls * ls,uint32_t flags,uint32_t lkid)6066*4882a593Smuzhiyun int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
6067*4882a593Smuzhiyun {
6068*4882a593Smuzhiyun 	struct dlm_lkb *lkb;
6069*4882a593Smuzhiyun 	struct dlm_args args;
6070*4882a593Smuzhiyun 	struct dlm_user_args *ua;
6071*4882a593Smuzhiyun 	struct dlm_rsb *r;
6072*4882a593Smuzhiyun 	int error;
6073*4882a593Smuzhiyun 
6074*4882a593Smuzhiyun 	dlm_lock_recovery(ls);
6075*4882a593Smuzhiyun 
6076*4882a593Smuzhiyun 	error = find_lkb(ls, lkid, &lkb);
6077*4882a593Smuzhiyun 	if (error)
6078*4882a593Smuzhiyun 		goto out;
6079*4882a593Smuzhiyun 
6080*4882a593Smuzhiyun 	ua = lkb->lkb_ua;
6081*4882a593Smuzhiyun 
6082*4882a593Smuzhiyun 	error = set_unlock_args(flags, ua, &args);
6083*4882a593Smuzhiyun 	if (error)
6084*4882a593Smuzhiyun 		goto out_put;
6085*4882a593Smuzhiyun 
6086*4882a593Smuzhiyun 	/* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
6087*4882a593Smuzhiyun 
6088*4882a593Smuzhiyun 	r = lkb->lkb_resource;
6089*4882a593Smuzhiyun 	hold_rsb(r);
6090*4882a593Smuzhiyun 	lock_rsb(r);
6091*4882a593Smuzhiyun 
6092*4882a593Smuzhiyun 	error = validate_unlock_args(lkb, &args);
6093*4882a593Smuzhiyun 	if (error)
6094*4882a593Smuzhiyun 		goto out_r;
6095*4882a593Smuzhiyun 	lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
6096*4882a593Smuzhiyun 
6097*4882a593Smuzhiyun 	error = _cancel_lock(r, lkb);
6098*4882a593Smuzhiyun  out_r:
6099*4882a593Smuzhiyun 	unlock_rsb(r);
6100*4882a593Smuzhiyun 	put_rsb(r);
6101*4882a593Smuzhiyun 
6102*4882a593Smuzhiyun 	if (error == -DLM_ECANCEL)
6103*4882a593Smuzhiyun 		error = 0;
6104*4882a593Smuzhiyun 	/* from validate_unlock_args() */
6105*4882a593Smuzhiyun 	if (error == -EBUSY)
6106*4882a593Smuzhiyun 		error = 0;
6107*4882a593Smuzhiyun  out_put:
6108*4882a593Smuzhiyun 	dlm_put_lkb(lkb);
6109*4882a593Smuzhiyun  out:
6110*4882a593Smuzhiyun 	dlm_unlock_recovery(ls);
6111*4882a593Smuzhiyun 	return error;
6112*4882a593Smuzhiyun }
6113*4882a593Smuzhiyun 
6114*4882a593Smuzhiyun /* lkb's that are removed from the waiters list by revert are just left on the
6115*4882a593Smuzhiyun    orphans list with the granted orphan locks, to be freed by purge */
6116*4882a593Smuzhiyun 
orphan_proc_lock(struct dlm_ls * ls,struct dlm_lkb * lkb)6117*4882a593Smuzhiyun static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
6118*4882a593Smuzhiyun {
6119*4882a593Smuzhiyun 	struct dlm_args args;
6120*4882a593Smuzhiyun 	int error;
6121*4882a593Smuzhiyun 
6122*4882a593Smuzhiyun 	hold_lkb(lkb); /* reference for the ls_orphans list */
6123*4882a593Smuzhiyun 	mutex_lock(&ls->ls_orphans_mutex);
6124*4882a593Smuzhiyun 	list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans);
6125*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_orphans_mutex);
6126*4882a593Smuzhiyun 
6127*4882a593Smuzhiyun 	set_unlock_args(0, lkb->lkb_ua, &args);
6128*4882a593Smuzhiyun 
6129*4882a593Smuzhiyun 	error = cancel_lock(ls, lkb, &args);
6130*4882a593Smuzhiyun 	if (error == -DLM_ECANCEL)
6131*4882a593Smuzhiyun 		error = 0;
6132*4882a593Smuzhiyun 	return error;
6133*4882a593Smuzhiyun }
6134*4882a593Smuzhiyun 
6135*4882a593Smuzhiyun /* The FORCEUNLOCK flag allows the unlock to go ahead even if the lkb isn't
6136*4882a593Smuzhiyun    granted.  Regardless of what rsb queue the lock is on, it's removed and
6137*4882a593Smuzhiyun    freed.  The IVVALBLK flag causes the lvb on the resource to be invalidated
6138*4882a593Smuzhiyun    if our lock is PW/EX (it's ignored if our granted mode is smaller.) */
6139*4882a593Smuzhiyun 
unlock_proc_lock(struct dlm_ls * ls,struct dlm_lkb * lkb)6140*4882a593Smuzhiyun static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
6141*4882a593Smuzhiyun {
6142*4882a593Smuzhiyun 	struct dlm_args args;
6143*4882a593Smuzhiyun 	int error;
6144*4882a593Smuzhiyun 
6145*4882a593Smuzhiyun 	set_unlock_args(DLM_LKF_FORCEUNLOCK | DLM_LKF_IVVALBLK,
6146*4882a593Smuzhiyun 			lkb->lkb_ua, &args);
6147*4882a593Smuzhiyun 
6148*4882a593Smuzhiyun 	error = unlock_lock(ls, lkb, &args);
6149*4882a593Smuzhiyun 	if (error == -DLM_EUNLOCK)
6150*4882a593Smuzhiyun 		error = 0;
6151*4882a593Smuzhiyun 	return error;
6152*4882a593Smuzhiyun }
6153*4882a593Smuzhiyun 
6154*4882a593Smuzhiyun /* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
6155*4882a593Smuzhiyun    (which does lock_rsb) due to deadlock with receiving a message that does
6156*4882a593Smuzhiyun    lock_rsb followed by dlm_user_add_cb() */
6157*4882a593Smuzhiyun 
del_proc_lock(struct dlm_ls * ls,struct dlm_user_proc * proc)6158*4882a593Smuzhiyun static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
6159*4882a593Smuzhiyun 				     struct dlm_user_proc *proc)
6160*4882a593Smuzhiyun {
6161*4882a593Smuzhiyun 	struct dlm_lkb *lkb = NULL;
6162*4882a593Smuzhiyun 
6163*4882a593Smuzhiyun 	mutex_lock(&ls->ls_clear_proc_locks);
6164*4882a593Smuzhiyun 	if (list_empty(&proc->locks))
6165*4882a593Smuzhiyun 		goto out;
6166*4882a593Smuzhiyun 
6167*4882a593Smuzhiyun 	lkb = list_entry(proc->locks.next, struct dlm_lkb, lkb_ownqueue);
6168*4882a593Smuzhiyun 	list_del_init(&lkb->lkb_ownqueue);
6169*4882a593Smuzhiyun 
6170*4882a593Smuzhiyun 	if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
6171*4882a593Smuzhiyun 		lkb->lkb_flags |= DLM_IFL_ORPHAN;
6172*4882a593Smuzhiyun 	else
6173*4882a593Smuzhiyun 		lkb->lkb_flags |= DLM_IFL_DEAD;
6174*4882a593Smuzhiyun  out:
6175*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_clear_proc_locks);
6176*4882a593Smuzhiyun 	return lkb;
6177*4882a593Smuzhiyun }
6178*4882a593Smuzhiyun 
6179*4882a593Smuzhiyun /* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which
6180*4882a593Smuzhiyun    1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
6181*4882a593Smuzhiyun    which we clear here. */
6182*4882a593Smuzhiyun 
6183*4882a593Smuzhiyun /* proc CLOSING flag is set so no more device_reads should look at proc->asts
6184*4882a593Smuzhiyun    list, and no more device_writes should add lkb's to proc->locks list; so we
6185*4882a593Smuzhiyun    shouldn't need to take asts_spin or locks_spin here.  this assumes that
6186*4882a593Smuzhiyun    device reads/writes/closes are serialized -- FIXME: we may need to serialize
6187*4882a593Smuzhiyun    them ourself. */
6188*4882a593Smuzhiyun 
dlm_clear_proc_locks(struct dlm_ls * ls,struct dlm_user_proc * proc)6189*4882a593Smuzhiyun void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
6190*4882a593Smuzhiyun {
6191*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *safe;
6192*4882a593Smuzhiyun 
6193*4882a593Smuzhiyun 	dlm_lock_recovery(ls);
6194*4882a593Smuzhiyun 
6195*4882a593Smuzhiyun 	while (1) {
6196*4882a593Smuzhiyun 		lkb = del_proc_lock(ls, proc);
6197*4882a593Smuzhiyun 		if (!lkb)
6198*4882a593Smuzhiyun 			break;
6199*4882a593Smuzhiyun 		del_timeout(lkb);
6200*4882a593Smuzhiyun 		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
6201*4882a593Smuzhiyun 			orphan_proc_lock(ls, lkb);
6202*4882a593Smuzhiyun 		else
6203*4882a593Smuzhiyun 			unlock_proc_lock(ls, lkb);
6204*4882a593Smuzhiyun 
6205*4882a593Smuzhiyun 		/* this removes the reference for the proc->locks list
6206*4882a593Smuzhiyun 		   added by dlm_user_request, it may result in the lkb
6207*4882a593Smuzhiyun 		   being freed */
6208*4882a593Smuzhiyun 
6209*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
6210*4882a593Smuzhiyun 	}
6211*4882a593Smuzhiyun 
6212*4882a593Smuzhiyun 	mutex_lock(&ls->ls_clear_proc_locks);
6213*4882a593Smuzhiyun 
6214*4882a593Smuzhiyun 	/* in-progress unlocks */
6215*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
6216*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_ownqueue);
6217*4882a593Smuzhiyun 		lkb->lkb_flags |= DLM_IFL_DEAD;
6218*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
6219*4882a593Smuzhiyun 	}
6220*4882a593Smuzhiyun 
6221*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
6222*4882a593Smuzhiyun 		memset(&lkb->lkb_callbacks, 0,
6223*4882a593Smuzhiyun 		       sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
6224*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_cb_list);
6225*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
6226*4882a593Smuzhiyun 	}
6227*4882a593Smuzhiyun 
6228*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_clear_proc_locks);
6229*4882a593Smuzhiyun 	dlm_unlock_recovery(ls);
6230*4882a593Smuzhiyun }
6231*4882a593Smuzhiyun 
purge_proc_locks(struct dlm_ls * ls,struct dlm_user_proc * proc)6232*4882a593Smuzhiyun static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
6233*4882a593Smuzhiyun {
6234*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *safe;
6235*4882a593Smuzhiyun 
6236*4882a593Smuzhiyun 	while (1) {
6237*4882a593Smuzhiyun 		lkb = NULL;
6238*4882a593Smuzhiyun 		spin_lock(&proc->locks_spin);
6239*4882a593Smuzhiyun 		if (!list_empty(&proc->locks)) {
6240*4882a593Smuzhiyun 			lkb = list_entry(proc->locks.next, struct dlm_lkb,
6241*4882a593Smuzhiyun 					 lkb_ownqueue);
6242*4882a593Smuzhiyun 			list_del_init(&lkb->lkb_ownqueue);
6243*4882a593Smuzhiyun 		}
6244*4882a593Smuzhiyun 		spin_unlock(&proc->locks_spin);
6245*4882a593Smuzhiyun 
6246*4882a593Smuzhiyun 		if (!lkb)
6247*4882a593Smuzhiyun 			break;
6248*4882a593Smuzhiyun 
6249*4882a593Smuzhiyun 		lkb->lkb_flags |= DLM_IFL_DEAD;
6250*4882a593Smuzhiyun 		unlock_proc_lock(ls, lkb);
6251*4882a593Smuzhiyun 		dlm_put_lkb(lkb); /* ref from proc->locks list */
6252*4882a593Smuzhiyun 	}
6253*4882a593Smuzhiyun 
6254*4882a593Smuzhiyun 	spin_lock(&proc->locks_spin);
6255*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
6256*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_ownqueue);
6257*4882a593Smuzhiyun 		lkb->lkb_flags |= DLM_IFL_DEAD;
6258*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
6259*4882a593Smuzhiyun 	}
6260*4882a593Smuzhiyun 	spin_unlock(&proc->locks_spin);
6261*4882a593Smuzhiyun 
6262*4882a593Smuzhiyun 	spin_lock(&proc->asts_spin);
6263*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
6264*4882a593Smuzhiyun 		memset(&lkb->lkb_callbacks, 0,
6265*4882a593Smuzhiyun 		       sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
6266*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_cb_list);
6267*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
6268*4882a593Smuzhiyun 	}
6269*4882a593Smuzhiyun 	spin_unlock(&proc->asts_spin);
6270*4882a593Smuzhiyun }
6271*4882a593Smuzhiyun 
6272*4882a593Smuzhiyun /* pid of 0 means purge all orphans */
6273*4882a593Smuzhiyun 
do_purge(struct dlm_ls * ls,int nodeid,int pid)6274*4882a593Smuzhiyun static void do_purge(struct dlm_ls *ls, int nodeid, int pid)
6275*4882a593Smuzhiyun {
6276*4882a593Smuzhiyun 	struct dlm_lkb *lkb, *safe;
6277*4882a593Smuzhiyun 
6278*4882a593Smuzhiyun 	mutex_lock(&ls->ls_orphans_mutex);
6279*4882a593Smuzhiyun 	list_for_each_entry_safe(lkb, safe, &ls->ls_orphans, lkb_ownqueue) {
6280*4882a593Smuzhiyun 		if (pid && lkb->lkb_ownpid != pid)
6281*4882a593Smuzhiyun 			continue;
6282*4882a593Smuzhiyun 		unlock_proc_lock(ls, lkb);
6283*4882a593Smuzhiyun 		list_del_init(&lkb->lkb_ownqueue);
6284*4882a593Smuzhiyun 		dlm_put_lkb(lkb);
6285*4882a593Smuzhiyun 	}
6286*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_orphans_mutex);
6287*4882a593Smuzhiyun }
6288*4882a593Smuzhiyun 
send_purge(struct dlm_ls * ls,int nodeid,int pid)6289*4882a593Smuzhiyun static int send_purge(struct dlm_ls *ls, int nodeid, int pid)
6290*4882a593Smuzhiyun {
6291*4882a593Smuzhiyun 	struct dlm_message *ms;
6292*4882a593Smuzhiyun 	struct dlm_mhandle *mh;
6293*4882a593Smuzhiyun 	int error;
6294*4882a593Smuzhiyun 
6295*4882a593Smuzhiyun 	error = _create_message(ls, sizeof(struct dlm_message), nodeid,
6296*4882a593Smuzhiyun 				DLM_MSG_PURGE, &ms, &mh);
6297*4882a593Smuzhiyun 	if (error)
6298*4882a593Smuzhiyun 		return error;
6299*4882a593Smuzhiyun 	ms->m_nodeid = nodeid;
6300*4882a593Smuzhiyun 	ms->m_pid = pid;
6301*4882a593Smuzhiyun 
6302*4882a593Smuzhiyun 	return send_message(mh, ms);
6303*4882a593Smuzhiyun }
6304*4882a593Smuzhiyun 
dlm_user_purge(struct dlm_ls * ls,struct dlm_user_proc * proc,int nodeid,int pid)6305*4882a593Smuzhiyun int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
6306*4882a593Smuzhiyun 		   int nodeid, int pid)
6307*4882a593Smuzhiyun {
6308*4882a593Smuzhiyun 	int error = 0;
6309*4882a593Smuzhiyun 
6310*4882a593Smuzhiyun 	if (nodeid && (nodeid != dlm_our_nodeid())) {
6311*4882a593Smuzhiyun 		error = send_purge(ls, nodeid, pid);
6312*4882a593Smuzhiyun 	} else {
6313*4882a593Smuzhiyun 		dlm_lock_recovery(ls);
6314*4882a593Smuzhiyun 		if (pid == current->pid)
6315*4882a593Smuzhiyun 			purge_proc_locks(ls, proc);
6316*4882a593Smuzhiyun 		else
6317*4882a593Smuzhiyun 			do_purge(ls, nodeid, pid);
6318*4882a593Smuzhiyun 		dlm_unlock_recovery(ls);
6319*4882a593Smuzhiyun 	}
6320*4882a593Smuzhiyun 	return error;
6321*4882a593Smuzhiyun }
6322*4882a593Smuzhiyun 
6323