xref: /OK3568_Linux_fs/kernel/fs/dlm/recoverd.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /******************************************************************************
3*4882a593Smuzhiyun *******************************************************************************
4*4882a593Smuzhiyun **
5*4882a593Smuzhiyun **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
6*4882a593Smuzhiyun **  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
7*4882a593Smuzhiyun **
8*4882a593Smuzhiyun **
9*4882a593Smuzhiyun *******************************************************************************
10*4882a593Smuzhiyun ******************************************************************************/
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include "dlm_internal.h"
13*4882a593Smuzhiyun #include "lockspace.h"
14*4882a593Smuzhiyun #include "member.h"
15*4882a593Smuzhiyun #include "dir.h"
16*4882a593Smuzhiyun #include "ast.h"
17*4882a593Smuzhiyun #include "recover.h"
18*4882a593Smuzhiyun #include "lowcomms.h"
19*4882a593Smuzhiyun #include "lock.h"
20*4882a593Smuzhiyun #include "requestqueue.h"
21*4882a593Smuzhiyun #include "recoverd.h"
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun /* If the start for which we're re-enabling locking (seq) has been superseded
25*4882a593Smuzhiyun    by a newer stop (ls_recover_seq), we need to leave locking disabled.
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun    We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
28*4882a593Smuzhiyun    locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
29*4882a593Smuzhiyun    enables locking and clears the requestqueue between a and b. */
30*4882a593Smuzhiyun 
enable_locking(struct dlm_ls * ls,uint64_t seq)31*4882a593Smuzhiyun static int enable_locking(struct dlm_ls *ls, uint64_t seq)
32*4882a593Smuzhiyun {
33*4882a593Smuzhiyun 	int error = -EINTR;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun 	down_write(&ls->ls_recv_active);
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	spin_lock(&ls->ls_recover_lock);
38*4882a593Smuzhiyun 	if (ls->ls_recover_seq == seq) {
39*4882a593Smuzhiyun 		set_bit(LSFL_RUNNING, &ls->ls_flags);
40*4882a593Smuzhiyun 		/* unblocks processes waiting to enter the dlm */
41*4882a593Smuzhiyun 		up_write(&ls->ls_in_recovery);
42*4882a593Smuzhiyun 		clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
43*4882a593Smuzhiyun 		error = 0;
44*4882a593Smuzhiyun 	}
45*4882a593Smuzhiyun 	spin_unlock(&ls->ls_recover_lock);
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 	up_write(&ls->ls_recv_active);
48*4882a593Smuzhiyun 	return error;
49*4882a593Smuzhiyun }
50*4882a593Smuzhiyun 
ls_recover(struct dlm_ls * ls,struct dlm_recover * rv)51*4882a593Smuzhiyun static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun 	unsigned long start;
54*4882a593Smuzhiyun 	int error, neg = 0;
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun 	log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq);
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun 	mutex_lock(&ls->ls_recoverd_active);
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	dlm_callback_suspend(ls);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	dlm_clear_toss(ls);
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	/*
65*4882a593Smuzhiyun 	 * This list of root rsb's will be the basis of most of the recovery
66*4882a593Smuzhiyun 	 * routines.
67*4882a593Smuzhiyun 	 */
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	dlm_create_root_list(ls);
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	/*
72*4882a593Smuzhiyun 	 * Add or remove nodes from the lockspace's ls_nodes list.
73*4882a593Smuzhiyun 	 */
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 	error = dlm_recover_members(ls, rv, &neg);
76*4882a593Smuzhiyun 	if (error) {
77*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_members error %d", error);
78*4882a593Smuzhiyun 		goto fail;
79*4882a593Smuzhiyun 	}
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 	dlm_recover_dir_nodeid(ls);
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	ls->ls_recover_dir_sent_res = 0;
84*4882a593Smuzhiyun 	ls->ls_recover_dir_sent_msg = 0;
85*4882a593Smuzhiyun 	ls->ls_recover_locks_in = 0;
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 	dlm_set_recover_status(ls, DLM_RS_NODES);
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	error = dlm_recover_members_wait(ls);
90*4882a593Smuzhiyun 	if (error) {
91*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_members_wait error %d", error);
92*4882a593Smuzhiyun 		goto fail;
93*4882a593Smuzhiyun 	}
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	start = jiffies;
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	/*
98*4882a593Smuzhiyun 	 * Rebuild our own share of the directory by collecting from all other
99*4882a593Smuzhiyun 	 * nodes their master rsb names that hash to us.
100*4882a593Smuzhiyun 	 */
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	error = dlm_recover_directory(ls);
103*4882a593Smuzhiyun 	if (error) {
104*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_directory error %d", error);
105*4882a593Smuzhiyun 		goto fail;
106*4882a593Smuzhiyun 	}
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun 	dlm_set_recover_status(ls, DLM_RS_DIR);
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	error = dlm_recover_directory_wait(ls);
111*4882a593Smuzhiyun 	if (error) {
112*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_directory_wait error %d", error);
113*4882a593Smuzhiyun 		goto fail;
114*4882a593Smuzhiyun 	}
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	log_rinfo(ls, "dlm_recover_directory %u out %u messages",
117*4882a593Smuzhiyun 		  ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg);
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	/*
120*4882a593Smuzhiyun 	 * We may have outstanding operations that are waiting for a reply from
121*4882a593Smuzhiyun 	 * a failed node.  Mark these to be resent after recovery.  Unlock and
122*4882a593Smuzhiyun 	 * cancel ops can just be completed.
123*4882a593Smuzhiyun 	 */
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	dlm_recover_waiters_pre(ls);
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	error = dlm_recovery_stopped(ls);
128*4882a593Smuzhiyun 	if (error)
129*4882a593Smuzhiyun 		goto fail;
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	if (neg || dlm_no_directory(ls)) {
132*4882a593Smuzhiyun 		/*
133*4882a593Smuzhiyun 		 * Clear lkb's for departed nodes.
134*4882a593Smuzhiyun 		 */
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 		dlm_recover_purge(ls);
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 		/*
139*4882a593Smuzhiyun 		 * Get new master nodeid's for rsb's that were mastered on
140*4882a593Smuzhiyun 		 * departed nodes.
141*4882a593Smuzhiyun 		 */
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 		error = dlm_recover_masters(ls);
144*4882a593Smuzhiyun 		if (error) {
145*4882a593Smuzhiyun 			log_rinfo(ls, "dlm_recover_masters error %d", error);
146*4882a593Smuzhiyun 			goto fail;
147*4882a593Smuzhiyun 		}
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 		/*
150*4882a593Smuzhiyun 		 * Send our locks on remastered rsb's to the new masters.
151*4882a593Smuzhiyun 		 */
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 		error = dlm_recover_locks(ls);
154*4882a593Smuzhiyun 		if (error) {
155*4882a593Smuzhiyun 			log_rinfo(ls, "dlm_recover_locks error %d", error);
156*4882a593Smuzhiyun 			goto fail;
157*4882a593Smuzhiyun 		}
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 		dlm_set_recover_status(ls, DLM_RS_LOCKS);
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 		error = dlm_recover_locks_wait(ls);
162*4882a593Smuzhiyun 		if (error) {
163*4882a593Smuzhiyun 			log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
164*4882a593Smuzhiyun 			goto fail;
165*4882a593Smuzhiyun 		}
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_locks %u in",
168*4882a593Smuzhiyun 			  ls->ls_recover_locks_in);
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun 		/*
171*4882a593Smuzhiyun 		 * Finalize state in master rsb's now that all locks can be
172*4882a593Smuzhiyun 		 * checked.  This includes conversion resolution and lvb
173*4882a593Smuzhiyun 		 * settings.
174*4882a593Smuzhiyun 		 */
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 		dlm_recover_rsbs(ls);
177*4882a593Smuzhiyun 	} else {
178*4882a593Smuzhiyun 		/*
179*4882a593Smuzhiyun 		 * Other lockspace members may be going through the "neg" steps
180*4882a593Smuzhiyun 		 * while also adding us to the lockspace, in which case they'll
181*4882a593Smuzhiyun 		 * be doing the recover_locks (RS_LOCKS) barrier.
182*4882a593Smuzhiyun 		 */
183*4882a593Smuzhiyun 		dlm_set_recover_status(ls, DLM_RS_LOCKS);
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 		error = dlm_recover_locks_wait(ls);
186*4882a593Smuzhiyun 		if (error) {
187*4882a593Smuzhiyun 			log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
188*4882a593Smuzhiyun 			goto fail;
189*4882a593Smuzhiyun 		}
190*4882a593Smuzhiyun 	}
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	dlm_release_root_list(ls);
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 	/*
195*4882a593Smuzhiyun 	 * Purge directory-related requests that are saved in requestqueue.
196*4882a593Smuzhiyun 	 * All dir requests from before recovery are invalid now due to the dir
197*4882a593Smuzhiyun 	 * rebuild and will be resent by the requesting nodes.
198*4882a593Smuzhiyun 	 */
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	dlm_purge_requestqueue(ls);
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	dlm_set_recover_status(ls, DLM_RS_DONE);
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	error = dlm_recover_done_wait(ls);
205*4882a593Smuzhiyun 	if (error) {
206*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_done_wait error %d", error);
207*4882a593Smuzhiyun 		goto fail;
208*4882a593Smuzhiyun 	}
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 	dlm_clear_members_gone(ls);
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	dlm_adjust_timeouts(ls);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	dlm_callback_resume(ls);
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun 	error = enable_locking(ls, rv->seq);
217*4882a593Smuzhiyun 	if (error) {
218*4882a593Smuzhiyun 		log_rinfo(ls, "enable_locking error %d", error);
219*4882a593Smuzhiyun 		goto fail;
220*4882a593Smuzhiyun 	}
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	error = dlm_process_requestqueue(ls);
223*4882a593Smuzhiyun 	if (error) {
224*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_process_requestqueue error %d", error);
225*4882a593Smuzhiyun 		goto fail;
226*4882a593Smuzhiyun 	}
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	error = dlm_recover_waiters_post(ls);
229*4882a593Smuzhiyun 	if (error) {
230*4882a593Smuzhiyun 		log_rinfo(ls, "dlm_recover_waiters_post error %d", error);
231*4882a593Smuzhiyun 		goto fail;
232*4882a593Smuzhiyun 	}
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 	dlm_recover_grant(ls);
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	log_rinfo(ls, "dlm_recover %llu generation %u done: %u ms",
237*4882a593Smuzhiyun 		  (unsigned long long)rv->seq, ls->ls_generation,
238*4882a593Smuzhiyun 		  jiffies_to_msecs(jiffies - start));
239*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_recoverd_active);
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	dlm_lsop_recover_done(ls);
242*4882a593Smuzhiyun 	return 0;
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun  fail:
245*4882a593Smuzhiyun 	dlm_release_root_list(ls);
246*4882a593Smuzhiyun 	log_rinfo(ls, "dlm_recover %llu error %d",
247*4882a593Smuzhiyun 		  (unsigned long long)rv->seq, error);
248*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_recoverd_active);
249*4882a593Smuzhiyun 	return error;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun /* The dlm_ls_start() that created the rv we take here may already have been
253*4882a593Smuzhiyun    stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
254*4882a593Smuzhiyun    flag set. */
255*4882a593Smuzhiyun 
do_ls_recovery(struct dlm_ls * ls)256*4882a593Smuzhiyun static void do_ls_recovery(struct dlm_ls *ls)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	struct dlm_recover *rv = NULL;
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	spin_lock(&ls->ls_recover_lock);
261*4882a593Smuzhiyun 	rv = ls->ls_recover_args;
262*4882a593Smuzhiyun 	ls->ls_recover_args = NULL;
263*4882a593Smuzhiyun 	if (rv && ls->ls_recover_seq == rv->seq)
264*4882a593Smuzhiyun 		clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
265*4882a593Smuzhiyun 	spin_unlock(&ls->ls_recover_lock);
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun 	if (rv) {
268*4882a593Smuzhiyun 		ls_recover(ls, rv);
269*4882a593Smuzhiyun 		kfree(rv->nodes);
270*4882a593Smuzhiyun 		kfree(rv);
271*4882a593Smuzhiyun 	}
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun 
dlm_recoverd(void * arg)274*4882a593Smuzhiyun static int dlm_recoverd(void *arg)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun 	struct dlm_ls *ls;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 	ls = dlm_find_lockspace_local(arg);
279*4882a593Smuzhiyun 	if (!ls) {
280*4882a593Smuzhiyun 		log_print("dlm_recoverd: no lockspace %p", arg);
281*4882a593Smuzhiyun 		return -1;
282*4882a593Smuzhiyun 	}
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 	down_write(&ls->ls_in_recovery);
285*4882a593Smuzhiyun 	set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
286*4882a593Smuzhiyun 	wake_up(&ls->ls_recover_lock_wait);
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	while (1) {
289*4882a593Smuzhiyun 		/*
290*4882a593Smuzhiyun 		 * We call kthread_should_stop() after set_current_state().
291*4882a593Smuzhiyun 		 * This is because it works correctly if kthread_stop() is
292*4882a593Smuzhiyun 		 * called just before set_current_state().
293*4882a593Smuzhiyun 		 */
294*4882a593Smuzhiyun 		set_current_state(TASK_INTERRUPTIBLE);
295*4882a593Smuzhiyun 		if (kthread_should_stop()) {
296*4882a593Smuzhiyun 			set_current_state(TASK_RUNNING);
297*4882a593Smuzhiyun 			break;
298*4882a593Smuzhiyun 		}
299*4882a593Smuzhiyun 		if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
300*4882a593Smuzhiyun 		    !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
301*4882a593Smuzhiyun 			if (kthread_should_stop())
302*4882a593Smuzhiyun 				break;
303*4882a593Smuzhiyun 			schedule();
304*4882a593Smuzhiyun 		}
305*4882a593Smuzhiyun 		set_current_state(TASK_RUNNING);
306*4882a593Smuzhiyun 
307*4882a593Smuzhiyun 		if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
308*4882a593Smuzhiyun 			down_write(&ls->ls_in_recovery);
309*4882a593Smuzhiyun 			set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
310*4882a593Smuzhiyun 			wake_up(&ls->ls_recover_lock_wait);
311*4882a593Smuzhiyun 		}
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 		if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
314*4882a593Smuzhiyun 			do_ls_recovery(ls);
315*4882a593Smuzhiyun 	}
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
318*4882a593Smuzhiyun 		up_write(&ls->ls_in_recovery);
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	dlm_put_lockspace(ls);
321*4882a593Smuzhiyun 	return 0;
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun 
dlm_recoverd_start(struct dlm_ls * ls)324*4882a593Smuzhiyun int dlm_recoverd_start(struct dlm_ls *ls)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun 	struct task_struct *p;
327*4882a593Smuzhiyun 	int error = 0;
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	p = kthread_run(dlm_recoverd, ls, "dlm_recoverd");
330*4882a593Smuzhiyun 	if (IS_ERR(p))
331*4882a593Smuzhiyun 		error = PTR_ERR(p);
332*4882a593Smuzhiyun 	else
333*4882a593Smuzhiyun                 ls->ls_recoverd_task = p;
334*4882a593Smuzhiyun 	return error;
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun 
dlm_recoverd_stop(struct dlm_ls * ls)337*4882a593Smuzhiyun void dlm_recoverd_stop(struct dlm_ls *ls)
338*4882a593Smuzhiyun {
339*4882a593Smuzhiyun 	kthread_stop(ls->ls_recoverd_task);
340*4882a593Smuzhiyun }
341*4882a593Smuzhiyun 
dlm_recoverd_suspend(struct dlm_ls * ls)342*4882a593Smuzhiyun void dlm_recoverd_suspend(struct dlm_ls *ls)
343*4882a593Smuzhiyun {
344*4882a593Smuzhiyun 	wake_up(&ls->ls_wait_general);
345*4882a593Smuzhiyun 	mutex_lock(&ls->ls_recoverd_active);
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun 
dlm_recoverd_resume(struct dlm_ls * ls)348*4882a593Smuzhiyun void dlm_recoverd_resume(struct dlm_ls *ls)
349*4882a593Smuzhiyun {
350*4882a593Smuzhiyun 	mutex_unlock(&ls->ls_recoverd_active);
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun 
353