1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /* -*- mode: c; c-basic-offset: 8; -*-
3*4882a593Smuzhiyun * vim: noexpandtab sw=8 ts=8 sts=0:
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * userdlm.c
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Code which implements the kernel side of a minimal userspace
8*4882a593Smuzhiyun * interface to our DLM.
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * Many of the functions here are pared down versions of dlmglue.c
11*4882a593Smuzhiyun * functions.
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * Copyright (C) 2003, 2004 Oracle. All rights reserved.
14*4882a593Smuzhiyun */
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun #include <linux/signal.h>
17*4882a593Smuzhiyun #include <linux/sched/signal.h>
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun #include <linux/module.h>
20*4882a593Smuzhiyun #include <linux/fs.h>
21*4882a593Smuzhiyun #include <linux/types.h>
22*4882a593Smuzhiyun #include <linux/crc32.h>
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #include "../ocfs2_lockingver.h"
25*4882a593Smuzhiyun #include "../stackglue.h"
26*4882a593Smuzhiyun #include "userdlm.h"
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #define MLOG_MASK_PREFIX ML_DLMFS
29*4882a593Smuzhiyun #include "../cluster/masklog.h"
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun
user_lksb_to_lock_res(struct ocfs2_dlm_lksb * lksb)32*4882a593Smuzhiyun static inline struct user_lock_res *user_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun return container_of(lksb, struct user_lock_res, l_lksb);
35*4882a593Smuzhiyun }
36*4882a593Smuzhiyun
user_check_wait_flag(struct user_lock_res * lockres,int flag)37*4882a593Smuzhiyun static inline int user_check_wait_flag(struct user_lock_res *lockres,
38*4882a593Smuzhiyun int flag)
39*4882a593Smuzhiyun {
40*4882a593Smuzhiyun int ret;
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
43*4882a593Smuzhiyun ret = lockres->l_flags & flag;
44*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun return ret;
47*4882a593Smuzhiyun }
48*4882a593Smuzhiyun
user_wait_on_busy_lock(struct user_lock_res * lockres)49*4882a593Smuzhiyun static inline void user_wait_on_busy_lock(struct user_lock_res *lockres)
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun wait_event(lockres->l_event,
53*4882a593Smuzhiyun !user_check_wait_flag(lockres, USER_LOCK_BUSY));
54*4882a593Smuzhiyun }
55*4882a593Smuzhiyun
user_wait_on_blocked_lock(struct user_lock_res * lockres)56*4882a593Smuzhiyun static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres)
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun {
59*4882a593Smuzhiyun wait_event(lockres->l_event,
60*4882a593Smuzhiyun !user_check_wait_flag(lockres, USER_LOCK_BLOCKED));
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun /* I heart container_of... */
64*4882a593Smuzhiyun static inline struct ocfs2_cluster_connection *
cluster_connection_from_user_lockres(struct user_lock_res * lockres)65*4882a593Smuzhiyun cluster_connection_from_user_lockres(struct user_lock_res *lockres)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun struct dlmfs_inode_private *ip;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun ip = container_of(lockres,
70*4882a593Smuzhiyun struct dlmfs_inode_private,
71*4882a593Smuzhiyun ip_lockres);
72*4882a593Smuzhiyun return ip->ip_conn;
73*4882a593Smuzhiyun }
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun static struct inode *
user_dlm_inode_from_user_lockres(struct user_lock_res * lockres)76*4882a593Smuzhiyun user_dlm_inode_from_user_lockres(struct user_lock_res *lockres)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun struct dlmfs_inode_private *ip;
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun ip = container_of(lockres,
81*4882a593Smuzhiyun struct dlmfs_inode_private,
82*4882a593Smuzhiyun ip_lockres);
83*4882a593Smuzhiyun return &ip->ip_vfs_inode;
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
user_recover_from_dlm_error(struct user_lock_res * lockres)86*4882a593Smuzhiyun static inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
89*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_BUSY;
90*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
91*4882a593Smuzhiyun }
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun #define user_log_dlm_error(_func, _stat, _lockres) do { \
94*4882a593Smuzhiyun mlog(ML_ERROR, "Dlm error %d while calling %s on " \
95*4882a593Smuzhiyun "resource %.*s\n", _stat, _func, \
96*4882a593Smuzhiyun _lockres->l_namelen, _lockres->l_name); \
97*4882a593Smuzhiyun } while (0)
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun /* WARNING: This function lives in a world where the only three lock
100*4882a593Smuzhiyun * levels are EX, PR, and NL. It *will* have to be adjusted when more
101*4882a593Smuzhiyun * lock types are added. */
user_highest_compat_lock_level(int level)102*4882a593Smuzhiyun static inline int user_highest_compat_lock_level(int level)
103*4882a593Smuzhiyun {
104*4882a593Smuzhiyun int new_level = DLM_LOCK_EX;
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun if (level == DLM_LOCK_EX)
107*4882a593Smuzhiyun new_level = DLM_LOCK_NL;
108*4882a593Smuzhiyun else if (level == DLM_LOCK_PR)
109*4882a593Smuzhiyun new_level = DLM_LOCK_PR;
110*4882a593Smuzhiyun return new_level;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
user_ast(struct ocfs2_dlm_lksb * lksb)113*4882a593Smuzhiyun static void user_ast(struct ocfs2_dlm_lksb *lksb)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun struct user_lock_res *lockres = user_lksb_to_lock_res(lksb);
116*4882a593Smuzhiyun int status;
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun mlog(ML_BASTS, "AST fired for lockres %.*s, level %d => %d\n",
119*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name, lockres->l_level,
120*4882a593Smuzhiyun lockres->l_requested);
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun status = ocfs2_dlm_lock_status(&lockres->l_lksb);
125*4882a593Smuzhiyun if (status) {
126*4882a593Smuzhiyun mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n",
127*4882a593Smuzhiyun status, lockres->l_namelen, lockres->l_name);
128*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
129*4882a593Smuzhiyun return;
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun mlog_bug_on_msg(lockres->l_requested == DLM_LOCK_IV,
133*4882a593Smuzhiyun "Lockres %.*s, requested ivmode. flags 0x%x\n",
134*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name, lockres->l_flags);
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun /* we're downconverting. */
137*4882a593Smuzhiyun if (lockres->l_requested < lockres->l_level) {
138*4882a593Smuzhiyun if (lockres->l_requested <=
139*4882a593Smuzhiyun user_highest_compat_lock_level(lockres->l_blocking)) {
140*4882a593Smuzhiyun lockres->l_blocking = DLM_LOCK_NL;
141*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_BLOCKED;
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun }
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun lockres->l_level = lockres->l_requested;
146*4882a593Smuzhiyun lockres->l_requested = DLM_LOCK_IV;
147*4882a593Smuzhiyun lockres->l_flags |= USER_LOCK_ATTACHED;
148*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_BUSY;
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun wake_up(&lockres->l_event);
153*4882a593Smuzhiyun }
154*4882a593Smuzhiyun
user_dlm_grab_inode_ref(struct user_lock_res * lockres)155*4882a593Smuzhiyun static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres)
156*4882a593Smuzhiyun {
157*4882a593Smuzhiyun struct inode *inode;
158*4882a593Smuzhiyun inode = user_dlm_inode_from_user_lockres(lockres);
159*4882a593Smuzhiyun if (!igrab(inode))
160*4882a593Smuzhiyun BUG();
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun static void user_dlm_unblock_lock(struct work_struct *work);
164*4882a593Smuzhiyun
__user_dlm_queue_lockres(struct user_lock_res * lockres)165*4882a593Smuzhiyun static void __user_dlm_queue_lockres(struct user_lock_res *lockres)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun if (!(lockres->l_flags & USER_LOCK_QUEUED)) {
168*4882a593Smuzhiyun user_dlm_grab_inode_ref(lockres);
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun INIT_WORK(&lockres->l_work, user_dlm_unblock_lock);
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun queue_work(user_dlm_worker, &lockres->l_work);
173*4882a593Smuzhiyun lockres->l_flags |= USER_LOCK_QUEUED;
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun
__user_dlm_cond_queue_lockres(struct user_lock_res * lockres)177*4882a593Smuzhiyun static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun int queue = 0;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun if (!(lockres->l_flags & USER_LOCK_BLOCKED))
182*4882a593Smuzhiyun return;
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun switch (lockres->l_blocking) {
185*4882a593Smuzhiyun case DLM_LOCK_EX:
186*4882a593Smuzhiyun if (!lockres->l_ex_holders && !lockres->l_ro_holders)
187*4882a593Smuzhiyun queue = 1;
188*4882a593Smuzhiyun break;
189*4882a593Smuzhiyun case DLM_LOCK_PR:
190*4882a593Smuzhiyun if (!lockres->l_ex_holders)
191*4882a593Smuzhiyun queue = 1;
192*4882a593Smuzhiyun break;
193*4882a593Smuzhiyun default:
194*4882a593Smuzhiyun BUG();
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun if (queue)
198*4882a593Smuzhiyun __user_dlm_queue_lockres(lockres);
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun
user_bast(struct ocfs2_dlm_lksb * lksb,int level)201*4882a593Smuzhiyun static void user_bast(struct ocfs2_dlm_lksb *lksb, int level)
202*4882a593Smuzhiyun {
203*4882a593Smuzhiyun struct user_lock_res *lockres = user_lksb_to_lock_res(lksb);
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun mlog(ML_BASTS, "BAST fired for lockres %.*s, blocking %d, level %d\n",
206*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name, level, lockres->l_level);
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
209*4882a593Smuzhiyun lockres->l_flags |= USER_LOCK_BLOCKED;
210*4882a593Smuzhiyun if (level > lockres->l_blocking)
211*4882a593Smuzhiyun lockres->l_blocking = level;
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun __user_dlm_queue_lockres(lockres);
214*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun wake_up(&lockres->l_event);
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun
user_unlock_ast(struct ocfs2_dlm_lksb * lksb,int status)219*4882a593Smuzhiyun static void user_unlock_ast(struct ocfs2_dlm_lksb *lksb, int status)
220*4882a593Smuzhiyun {
221*4882a593Smuzhiyun struct user_lock_res *lockres = user_lksb_to_lock_res(lksb);
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun mlog(ML_BASTS, "UNLOCK AST fired for lockres %.*s, flags 0x%x\n",
224*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name, lockres->l_flags);
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun if (status)
227*4882a593Smuzhiyun mlog(ML_ERROR, "dlm returns status %d\n", status);
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
230*4882a593Smuzhiyun /* The teardown flag gets set early during the unlock process,
231*4882a593Smuzhiyun * so test the cancel flag to make sure that this ast isn't
232*4882a593Smuzhiyun * for a concurrent cancel. */
233*4882a593Smuzhiyun if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
234*4882a593Smuzhiyun && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
235*4882a593Smuzhiyun lockres->l_level = DLM_LOCK_IV;
236*4882a593Smuzhiyun } else if (status == DLM_CANCELGRANT) {
237*4882a593Smuzhiyun /* We tried to cancel a convert request, but it was
238*4882a593Smuzhiyun * already granted. Don't clear the busy flag - the
239*4882a593Smuzhiyun * ast should've done this already. */
240*4882a593Smuzhiyun BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
241*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
242*4882a593Smuzhiyun goto out_noclear;
243*4882a593Smuzhiyun } else {
244*4882a593Smuzhiyun BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
245*4882a593Smuzhiyun /* Cancel succeeded, we want to re-queue */
246*4882a593Smuzhiyun lockres->l_requested = DLM_LOCK_IV; /* cancel an
247*4882a593Smuzhiyun * upconvert
248*4882a593Smuzhiyun * request. */
249*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
250*4882a593Smuzhiyun /* we want the unblock thread to look at it again
251*4882a593Smuzhiyun * now. */
252*4882a593Smuzhiyun if (lockres->l_flags & USER_LOCK_BLOCKED)
253*4882a593Smuzhiyun __user_dlm_queue_lockres(lockres);
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_BUSY;
257*4882a593Smuzhiyun out_noclear:
258*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun wake_up(&lockres->l_event);
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun /*
264*4882a593Smuzhiyun * This is the userdlmfs locking protocol version.
265*4882a593Smuzhiyun *
266*4882a593Smuzhiyun * See fs/ocfs2/dlmglue.c for more details on locking versions.
267*4882a593Smuzhiyun */
268*4882a593Smuzhiyun static struct ocfs2_locking_protocol user_dlm_lproto = {
269*4882a593Smuzhiyun .lp_max_version = {
270*4882a593Smuzhiyun .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR,
271*4882a593Smuzhiyun .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR,
272*4882a593Smuzhiyun },
273*4882a593Smuzhiyun .lp_lock_ast = user_ast,
274*4882a593Smuzhiyun .lp_blocking_ast = user_bast,
275*4882a593Smuzhiyun .lp_unlock_ast = user_unlock_ast,
276*4882a593Smuzhiyun };
277*4882a593Smuzhiyun
user_dlm_drop_inode_ref(struct user_lock_res * lockres)278*4882a593Smuzhiyun static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun struct inode *inode;
281*4882a593Smuzhiyun inode = user_dlm_inode_from_user_lockres(lockres);
282*4882a593Smuzhiyun iput(inode);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
user_dlm_unblock_lock(struct work_struct * work)285*4882a593Smuzhiyun static void user_dlm_unblock_lock(struct work_struct *work)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun int new_level, status;
288*4882a593Smuzhiyun struct user_lock_res *lockres =
289*4882a593Smuzhiyun container_of(work, struct user_lock_res, l_work);
290*4882a593Smuzhiyun struct ocfs2_cluster_connection *conn =
291*4882a593Smuzhiyun cluster_connection_from_user_lockres(lockres);
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun mlog(0, "lockres %.*s\n", lockres->l_namelen, lockres->l_name);
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
298*4882a593Smuzhiyun "Lockres %.*s, flags 0x%x\n",
299*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name, lockres->l_flags);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
302*4882a593Smuzhiyun * set, we want user_ast clear it. */
303*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_QUEUED;
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun /* It's valid to get here and no longer be blocked - if we get
306*4882a593Smuzhiyun * several basts in a row, we might be queued by the first
307*4882a593Smuzhiyun * one, the unblock thread might run and clear the queued
308*4882a593Smuzhiyun * flag, and finally we might get another bast which re-queues
309*4882a593Smuzhiyun * us before our ast for the downconvert is called. */
310*4882a593Smuzhiyun if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
311*4882a593Smuzhiyun mlog(ML_BASTS, "lockres %.*s USER_LOCK_BLOCKED\n",
312*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name);
313*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
314*4882a593Smuzhiyun goto drop_ref;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
318*4882a593Smuzhiyun mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_TEARDOWN\n",
319*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name);
320*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
321*4882a593Smuzhiyun goto drop_ref;
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun if (lockres->l_flags & USER_LOCK_BUSY) {
325*4882a593Smuzhiyun if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
326*4882a593Smuzhiyun mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_CANCEL\n",
327*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name);
328*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
329*4882a593Smuzhiyun goto drop_ref;
330*4882a593Smuzhiyun }
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun lockres->l_flags |= USER_LOCK_IN_CANCEL;
333*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun status = ocfs2_dlm_unlock(conn, &lockres->l_lksb,
336*4882a593Smuzhiyun DLM_LKF_CANCEL);
337*4882a593Smuzhiyun if (status)
338*4882a593Smuzhiyun user_log_dlm_error("ocfs2_dlm_unlock", status, lockres);
339*4882a593Smuzhiyun goto drop_ref;
340*4882a593Smuzhiyun }
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun /* If there are still incompat holders, we can exit safely
343*4882a593Smuzhiyun * without worrying about re-queueing this lock as that will
344*4882a593Smuzhiyun * happen on the last call to user_cluster_unlock. */
345*4882a593Smuzhiyun if ((lockres->l_blocking == DLM_LOCK_EX)
346*4882a593Smuzhiyun && (lockres->l_ex_holders || lockres->l_ro_holders)) {
347*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
348*4882a593Smuzhiyun mlog(ML_BASTS, "lockres %.*s, EX/PR Holders %u,%u\n",
349*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name,
350*4882a593Smuzhiyun lockres->l_ex_holders, lockres->l_ro_holders);
351*4882a593Smuzhiyun goto drop_ref;
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun if ((lockres->l_blocking == DLM_LOCK_PR)
355*4882a593Smuzhiyun && lockres->l_ex_holders) {
356*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
357*4882a593Smuzhiyun mlog(ML_BASTS, "lockres %.*s, EX Holders %u\n",
358*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name,
359*4882a593Smuzhiyun lockres->l_ex_holders);
360*4882a593Smuzhiyun goto drop_ref;
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun /* yay, we can downconvert now. */
364*4882a593Smuzhiyun new_level = user_highest_compat_lock_level(lockres->l_blocking);
365*4882a593Smuzhiyun lockres->l_requested = new_level;
366*4882a593Smuzhiyun lockres->l_flags |= USER_LOCK_BUSY;
367*4882a593Smuzhiyun mlog(ML_BASTS, "lockres %.*s, downconvert %d => %d\n",
368*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name, lockres->l_level, new_level);
369*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun /* need lock downconvert request now... */
372*4882a593Smuzhiyun status = ocfs2_dlm_lock(conn, new_level, &lockres->l_lksb,
373*4882a593Smuzhiyun DLM_LKF_CONVERT|DLM_LKF_VALBLK,
374*4882a593Smuzhiyun lockres->l_name,
375*4882a593Smuzhiyun lockres->l_namelen);
376*4882a593Smuzhiyun if (status) {
377*4882a593Smuzhiyun user_log_dlm_error("ocfs2_dlm_lock", status, lockres);
378*4882a593Smuzhiyun user_recover_from_dlm_error(lockres);
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun drop_ref:
382*4882a593Smuzhiyun user_dlm_drop_inode_ref(lockres);
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun
user_dlm_inc_holders(struct user_lock_res * lockres,int level)385*4882a593Smuzhiyun static inline void user_dlm_inc_holders(struct user_lock_res *lockres,
386*4882a593Smuzhiyun int level)
387*4882a593Smuzhiyun {
388*4882a593Smuzhiyun switch(level) {
389*4882a593Smuzhiyun case DLM_LOCK_EX:
390*4882a593Smuzhiyun lockres->l_ex_holders++;
391*4882a593Smuzhiyun break;
392*4882a593Smuzhiyun case DLM_LOCK_PR:
393*4882a593Smuzhiyun lockres->l_ro_holders++;
394*4882a593Smuzhiyun break;
395*4882a593Smuzhiyun default:
396*4882a593Smuzhiyun BUG();
397*4882a593Smuzhiyun }
398*4882a593Smuzhiyun }
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun /* predict what lock level we'll be dropping down to on behalf
401*4882a593Smuzhiyun * of another node, and return true if the currently wanted
402*4882a593Smuzhiyun * level will be compatible with it. */
403*4882a593Smuzhiyun static inline int
user_may_continue_on_blocked_lock(struct user_lock_res * lockres,int wanted)404*4882a593Smuzhiyun user_may_continue_on_blocked_lock(struct user_lock_res *lockres,
405*4882a593Smuzhiyun int wanted)
406*4882a593Smuzhiyun {
407*4882a593Smuzhiyun BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED));
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun return wanted <= user_highest_compat_lock_level(lockres->l_blocking);
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
user_dlm_cluster_lock(struct user_lock_res * lockres,int level,int lkm_flags)412*4882a593Smuzhiyun int user_dlm_cluster_lock(struct user_lock_res *lockres,
413*4882a593Smuzhiyun int level,
414*4882a593Smuzhiyun int lkm_flags)
415*4882a593Smuzhiyun {
416*4882a593Smuzhiyun int status, local_flags;
417*4882a593Smuzhiyun struct ocfs2_cluster_connection *conn =
418*4882a593Smuzhiyun cluster_connection_from_user_lockres(lockres);
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun if (level != DLM_LOCK_EX &&
421*4882a593Smuzhiyun level != DLM_LOCK_PR) {
422*4882a593Smuzhiyun mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
423*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name);
424*4882a593Smuzhiyun status = -EINVAL;
425*4882a593Smuzhiyun goto bail;
426*4882a593Smuzhiyun }
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun mlog(ML_BASTS, "lockres %.*s, level %d, flags = 0x%x\n",
429*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name, level, lkm_flags);
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun again:
432*4882a593Smuzhiyun if (signal_pending(current)) {
433*4882a593Smuzhiyun status = -ERESTARTSYS;
434*4882a593Smuzhiyun goto bail;
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
438*4882a593Smuzhiyun if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
439*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
440*4882a593Smuzhiyun status = -EAGAIN;
441*4882a593Smuzhiyun goto bail;
442*4882a593Smuzhiyun }
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun /* We only compare against the currently granted level
445*4882a593Smuzhiyun * here. If the lock is blocked waiting on a downconvert,
446*4882a593Smuzhiyun * we'll get caught below. */
447*4882a593Smuzhiyun if ((lockres->l_flags & USER_LOCK_BUSY) &&
448*4882a593Smuzhiyun (level > lockres->l_level)) {
449*4882a593Smuzhiyun /* is someone sitting in dlm_lock? If so, wait on
450*4882a593Smuzhiyun * them. */
451*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun user_wait_on_busy_lock(lockres);
454*4882a593Smuzhiyun goto again;
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun if ((lockres->l_flags & USER_LOCK_BLOCKED) &&
458*4882a593Smuzhiyun (!user_may_continue_on_blocked_lock(lockres, level))) {
459*4882a593Smuzhiyun /* is the lock is currently blocked on behalf of
460*4882a593Smuzhiyun * another node */
461*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun user_wait_on_blocked_lock(lockres);
464*4882a593Smuzhiyun goto again;
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun if (level > lockres->l_level) {
468*4882a593Smuzhiyun local_flags = lkm_flags | DLM_LKF_VALBLK;
469*4882a593Smuzhiyun if (lockres->l_level != DLM_LOCK_IV)
470*4882a593Smuzhiyun local_flags |= DLM_LKF_CONVERT;
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun lockres->l_requested = level;
473*4882a593Smuzhiyun lockres->l_flags |= USER_LOCK_BUSY;
474*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun BUG_ON(level == DLM_LOCK_IV);
477*4882a593Smuzhiyun BUG_ON(level == DLM_LOCK_NL);
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun /* call dlm_lock to upgrade lock now */
480*4882a593Smuzhiyun status = ocfs2_dlm_lock(conn, level, &lockres->l_lksb,
481*4882a593Smuzhiyun local_flags, lockres->l_name,
482*4882a593Smuzhiyun lockres->l_namelen);
483*4882a593Smuzhiyun if (status) {
484*4882a593Smuzhiyun if ((lkm_flags & DLM_LKF_NOQUEUE) &&
485*4882a593Smuzhiyun (status != -EAGAIN))
486*4882a593Smuzhiyun user_log_dlm_error("ocfs2_dlm_lock",
487*4882a593Smuzhiyun status, lockres);
488*4882a593Smuzhiyun user_recover_from_dlm_error(lockres);
489*4882a593Smuzhiyun goto bail;
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun
492*4882a593Smuzhiyun user_wait_on_busy_lock(lockres);
493*4882a593Smuzhiyun goto again;
494*4882a593Smuzhiyun }
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun user_dlm_inc_holders(lockres, level);
497*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun status = 0;
500*4882a593Smuzhiyun bail:
501*4882a593Smuzhiyun return status;
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun
user_dlm_dec_holders(struct user_lock_res * lockres,int level)504*4882a593Smuzhiyun static inline void user_dlm_dec_holders(struct user_lock_res *lockres,
505*4882a593Smuzhiyun int level)
506*4882a593Smuzhiyun {
507*4882a593Smuzhiyun switch(level) {
508*4882a593Smuzhiyun case DLM_LOCK_EX:
509*4882a593Smuzhiyun BUG_ON(!lockres->l_ex_holders);
510*4882a593Smuzhiyun lockres->l_ex_holders--;
511*4882a593Smuzhiyun break;
512*4882a593Smuzhiyun case DLM_LOCK_PR:
513*4882a593Smuzhiyun BUG_ON(!lockres->l_ro_holders);
514*4882a593Smuzhiyun lockres->l_ro_holders--;
515*4882a593Smuzhiyun break;
516*4882a593Smuzhiyun default:
517*4882a593Smuzhiyun BUG();
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun }
520*4882a593Smuzhiyun
user_dlm_cluster_unlock(struct user_lock_res * lockres,int level)521*4882a593Smuzhiyun void user_dlm_cluster_unlock(struct user_lock_res *lockres,
522*4882a593Smuzhiyun int level)
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun if (level != DLM_LOCK_EX &&
525*4882a593Smuzhiyun level != DLM_LOCK_PR) {
526*4882a593Smuzhiyun mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
527*4882a593Smuzhiyun lockres->l_namelen, lockres->l_name);
528*4882a593Smuzhiyun return;
529*4882a593Smuzhiyun }
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
532*4882a593Smuzhiyun user_dlm_dec_holders(lockres, level);
533*4882a593Smuzhiyun __user_dlm_cond_queue_lockres(lockres);
534*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
535*4882a593Smuzhiyun }
536*4882a593Smuzhiyun
user_dlm_write_lvb(struct inode * inode,const char * val,unsigned int len)537*4882a593Smuzhiyun void user_dlm_write_lvb(struct inode *inode,
538*4882a593Smuzhiyun const char *val,
539*4882a593Smuzhiyun unsigned int len)
540*4882a593Smuzhiyun {
541*4882a593Smuzhiyun struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres;
542*4882a593Smuzhiyun char *lvb;
543*4882a593Smuzhiyun
544*4882a593Smuzhiyun BUG_ON(len > DLM_LVB_LEN);
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun BUG_ON(lockres->l_level < DLM_LOCK_EX);
549*4882a593Smuzhiyun lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
550*4882a593Smuzhiyun memcpy(lvb, val, len);
551*4882a593Smuzhiyun
552*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
553*4882a593Smuzhiyun }
554*4882a593Smuzhiyun
user_dlm_read_lvb(struct inode * inode,char * val)555*4882a593Smuzhiyun bool user_dlm_read_lvb(struct inode *inode, char *val)
556*4882a593Smuzhiyun {
557*4882a593Smuzhiyun struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres;
558*4882a593Smuzhiyun char *lvb;
559*4882a593Smuzhiyun bool ret = true;
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun BUG_ON(lockres->l_level < DLM_LOCK_PR);
564*4882a593Smuzhiyun if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)) {
565*4882a593Smuzhiyun lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
566*4882a593Smuzhiyun memcpy(val, lvb, DLM_LVB_LEN);
567*4882a593Smuzhiyun } else
568*4882a593Smuzhiyun ret = false;
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
571*4882a593Smuzhiyun return ret;
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun
user_dlm_lock_res_init(struct user_lock_res * lockres,struct dentry * dentry)574*4882a593Smuzhiyun void user_dlm_lock_res_init(struct user_lock_res *lockres,
575*4882a593Smuzhiyun struct dentry *dentry)
576*4882a593Smuzhiyun {
577*4882a593Smuzhiyun memset(lockres, 0, sizeof(*lockres));
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun spin_lock_init(&lockres->l_lock);
580*4882a593Smuzhiyun init_waitqueue_head(&lockres->l_event);
581*4882a593Smuzhiyun lockres->l_level = DLM_LOCK_IV;
582*4882a593Smuzhiyun lockres->l_requested = DLM_LOCK_IV;
583*4882a593Smuzhiyun lockres->l_blocking = DLM_LOCK_IV;
584*4882a593Smuzhiyun
585*4882a593Smuzhiyun /* should have been checked before getting here. */
586*4882a593Smuzhiyun BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN);
587*4882a593Smuzhiyun
588*4882a593Smuzhiyun memcpy(lockres->l_name,
589*4882a593Smuzhiyun dentry->d_name.name,
590*4882a593Smuzhiyun dentry->d_name.len);
591*4882a593Smuzhiyun lockres->l_namelen = dentry->d_name.len;
592*4882a593Smuzhiyun }
593*4882a593Smuzhiyun
user_dlm_destroy_lock(struct user_lock_res * lockres)594*4882a593Smuzhiyun int user_dlm_destroy_lock(struct user_lock_res *lockres)
595*4882a593Smuzhiyun {
596*4882a593Smuzhiyun int status = -EBUSY;
597*4882a593Smuzhiyun struct ocfs2_cluster_connection *conn =
598*4882a593Smuzhiyun cluster_connection_from_user_lockres(lockres);
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun mlog(ML_BASTS, "lockres %.*s\n", lockres->l_namelen, lockres->l_name);
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
603*4882a593Smuzhiyun if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
604*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
605*4882a593Smuzhiyun goto bail;
606*4882a593Smuzhiyun }
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun while (lockres->l_flags & USER_LOCK_BUSY) {
611*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
612*4882a593Smuzhiyun
613*4882a593Smuzhiyun user_wait_on_busy_lock(lockres);
614*4882a593Smuzhiyun
615*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun if (lockres->l_ro_holders || lockres->l_ex_holders) {
619*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN;
620*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
621*4882a593Smuzhiyun goto bail;
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun status = 0;
625*4882a593Smuzhiyun if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
626*4882a593Smuzhiyun /*
627*4882a593Smuzhiyun * lock is never requested, leave USER_LOCK_IN_TEARDOWN set
628*4882a593Smuzhiyun * to avoid new lock request coming in.
629*4882a593Smuzhiyun */
630*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
631*4882a593Smuzhiyun goto bail;
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_ATTACHED;
635*4882a593Smuzhiyun lockres->l_flags |= USER_LOCK_BUSY;
636*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK);
639*4882a593Smuzhiyun if (status) {
640*4882a593Smuzhiyun spin_lock(&lockres->l_lock);
641*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN;
642*4882a593Smuzhiyun lockres->l_flags &= ~USER_LOCK_BUSY;
643*4882a593Smuzhiyun spin_unlock(&lockres->l_lock);
644*4882a593Smuzhiyun user_log_dlm_error("ocfs2_dlm_unlock", status, lockres);
645*4882a593Smuzhiyun goto bail;
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun user_wait_on_busy_lock(lockres);
649*4882a593Smuzhiyun
650*4882a593Smuzhiyun status = 0;
651*4882a593Smuzhiyun bail:
652*4882a593Smuzhiyun return status;
653*4882a593Smuzhiyun }
654*4882a593Smuzhiyun
user_dlm_recovery_handler_noop(int node_num,void * recovery_data)655*4882a593Smuzhiyun static void user_dlm_recovery_handler_noop(int node_num,
656*4882a593Smuzhiyun void *recovery_data)
657*4882a593Smuzhiyun {
658*4882a593Smuzhiyun /* We ignore recovery events */
659*4882a593Smuzhiyun return;
660*4882a593Smuzhiyun }
661*4882a593Smuzhiyun
user_dlm_set_locking_protocol(void)662*4882a593Smuzhiyun void user_dlm_set_locking_protocol(void)
663*4882a593Smuzhiyun {
664*4882a593Smuzhiyun ocfs2_stack_glue_set_max_proto_version(&user_dlm_lproto.lp_max_version);
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun
user_dlm_register(const struct qstr * name)667*4882a593Smuzhiyun struct ocfs2_cluster_connection *user_dlm_register(const struct qstr *name)
668*4882a593Smuzhiyun {
669*4882a593Smuzhiyun int rc;
670*4882a593Smuzhiyun struct ocfs2_cluster_connection *conn;
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun rc = ocfs2_cluster_connect_agnostic(name->name, name->len,
673*4882a593Smuzhiyun &user_dlm_lproto,
674*4882a593Smuzhiyun user_dlm_recovery_handler_noop,
675*4882a593Smuzhiyun NULL, &conn);
676*4882a593Smuzhiyun if (rc)
677*4882a593Smuzhiyun mlog_errno(rc);
678*4882a593Smuzhiyun
679*4882a593Smuzhiyun return rc ? ERR_PTR(rc) : conn;
680*4882a593Smuzhiyun }
681*4882a593Smuzhiyun
user_dlm_unregister(struct ocfs2_cluster_connection * conn)682*4882a593Smuzhiyun void user_dlm_unregister(struct ocfs2_cluster_connection *conn)
683*4882a593Smuzhiyun {
684*4882a593Smuzhiyun ocfs2_cluster_disconnect(conn, 0);
685*4882a593Smuzhiyun }
686