xref: /OK3568_Linux_fs/kernel/fs/ceph/locks.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun #include <linux/ceph/ceph_debug.h>
3*4882a593Smuzhiyun 
4*4882a593Smuzhiyun #include <linux/file.h>
5*4882a593Smuzhiyun #include <linux/namei.h>
6*4882a593Smuzhiyun #include <linux/random.h>
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include "super.h"
9*4882a593Smuzhiyun #include "mds_client.h"
10*4882a593Smuzhiyun #include <linux/ceph/pagelist.h>
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun static u64 lock_secret;
13*4882a593Smuzhiyun static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
14*4882a593Smuzhiyun                                          struct ceph_mds_request *req);
15*4882a593Smuzhiyun 
secure_addr(void * addr)16*4882a593Smuzhiyun static inline u64 secure_addr(void *addr)
17*4882a593Smuzhiyun {
18*4882a593Smuzhiyun 	u64 v = lock_secret ^ (u64)(unsigned long)addr;
19*4882a593Smuzhiyun 	/*
20*4882a593Smuzhiyun 	 * Set the most significant bit, so that MDS knows the 'owner'
21*4882a593Smuzhiyun 	 * is sufficient to identify the owner of lock. (old code uses
22*4882a593Smuzhiyun 	 * both 'owner' and 'pid')
23*4882a593Smuzhiyun 	 */
24*4882a593Smuzhiyun 	v |= (1ULL << 63);
25*4882a593Smuzhiyun 	return v;
26*4882a593Smuzhiyun }
27*4882a593Smuzhiyun 
ceph_flock_init(void)28*4882a593Smuzhiyun void __init ceph_flock_init(void)
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun 	get_random_bytes(&lock_secret, sizeof(lock_secret));
31*4882a593Smuzhiyun }
32*4882a593Smuzhiyun 
ceph_fl_copy_lock(struct file_lock * dst,struct file_lock * src)33*4882a593Smuzhiyun static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun 	struct ceph_file_info *fi = dst->fl_file->private_data;
36*4882a593Smuzhiyun 	struct inode *inode = file_inode(dst->fl_file);
37*4882a593Smuzhiyun 	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
38*4882a593Smuzhiyun 	atomic_inc(&fi->num_locks);
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun 
ceph_fl_release_lock(struct file_lock * fl)41*4882a593Smuzhiyun static void ceph_fl_release_lock(struct file_lock *fl)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun 	struct ceph_file_info *fi = fl->fl_file->private_data;
44*4882a593Smuzhiyun 	struct inode *inode = file_inode(fl->fl_file);
45*4882a593Smuzhiyun 	struct ceph_inode_info *ci = ceph_inode(inode);
46*4882a593Smuzhiyun 	atomic_dec(&fi->num_locks);
47*4882a593Smuzhiyun 	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
48*4882a593Smuzhiyun 		/* clear error when all locks are released */
49*4882a593Smuzhiyun 		spin_lock(&ci->i_ceph_lock);
50*4882a593Smuzhiyun 		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
51*4882a593Smuzhiyun 		spin_unlock(&ci->i_ceph_lock);
52*4882a593Smuzhiyun 	}
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun static const struct file_lock_operations ceph_fl_lock_ops = {
56*4882a593Smuzhiyun 	.fl_copy_lock = ceph_fl_copy_lock,
57*4882a593Smuzhiyun 	.fl_release_private = ceph_fl_release_lock,
58*4882a593Smuzhiyun };
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun /**
61*4882a593Smuzhiyun  * Implement fcntl and flock locking functions.
62*4882a593Smuzhiyun  */
ceph_lock_message(u8 lock_type,u16 operation,struct inode * inode,int cmd,u8 wait,struct file_lock * fl)63*4882a593Smuzhiyun static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
64*4882a593Smuzhiyun 			     int cmd, u8 wait, struct file_lock *fl)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
67*4882a593Smuzhiyun 	struct ceph_mds_request *req;
68*4882a593Smuzhiyun 	int err;
69*4882a593Smuzhiyun 	u64 length = 0;
70*4882a593Smuzhiyun 	u64 owner;
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun 	if (operation == CEPH_MDS_OP_SETFILELOCK) {
73*4882a593Smuzhiyun 		/*
74*4882a593Smuzhiyun 		 * increasing i_filelock_ref closes race window between
75*4882a593Smuzhiyun 		 * handling request reply and adding file_lock struct to
76*4882a593Smuzhiyun 		 * inode. Otherwise, auth caps may get trimmed in the
77*4882a593Smuzhiyun 		 * window. Caller function will decrease the counter.
78*4882a593Smuzhiyun 		 */
79*4882a593Smuzhiyun 		fl->fl_ops = &ceph_fl_lock_ops;
80*4882a593Smuzhiyun 		fl->fl_ops->fl_copy_lock(fl, NULL);
81*4882a593Smuzhiyun 	}
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
84*4882a593Smuzhiyun 		wait = 0;
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
87*4882a593Smuzhiyun 	if (IS_ERR(req))
88*4882a593Smuzhiyun 		return PTR_ERR(req);
89*4882a593Smuzhiyun 	req->r_inode = inode;
90*4882a593Smuzhiyun 	ihold(inode);
91*4882a593Smuzhiyun 	req->r_num_caps = 1;
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	/* mds requires start and length rather than start and end */
94*4882a593Smuzhiyun 	if (LLONG_MAX == fl->fl_end)
95*4882a593Smuzhiyun 		length = 0;
96*4882a593Smuzhiyun 	else
97*4882a593Smuzhiyun 		length = fl->fl_end - fl->fl_start + 1;
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	owner = secure_addr(fl->fl_owner);
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
102*4882a593Smuzhiyun 	     "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type,
103*4882a593Smuzhiyun 	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
104*4882a593Smuzhiyun 	     wait, fl->fl_type);
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	req->r_args.filelock_change.rule = lock_type;
107*4882a593Smuzhiyun 	req->r_args.filelock_change.type = cmd;
108*4882a593Smuzhiyun 	req->r_args.filelock_change.owner = cpu_to_le64(owner);
109*4882a593Smuzhiyun 	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
110*4882a593Smuzhiyun 	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
111*4882a593Smuzhiyun 	req->r_args.filelock_change.length = cpu_to_le64(length);
112*4882a593Smuzhiyun 	req->r_args.filelock_change.wait = wait;
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 	if (wait)
115*4882a593Smuzhiyun 		req->r_wait_for_completion = ceph_lock_wait_for_completion;
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 	err = ceph_mdsc_do_request(mdsc, inode, req);
118*4882a593Smuzhiyun 	if (!err && operation == CEPH_MDS_OP_GETFILELOCK) {
119*4882a593Smuzhiyun 		fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
120*4882a593Smuzhiyun 		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
121*4882a593Smuzhiyun 			fl->fl_type = F_RDLCK;
122*4882a593Smuzhiyun 		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
123*4882a593Smuzhiyun 			fl->fl_type = F_WRLCK;
124*4882a593Smuzhiyun 		else
125*4882a593Smuzhiyun 			fl->fl_type = F_UNLCK;
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
128*4882a593Smuzhiyun 		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
129*4882a593Smuzhiyun 						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
130*4882a593Smuzhiyun 		if (length >= 1)
131*4882a593Smuzhiyun 			fl->fl_end = length -1;
132*4882a593Smuzhiyun 		else
133*4882a593Smuzhiyun 			fl->fl_end = 0;
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	}
136*4882a593Smuzhiyun 	ceph_mdsc_put_request(req);
137*4882a593Smuzhiyun 	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
138*4882a593Smuzhiyun 	     "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type,
139*4882a593Smuzhiyun 	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
140*4882a593Smuzhiyun 	     length, wait, fl->fl_type, err);
141*4882a593Smuzhiyun 	return err;
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun 
ceph_lock_wait_for_completion(struct ceph_mds_client * mdsc,struct ceph_mds_request * req)144*4882a593Smuzhiyun static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
145*4882a593Smuzhiyun                                          struct ceph_mds_request *req)
146*4882a593Smuzhiyun {
147*4882a593Smuzhiyun 	struct ceph_mds_request *intr_req;
148*4882a593Smuzhiyun 	struct inode *inode = req->r_inode;
149*4882a593Smuzhiyun 	int err, lock_type;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
152*4882a593Smuzhiyun 	if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
153*4882a593Smuzhiyun 		lock_type = CEPH_LOCK_FCNTL_INTR;
154*4882a593Smuzhiyun 	else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
155*4882a593Smuzhiyun 		lock_type = CEPH_LOCK_FLOCK_INTR;
156*4882a593Smuzhiyun 	else
157*4882a593Smuzhiyun 		BUG_ON(1);
158*4882a593Smuzhiyun 	BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	err = wait_for_completion_interruptible(&req->r_completion);
161*4882a593Smuzhiyun 	if (!err)
162*4882a593Smuzhiyun 		return 0;
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 	dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
165*4882a593Smuzhiyun 	     req->r_tid);
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	mutex_lock(&mdsc->mutex);
168*4882a593Smuzhiyun 	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
169*4882a593Smuzhiyun 		err = 0;
170*4882a593Smuzhiyun 	} else {
171*4882a593Smuzhiyun 		/*
172*4882a593Smuzhiyun 		 * ensure we aren't running concurrently with
173*4882a593Smuzhiyun 		 * ceph_fill_trace or ceph_readdir_prepopulate, which
174*4882a593Smuzhiyun 		 * rely on locks (dir mutex) held by our caller.
175*4882a593Smuzhiyun 		 */
176*4882a593Smuzhiyun 		mutex_lock(&req->r_fill_mutex);
177*4882a593Smuzhiyun 		req->r_err = err;
178*4882a593Smuzhiyun 		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
179*4882a593Smuzhiyun 		mutex_unlock(&req->r_fill_mutex);
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 		if (!req->r_session) {
182*4882a593Smuzhiyun 			// haven't sent the request
183*4882a593Smuzhiyun 			err = 0;
184*4882a593Smuzhiyun 		}
185*4882a593Smuzhiyun 	}
186*4882a593Smuzhiyun 	mutex_unlock(&mdsc->mutex);
187*4882a593Smuzhiyun 	if (!err)
188*4882a593Smuzhiyun 		return 0;
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
191*4882a593Smuzhiyun 					    USE_AUTH_MDS);
192*4882a593Smuzhiyun 	if (IS_ERR(intr_req))
193*4882a593Smuzhiyun 		return PTR_ERR(intr_req);
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	intr_req->r_inode = inode;
196*4882a593Smuzhiyun 	ihold(inode);
197*4882a593Smuzhiyun 	intr_req->r_num_caps = 1;
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	intr_req->r_args.filelock_change = req->r_args.filelock_change;
200*4882a593Smuzhiyun 	intr_req->r_args.filelock_change.rule = lock_type;
201*4882a593Smuzhiyun 	intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	err = ceph_mdsc_do_request(mdsc, inode, intr_req);
204*4882a593Smuzhiyun 	ceph_mdsc_put_request(intr_req);
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	if (err && err != -ERESTARTSYS)
207*4882a593Smuzhiyun 		return err;
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	wait_for_completion_killable(&req->r_safe_completion);
210*4882a593Smuzhiyun 	return 0;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
try_unlock_file(struct file * file,struct file_lock * fl)213*4882a593Smuzhiyun static int try_unlock_file(struct file *file, struct file_lock *fl)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun 	int err;
216*4882a593Smuzhiyun 	unsigned int orig_flags = fl->fl_flags;
217*4882a593Smuzhiyun 	fl->fl_flags |= FL_EXISTS;
218*4882a593Smuzhiyun 	err = locks_lock_file_wait(file, fl);
219*4882a593Smuzhiyun 	fl->fl_flags = orig_flags;
220*4882a593Smuzhiyun 	if (err == -ENOENT) {
221*4882a593Smuzhiyun 		if (!(orig_flags & FL_EXISTS))
222*4882a593Smuzhiyun 			err = 0;
223*4882a593Smuzhiyun 		return err;
224*4882a593Smuzhiyun 	}
225*4882a593Smuzhiyun 	return 1;
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun /**
229*4882a593Smuzhiyun  * Attempt to set an fcntl lock.
230*4882a593Smuzhiyun  * For now, this just goes away to the server. Later it may be more awesome.
231*4882a593Smuzhiyun  */
ceph_lock(struct file * file,int cmd,struct file_lock * fl)232*4882a593Smuzhiyun int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun 	struct inode *inode = file_inode(file);
235*4882a593Smuzhiyun 	struct ceph_inode_info *ci = ceph_inode(inode);
236*4882a593Smuzhiyun 	int err = 0;
237*4882a593Smuzhiyun 	u16 op = CEPH_MDS_OP_SETFILELOCK;
238*4882a593Smuzhiyun 	u8 wait = 0;
239*4882a593Smuzhiyun 	u8 lock_cmd;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	if (!(fl->fl_flags & FL_POSIX))
242*4882a593Smuzhiyun 		return -ENOLCK;
243*4882a593Smuzhiyun 	/* No mandatory locks */
244*4882a593Smuzhiyun 	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
245*4882a593Smuzhiyun 		return -ENOLCK;
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	/* set wait bit as appropriate, then make command as Ceph expects it*/
250*4882a593Smuzhiyun 	if (IS_GETLK(cmd))
251*4882a593Smuzhiyun 		op = CEPH_MDS_OP_GETFILELOCK;
252*4882a593Smuzhiyun 	else if (IS_SETLKW(cmd))
253*4882a593Smuzhiyun 		wait = 1;
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	spin_lock(&ci->i_ceph_lock);
256*4882a593Smuzhiyun 	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
257*4882a593Smuzhiyun 		err = -EIO;
258*4882a593Smuzhiyun 	}
259*4882a593Smuzhiyun 	spin_unlock(&ci->i_ceph_lock);
260*4882a593Smuzhiyun 	if (err < 0) {
261*4882a593Smuzhiyun 		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type)
262*4882a593Smuzhiyun 			posix_lock_file(file, fl, NULL);
263*4882a593Smuzhiyun 		return err;
264*4882a593Smuzhiyun 	}
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	if (F_RDLCK == fl->fl_type)
267*4882a593Smuzhiyun 		lock_cmd = CEPH_LOCK_SHARED;
268*4882a593Smuzhiyun 	else if (F_WRLCK == fl->fl_type)
269*4882a593Smuzhiyun 		lock_cmd = CEPH_LOCK_EXCL;
270*4882a593Smuzhiyun 	else
271*4882a593Smuzhiyun 		lock_cmd = CEPH_LOCK_UNLOCK;
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) {
274*4882a593Smuzhiyun 		err = try_unlock_file(file, fl);
275*4882a593Smuzhiyun 		if (err <= 0)
276*4882a593Smuzhiyun 			return err;
277*4882a593Smuzhiyun 	}
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun 	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
280*4882a593Smuzhiyun 	if (!err) {
281*4882a593Smuzhiyun 		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->fl_type) {
282*4882a593Smuzhiyun 			dout("mds locked, locking locally\n");
283*4882a593Smuzhiyun 			err = posix_lock_file(file, fl, NULL);
284*4882a593Smuzhiyun 			if (err) {
285*4882a593Smuzhiyun 				/* undo! This should only happen if
286*4882a593Smuzhiyun 				 * the kernel detects local
287*4882a593Smuzhiyun 				 * deadlock. */
288*4882a593Smuzhiyun 				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
289*4882a593Smuzhiyun 						  CEPH_LOCK_UNLOCK, 0, fl);
290*4882a593Smuzhiyun 				dout("got %d on posix_lock_file, undid lock\n",
291*4882a593Smuzhiyun 				     err);
292*4882a593Smuzhiyun 			}
293*4882a593Smuzhiyun 		}
294*4882a593Smuzhiyun 	}
295*4882a593Smuzhiyun 	return err;
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun 
ceph_flock(struct file * file,int cmd,struct file_lock * fl)298*4882a593Smuzhiyun int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
299*4882a593Smuzhiyun {
300*4882a593Smuzhiyun 	struct inode *inode = file_inode(file);
301*4882a593Smuzhiyun 	struct ceph_inode_info *ci = ceph_inode(inode);
302*4882a593Smuzhiyun 	int err = 0;
303*4882a593Smuzhiyun 	u8 wait = 0;
304*4882a593Smuzhiyun 	u8 lock_cmd;
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun 	if (!(fl->fl_flags & FL_FLOCK))
307*4882a593Smuzhiyun 		return -ENOLCK;
308*4882a593Smuzhiyun 	/* No mandatory locks */
309*4882a593Smuzhiyun 	if (fl->fl_type & LOCK_MAND)
310*4882a593Smuzhiyun 		return -EOPNOTSUPP;
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	dout("ceph_flock, fl_file: %p\n", fl->fl_file);
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	spin_lock(&ci->i_ceph_lock);
315*4882a593Smuzhiyun 	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
316*4882a593Smuzhiyun 		err = -EIO;
317*4882a593Smuzhiyun 	}
318*4882a593Smuzhiyun 	spin_unlock(&ci->i_ceph_lock);
319*4882a593Smuzhiyun 	if (err < 0) {
320*4882a593Smuzhiyun 		if (F_UNLCK == fl->fl_type)
321*4882a593Smuzhiyun 			locks_lock_file_wait(file, fl);
322*4882a593Smuzhiyun 		return err;
323*4882a593Smuzhiyun 	}
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	if (IS_SETLKW(cmd))
326*4882a593Smuzhiyun 		wait = 1;
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	if (F_RDLCK == fl->fl_type)
329*4882a593Smuzhiyun 		lock_cmd = CEPH_LOCK_SHARED;
330*4882a593Smuzhiyun 	else if (F_WRLCK == fl->fl_type)
331*4882a593Smuzhiyun 		lock_cmd = CEPH_LOCK_EXCL;
332*4882a593Smuzhiyun 	else
333*4882a593Smuzhiyun 		lock_cmd = CEPH_LOCK_UNLOCK;
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	if (F_UNLCK == fl->fl_type) {
336*4882a593Smuzhiyun 		err = try_unlock_file(file, fl);
337*4882a593Smuzhiyun 		if (err <= 0)
338*4882a593Smuzhiyun 			return err;
339*4882a593Smuzhiyun 	}
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
342*4882a593Smuzhiyun 				inode, lock_cmd, wait, fl);
343*4882a593Smuzhiyun 	if (!err && F_UNLCK != fl->fl_type) {
344*4882a593Smuzhiyun 		err = locks_lock_file_wait(file, fl);
345*4882a593Smuzhiyun 		if (err) {
346*4882a593Smuzhiyun 			ceph_lock_message(CEPH_LOCK_FLOCK,
347*4882a593Smuzhiyun 					  CEPH_MDS_OP_SETFILELOCK,
348*4882a593Smuzhiyun 					  inode, CEPH_LOCK_UNLOCK, 0, fl);
349*4882a593Smuzhiyun 			dout("got %d on locks_lock_file_wait, undid lock\n", err);
350*4882a593Smuzhiyun 		}
351*4882a593Smuzhiyun 	}
352*4882a593Smuzhiyun 	return err;
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun /*
356*4882a593Smuzhiyun  * Fills in the passed counter variables, so you can prepare pagelist metadata
357*4882a593Smuzhiyun  * before calling ceph_encode_locks.
358*4882a593Smuzhiyun  */
ceph_count_locks(struct inode * inode,int * fcntl_count,int * flock_count)359*4882a593Smuzhiyun void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
360*4882a593Smuzhiyun {
361*4882a593Smuzhiyun 	struct file_lock *lock;
362*4882a593Smuzhiyun 	struct file_lock_context *ctx;
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	*fcntl_count = 0;
365*4882a593Smuzhiyun 	*flock_count = 0;
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	ctx = inode->i_flctx;
368*4882a593Smuzhiyun 	if (ctx) {
369*4882a593Smuzhiyun 		spin_lock(&ctx->flc_lock);
370*4882a593Smuzhiyun 		list_for_each_entry(lock, &ctx->flc_posix, fl_list)
371*4882a593Smuzhiyun 			++(*fcntl_count);
372*4882a593Smuzhiyun 		list_for_each_entry(lock, &ctx->flc_flock, fl_list)
373*4882a593Smuzhiyun 			++(*flock_count);
374*4882a593Smuzhiyun 		spin_unlock(&ctx->flc_lock);
375*4882a593Smuzhiyun 	}
376*4882a593Smuzhiyun 	dout("counted %d flock locks and %d fcntl locks\n",
377*4882a593Smuzhiyun 	     *flock_count, *fcntl_count);
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun /*
381*4882a593Smuzhiyun  * Given a pointer to a lock, convert it to a ceph filelock
382*4882a593Smuzhiyun  */
lock_to_ceph_filelock(struct file_lock * lock,struct ceph_filelock * cephlock)383*4882a593Smuzhiyun static int lock_to_ceph_filelock(struct file_lock *lock,
384*4882a593Smuzhiyun 				 struct ceph_filelock *cephlock)
385*4882a593Smuzhiyun {
386*4882a593Smuzhiyun 	int err = 0;
387*4882a593Smuzhiyun 	cephlock->start = cpu_to_le64(lock->fl_start);
388*4882a593Smuzhiyun 	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
389*4882a593Smuzhiyun 	cephlock->client = cpu_to_le64(0);
390*4882a593Smuzhiyun 	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
391*4882a593Smuzhiyun 	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	switch (lock->fl_type) {
394*4882a593Smuzhiyun 	case F_RDLCK:
395*4882a593Smuzhiyun 		cephlock->type = CEPH_LOCK_SHARED;
396*4882a593Smuzhiyun 		break;
397*4882a593Smuzhiyun 	case F_WRLCK:
398*4882a593Smuzhiyun 		cephlock->type = CEPH_LOCK_EXCL;
399*4882a593Smuzhiyun 		break;
400*4882a593Smuzhiyun 	case F_UNLCK:
401*4882a593Smuzhiyun 		cephlock->type = CEPH_LOCK_UNLOCK;
402*4882a593Smuzhiyun 		break;
403*4882a593Smuzhiyun 	default:
404*4882a593Smuzhiyun 		dout("Have unknown lock type %d\n", lock->fl_type);
405*4882a593Smuzhiyun 		err = -EINVAL;
406*4882a593Smuzhiyun 	}
407*4882a593Smuzhiyun 
408*4882a593Smuzhiyun 	return err;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun /**
412*4882a593Smuzhiyun  * Encode the flock and fcntl locks for the given inode into the ceph_filelock
413*4882a593Smuzhiyun  * array. Must be called with inode->i_lock already held.
414*4882a593Smuzhiyun  * If we encounter more of a specific lock type than expected, return -ENOSPC.
415*4882a593Smuzhiyun  */
ceph_encode_locks_to_buffer(struct inode * inode,struct ceph_filelock * flocks,int num_fcntl_locks,int num_flock_locks)416*4882a593Smuzhiyun int ceph_encode_locks_to_buffer(struct inode *inode,
417*4882a593Smuzhiyun 				struct ceph_filelock *flocks,
418*4882a593Smuzhiyun 				int num_fcntl_locks, int num_flock_locks)
419*4882a593Smuzhiyun {
420*4882a593Smuzhiyun 	struct file_lock *lock;
421*4882a593Smuzhiyun 	struct file_lock_context *ctx = inode->i_flctx;
422*4882a593Smuzhiyun 	int err = 0;
423*4882a593Smuzhiyun 	int seen_fcntl = 0;
424*4882a593Smuzhiyun 	int seen_flock = 0;
425*4882a593Smuzhiyun 	int l = 0;
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	dout("encoding %d flock and %d fcntl locks\n", num_flock_locks,
428*4882a593Smuzhiyun 	     num_fcntl_locks);
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	if (!ctx)
431*4882a593Smuzhiyun 		return 0;
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 	spin_lock(&ctx->flc_lock);
434*4882a593Smuzhiyun 	list_for_each_entry(lock, &ctx->flc_posix, fl_list) {
435*4882a593Smuzhiyun 		++seen_fcntl;
436*4882a593Smuzhiyun 		if (seen_fcntl > num_fcntl_locks) {
437*4882a593Smuzhiyun 			err = -ENOSPC;
438*4882a593Smuzhiyun 			goto fail;
439*4882a593Smuzhiyun 		}
440*4882a593Smuzhiyun 		err = lock_to_ceph_filelock(lock, &flocks[l]);
441*4882a593Smuzhiyun 		if (err)
442*4882a593Smuzhiyun 			goto fail;
443*4882a593Smuzhiyun 		++l;
444*4882a593Smuzhiyun 	}
445*4882a593Smuzhiyun 	list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
446*4882a593Smuzhiyun 		++seen_flock;
447*4882a593Smuzhiyun 		if (seen_flock > num_flock_locks) {
448*4882a593Smuzhiyun 			err = -ENOSPC;
449*4882a593Smuzhiyun 			goto fail;
450*4882a593Smuzhiyun 		}
451*4882a593Smuzhiyun 		err = lock_to_ceph_filelock(lock, &flocks[l]);
452*4882a593Smuzhiyun 		if (err)
453*4882a593Smuzhiyun 			goto fail;
454*4882a593Smuzhiyun 		++l;
455*4882a593Smuzhiyun 	}
456*4882a593Smuzhiyun fail:
457*4882a593Smuzhiyun 	spin_unlock(&ctx->flc_lock);
458*4882a593Smuzhiyun 	return err;
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun /**
462*4882a593Smuzhiyun  * Copy the encoded flock and fcntl locks into the pagelist.
463*4882a593Smuzhiyun  * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
464*4882a593Smuzhiyun  * sequential flock locks.
465*4882a593Smuzhiyun  * Returns zero on success.
466*4882a593Smuzhiyun  */
ceph_locks_to_pagelist(struct ceph_filelock * flocks,struct ceph_pagelist * pagelist,int num_fcntl_locks,int num_flock_locks)467*4882a593Smuzhiyun int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
468*4882a593Smuzhiyun 			   struct ceph_pagelist *pagelist,
469*4882a593Smuzhiyun 			   int num_fcntl_locks, int num_flock_locks)
470*4882a593Smuzhiyun {
471*4882a593Smuzhiyun 	int err = 0;
472*4882a593Smuzhiyun 	__le32 nlocks;
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun 	nlocks = cpu_to_le32(num_fcntl_locks);
475*4882a593Smuzhiyun 	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
476*4882a593Smuzhiyun 	if (err)
477*4882a593Smuzhiyun 		goto out_fail;
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 	if (num_fcntl_locks > 0) {
480*4882a593Smuzhiyun 		err = ceph_pagelist_append(pagelist, flocks,
481*4882a593Smuzhiyun 					   num_fcntl_locks * sizeof(*flocks));
482*4882a593Smuzhiyun 		if (err)
483*4882a593Smuzhiyun 			goto out_fail;
484*4882a593Smuzhiyun 	}
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	nlocks = cpu_to_le32(num_flock_locks);
487*4882a593Smuzhiyun 	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
488*4882a593Smuzhiyun 	if (err)
489*4882a593Smuzhiyun 		goto out_fail;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	if (num_flock_locks > 0) {
492*4882a593Smuzhiyun 		err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks],
493*4882a593Smuzhiyun 					   num_flock_locks * sizeof(*flocks));
494*4882a593Smuzhiyun 	}
495*4882a593Smuzhiyun out_fail:
496*4882a593Smuzhiyun 	return err;
497*4882a593Smuzhiyun }
498