1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun #include <linux/ceph/ceph_debug.h>
3*4882a593Smuzhiyun
4*4882a593Smuzhiyun #include <linux/file.h>
5*4882a593Smuzhiyun #include <linux/namei.h>
6*4882a593Smuzhiyun #include <linux/random.h>
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include "super.h"
9*4882a593Smuzhiyun #include "mds_client.h"
10*4882a593Smuzhiyun #include <linux/ceph/pagelist.h>
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun static u64 lock_secret;
13*4882a593Smuzhiyun static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
14*4882a593Smuzhiyun struct ceph_mds_request *req);
15*4882a593Smuzhiyun
secure_addr(void * addr)16*4882a593Smuzhiyun static inline u64 secure_addr(void *addr)
17*4882a593Smuzhiyun {
18*4882a593Smuzhiyun u64 v = lock_secret ^ (u64)(unsigned long)addr;
19*4882a593Smuzhiyun /*
20*4882a593Smuzhiyun * Set the most significant bit, so that MDS knows the 'owner'
21*4882a593Smuzhiyun * is sufficient to identify the owner of lock. (old code uses
22*4882a593Smuzhiyun * both 'owner' and 'pid')
23*4882a593Smuzhiyun */
24*4882a593Smuzhiyun v |= (1ULL << 63);
25*4882a593Smuzhiyun return v;
26*4882a593Smuzhiyun }
27*4882a593Smuzhiyun
ceph_flock_init(void)28*4882a593Smuzhiyun void __init ceph_flock_init(void)
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun get_random_bytes(&lock_secret, sizeof(lock_secret));
31*4882a593Smuzhiyun }
32*4882a593Smuzhiyun
ceph_fl_copy_lock(struct file_lock * dst,struct file_lock * src)33*4882a593Smuzhiyun static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun struct ceph_file_info *fi = dst->fl_file->private_data;
36*4882a593Smuzhiyun struct inode *inode = file_inode(dst->fl_file);
37*4882a593Smuzhiyun atomic_inc(&ceph_inode(inode)->i_filelock_ref);
38*4882a593Smuzhiyun atomic_inc(&fi->num_locks);
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun
ceph_fl_release_lock(struct file_lock * fl)41*4882a593Smuzhiyun static void ceph_fl_release_lock(struct file_lock *fl)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun struct ceph_file_info *fi = fl->fl_file->private_data;
44*4882a593Smuzhiyun struct inode *inode = file_inode(fl->fl_file);
45*4882a593Smuzhiyun struct ceph_inode_info *ci = ceph_inode(inode);
46*4882a593Smuzhiyun atomic_dec(&fi->num_locks);
47*4882a593Smuzhiyun if (atomic_dec_and_test(&ci->i_filelock_ref)) {
48*4882a593Smuzhiyun /* clear error when all locks are released */
49*4882a593Smuzhiyun spin_lock(&ci->i_ceph_lock);
50*4882a593Smuzhiyun ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
51*4882a593Smuzhiyun spin_unlock(&ci->i_ceph_lock);
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun static const struct file_lock_operations ceph_fl_lock_ops = {
56*4882a593Smuzhiyun .fl_copy_lock = ceph_fl_copy_lock,
57*4882a593Smuzhiyun .fl_release_private = ceph_fl_release_lock,
58*4882a593Smuzhiyun };
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun /**
61*4882a593Smuzhiyun * Implement fcntl and flock locking functions.
62*4882a593Smuzhiyun */
ceph_lock_message(u8 lock_type,u16 operation,struct inode * inode,int cmd,u8 wait,struct file_lock * fl)63*4882a593Smuzhiyun static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
64*4882a593Smuzhiyun int cmd, u8 wait, struct file_lock *fl)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
67*4882a593Smuzhiyun struct ceph_mds_request *req;
68*4882a593Smuzhiyun int err;
69*4882a593Smuzhiyun u64 length = 0;
70*4882a593Smuzhiyun u64 owner;
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun if (operation == CEPH_MDS_OP_SETFILELOCK) {
73*4882a593Smuzhiyun /*
74*4882a593Smuzhiyun * increasing i_filelock_ref closes race window between
75*4882a593Smuzhiyun * handling request reply and adding file_lock struct to
76*4882a593Smuzhiyun * inode. Otherwise, auth caps may get trimmed in the
77*4882a593Smuzhiyun * window. Caller function will decrease the counter.
78*4882a593Smuzhiyun */
79*4882a593Smuzhiyun fl->fl_ops = &ceph_fl_lock_ops;
80*4882a593Smuzhiyun fl->fl_ops->fl_copy_lock(fl, NULL);
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
84*4882a593Smuzhiyun wait = 0;
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
87*4882a593Smuzhiyun if (IS_ERR(req))
88*4882a593Smuzhiyun return PTR_ERR(req);
89*4882a593Smuzhiyun req->r_inode = inode;
90*4882a593Smuzhiyun ihold(inode);
91*4882a593Smuzhiyun req->r_num_caps = 1;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun /* mds requires start and length rather than start and end */
94*4882a593Smuzhiyun if (LLONG_MAX == fl->fl_end)
95*4882a593Smuzhiyun length = 0;
96*4882a593Smuzhiyun else
97*4882a593Smuzhiyun length = fl->fl_end - fl->fl_start + 1;
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun owner = secure_addr(fl->fl_owner);
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
102*4882a593Smuzhiyun "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type,
103*4882a593Smuzhiyun (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
104*4882a593Smuzhiyun wait, fl->fl_type);
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun req->r_args.filelock_change.rule = lock_type;
107*4882a593Smuzhiyun req->r_args.filelock_change.type = cmd;
108*4882a593Smuzhiyun req->r_args.filelock_change.owner = cpu_to_le64(owner);
109*4882a593Smuzhiyun req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
110*4882a593Smuzhiyun req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
111*4882a593Smuzhiyun req->r_args.filelock_change.length = cpu_to_le64(length);
112*4882a593Smuzhiyun req->r_args.filelock_change.wait = wait;
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun if (wait)
115*4882a593Smuzhiyun req->r_wait_for_completion = ceph_lock_wait_for_completion;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun err = ceph_mdsc_do_request(mdsc, inode, req);
118*4882a593Smuzhiyun if (!err && operation == CEPH_MDS_OP_GETFILELOCK) {
119*4882a593Smuzhiyun fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
120*4882a593Smuzhiyun if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
121*4882a593Smuzhiyun fl->fl_type = F_RDLCK;
122*4882a593Smuzhiyun else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
123*4882a593Smuzhiyun fl->fl_type = F_WRLCK;
124*4882a593Smuzhiyun else
125*4882a593Smuzhiyun fl->fl_type = F_UNLCK;
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
128*4882a593Smuzhiyun length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
129*4882a593Smuzhiyun le64_to_cpu(req->r_reply_info.filelock_reply->length);
130*4882a593Smuzhiyun if (length >= 1)
131*4882a593Smuzhiyun fl->fl_end = length -1;
132*4882a593Smuzhiyun else
133*4882a593Smuzhiyun fl->fl_end = 0;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun ceph_mdsc_put_request(req);
137*4882a593Smuzhiyun dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
138*4882a593Smuzhiyun "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type,
139*4882a593Smuzhiyun (int)operation, (u64)fl->fl_pid, fl->fl_start,
140*4882a593Smuzhiyun length, wait, fl->fl_type, err);
141*4882a593Smuzhiyun return err;
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun
ceph_lock_wait_for_completion(struct ceph_mds_client * mdsc,struct ceph_mds_request * req)144*4882a593Smuzhiyun static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
145*4882a593Smuzhiyun struct ceph_mds_request *req)
146*4882a593Smuzhiyun {
147*4882a593Smuzhiyun struct ceph_mds_request *intr_req;
148*4882a593Smuzhiyun struct inode *inode = req->r_inode;
149*4882a593Smuzhiyun int err, lock_type;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
152*4882a593Smuzhiyun if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
153*4882a593Smuzhiyun lock_type = CEPH_LOCK_FCNTL_INTR;
154*4882a593Smuzhiyun else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
155*4882a593Smuzhiyun lock_type = CEPH_LOCK_FLOCK_INTR;
156*4882a593Smuzhiyun else
157*4882a593Smuzhiyun BUG_ON(1);
158*4882a593Smuzhiyun BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun err = wait_for_completion_interruptible(&req->r_completion);
161*4882a593Smuzhiyun if (!err)
162*4882a593Smuzhiyun return 0;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
165*4882a593Smuzhiyun req->r_tid);
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun mutex_lock(&mdsc->mutex);
168*4882a593Smuzhiyun if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
169*4882a593Smuzhiyun err = 0;
170*4882a593Smuzhiyun } else {
171*4882a593Smuzhiyun /*
172*4882a593Smuzhiyun * ensure we aren't running concurrently with
173*4882a593Smuzhiyun * ceph_fill_trace or ceph_readdir_prepopulate, which
174*4882a593Smuzhiyun * rely on locks (dir mutex) held by our caller.
175*4882a593Smuzhiyun */
176*4882a593Smuzhiyun mutex_lock(&req->r_fill_mutex);
177*4882a593Smuzhiyun req->r_err = err;
178*4882a593Smuzhiyun set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
179*4882a593Smuzhiyun mutex_unlock(&req->r_fill_mutex);
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun if (!req->r_session) {
182*4882a593Smuzhiyun // haven't sent the request
183*4882a593Smuzhiyun err = 0;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun mutex_unlock(&mdsc->mutex);
187*4882a593Smuzhiyun if (!err)
188*4882a593Smuzhiyun return 0;
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
191*4882a593Smuzhiyun USE_AUTH_MDS);
192*4882a593Smuzhiyun if (IS_ERR(intr_req))
193*4882a593Smuzhiyun return PTR_ERR(intr_req);
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun intr_req->r_inode = inode;
196*4882a593Smuzhiyun ihold(inode);
197*4882a593Smuzhiyun intr_req->r_num_caps = 1;
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun intr_req->r_args.filelock_change = req->r_args.filelock_change;
200*4882a593Smuzhiyun intr_req->r_args.filelock_change.rule = lock_type;
201*4882a593Smuzhiyun intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun err = ceph_mdsc_do_request(mdsc, inode, intr_req);
204*4882a593Smuzhiyun ceph_mdsc_put_request(intr_req);
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun if (err && err != -ERESTARTSYS)
207*4882a593Smuzhiyun return err;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun wait_for_completion_killable(&req->r_safe_completion);
210*4882a593Smuzhiyun return 0;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
try_unlock_file(struct file * file,struct file_lock * fl)213*4882a593Smuzhiyun static int try_unlock_file(struct file *file, struct file_lock *fl)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun int err;
216*4882a593Smuzhiyun unsigned int orig_flags = fl->fl_flags;
217*4882a593Smuzhiyun fl->fl_flags |= FL_EXISTS;
218*4882a593Smuzhiyun err = locks_lock_file_wait(file, fl);
219*4882a593Smuzhiyun fl->fl_flags = orig_flags;
220*4882a593Smuzhiyun if (err == -ENOENT) {
221*4882a593Smuzhiyun if (!(orig_flags & FL_EXISTS))
222*4882a593Smuzhiyun err = 0;
223*4882a593Smuzhiyun return err;
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun return 1;
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun /**
229*4882a593Smuzhiyun * Attempt to set an fcntl lock.
230*4882a593Smuzhiyun * For now, this just goes away to the server. Later it may be more awesome.
231*4882a593Smuzhiyun */
ceph_lock(struct file * file,int cmd,struct file_lock * fl)232*4882a593Smuzhiyun int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun struct inode *inode = file_inode(file);
235*4882a593Smuzhiyun struct ceph_inode_info *ci = ceph_inode(inode);
236*4882a593Smuzhiyun int err = 0;
237*4882a593Smuzhiyun u16 op = CEPH_MDS_OP_SETFILELOCK;
238*4882a593Smuzhiyun u8 wait = 0;
239*4882a593Smuzhiyun u8 lock_cmd;
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun if (!(fl->fl_flags & FL_POSIX))
242*4882a593Smuzhiyun return -ENOLCK;
243*4882a593Smuzhiyun /* No mandatory locks */
244*4882a593Smuzhiyun if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
245*4882a593Smuzhiyun return -ENOLCK;
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun /* set wait bit as appropriate, then make command as Ceph expects it*/
250*4882a593Smuzhiyun if (IS_GETLK(cmd))
251*4882a593Smuzhiyun op = CEPH_MDS_OP_GETFILELOCK;
252*4882a593Smuzhiyun else if (IS_SETLKW(cmd))
253*4882a593Smuzhiyun wait = 1;
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun spin_lock(&ci->i_ceph_lock);
256*4882a593Smuzhiyun if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
257*4882a593Smuzhiyun err = -EIO;
258*4882a593Smuzhiyun }
259*4882a593Smuzhiyun spin_unlock(&ci->i_ceph_lock);
260*4882a593Smuzhiyun if (err < 0) {
261*4882a593Smuzhiyun if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type)
262*4882a593Smuzhiyun posix_lock_file(file, fl, NULL);
263*4882a593Smuzhiyun return err;
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun if (F_RDLCK == fl->fl_type)
267*4882a593Smuzhiyun lock_cmd = CEPH_LOCK_SHARED;
268*4882a593Smuzhiyun else if (F_WRLCK == fl->fl_type)
269*4882a593Smuzhiyun lock_cmd = CEPH_LOCK_EXCL;
270*4882a593Smuzhiyun else
271*4882a593Smuzhiyun lock_cmd = CEPH_LOCK_UNLOCK;
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) {
274*4882a593Smuzhiyun err = try_unlock_file(file, fl);
275*4882a593Smuzhiyun if (err <= 0)
276*4882a593Smuzhiyun return err;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
280*4882a593Smuzhiyun if (!err) {
281*4882a593Smuzhiyun if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->fl_type) {
282*4882a593Smuzhiyun dout("mds locked, locking locally\n");
283*4882a593Smuzhiyun err = posix_lock_file(file, fl, NULL);
284*4882a593Smuzhiyun if (err) {
285*4882a593Smuzhiyun /* undo! This should only happen if
286*4882a593Smuzhiyun * the kernel detects local
287*4882a593Smuzhiyun * deadlock. */
288*4882a593Smuzhiyun ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
289*4882a593Smuzhiyun CEPH_LOCK_UNLOCK, 0, fl);
290*4882a593Smuzhiyun dout("got %d on posix_lock_file, undid lock\n",
291*4882a593Smuzhiyun err);
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun }
295*4882a593Smuzhiyun return err;
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun
ceph_flock(struct file * file,int cmd,struct file_lock * fl)298*4882a593Smuzhiyun int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
299*4882a593Smuzhiyun {
300*4882a593Smuzhiyun struct inode *inode = file_inode(file);
301*4882a593Smuzhiyun struct ceph_inode_info *ci = ceph_inode(inode);
302*4882a593Smuzhiyun int err = 0;
303*4882a593Smuzhiyun u8 wait = 0;
304*4882a593Smuzhiyun u8 lock_cmd;
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun if (!(fl->fl_flags & FL_FLOCK))
307*4882a593Smuzhiyun return -ENOLCK;
308*4882a593Smuzhiyun /* No mandatory locks */
309*4882a593Smuzhiyun if (fl->fl_type & LOCK_MAND)
310*4882a593Smuzhiyun return -EOPNOTSUPP;
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun dout("ceph_flock, fl_file: %p\n", fl->fl_file);
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun spin_lock(&ci->i_ceph_lock);
315*4882a593Smuzhiyun if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
316*4882a593Smuzhiyun err = -EIO;
317*4882a593Smuzhiyun }
318*4882a593Smuzhiyun spin_unlock(&ci->i_ceph_lock);
319*4882a593Smuzhiyun if (err < 0) {
320*4882a593Smuzhiyun if (F_UNLCK == fl->fl_type)
321*4882a593Smuzhiyun locks_lock_file_wait(file, fl);
322*4882a593Smuzhiyun return err;
323*4882a593Smuzhiyun }
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun if (IS_SETLKW(cmd))
326*4882a593Smuzhiyun wait = 1;
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun if (F_RDLCK == fl->fl_type)
329*4882a593Smuzhiyun lock_cmd = CEPH_LOCK_SHARED;
330*4882a593Smuzhiyun else if (F_WRLCK == fl->fl_type)
331*4882a593Smuzhiyun lock_cmd = CEPH_LOCK_EXCL;
332*4882a593Smuzhiyun else
333*4882a593Smuzhiyun lock_cmd = CEPH_LOCK_UNLOCK;
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun if (F_UNLCK == fl->fl_type) {
336*4882a593Smuzhiyun err = try_unlock_file(file, fl);
337*4882a593Smuzhiyun if (err <= 0)
338*4882a593Smuzhiyun return err;
339*4882a593Smuzhiyun }
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
342*4882a593Smuzhiyun inode, lock_cmd, wait, fl);
343*4882a593Smuzhiyun if (!err && F_UNLCK != fl->fl_type) {
344*4882a593Smuzhiyun err = locks_lock_file_wait(file, fl);
345*4882a593Smuzhiyun if (err) {
346*4882a593Smuzhiyun ceph_lock_message(CEPH_LOCK_FLOCK,
347*4882a593Smuzhiyun CEPH_MDS_OP_SETFILELOCK,
348*4882a593Smuzhiyun inode, CEPH_LOCK_UNLOCK, 0, fl);
349*4882a593Smuzhiyun dout("got %d on locks_lock_file_wait, undid lock\n", err);
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun return err;
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun /*
356*4882a593Smuzhiyun * Fills in the passed counter variables, so you can prepare pagelist metadata
357*4882a593Smuzhiyun * before calling ceph_encode_locks.
358*4882a593Smuzhiyun */
ceph_count_locks(struct inode * inode,int * fcntl_count,int * flock_count)359*4882a593Smuzhiyun void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
360*4882a593Smuzhiyun {
361*4882a593Smuzhiyun struct file_lock *lock;
362*4882a593Smuzhiyun struct file_lock_context *ctx;
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun *fcntl_count = 0;
365*4882a593Smuzhiyun *flock_count = 0;
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun ctx = inode->i_flctx;
368*4882a593Smuzhiyun if (ctx) {
369*4882a593Smuzhiyun spin_lock(&ctx->flc_lock);
370*4882a593Smuzhiyun list_for_each_entry(lock, &ctx->flc_posix, fl_list)
371*4882a593Smuzhiyun ++(*fcntl_count);
372*4882a593Smuzhiyun list_for_each_entry(lock, &ctx->flc_flock, fl_list)
373*4882a593Smuzhiyun ++(*flock_count);
374*4882a593Smuzhiyun spin_unlock(&ctx->flc_lock);
375*4882a593Smuzhiyun }
376*4882a593Smuzhiyun dout("counted %d flock locks and %d fcntl locks\n",
377*4882a593Smuzhiyun *flock_count, *fcntl_count);
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun
380*4882a593Smuzhiyun /*
381*4882a593Smuzhiyun * Given a pointer to a lock, convert it to a ceph filelock
382*4882a593Smuzhiyun */
lock_to_ceph_filelock(struct file_lock * lock,struct ceph_filelock * cephlock)383*4882a593Smuzhiyun static int lock_to_ceph_filelock(struct file_lock *lock,
384*4882a593Smuzhiyun struct ceph_filelock *cephlock)
385*4882a593Smuzhiyun {
386*4882a593Smuzhiyun int err = 0;
387*4882a593Smuzhiyun cephlock->start = cpu_to_le64(lock->fl_start);
388*4882a593Smuzhiyun cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
389*4882a593Smuzhiyun cephlock->client = cpu_to_le64(0);
390*4882a593Smuzhiyun cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
391*4882a593Smuzhiyun cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun switch (lock->fl_type) {
394*4882a593Smuzhiyun case F_RDLCK:
395*4882a593Smuzhiyun cephlock->type = CEPH_LOCK_SHARED;
396*4882a593Smuzhiyun break;
397*4882a593Smuzhiyun case F_WRLCK:
398*4882a593Smuzhiyun cephlock->type = CEPH_LOCK_EXCL;
399*4882a593Smuzhiyun break;
400*4882a593Smuzhiyun case F_UNLCK:
401*4882a593Smuzhiyun cephlock->type = CEPH_LOCK_UNLOCK;
402*4882a593Smuzhiyun break;
403*4882a593Smuzhiyun default:
404*4882a593Smuzhiyun dout("Have unknown lock type %d\n", lock->fl_type);
405*4882a593Smuzhiyun err = -EINVAL;
406*4882a593Smuzhiyun }
407*4882a593Smuzhiyun
408*4882a593Smuzhiyun return err;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun /**
412*4882a593Smuzhiyun * Encode the flock and fcntl locks for the given inode into the ceph_filelock
413*4882a593Smuzhiyun * array. Must be called with inode->i_lock already held.
414*4882a593Smuzhiyun * If we encounter more of a specific lock type than expected, return -ENOSPC.
415*4882a593Smuzhiyun */
ceph_encode_locks_to_buffer(struct inode * inode,struct ceph_filelock * flocks,int num_fcntl_locks,int num_flock_locks)416*4882a593Smuzhiyun int ceph_encode_locks_to_buffer(struct inode *inode,
417*4882a593Smuzhiyun struct ceph_filelock *flocks,
418*4882a593Smuzhiyun int num_fcntl_locks, int num_flock_locks)
419*4882a593Smuzhiyun {
420*4882a593Smuzhiyun struct file_lock *lock;
421*4882a593Smuzhiyun struct file_lock_context *ctx = inode->i_flctx;
422*4882a593Smuzhiyun int err = 0;
423*4882a593Smuzhiyun int seen_fcntl = 0;
424*4882a593Smuzhiyun int seen_flock = 0;
425*4882a593Smuzhiyun int l = 0;
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun dout("encoding %d flock and %d fcntl locks\n", num_flock_locks,
428*4882a593Smuzhiyun num_fcntl_locks);
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun if (!ctx)
431*4882a593Smuzhiyun return 0;
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun spin_lock(&ctx->flc_lock);
434*4882a593Smuzhiyun list_for_each_entry(lock, &ctx->flc_posix, fl_list) {
435*4882a593Smuzhiyun ++seen_fcntl;
436*4882a593Smuzhiyun if (seen_fcntl > num_fcntl_locks) {
437*4882a593Smuzhiyun err = -ENOSPC;
438*4882a593Smuzhiyun goto fail;
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun err = lock_to_ceph_filelock(lock, &flocks[l]);
441*4882a593Smuzhiyun if (err)
442*4882a593Smuzhiyun goto fail;
443*4882a593Smuzhiyun ++l;
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
446*4882a593Smuzhiyun ++seen_flock;
447*4882a593Smuzhiyun if (seen_flock > num_flock_locks) {
448*4882a593Smuzhiyun err = -ENOSPC;
449*4882a593Smuzhiyun goto fail;
450*4882a593Smuzhiyun }
451*4882a593Smuzhiyun err = lock_to_ceph_filelock(lock, &flocks[l]);
452*4882a593Smuzhiyun if (err)
453*4882a593Smuzhiyun goto fail;
454*4882a593Smuzhiyun ++l;
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun fail:
457*4882a593Smuzhiyun spin_unlock(&ctx->flc_lock);
458*4882a593Smuzhiyun return err;
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun /**
462*4882a593Smuzhiyun * Copy the encoded flock and fcntl locks into the pagelist.
463*4882a593Smuzhiyun * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
464*4882a593Smuzhiyun * sequential flock locks.
465*4882a593Smuzhiyun * Returns zero on success.
466*4882a593Smuzhiyun */
ceph_locks_to_pagelist(struct ceph_filelock * flocks,struct ceph_pagelist * pagelist,int num_fcntl_locks,int num_flock_locks)467*4882a593Smuzhiyun int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
468*4882a593Smuzhiyun struct ceph_pagelist *pagelist,
469*4882a593Smuzhiyun int num_fcntl_locks, int num_flock_locks)
470*4882a593Smuzhiyun {
471*4882a593Smuzhiyun int err = 0;
472*4882a593Smuzhiyun __le32 nlocks;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun nlocks = cpu_to_le32(num_fcntl_locks);
475*4882a593Smuzhiyun err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
476*4882a593Smuzhiyun if (err)
477*4882a593Smuzhiyun goto out_fail;
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun if (num_fcntl_locks > 0) {
480*4882a593Smuzhiyun err = ceph_pagelist_append(pagelist, flocks,
481*4882a593Smuzhiyun num_fcntl_locks * sizeof(*flocks));
482*4882a593Smuzhiyun if (err)
483*4882a593Smuzhiyun goto out_fail;
484*4882a593Smuzhiyun }
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun nlocks = cpu_to_le32(num_flock_locks);
487*4882a593Smuzhiyun err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
488*4882a593Smuzhiyun if (err)
489*4882a593Smuzhiyun goto out_fail;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun if (num_flock_locks > 0) {
492*4882a593Smuzhiyun err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks],
493*4882a593Smuzhiyun num_flock_locks * sizeof(*flocks));
494*4882a593Smuzhiyun }
495*4882a593Smuzhiyun out_fail:
496*4882a593Smuzhiyun return err;
497*4882a593Smuzhiyun }
498