1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Module for the pnfs nfs4 file layout driver.
3*4882a593Smuzhiyun * Defines all I/O and Policy interface operations, plus code
4*4882a593Smuzhiyun * to register itself with the pNFS client.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Copyright (c) 2002
7*4882a593Smuzhiyun * The Regents of the University of Michigan
8*4882a593Smuzhiyun * All Rights Reserved
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * Dean Hildebrand <dhildebz@umich.edu>
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * Permission is granted to use, copy, create derivative works, and
13*4882a593Smuzhiyun * redistribute this software and such derivative works for any purpose,
14*4882a593Smuzhiyun * so long as the name of the University of Michigan is not used in
15*4882a593Smuzhiyun * any advertising or publicity pertaining to the use or distribution
16*4882a593Smuzhiyun * of this software without specific, written prior authorization. If
17*4882a593Smuzhiyun * the above copyright notice or any other identification of the
18*4882a593Smuzhiyun * University of Michigan is included in any copy of any portion of
19*4882a593Smuzhiyun * this software, then the disclaimer below must also be included.
20*4882a593Smuzhiyun *
21*4882a593Smuzhiyun * This software is provided as is, without representation or warranty
22*4882a593Smuzhiyun * of any kind either express or implied, including without limitation
23*4882a593Smuzhiyun * the implied warranties of merchantability, fitness for a particular
24*4882a593Smuzhiyun * purpose, or noninfringement. The Regents of the University of
25*4882a593Smuzhiyun * Michigan shall not be liable for any damages, including special,
26*4882a593Smuzhiyun * indirect, incidental, or consequential damages, with respect to any
27*4882a593Smuzhiyun * claim arising out of or in connection with the use of the software,
28*4882a593Smuzhiyun * even if it has been or is hereafter advised of the possibility of
29*4882a593Smuzhiyun * such damages.
30*4882a593Smuzhiyun */
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun #include <linux/nfs_fs.h>
33*4882a593Smuzhiyun #include <linux/nfs_page.h>
34*4882a593Smuzhiyun #include <linux/module.h>
35*4882a593Smuzhiyun #include <linux/backing-dev.h>
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun #include <linux/sunrpc/metrics.h>
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun #include "../nfs4session.h"
40*4882a593Smuzhiyun #include "../internal.h"
41*4882a593Smuzhiyun #include "../delegation.h"
42*4882a593Smuzhiyun #include "filelayout.h"
43*4882a593Smuzhiyun #include "../nfs4trace.h"
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #define NFSDBG_FACILITY NFSDBG_PNFS_LD
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun MODULE_LICENSE("GPL");
48*4882a593Smuzhiyun MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
49*4882a593Smuzhiyun MODULE_DESCRIPTION("The NFSv4 file layout driver");
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun #define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
52*4882a593Smuzhiyun static const struct pnfs_commit_ops filelayout_commit_ops;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun static loff_t
filelayout_get_dense_offset(struct nfs4_filelayout_segment * flseg,loff_t offset)55*4882a593Smuzhiyun filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
56*4882a593Smuzhiyun loff_t offset)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
59*4882a593Smuzhiyun u64 stripe_no;
60*4882a593Smuzhiyun u32 rem;
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun offset -= flseg->pattern_offset;
63*4882a593Smuzhiyun stripe_no = div_u64(offset, stripe_width);
64*4882a593Smuzhiyun div_u64_rem(offset, flseg->stripe_unit, &rem);
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun return stripe_no * flseg->stripe_unit + rem;
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun /* This function is used by the layout driver to calculate the
70*4882a593Smuzhiyun * offset of the file on the dserver based on whether the
71*4882a593Smuzhiyun * layout type is STRIPE_DENSE or STRIPE_SPARSE
72*4882a593Smuzhiyun */
73*4882a593Smuzhiyun static loff_t
filelayout_get_dserver_offset(struct pnfs_layout_segment * lseg,loff_t offset)74*4882a593Smuzhiyun filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
75*4882a593Smuzhiyun {
76*4882a593Smuzhiyun struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun switch (flseg->stripe_type) {
79*4882a593Smuzhiyun case STRIPE_SPARSE:
80*4882a593Smuzhiyun return offset;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun case STRIPE_DENSE:
83*4882a593Smuzhiyun return filelayout_get_dense_offset(flseg, offset);
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun BUG();
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
filelayout_reset_write(struct nfs_pgio_header * hdr)89*4882a593Smuzhiyun static void filelayout_reset_write(struct nfs_pgio_header *hdr)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun struct rpc_task *task = &hdr->task;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
94*4882a593Smuzhiyun dprintk("%s Reset task %5u for i/o through MDS "
95*4882a593Smuzhiyun "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
96*4882a593Smuzhiyun hdr->task.tk_pid,
97*4882a593Smuzhiyun hdr->inode->i_sb->s_id,
98*4882a593Smuzhiyun (unsigned long long)NFS_FILEID(hdr->inode),
99*4882a593Smuzhiyun hdr->args.count,
100*4882a593Smuzhiyun (unsigned long long)hdr->args.offset);
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun task->tk_status = pnfs_write_done_resend_to_mds(hdr);
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun
filelayout_reset_read(struct nfs_pgio_header * hdr)106*4882a593Smuzhiyun static void filelayout_reset_read(struct nfs_pgio_header *hdr)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun struct rpc_task *task = &hdr->task;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
111*4882a593Smuzhiyun dprintk("%s Reset task %5u for i/o through MDS "
112*4882a593Smuzhiyun "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
113*4882a593Smuzhiyun hdr->task.tk_pid,
114*4882a593Smuzhiyun hdr->inode->i_sb->s_id,
115*4882a593Smuzhiyun (unsigned long long)NFS_FILEID(hdr->inode),
116*4882a593Smuzhiyun hdr->args.count,
117*4882a593Smuzhiyun (unsigned long long)hdr->args.offset);
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun task->tk_status = pnfs_read_done_resend_to_mds(hdr);
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun
filelayout_async_handle_error(struct rpc_task * task,struct nfs4_state * state,struct nfs_client * clp,struct pnfs_layout_segment * lseg)123*4882a593Smuzhiyun static int filelayout_async_handle_error(struct rpc_task *task,
124*4882a593Smuzhiyun struct nfs4_state *state,
125*4882a593Smuzhiyun struct nfs_client *clp,
126*4882a593Smuzhiyun struct pnfs_layout_segment *lseg)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun struct pnfs_layout_hdr *lo = lseg->pls_layout;
129*4882a593Smuzhiyun struct inode *inode = lo->plh_inode;
130*4882a593Smuzhiyun struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
131*4882a593Smuzhiyun struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun if (task->tk_status >= 0)
134*4882a593Smuzhiyun return 0;
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun switch (task->tk_status) {
137*4882a593Smuzhiyun /* DS session errors */
138*4882a593Smuzhiyun case -NFS4ERR_BADSESSION:
139*4882a593Smuzhiyun case -NFS4ERR_BADSLOT:
140*4882a593Smuzhiyun case -NFS4ERR_BAD_HIGH_SLOT:
141*4882a593Smuzhiyun case -NFS4ERR_DEADSESSION:
142*4882a593Smuzhiyun case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
143*4882a593Smuzhiyun case -NFS4ERR_SEQ_FALSE_RETRY:
144*4882a593Smuzhiyun case -NFS4ERR_SEQ_MISORDERED:
145*4882a593Smuzhiyun dprintk("%s ERROR %d, Reset session. Exchangeid "
146*4882a593Smuzhiyun "flags 0x%x\n", __func__, task->tk_status,
147*4882a593Smuzhiyun clp->cl_exchange_flags);
148*4882a593Smuzhiyun nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
149*4882a593Smuzhiyun break;
150*4882a593Smuzhiyun case -NFS4ERR_DELAY:
151*4882a593Smuzhiyun case -NFS4ERR_GRACE:
152*4882a593Smuzhiyun rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
153*4882a593Smuzhiyun break;
154*4882a593Smuzhiyun case -NFS4ERR_RETRY_UNCACHED_REP:
155*4882a593Smuzhiyun break;
156*4882a593Smuzhiyun /* Invalidate Layout errors */
157*4882a593Smuzhiyun case -NFS4ERR_ACCESS:
158*4882a593Smuzhiyun case -NFS4ERR_PNFS_NO_LAYOUT:
159*4882a593Smuzhiyun case -ESTALE: /* mapped NFS4ERR_STALE */
160*4882a593Smuzhiyun case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
161*4882a593Smuzhiyun case -EISDIR: /* mapped NFS4ERR_ISDIR */
162*4882a593Smuzhiyun case -NFS4ERR_FHEXPIRED:
163*4882a593Smuzhiyun case -NFS4ERR_WRONG_TYPE:
164*4882a593Smuzhiyun dprintk("%s Invalid layout error %d\n", __func__,
165*4882a593Smuzhiyun task->tk_status);
166*4882a593Smuzhiyun /*
167*4882a593Smuzhiyun * Destroy layout so new i/o will get a new layout.
168*4882a593Smuzhiyun * Layout will not be destroyed until all current lseg
169*4882a593Smuzhiyun * references are put. Mark layout as invalid to resend failed
170*4882a593Smuzhiyun * i/o and all i/o waiting on the slot table to the MDS until
171*4882a593Smuzhiyun * layout is destroyed and a new valid layout is obtained.
172*4882a593Smuzhiyun */
173*4882a593Smuzhiyun pnfs_destroy_layout(NFS_I(inode));
174*4882a593Smuzhiyun rpc_wake_up(&tbl->slot_tbl_waitq);
175*4882a593Smuzhiyun goto reset;
176*4882a593Smuzhiyun /* RPC connection errors */
177*4882a593Smuzhiyun case -ECONNREFUSED:
178*4882a593Smuzhiyun case -EHOSTDOWN:
179*4882a593Smuzhiyun case -EHOSTUNREACH:
180*4882a593Smuzhiyun case -ENETUNREACH:
181*4882a593Smuzhiyun case -EIO:
182*4882a593Smuzhiyun case -ETIMEDOUT:
183*4882a593Smuzhiyun case -EPIPE:
184*4882a593Smuzhiyun dprintk("%s DS connection error %d\n", __func__,
185*4882a593Smuzhiyun task->tk_status);
186*4882a593Smuzhiyun nfs4_mark_deviceid_unavailable(devid);
187*4882a593Smuzhiyun pnfs_error_mark_layout_for_return(inode, lseg);
188*4882a593Smuzhiyun pnfs_set_lo_fail(lseg);
189*4882a593Smuzhiyun rpc_wake_up(&tbl->slot_tbl_waitq);
190*4882a593Smuzhiyun fallthrough;
191*4882a593Smuzhiyun default:
192*4882a593Smuzhiyun reset:
193*4882a593Smuzhiyun dprintk("%s Retry through MDS. Error %d\n", __func__,
194*4882a593Smuzhiyun task->tk_status);
195*4882a593Smuzhiyun return -NFS4ERR_RESET_TO_MDS;
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun task->tk_status = 0;
198*4882a593Smuzhiyun return -EAGAIN;
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun /* NFS_PROTO call done callback routines */
202*4882a593Smuzhiyun
filelayout_read_done_cb(struct rpc_task * task,struct nfs_pgio_header * hdr)203*4882a593Smuzhiyun static int filelayout_read_done_cb(struct rpc_task *task,
204*4882a593Smuzhiyun struct nfs_pgio_header *hdr)
205*4882a593Smuzhiyun {
206*4882a593Smuzhiyun int err;
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun trace_nfs4_pnfs_read(hdr, task->tk_status);
209*4882a593Smuzhiyun err = filelayout_async_handle_error(task, hdr->args.context->state,
210*4882a593Smuzhiyun hdr->ds_clp, hdr->lseg);
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun switch (err) {
213*4882a593Smuzhiyun case -NFS4ERR_RESET_TO_MDS:
214*4882a593Smuzhiyun filelayout_reset_read(hdr);
215*4882a593Smuzhiyun return task->tk_status;
216*4882a593Smuzhiyun case -EAGAIN:
217*4882a593Smuzhiyun rpc_restart_call_prepare(task);
218*4882a593Smuzhiyun return -EAGAIN;
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun return 0;
222*4882a593Smuzhiyun }
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun /*
225*4882a593Smuzhiyun * We reference the rpc_cred of the first WRITE that triggers the need for
226*4882a593Smuzhiyun * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
227*4882a593Smuzhiyun * rfc5661 is not clear about which credential should be used.
228*4882a593Smuzhiyun */
229*4882a593Smuzhiyun static void
filelayout_set_layoutcommit(struct nfs_pgio_header * hdr)230*4882a593Smuzhiyun filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun loff_t end_offs = 0;
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
235*4882a593Smuzhiyun hdr->res.verf->committed == NFS_FILE_SYNC)
236*4882a593Smuzhiyun return;
237*4882a593Smuzhiyun if (hdr->res.verf->committed == NFS_DATA_SYNC)
238*4882a593Smuzhiyun end_offs = hdr->mds_offset + (loff_t)hdr->res.count;
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun /* Note: if the write is unstable, don't set end_offs until commit */
241*4882a593Smuzhiyun pnfs_set_layoutcommit(hdr->inode, hdr->lseg, end_offs);
242*4882a593Smuzhiyun dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
243*4882a593Smuzhiyun (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
244*4882a593Smuzhiyun }
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun bool
filelayout_test_devid_unavailable(struct nfs4_deviceid_node * node)247*4882a593Smuzhiyun filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun return filelayout_test_devid_invalid(node) ||
250*4882a593Smuzhiyun nfs4_test_deviceid_unavailable(node);
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun static bool
filelayout_reset_to_mds(struct pnfs_layout_segment * lseg)254*4882a593Smuzhiyun filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
255*4882a593Smuzhiyun {
256*4882a593Smuzhiyun struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg);
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun return filelayout_test_devid_unavailable(node);
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun /*
262*4882a593Smuzhiyun * Call ops for the async read/write cases
263*4882a593Smuzhiyun * In the case of dense layouts, the offset needs to be reset to its
264*4882a593Smuzhiyun * original value.
265*4882a593Smuzhiyun */
filelayout_read_prepare(struct rpc_task * task,void * data)266*4882a593Smuzhiyun static void filelayout_read_prepare(struct rpc_task *task, void *data)
267*4882a593Smuzhiyun {
268*4882a593Smuzhiyun struct nfs_pgio_header *hdr = data;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
271*4882a593Smuzhiyun rpc_exit(task, -EIO);
272*4882a593Smuzhiyun return;
273*4882a593Smuzhiyun }
274*4882a593Smuzhiyun if (filelayout_reset_to_mds(hdr->lseg)) {
275*4882a593Smuzhiyun dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
276*4882a593Smuzhiyun filelayout_reset_read(hdr);
277*4882a593Smuzhiyun rpc_exit(task, 0);
278*4882a593Smuzhiyun return;
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun hdr->pgio_done_cb = filelayout_read_done_cb;
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun if (nfs4_setup_sequence(hdr->ds_clp,
283*4882a593Smuzhiyun &hdr->args.seq_args,
284*4882a593Smuzhiyun &hdr->res.seq_res,
285*4882a593Smuzhiyun task))
286*4882a593Smuzhiyun return;
287*4882a593Smuzhiyun if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
288*4882a593Smuzhiyun hdr->args.lock_context, FMODE_READ) == -EIO)
289*4882a593Smuzhiyun rpc_exit(task, -EIO); /* lost lock, terminate I/O */
290*4882a593Smuzhiyun }
291*4882a593Smuzhiyun
filelayout_read_call_done(struct rpc_task * task,void * data)292*4882a593Smuzhiyun static void filelayout_read_call_done(struct rpc_task *task, void *data)
293*4882a593Smuzhiyun {
294*4882a593Smuzhiyun struct nfs_pgio_header *hdr = data;
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
299*4882a593Smuzhiyun task->tk_status == 0) {
300*4882a593Smuzhiyun nfs41_sequence_done(task, &hdr->res.seq_res);
301*4882a593Smuzhiyun return;
302*4882a593Smuzhiyun }
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun /* Note this may cause RPC to be resent */
305*4882a593Smuzhiyun hdr->mds_ops->rpc_call_done(task, data);
306*4882a593Smuzhiyun }
307*4882a593Smuzhiyun
filelayout_read_count_stats(struct rpc_task * task,void * data)308*4882a593Smuzhiyun static void filelayout_read_count_stats(struct rpc_task *task, void *data)
309*4882a593Smuzhiyun {
310*4882a593Smuzhiyun struct nfs_pgio_header *hdr = data;
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
313*4882a593Smuzhiyun }
314*4882a593Smuzhiyun
filelayout_write_done_cb(struct rpc_task * task,struct nfs_pgio_header * hdr)315*4882a593Smuzhiyun static int filelayout_write_done_cb(struct rpc_task *task,
316*4882a593Smuzhiyun struct nfs_pgio_header *hdr)
317*4882a593Smuzhiyun {
318*4882a593Smuzhiyun int err;
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun trace_nfs4_pnfs_write(hdr, task->tk_status);
321*4882a593Smuzhiyun err = filelayout_async_handle_error(task, hdr->args.context->state,
322*4882a593Smuzhiyun hdr->ds_clp, hdr->lseg);
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun switch (err) {
325*4882a593Smuzhiyun case -NFS4ERR_RESET_TO_MDS:
326*4882a593Smuzhiyun filelayout_reset_write(hdr);
327*4882a593Smuzhiyun return task->tk_status;
328*4882a593Smuzhiyun case -EAGAIN:
329*4882a593Smuzhiyun rpc_restart_call_prepare(task);
330*4882a593Smuzhiyun return -EAGAIN;
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun filelayout_set_layoutcommit(hdr);
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun /* zero out the fattr */
336*4882a593Smuzhiyun hdr->fattr.valid = 0;
337*4882a593Smuzhiyun if (task->tk_status >= 0)
338*4882a593Smuzhiyun nfs_writeback_update_inode(hdr);
339*4882a593Smuzhiyun
340*4882a593Smuzhiyun return 0;
341*4882a593Smuzhiyun }
342*4882a593Smuzhiyun
filelayout_commit_done_cb(struct rpc_task * task,struct nfs_commit_data * data)343*4882a593Smuzhiyun static int filelayout_commit_done_cb(struct rpc_task *task,
344*4882a593Smuzhiyun struct nfs_commit_data *data)
345*4882a593Smuzhiyun {
346*4882a593Smuzhiyun int err;
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun trace_nfs4_pnfs_commit_ds(data, task->tk_status);
349*4882a593Smuzhiyun err = filelayout_async_handle_error(task, NULL, data->ds_clp,
350*4882a593Smuzhiyun data->lseg);
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun switch (err) {
353*4882a593Smuzhiyun case -NFS4ERR_RESET_TO_MDS:
354*4882a593Smuzhiyun pnfs_generic_prepare_to_resend_writes(data);
355*4882a593Smuzhiyun return -EAGAIN;
356*4882a593Smuzhiyun case -EAGAIN:
357*4882a593Smuzhiyun rpc_restart_call_prepare(task);
358*4882a593Smuzhiyun return -EAGAIN;
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun return 0;
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
filelayout_write_prepare(struct rpc_task * task,void * data)366*4882a593Smuzhiyun static void filelayout_write_prepare(struct rpc_task *task, void *data)
367*4882a593Smuzhiyun {
368*4882a593Smuzhiyun struct nfs_pgio_header *hdr = data;
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
371*4882a593Smuzhiyun rpc_exit(task, -EIO);
372*4882a593Smuzhiyun return;
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun if (filelayout_reset_to_mds(hdr->lseg)) {
375*4882a593Smuzhiyun dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
376*4882a593Smuzhiyun filelayout_reset_write(hdr);
377*4882a593Smuzhiyun rpc_exit(task, 0);
378*4882a593Smuzhiyun return;
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun if (nfs4_setup_sequence(hdr->ds_clp,
381*4882a593Smuzhiyun &hdr->args.seq_args,
382*4882a593Smuzhiyun &hdr->res.seq_res,
383*4882a593Smuzhiyun task))
384*4882a593Smuzhiyun return;
385*4882a593Smuzhiyun if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
386*4882a593Smuzhiyun hdr->args.lock_context, FMODE_WRITE) == -EIO)
387*4882a593Smuzhiyun rpc_exit(task, -EIO); /* lost lock, terminate I/O */
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun
filelayout_write_call_done(struct rpc_task * task,void * data)390*4882a593Smuzhiyun static void filelayout_write_call_done(struct rpc_task *task, void *data)
391*4882a593Smuzhiyun {
392*4882a593Smuzhiyun struct nfs_pgio_header *hdr = data;
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
395*4882a593Smuzhiyun task->tk_status == 0) {
396*4882a593Smuzhiyun nfs41_sequence_done(task, &hdr->res.seq_res);
397*4882a593Smuzhiyun return;
398*4882a593Smuzhiyun }
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun /* Note this may cause RPC to be resent */
401*4882a593Smuzhiyun hdr->mds_ops->rpc_call_done(task, data);
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun
filelayout_write_count_stats(struct rpc_task * task,void * data)404*4882a593Smuzhiyun static void filelayout_write_count_stats(struct rpc_task *task, void *data)
405*4882a593Smuzhiyun {
406*4882a593Smuzhiyun struct nfs_pgio_header *hdr = data;
407*4882a593Smuzhiyun
408*4882a593Smuzhiyun rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
filelayout_commit_prepare(struct rpc_task * task,void * data)411*4882a593Smuzhiyun static void filelayout_commit_prepare(struct rpc_task *task, void *data)
412*4882a593Smuzhiyun {
413*4882a593Smuzhiyun struct nfs_commit_data *wdata = data;
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun nfs4_setup_sequence(wdata->ds_clp,
416*4882a593Smuzhiyun &wdata->args.seq_args,
417*4882a593Smuzhiyun &wdata->res.seq_res,
418*4882a593Smuzhiyun task);
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun
filelayout_commit_count_stats(struct rpc_task * task,void * data)421*4882a593Smuzhiyun static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
422*4882a593Smuzhiyun {
423*4882a593Smuzhiyun struct nfs_commit_data *cdata = data;
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
426*4882a593Smuzhiyun }
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun static const struct rpc_call_ops filelayout_read_call_ops = {
429*4882a593Smuzhiyun .rpc_call_prepare = filelayout_read_prepare,
430*4882a593Smuzhiyun .rpc_call_done = filelayout_read_call_done,
431*4882a593Smuzhiyun .rpc_count_stats = filelayout_read_count_stats,
432*4882a593Smuzhiyun .rpc_release = pnfs_generic_rw_release,
433*4882a593Smuzhiyun };
434*4882a593Smuzhiyun
435*4882a593Smuzhiyun static const struct rpc_call_ops filelayout_write_call_ops = {
436*4882a593Smuzhiyun .rpc_call_prepare = filelayout_write_prepare,
437*4882a593Smuzhiyun .rpc_call_done = filelayout_write_call_done,
438*4882a593Smuzhiyun .rpc_count_stats = filelayout_write_count_stats,
439*4882a593Smuzhiyun .rpc_release = pnfs_generic_rw_release,
440*4882a593Smuzhiyun };
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun static const struct rpc_call_ops filelayout_commit_call_ops = {
443*4882a593Smuzhiyun .rpc_call_prepare = filelayout_commit_prepare,
444*4882a593Smuzhiyun .rpc_call_done = pnfs_generic_write_commit_done,
445*4882a593Smuzhiyun .rpc_count_stats = filelayout_commit_count_stats,
446*4882a593Smuzhiyun .rpc_release = pnfs_generic_commit_release,
447*4882a593Smuzhiyun };
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun static enum pnfs_try_status
filelayout_read_pagelist(struct nfs_pgio_header * hdr)450*4882a593Smuzhiyun filelayout_read_pagelist(struct nfs_pgio_header *hdr)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun struct pnfs_layout_segment *lseg = hdr->lseg;
453*4882a593Smuzhiyun struct nfs4_pnfs_ds *ds;
454*4882a593Smuzhiyun struct rpc_clnt *ds_clnt;
455*4882a593Smuzhiyun loff_t offset = hdr->args.offset;
456*4882a593Smuzhiyun u32 j, idx;
457*4882a593Smuzhiyun struct nfs_fh *fh;
458*4882a593Smuzhiyun
459*4882a593Smuzhiyun dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
460*4882a593Smuzhiyun __func__, hdr->inode->i_ino,
461*4882a593Smuzhiyun hdr->args.pgbase, (size_t)hdr->args.count, offset);
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun /* Retrieve the correct rpc_client for the byte range */
464*4882a593Smuzhiyun j = nfs4_fl_calc_j_index(lseg, offset);
465*4882a593Smuzhiyun idx = nfs4_fl_calc_ds_index(lseg, j);
466*4882a593Smuzhiyun ds = nfs4_fl_prepare_ds(lseg, idx);
467*4882a593Smuzhiyun if (!ds)
468*4882a593Smuzhiyun return PNFS_NOT_ATTEMPTED;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
471*4882a593Smuzhiyun if (IS_ERR(ds_clnt))
472*4882a593Smuzhiyun return PNFS_NOT_ATTEMPTED;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun dprintk("%s USE DS: %s cl_count %d\n", __func__,
475*4882a593Smuzhiyun ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun /* No multipath support. Use first DS */
478*4882a593Smuzhiyun refcount_inc(&ds->ds_clp->cl_count);
479*4882a593Smuzhiyun hdr->ds_clp = ds->ds_clp;
480*4882a593Smuzhiyun hdr->ds_commit_idx = idx;
481*4882a593Smuzhiyun fh = nfs4_fl_select_ds_fh(lseg, j);
482*4882a593Smuzhiyun if (fh)
483*4882a593Smuzhiyun hdr->args.fh = fh;
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
486*4882a593Smuzhiyun hdr->mds_offset = offset;
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun /* Perform an asynchronous read to ds */
489*4882a593Smuzhiyun nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
490*4882a593Smuzhiyun NFS_PROTO(hdr->inode), &filelayout_read_call_ops,
491*4882a593Smuzhiyun 0, RPC_TASK_SOFTCONN);
492*4882a593Smuzhiyun return PNFS_ATTEMPTED;
493*4882a593Smuzhiyun }
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun /* Perform async writes. */
496*4882a593Smuzhiyun static enum pnfs_try_status
filelayout_write_pagelist(struct nfs_pgio_header * hdr,int sync)497*4882a593Smuzhiyun filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
498*4882a593Smuzhiyun {
499*4882a593Smuzhiyun struct pnfs_layout_segment *lseg = hdr->lseg;
500*4882a593Smuzhiyun struct nfs4_pnfs_ds *ds;
501*4882a593Smuzhiyun struct rpc_clnt *ds_clnt;
502*4882a593Smuzhiyun loff_t offset = hdr->args.offset;
503*4882a593Smuzhiyun u32 j, idx;
504*4882a593Smuzhiyun struct nfs_fh *fh;
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun /* Retrieve the correct rpc_client for the byte range */
507*4882a593Smuzhiyun j = nfs4_fl_calc_j_index(lseg, offset);
508*4882a593Smuzhiyun idx = nfs4_fl_calc_ds_index(lseg, j);
509*4882a593Smuzhiyun ds = nfs4_fl_prepare_ds(lseg, idx);
510*4882a593Smuzhiyun if (!ds)
511*4882a593Smuzhiyun return PNFS_NOT_ATTEMPTED;
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
514*4882a593Smuzhiyun if (IS_ERR(ds_clnt))
515*4882a593Smuzhiyun return PNFS_NOT_ATTEMPTED;
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
518*4882a593Smuzhiyun __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
519*4882a593Smuzhiyun offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count));
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun hdr->pgio_done_cb = filelayout_write_done_cb;
522*4882a593Smuzhiyun refcount_inc(&ds->ds_clp->cl_count);
523*4882a593Smuzhiyun hdr->ds_clp = ds->ds_clp;
524*4882a593Smuzhiyun hdr->ds_commit_idx = idx;
525*4882a593Smuzhiyun fh = nfs4_fl_select_ds_fh(lseg, j);
526*4882a593Smuzhiyun if (fh)
527*4882a593Smuzhiyun hdr->args.fh = fh;
528*4882a593Smuzhiyun hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun /* Perform an asynchronous write */
531*4882a593Smuzhiyun nfs_initiate_pgio(ds_clnt, hdr, hdr->cred,
532*4882a593Smuzhiyun NFS_PROTO(hdr->inode), &filelayout_write_call_ops,
533*4882a593Smuzhiyun sync, RPC_TASK_SOFTCONN);
534*4882a593Smuzhiyun return PNFS_ATTEMPTED;
535*4882a593Smuzhiyun }
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun static int
filelayout_check_deviceid(struct pnfs_layout_hdr * lo,struct nfs4_filelayout_segment * fl,gfp_t gfp_flags)538*4882a593Smuzhiyun filelayout_check_deviceid(struct pnfs_layout_hdr *lo,
539*4882a593Smuzhiyun struct nfs4_filelayout_segment *fl,
540*4882a593Smuzhiyun gfp_t gfp_flags)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun struct nfs4_deviceid_node *d;
543*4882a593Smuzhiyun struct nfs4_file_layout_dsaddr *dsaddr;
544*4882a593Smuzhiyun int status = -EINVAL;
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun /* Is the deviceid already set? If so, we're good. */
547*4882a593Smuzhiyun if (fl->dsaddr != NULL)
548*4882a593Smuzhiyun return 0;
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun /* find and reference the deviceid */
551*4882a593Smuzhiyun d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
552*4882a593Smuzhiyun lo->plh_lc_cred, gfp_flags);
553*4882a593Smuzhiyun if (d == NULL)
554*4882a593Smuzhiyun goto out;
555*4882a593Smuzhiyun
556*4882a593Smuzhiyun dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
557*4882a593Smuzhiyun /* Found deviceid is unavailable */
558*4882a593Smuzhiyun if (filelayout_test_devid_unavailable(&dsaddr->id_node))
559*4882a593Smuzhiyun goto out_put;
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun if (fl->first_stripe_index >= dsaddr->stripe_count) {
562*4882a593Smuzhiyun dprintk("%s Bad first_stripe_index %u\n",
563*4882a593Smuzhiyun __func__, fl->first_stripe_index);
564*4882a593Smuzhiyun goto out_put;
565*4882a593Smuzhiyun }
566*4882a593Smuzhiyun
567*4882a593Smuzhiyun if ((fl->stripe_type == STRIPE_SPARSE &&
568*4882a593Smuzhiyun fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
569*4882a593Smuzhiyun (fl->stripe_type == STRIPE_DENSE &&
570*4882a593Smuzhiyun fl->num_fh != dsaddr->stripe_count)) {
571*4882a593Smuzhiyun dprintk("%s num_fh %u not valid for given packing\n",
572*4882a593Smuzhiyun __func__, fl->num_fh);
573*4882a593Smuzhiyun goto out_put;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun status = 0;
576*4882a593Smuzhiyun
577*4882a593Smuzhiyun /*
578*4882a593Smuzhiyun * Atomic compare and xchange to ensure we don't scribble
579*4882a593Smuzhiyun * over a non-NULL pointer.
580*4882a593Smuzhiyun */
581*4882a593Smuzhiyun if (cmpxchg(&fl->dsaddr, NULL, dsaddr) != NULL)
582*4882a593Smuzhiyun goto out_put;
583*4882a593Smuzhiyun out:
584*4882a593Smuzhiyun return status;
585*4882a593Smuzhiyun out_put:
586*4882a593Smuzhiyun nfs4_fl_put_deviceid(dsaddr);
587*4882a593Smuzhiyun goto out;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun /*
591*4882a593Smuzhiyun * filelayout_check_layout()
592*4882a593Smuzhiyun *
593*4882a593Smuzhiyun * Make sure layout segment parameters are sane WRT the device.
594*4882a593Smuzhiyun * At this point no generic layer initialization of the lseg has occurred,
595*4882a593Smuzhiyun * and nothing has been added to the layout_hdr cache.
596*4882a593Smuzhiyun *
597*4882a593Smuzhiyun */
598*4882a593Smuzhiyun static int
filelayout_check_layout(struct pnfs_layout_hdr * lo,struct nfs4_filelayout_segment * fl,struct nfs4_layoutget_res * lgr,gfp_t gfp_flags)599*4882a593Smuzhiyun filelayout_check_layout(struct pnfs_layout_hdr *lo,
600*4882a593Smuzhiyun struct nfs4_filelayout_segment *fl,
601*4882a593Smuzhiyun struct nfs4_layoutget_res *lgr,
602*4882a593Smuzhiyun gfp_t gfp_flags)
603*4882a593Smuzhiyun {
604*4882a593Smuzhiyun int status = -EINVAL;
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun dprintk("--> %s\n", __func__);
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun /* FIXME: remove this check when layout segment support is added */
609*4882a593Smuzhiyun if (lgr->range.offset != 0 ||
610*4882a593Smuzhiyun lgr->range.length != NFS4_MAX_UINT64) {
611*4882a593Smuzhiyun dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
612*4882a593Smuzhiyun __func__);
613*4882a593Smuzhiyun goto out;
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun
616*4882a593Smuzhiyun if (fl->pattern_offset > lgr->range.offset) {
617*4882a593Smuzhiyun dprintk("%s pattern_offset %lld too large\n",
618*4882a593Smuzhiyun __func__, fl->pattern_offset);
619*4882a593Smuzhiyun goto out;
620*4882a593Smuzhiyun }
621*4882a593Smuzhiyun
622*4882a593Smuzhiyun if (!fl->stripe_unit) {
623*4882a593Smuzhiyun dprintk("%s Invalid stripe unit (%u)\n",
624*4882a593Smuzhiyun __func__, fl->stripe_unit);
625*4882a593Smuzhiyun goto out;
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun
628*4882a593Smuzhiyun status = 0;
629*4882a593Smuzhiyun out:
630*4882a593Smuzhiyun dprintk("--> %s returns %d\n", __func__, status);
631*4882a593Smuzhiyun return status;
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun
_filelayout_free_lseg(struct nfs4_filelayout_segment * fl)634*4882a593Smuzhiyun static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
635*4882a593Smuzhiyun {
636*4882a593Smuzhiyun int i;
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun if (fl->fh_array) {
639*4882a593Smuzhiyun for (i = 0; i < fl->num_fh; i++) {
640*4882a593Smuzhiyun if (!fl->fh_array[i])
641*4882a593Smuzhiyun break;
642*4882a593Smuzhiyun kfree(fl->fh_array[i]);
643*4882a593Smuzhiyun }
644*4882a593Smuzhiyun kfree(fl->fh_array);
645*4882a593Smuzhiyun }
646*4882a593Smuzhiyun kfree(fl);
647*4882a593Smuzhiyun }
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun static int
filelayout_decode_layout(struct pnfs_layout_hdr * flo,struct nfs4_filelayout_segment * fl,struct nfs4_layoutget_res * lgr,gfp_t gfp_flags)650*4882a593Smuzhiyun filelayout_decode_layout(struct pnfs_layout_hdr *flo,
651*4882a593Smuzhiyun struct nfs4_filelayout_segment *fl,
652*4882a593Smuzhiyun struct nfs4_layoutget_res *lgr,
653*4882a593Smuzhiyun gfp_t gfp_flags)
654*4882a593Smuzhiyun {
655*4882a593Smuzhiyun struct xdr_stream stream;
656*4882a593Smuzhiyun struct xdr_buf buf;
657*4882a593Smuzhiyun struct page *scratch;
658*4882a593Smuzhiyun __be32 *p;
659*4882a593Smuzhiyun uint32_t nfl_util;
660*4882a593Smuzhiyun int i;
661*4882a593Smuzhiyun
662*4882a593Smuzhiyun dprintk("%s: set_layout_map Begin\n", __func__);
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun scratch = alloc_page(gfp_flags);
665*4882a593Smuzhiyun if (!scratch)
666*4882a593Smuzhiyun return -ENOMEM;
667*4882a593Smuzhiyun
668*4882a593Smuzhiyun xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
669*4882a593Smuzhiyun xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
670*4882a593Smuzhiyun
671*4882a593Smuzhiyun /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
672*4882a593Smuzhiyun * num_fh (4) */
673*4882a593Smuzhiyun p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20);
674*4882a593Smuzhiyun if (unlikely(!p))
675*4882a593Smuzhiyun goto out_err;
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun memcpy(&fl->deviceid, p, sizeof(fl->deviceid));
678*4882a593Smuzhiyun p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
679*4882a593Smuzhiyun nfs4_print_deviceid(&fl->deviceid);
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun nfl_util = be32_to_cpup(p++);
682*4882a593Smuzhiyun if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
683*4882a593Smuzhiyun fl->commit_through_mds = 1;
684*4882a593Smuzhiyun if (nfl_util & NFL4_UFLG_DENSE)
685*4882a593Smuzhiyun fl->stripe_type = STRIPE_DENSE;
686*4882a593Smuzhiyun else
687*4882a593Smuzhiyun fl->stripe_type = STRIPE_SPARSE;
688*4882a593Smuzhiyun fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun fl->first_stripe_index = be32_to_cpup(p++);
691*4882a593Smuzhiyun p = xdr_decode_hyper(p, &fl->pattern_offset);
692*4882a593Smuzhiyun fl->num_fh = be32_to_cpup(p++);
693*4882a593Smuzhiyun
694*4882a593Smuzhiyun dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
695*4882a593Smuzhiyun __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
696*4882a593Smuzhiyun fl->pattern_offset);
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun /* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
699*4882a593Smuzhiyun * Futher checking is done in filelayout_check_layout */
700*4882a593Smuzhiyun if (fl->num_fh >
701*4882a593Smuzhiyun max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
702*4882a593Smuzhiyun goto out_err;
703*4882a593Smuzhiyun
704*4882a593Smuzhiyun if (fl->num_fh > 0) {
705*4882a593Smuzhiyun fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]),
706*4882a593Smuzhiyun gfp_flags);
707*4882a593Smuzhiyun if (!fl->fh_array)
708*4882a593Smuzhiyun goto out_err;
709*4882a593Smuzhiyun }
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun for (i = 0; i < fl->num_fh; i++) {
712*4882a593Smuzhiyun /* Do we want to use a mempool here? */
713*4882a593Smuzhiyun fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
714*4882a593Smuzhiyun if (!fl->fh_array[i])
715*4882a593Smuzhiyun goto out_err;
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun p = xdr_inline_decode(&stream, 4);
718*4882a593Smuzhiyun if (unlikely(!p))
719*4882a593Smuzhiyun goto out_err;
720*4882a593Smuzhiyun fl->fh_array[i]->size = be32_to_cpup(p++);
721*4882a593Smuzhiyun if (fl->fh_array[i]->size > NFS_MAXFHSIZE) {
722*4882a593Smuzhiyun printk(KERN_ERR "NFS: Too big fh %d received %d\n",
723*4882a593Smuzhiyun i, fl->fh_array[i]->size);
724*4882a593Smuzhiyun goto out_err;
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun
727*4882a593Smuzhiyun p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
728*4882a593Smuzhiyun if (unlikely(!p))
729*4882a593Smuzhiyun goto out_err;
730*4882a593Smuzhiyun memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
731*4882a593Smuzhiyun dprintk("DEBUG: %s: fh len %d\n", __func__,
732*4882a593Smuzhiyun fl->fh_array[i]->size);
733*4882a593Smuzhiyun }
734*4882a593Smuzhiyun
735*4882a593Smuzhiyun __free_page(scratch);
736*4882a593Smuzhiyun return 0;
737*4882a593Smuzhiyun
738*4882a593Smuzhiyun out_err:
739*4882a593Smuzhiyun __free_page(scratch);
740*4882a593Smuzhiyun return -EIO;
741*4882a593Smuzhiyun }
742*4882a593Smuzhiyun
743*4882a593Smuzhiyun static void
filelayout_free_lseg(struct pnfs_layout_segment * lseg)744*4882a593Smuzhiyun filelayout_free_lseg(struct pnfs_layout_segment *lseg)
745*4882a593Smuzhiyun {
746*4882a593Smuzhiyun struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
747*4882a593Smuzhiyun
748*4882a593Smuzhiyun dprintk("--> %s\n", __func__);
749*4882a593Smuzhiyun if (fl->dsaddr != NULL)
750*4882a593Smuzhiyun nfs4_fl_put_deviceid(fl->dsaddr);
751*4882a593Smuzhiyun /* This assumes a single RW lseg */
752*4882a593Smuzhiyun if (lseg->pls_range.iomode == IOMODE_RW) {
753*4882a593Smuzhiyun struct nfs4_filelayout *flo;
754*4882a593Smuzhiyun struct inode *inode;
755*4882a593Smuzhiyun
756*4882a593Smuzhiyun flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
757*4882a593Smuzhiyun inode = flo->generic_hdr.plh_inode;
758*4882a593Smuzhiyun spin_lock(&inode->i_lock);
759*4882a593Smuzhiyun pnfs_generic_ds_cinfo_release_lseg(&flo->commit_info, lseg);
760*4882a593Smuzhiyun spin_unlock(&inode->i_lock);
761*4882a593Smuzhiyun }
762*4882a593Smuzhiyun _filelayout_free_lseg(fl);
763*4882a593Smuzhiyun }
764*4882a593Smuzhiyun
765*4882a593Smuzhiyun static struct pnfs_layout_segment *
filelayout_alloc_lseg(struct pnfs_layout_hdr * layoutid,struct nfs4_layoutget_res * lgr,gfp_t gfp_flags)766*4882a593Smuzhiyun filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
767*4882a593Smuzhiyun struct nfs4_layoutget_res *lgr,
768*4882a593Smuzhiyun gfp_t gfp_flags)
769*4882a593Smuzhiyun {
770*4882a593Smuzhiyun struct nfs4_filelayout_segment *fl;
771*4882a593Smuzhiyun int rc;
772*4882a593Smuzhiyun
773*4882a593Smuzhiyun dprintk("--> %s\n", __func__);
774*4882a593Smuzhiyun fl = kzalloc(sizeof(*fl), gfp_flags);
775*4882a593Smuzhiyun if (!fl)
776*4882a593Smuzhiyun return NULL;
777*4882a593Smuzhiyun
778*4882a593Smuzhiyun rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags);
779*4882a593Smuzhiyun if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) {
780*4882a593Smuzhiyun _filelayout_free_lseg(fl);
781*4882a593Smuzhiyun return NULL;
782*4882a593Smuzhiyun }
783*4882a593Smuzhiyun return &fl->generic_hdr;
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun /*
787*4882a593Smuzhiyun * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
788*4882a593Smuzhiyun *
789*4882a593Smuzhiyun * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
790*4882a593Smuzhiyun * of bytes (maximum @req->wb_bytes) that can be coalesced.
791*4882a593Smuzhiyun */
792*4882a593Smuzhiyun static size_t
filelayout_pg_test(struct nfs_pageio_descriptor * pgio,struct nfs_page * prev,struct nfs_page * req)793*4882a593Smuzhiyun filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
794*4882a593Smuzhiyun struct nfs_page *req)
795*4882a593Smuzhiyun {
796*4882a593Smuzhiyun unsigned int size;
797*4882a593Smuzhiyun u64 p_stripe, r_stripe;
798*4882a593Smuzhiyun u32 stripe_offset;
799*4882a593Smuzhiyun u64 segment_offset = pgio->pg_lseg->pls_range.offset;
800*4882a593Smuzhiyun u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
801*4882a593Smuzhiyun
802*4882a593Smuzhiyun /* calls nfs_generic_pg_test */
803*4882a593Smuzhiyun size = pnfs_generic_pg_test(pgio, prev, req);
804*4882a593Smuzhiyun if (!size)
805*4882a593Smuzhiyun return 0;
806*4882a593Smuzhiyun
807*4882a593Smuzhiyun /* see if req and prev are in the same stripe */
808*4882a593Smuzhiyun if (prev) {
809*4882a593Smuzhiyun p_stripe = (u64)req_offset(prev) - segment_offset;
810*4882a593Smuzhiyun r_stripe = (u64)req_offset(req) - segment_offset;
811*4882a593Smuzhiyun do_div(p_stripe, stripe_unit);
812*4882a593Smuzhiyun do_div(r_stripe, stripe_unit);
813*4882a593Smuzhiyun
814*4882a593Smuzhiyun if (p_stripe != r_stripe)
815*4882a593Smuzhiyun return 0;
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun
818*4882a593Smuzhiyun /* calculate remaining bytes in the current stripe */
819*4882a593Smuzhiyun div_u64_rem((u64)req_offset(req) - segment_offset,
820*4882a593Smuzhiyun stripe_unit,
821*4882a593Smuzhiyun &stripe_offset);
822*4882a593Smuzhiyun WARN_ON_ONCE(stripe_offset > stripe_unit);
823*4882a593Smuzhiyun if (stripe_offset >= stripe_unit)
824*4882a593Smuzhiyun return 0;
825*4882a593Smuzhiyun return min(stripe_unit - (unsigned int)stripe_offset, size);
826*4882a593Smuzhiyun }
827*4882a593Smuzhiyun
828*4882a593Smuzhiyun static struct pnfs_layout_segment *
fl_pnfs_update_layout(struct inode * ino,struct nfs_open_context * ctx,loff_t pos,u64 count,enum pnfs_iomode iomode,bool strict_iomode,gfp_t gfp_flags)829*4882a593Smuzhiyun fl_pnfs_update_layout(struct inode *ino,
830*4882a593Smuzhiyun struct nfs_open_context *ctx,
831*4882a593Smuzhiyun loff_t pos,
832*4882a593Smuzhiyun u64 count,
833*4882a593Smuzhiyun enum pnfs_iomode iomode,
834*4882a593Smuzhiyun bool strict_iomode,
835*4882a593Smuzhiyun gfp_t gfp_flags)
836*4882a593Smuzhiyun {
837*4882a593Smuzhiyun struct pnfs_layout_segment *lseg = NULL;
838*4882a593Smuzhiyun struct pnfs_layout_hdr *lo;
839*4882a593Smuzhiyun struct nfs4_filelayout_segment *fl;
840*4882a593Smuzhiyun int status;
841*4882a593Smuzhiyun
842*4882a593Smuzhiyun lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
843*4882a593Smuzhiyun gfp_flags);
844*4882a593Smuzhiyun if (IS_ERR_OR_NULL(lseg))
845*4882a593Smuzhiyun goto out;
846*4882a593Smuzhiyun
847*4882a593Smuzhiyun lo = NFS_I(ino)->layout;
848*4882a593Smuzhiyun fl = FILELAYOUT_LSEG(lseg);
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun status = filelayout_check_deviceid(lo, fl, gfp_flags);
851*4882a593Smuzhiyun if (status) {
852*4882a593Smuzhiyun pnfs_put_lseg(lseg);
853*4882a593Smuzhiyun lseg = NULL;
854*4882a593Smuzhiyun }
855*4882a593Smuzhiyun out:
856*4882a593Smuzhiyun return lseg;
857*4882a593Smuzhiyun }
858*4882a593Smuzhiyun
859*4882a593Smuzhiyun static void
filelayout_pg_init_read(struct nfs_pageio_descriptor * pgio,struct nfs_page * req)860*4882a593Smuzhiyun filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
861*4882a593Smuzhiyun struct nfs_page *req)
862*4882a593Smuzhiyun {
863*4882a593Smuzhiyun pnfs_generic_pg_check_layout(pgio);
864*4882a593Smuzhiyun if (!pgio->pg_lseg) {
865*4882a593Smuzhiyun pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
866*4882a593Smuzhiyun nfs_req_openctx(req),
867*4882a593Smuzhiyun 0,
868*4882a593Smuzhiyun NFS4_MAX_UINT64,
869*4882a593Smuzhiyun IOMODE_READ,
870*4882a593Smuzhiyun false,
871*4882a593Smuzhiyun GFP_KERNEL);
872*4882a593Smuzhiyun if (IS_ERR(pgio->pg_lseg)) {
873*4882a593Smuzhiyun pgio->pg_error = PTR_ERR(pgio->pg_lseg);
874*4882a593Smuzhiyun pgio->pg_lseg = NULL;
875*4882a593Smuzhiyun return;
876*4882a593Smuzhiyun }
877*4882a593Smuzhiyun }
878*4882a593Smuzhiyun /* If no lseg, fall back to read through mds */
879*4882a593Smuzhiyun if (pgio->pg_lseg == NULL)
880*4882a593Smuzhiyun nfs_pageio_reset_read_mds(pgio);
881*4882a593Smuzhiyun }
882*4882a593Smuzhiyun
883*4882a593Smuzhiyun static void
filelayout_pg_init_write(struct nfs_pageio_descriptor * pgio,struct nfs_page * req)884*4882a593Smuzhiyun filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
885*4882a593Smuzhiyun struct nfs_page *req)
886*4882a593Smuzhiyun {
887*4882a593Smuzhiyun pnfs_generic_pg_check_layout(pgio);
888*4882a593Smuzhiyun if (!pgio->pg_lseg) {
889*4882a593Smuzhiyun pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
890*4882a593Smuzhiyun nfs_req_openctx(req),
891*4882a593Smuzhiyun 0,
892*4882a593Smuzhiyun NFS4_MAX_UINT64,
893*4882a593Smuzhiyun IOMODE_RW,
894*4882a593Smuzhiyun false,
895*4882a593Smuzhiyun GFP_NOFS);
896*4882a593Smuzhiyun if (IS_ERR(pgio->pg_lseg)) {
897*4882a593Smuzhiyun pgio->pg_error = PTR_ERR(pgio->pg_lseg);
898*4882a593Smuzhiyun pgio->pg_lseg = NULL;
899*4882a593Smuzhiyun return;
900*4882a593Smuzhiyun }
901*4882a593Smuzhiyun }
902*4882a593Smuzhiyun
903*4882a593Smuzhiyun /* If no lseg, fall back to write through mds */
904*4882a593Smuzhiyun if (pgio->pg_lseg == NULL)
905*4882a593Smuzhiyun nfs_pageio_reset_write_mds(pgio);
906*4882a593Smuzhiyun }
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun static const struct nfs_pageio_ops filelayout_pg_read_ops = {
909*4882a593Smuzhiyun .pg_init = filelayout_pg_init_read,
910*4882a593Smuzhiyun .pg_test = filelayout_pg_test,
911*4882a593Smuzhiyun .pg_doio = pnfs_generic_pg_readpages,
912*4882a593Smuzhiyun .pg_cleanup = pnfs_generic_pg_cleanup,
913*4882a593Smuzhiyun };
914*4882a593Smuzhiyun
915*4882a593Smuzhiyun static const struct nfs_pageio_ops filelayout_pg_write_ops = {
916*4882a593Smuzhiyun .pg_init = filelayout_pg_init_write,
917*4882a593Smuzhiyun .pg_test = filelayout_pg_test,
918*4882a593Smuzhiyun .pg_doio = pnfs_generic_pg_writepages,
919*4882a593Smuzhiyun .pg_cleanup = pnfs_generic_pg_cleanup,
920*4882a593Smuzhiyun };
921*4882a593Smuzhiyun
select_bucket_index(struct nfs4_filelayout_segment * fl,u32 j)922*4882a593Smuzhiyun static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
923*4882a593Smuzhiyun {
924*4882a593Smuzhiyun if (fl->stripe_type == STRIPE_SPARSE)
925*4882a593Smuzhiyun return nfs4_fl_calc_ds_index(&fl->generic_hdr, j);
926*4882a593Smuzhiyun else
927*4882a593Smuzhiyun return j;
928*4882a593Smuzhiyun }
929*4882a593Smuzhiyun
930*4882a593Smuzhiyun static void
filelayout_mark_request_commit(struct nfs_page * req,struct pnfs_layout_segment * lseg,struct nfs_commit_info * cinfo,u32 ds_commit_idx)931*4882a593Smuzhiyun filelayout_mark_request_commit(struct nfs_page *req,
932*4882a593Smuzhiyun struct pnfs_layout_segment *lseg,
933*4882a593Smuzhiyun struct nfs_commit_info *cinfo,
934*4882a593Smuzhiyun u32 ds_commit_idx)
935*4882a593Smuzhiyun
936*4882a593Smuzhiyun {
937*4882a593Smuzhiyun struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
938*4882a593Smuzhiyun u32 i, j;
939*4882a593Smuzhiyun
940*4882a593Smuzhiyun if (fl->commit_through_mds) {
941*4882a593Smuzhiyun nfs_request_add_commit_list(req, cinfo);
942*4882a593Smuzhiyun } else {
943*4882a593Smuzhiyun /* Note that we are calling nfs4_fl_calc_j_index on each page
944*4882a593Smuzhiyun * that ends up being committed to a data server. An attractive
945*4882a593Smuzhiyun * alternative is to add a field to nfs_write_data and nfs_page
946*4882a593Smuzhiyun * to store the value calculated in filelayout_write_pagelist
947*4882a593Smuzhiyun * and just use that here.
948*4882a593Smuzhiyun */
949*4882a593Smuzhiyun j = nfs4_fl_calc_j_index(lseg, req_offset(req));
950*4882a593Smuzhiyun i = select_bucket_index(fl, j);
951*4882a593Smuzhiyun pnfs_layout_mark_request_commit(req, lseg, cinfo, i);
952*4882a593Smuzhiyun }
953*4882a593Smuzhiyun }
954*4882a593Smuzhiyun
calc_ds_index_from_commit(struct pnfs_layout_segment * lseg,u32 i)955*4882a593Smuzhiyun static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
956*4882a593Smuzhiyun {
957*4882a593Smuzhiyun struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
958*4882a593Smuzhiyun
959*4882a593Smuzhiyun if (flseg->stripe_type == STRIPE_SPARSE)
960*4882a593Smuzhiyun return i;
961*4882a593Smuzhiyun else
962*4882a593Smuzhiyun return nfs4_fl_calc_ds_index(lseg, i);
963*4882a593Smuzhiyun }
964*4882a593Smuzhiyun
965*4882a593Smuzhiyun static struct nfs_fh *
select_ds_fh_from_commit(struct pnfs_layout_segment * lseg,u32 i)966*4882a593Smuzhiyun select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
967*4882a593Smuzhiyun {
968*4882a593Smuzhiyun struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
969*4882a593Smuzhiyun
970*4882a593Smuzhiyun if (flseg->stripe_type == STRIPE_SPARSE) {
971*4882a593Smuzhiyun if (flseg->num_fh == 1)
972*4882a593Smuzhiyun i = 0;
973*4882a593Smuzhiyun else if (flseg->num_fh == 0)
974*4882a593Smuzhiyun /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
975*4882a593Smuzhiyun return NULL;
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun return flseg->fh_array[i];
978*4882a593Smuzhiyun }
979*4882a593Smuzhiyun
filelayout_initiate_commit(struct nfs_commit_data * data,int how)980*4882a593Smuzhiyun static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
981*4882a593Smuzhiyun {
982*4882a593Smuzhiyun struct pnfs_layout_segment *lseg = data->lseg;
983*4882a593Smuzhiyun struct nfs4_pnfs_ds *ds;
984*4882a593Smuzhiyun struct rpc_clnt *ds_clnt;
985*4882a593Smuzhiyun u32 idx;
986*4882a593Smuzhiyun struct nfs_fh *fh;
987*4882a593Smuzhiyun
988*4882a593Smuzhiyun idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
989*4882a593Smuzhiyun ds = nfs4_fl_prepare_ds(lseg, idx);
990*4882a593Smuzhiyun if (!ds)
991*4882a593Smuzhiyun goto out_err;
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode);
994*4882a593Smuzhiyun if (IS_ERR(ds_clnt))
995*4882a593Smuzhiyun goto out_err;
996*4882a593Smuzhiyun
997*4882a593Smuzhiyun dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
998*4882a593Smuzhiyun data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count));
999*4882a593Smuzhiyun data->commit_done_cb = filelayout_commit_done_cb;
1000*4882a593Smuzhiyun refcount_inc(&ds->ds_clp->cl_count);
1001*4882a593Smuzhiyun data->ds_clp = ds->ds_clp;
1002*4882a593Smuzhiyun fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
1003*4882a593Smuzhiyun if (fh)
1004*4882a593Smuzhiyun data->args.fh = fh;
1005*4882a593Smuzhiyun return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode),
1006*4882a593Smuzhiyun &filelayout_commit_call_ops, how,
1007*4882a593Smuzhiyun RPC_TASK_SOFTCONN);
1008*4882a593Smuzhiyun out_err:
1009*4882a593Smuzhiyun pnfs_generic_prepare_to_resend_writes(data);
1010*4882a593Smuzhiyun pnfs_generic_commit_release(data);
1011*4882a593Smuzhiyun return -EAGAIN;
1012*4882a593Smuzhiyun }
1013*4882a593Smuzhiyun
1014*4882a593Smuzhiyun static int
filelayout_commit_pagelist(struct inode * inode,struct list_head * mds_pages,int how,struct nfs_commit_info * cinfo)1015*4882a593Smuzhiyun filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1016*4882a593Smuzhiyun int how, struct nfs_commit_info *cinfo)
1017*4882a593Smuzhiyun {
1018*4882a593Smuzhiyun return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo,
1019*4882a593Smuzhiyun filelayout_initiate_commit);
1020*4882a593Smuzhiyun }
1021*4882a593Smuzhiyun
1022*4882a593Smuzhiyun static struct nfs4_deviceid_node *
filelayout_alloc_deviceid_node(struct nfs_server * server,struct pnfs_device * pdev,gfp_t gfp_flags)1023*4882a593Smuzhiyun filelayout_alloc_deviceid_node(struct nfs_server *server,
1024*4882a593Smuzhiyun struct pnfs_device *pdev, gfp_t gfp_flags)
1025*4882a593Smuzhiyun {
1026*4882a593Smuzhiyun struct nfs4_file_layout_dsaddr *dsaddr;
1027*4882a593Smuzhiyun
1028*4882a593Smuzhiyun dsaddr = nfs4_fl_alloc_deviceid_node(server, pdev, gfp_flags);
1029*4882a593Smuzhiyun if (!dsaddr)
1030*4882a593Smuzhiyun return NULL;
1031*4882a593Smuzhiyun return &dsaddr->id_node;
1032*4882a593Smuzhiyun }
1033*4882a593Smuzhiyun
1034*4882a593Smuzhiyun static void
filelayout_free_deviceid_node(struct nfs4_deviceid_node * d)1035*4882a593Smuzhiyun filelayout_free_deviceid_node(struct nfs4_deviceid_node *d)
1036*4882a593Smuzhiyun {
1037*4882a593Smuzhiyun nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
1038*4882a593Smuzhiyun }
1039*4882a593Smuzhiyun
1040*4882a593Smuzhiyun static struct pnfs_layout_hdr *
filelayout_alloc_layout_hdr(struct inode * inode,gfp_t gfp_flags)1041*4882a593Smuzhiyun filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
1042*4882a593Smuzhiyun {
1043*4882a593Smuzhiyun struct nfs4_filelayout *flo;
1044*4882a593Smuzhiyun
1045*4882a593Smuzhiyun flo = kzalloc(sizeof(*flo), gfp_flags);
1046*4882a593Smuzhiyun if (flo == NULL)
1047*4882a593Smuzhiyun return NULL;
1048*4882a593Smuzhiyun pnfs_init_ds_commit_info(&flo->commit_info);
1049*4882a593Smuzhiyun flo->commit_info.ops = &filelayout_commit_ops;
1050*4882a593Smuzhiyun return &flo->generic_hdr;
1051*4882a593Smuzhiyun }
1052*4882a593Smuzhiyun
1053*4882a593Smuzhiyun static void
filelayout_free_layout_hdr(struct pnfs_layout_hdr * lo)1054*4882a593Smuzhiyun filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
1055*4882a593Smuzhiyun {
1056*4882a593Smuzhiyun kfree_rcu(FILELAYOUT_FROM_HDR(lo), generic_hdr.plh_rcu);
1057*4882a593Smuzhiyun }
1058*4882a593Smuzhiyun
1059*4882a593Smuzhiyun static struct pnfs_ds_commit_info *
filelayout_get_ds_info(struct inode * inode)1060*4882a593Smuzhiyun filelayout_get_ds_info(struct inode *inode)
1061*4882a593Smuzhiyun {
1062*4882a593Smuzhiyun struct pnfs_layout_hdr *layout = NFS_I(inode)->layout;
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun if (layout == NULL)
1065*4882a593Smuzhiyun return NULL;
1066*4882a593Smuzhiyun else
1067*4882a593Smuzhiyun return &FILELAYOUT_FROM_HDR(layout)->commit_info;
1068*4882a593Smuzhiyun }
1069*4882a593Smuzhiyun
1070*4882a593Smuzhiyun static void
filelayout_setup_ds_info(struct pnfs_ds_commit_info * fl_cinfo,struct pnfs_layout_segment * lseg)1071*4882a593Smuzhiyun filelayout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
1072*4882a593Smuzhiyun struct pnfs_layout_segment *lseg)
1073*4882a593Smuzhiyun {
1074*4882a593Smuzhiyun struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
1075*4882a593Smuzhiyun struct inode *inode = lseg->pls_layout->plh_inode;
1076*4882a593Smuzhiyun struct pnfs_commit_array *array, *new;
1077*4882a593Smuzhiyun unsigned int size = (fl->stripe_type == STRIPE_SPARSE) ?
1078*4882a593Smuzhiyun fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
1079*4882a593Smuzhiyun
1080*4882a593Smuzhiyun new = pnfs_alloc_commit_array(size, GFP_NOIO);
1081*4882a593Smuzhiyun if (new) {
1082*4882a593Smuzhiyun spin_lock(&inode->i_lock);
1083*4882a593Smuzhiyun array = pnfs_add_commit_array(fl_cinfo, new, lseg);
1084*4882a593Smuzhiyun spin_unlock(&inode->i_lock);
1085*4882a593Smuzhiyun if (array != new)
1086*4882a593Smuzhiyun pnfs_free_commit_array(new);
1087*4882a593Smuzhiyun }
1088*4882a593Smuzhiyun }
1089*4882a593Smuzhiyun
1090*4882a593Smuzhiyun static void
filelayout_release_ds_info(struct pnfs_ds_commit_info * fl_cinfo,struct inode * inode)1091*4882a593Smuzhiyun filelayout_release_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
1092*4882a593Smuzhiyun struct inode *inode)
1093*4882a593Smuzhiyun {
1094*4882a593Smuzhiyun spin_lock(&inode->i_lock);
1095*4882a593Smuzhiyun pnfs_generic_ds_cinfo_destroy(fl_cinfo);
1096*4882a593Smuzhiyun spin_unlock(&inode->i_lock);
1097*4882a593Smuzhiyun }
1098*4882a593Smuzhiyun
1099*4882a593Smuzhiyun static const struct pnfs_commit_ops filelayout_commit_ops = {
1100*4882a593Smuzhiyun .setup_ds_info = filelayout_setup_ds_info,
1101*4882a593Smuzhiyun .release_ds_info = filelayout_release_ds_info,
1102*4882a593Smuzhiyun .mark_request_commit = filelayout_mark_request_commit,
1103*4882a593Smuzhiyun .clear_request_commit = pnfs_generic_clear_request_commit,
1104*4882a593Smuzhiyun .scan_commit_lists = pnfs_generic_scan_commit_lists,
1105*4882a593Smuzhiyun .recover_commit_reqs = pnfs_generic_recover_commit_reqs,
1106*4882a593Smuzhiyun .search_commit_reqs = pnfs_generic_search_commit_reqs,
1107*4882a593Smuzhiyun .commit_pagelist = filelayout_commit_pagelist,
1108*4882a593Smuzhiyun };
1109*4882a593Smuzhiyun
1110*4882a593Smuzhiyun static struct pnfs_layoutdriver_type filelayout_type = {
1111*4882a593Smuzhiyun .id = LAYOUT_NFSV4_1_FILES,
1112*4882a593Smuzhiyun .name = "LAYOUT_NFSV4_1_FILES",
1113*4882a593Smuzhiyun .owner = THIS_MODULE,
1114*4882a593Smuzhiyun .flags = PNFS_LAYOUTGET_ON_OPEN,
1115*4882a593Smuzhiyun .max_layoutget_response = 4096, /* 1 page or so... */
1116*4882a593Smuzhiyun .alloc_layout_hdr = filelayout_alloc_layout_hdr,
1117*4882a593Smuzhiyun .free_layout_hdr = filelayout_free_layout_hdr,
1118*4882a593Smuzhiyun .alloc_lseg = filelayout_alloc_lseg,
1119*4882a593Smuzhiyun .free_lseg = filelayout_free_lseg,
1120*4882a593Smuzhiyun .pg_read_ops = &filelayout_pg_read_ops,
1121*4882a593Smuzhiyun .pg_write_ops = &filelayout_pg_write_ops,
1122*4882a593Smuzhiyun .get_ds_info = &filelayout_get_ds_info,
1123*4882a593Smuzhiyun .read_pagelist = filelayout_read_pagelist,
1124*4882a593Smuzhiyun .write_pagelist = filelayout_write_pagelist,
1125*4882a593Smuzhiyun .alloc_deviceid_node = filelayout_alloc_deviceid_node,
1126*4882a593Smuzhiyun .free_deviceid_node = filelayout_free_deviceid_node,
1127*4882a593Smuzhiyun .sync = pnfs_nfs_generic_sync,
1128*4882a593Smuzhiyun };
1129*4882a593Smuzhiyun
nfs4filelayout_init(void)1130*4882a593Smuzhiyun static int __init nfs4filelayout_init(void)
1131*4882a593Smuzhiyun {
1132*4882a593Smuzhiyun printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
1133*4882a593Smuzhiyun __func__);
1134*4882a593Smuzhiyun return pnfs_register_layoutdriver(&filelayout_type);
1135*4882a593Smuzhiyun }
1136*4882a593Smuzhiyun
nfs4filelayout_exit(void)1137*4882a593Smuzhiyun static void __exit nfs4filelayout_exit(void)
1138*4882a593Smuzhiyun {
1139*4882a593Smuzhiyun printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
1140*4882a593Smuzhiyun __func__);
1141*4882a593Smuzhiyun pnfs_unregister_layoutdriver(&filelayout_type);
1142*4882a593Smuzhiyun }
1143*4882a593Smuzhiyun
1144*4882a593Smuzhiyun MODULE_ALIAS("nfs-layouttype4-1");
1145*4882a593Smuzhiyun
1146*4882a593Smuzhiyun module_init(nfs4filelayout_init);
1147*4882a593Smuzhiyun module_exit(nfs4filelayout_exit);
1148