1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0+
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2016 Oracle. All Rights Reserved.
4*4882a593Smuzhiyun * Author: Darrick J. Wong <darrick.wong@oracle.com>
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun #include "xfs.h"
7*4882a593Smuzhiyun #include "xfs_fs.h"
8*4882a593Smuzhiyun #include "xfs_shared.h"
9*4882a593Smuzhiyun #include "xfs_format.h"
10*4882a593Smuzhiyun #include "xfs_log_format.h"
11*4882a593Smuzhiyun #include "xfs_trans_resv.h"
12*4882a593Smuzhiyun #include "xfs_mount.h"
13*4882a593Smuzhiyun #include "xfs_defer.h"
14*4882a593Smuzhiyun #include "xfs_trans.h"
15*4882a593Smuzhiyun #include "xfs_buf_item.h"
16*4882a593Smuzhiyun #include "xfs_inode.h"
17*4882a593Smuzhiyun #include "xfs_inode_item.h"
18*4882a593Smuzhiyun #include "xfs_trace.h"
19*4882a593Smuzhiyun #include "xfs_icache.h"
20*4882a593Smuzhiyun #include "xfs_log.h"
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /*
23*4882a593Smuzhiyun * Deferred Operations in XFS
24*4882a593Smuzhiyun *
25*4882a593Smuzhiyun * Due to the way locking rules work in XFS, certain transactions (block
26*4882a593Smuzhiyun * mapping and unmapping, typically) have permanent reservations so that
27*4882a593Smuzhiyun * we can roll the transaction to adhere to AG locking order rules and
28*4882a593Smuzhiyun * to unlock buffers between metadata updates. Prior to rmap/reflink,
29*4882a593Smuzhiyun * the mapping code had a mechanism to perform these deferrals for
30*4882a593Smuzhiyun * extents that were going to be freed; this code makes that facility
31*4882a593Smuzhiyun * more generic.
32*4882a593Smuzhiyun *
33*4882a593Smuzhiyun * When adding the reverse mapping and reflink features, it became
34*4882a593Smuzhiyun * necessary to perform complex remapping multi-transactions to comply
35*4882a593Smuzhiyun * with AG locking order rules, and to be able to spread a single
36*4882a593Smuzhiyun * refcount update operation (an operation on an n-block extent can
37*4882a593Smuzhiyun * update as many as n records!) among multiple transactions. XFS can
38*4882a593Smuzhiyun * roll a transaction to facilitate this, but using this facility
39*4882a593Smuzhiyun * requires us to log "intent" items in case log recovery needs to
40*4882a593Smuzhiyun * redo the operation, and to log "done" items to indicate that redo
41*4882a593Smuzhiyun * is not necessary.
42*4882a593Smuzhiyun *
43*4882a593Smuzhiyun * Deferred work is tracked in xfs_defer_pending items. Each pending
44*4882a593Smuzhiyun * item tracks one type of deferred work. Incoming work items (which
45*4882a593Smuzhiyun * have not yet had an intent logged) are attached to a pending item
46*4882a593Smuzhiyun * on the dop_intake list, where they wait for the caller to finish
47*4882a593Smuzhiyun * the deferred operations.
48*4882a593Smuzhiyun *
49*4882a593Smuzhiyun * Finishing a set of deferred operations is an involved process. To
50*4882a593Smuzhiyun * start, we define "rolling a deferred-op transaction" as follows:
51*4882a593Smuzhiyun *
52*4882a593Smuzhiyun * > For each xfs_defer_pending item on the dop_intake list,
53*4882a593Smuzhiyun * - Sort the work items in AG order. XFS locking
54*4882a593Smuzhiyun * order rules require us to lock buffers in AG order.
55*4882a593Smuzhiyun * - Create a log intent item for that type.
56*4882a593Smuzhiyun * - Attach it to the pending item.
57*4882a593Smuzhiyun * - Move the pending item from the dop_intake list to the
58*4882a593Smuzhiyun * dop_pending list.
59*4882a593Smuzhiyun * > Roll the transaction.
60*4882a593Smuzhiyun *
61*4882a593Smuzhiyun * NOTE: To avoid exceeding the transaction reservation, we limit the
62*4882a593Smuzhiyun * number of items that we attach to a given xfs_defer_pending.
63*4882a593Smuzhiyun *
64*4882a593Smuzhiyun * The actual finishing process looks like this:
65*4882a593Smuzhiyun *
66*4882a593Smuzhiyun * > For each xfs_defer_pending in the dop_pending list,
67*4882a593Smuzhiyun * - Roll the deferred-op transaction as above.
68*4882a593Smuzhiyun * - Create a log done item for that type, and attach it to the
69*4882a593Smuzhiyun * log intent item.
70*4882a593Smuzhiyun * - For each work item attached to the log intent item,
71*4882a593Smuzhiyun * * Perform the described action.
72*4882a593Smuzhiyun * * Attach the work item to the log done item.
73*4882a593Smuzhiyun * * If the result of doing the work was -EAGAIN, ->finish work
74*4882a593Smuzhiyun * wants a new transaction. See the "Requesting a Fresh
75*4882a593Smuzhiyun * Transaction while Finishing Deferred Work" section below for
76*4882a593Smuzhiyun * details.
77*4882a593Smuzhiyun *
78*4882a593Smuzhiyun * The key here is that we must log an intent item for all pending
79*4882a593Smuzhiyun * work items every time we roll the transaction, and that we must log
80*4882a593Smuzhiyun * a done item as soon as the work is completed. With this mechanism
81*4882a593Smuzhiyun * we can perform complex remapping operations, chaining intent items
82*4882a593Smuzhiyun * as needed.
83*4882a593Smuzhiyun *
84*4882a593Smuzhiyun * Requesting a Fresh Transaction while Finishing Deferred Work
85*4882a593Smuzhiyun *
86*4882a593Smuzhiyun * If ->finish_item decides that it needs a fresh transaction to
87*4882a593Smuzhiyun * finish the work, it must ask its caller (xfs_defer_finish) for a
88*4882a593Smuzhiyun * continuation. The most likely cause of this circumstance are the
89*4882a593Smuzhiyun * refcount adjust functions deciding that they've logged enough items
90*4882a593Smuzhiyun * to be at risk of exceeding the transaction reservation.
91*4882a593Smuzhiyun *
92*4882a593Smuzhiyun * To get a fresh transaction, we want to log the existing log done
93*4882a593Smuzhiyun * item to prevent the log intent item from replaying, immediately log
94*4882a593Smuzhiyun * a new log intent item with the unfinished work items, roll the
95*4882a593Smuzhiyun * transaction, and re-call ->finish_item wherever it left off. The
96*4882a593Smuzhiyun * log done item and the new log intent item must be in the same
97*4882a593Smuzhiyun * transaction or atomicity cannot be guaranteed; defer_finish ensures
98*4882a593Smuzhiyun * that this happens.
99*4882a593Smuzhiyun *
100*4882a593Smuzhiyun * This requires some coordination between ->finish_item and
101*4882a593Smuzhiyun * defer_finish. Upon deciding to request a new transaction,
102*4882a593Smuzhiyun * ->finish_item should update the current work item to reflect the
103*4882a593Smuzhiyun * unfinished work. Next, it should reset the log done item's list
104*4882a593Smuzhiyun * count to the number of items finished, and return -EAGAIN.
105*4882a593Smuzhiyun * defer_finish sees the -EAGAIN, logs the new log intent item
106*4882a593Smuzhiyun * with the remaining work items, and leaves the xfs_defer_pending
107*4882a593Smuzhiyun * item at the head of the dop_work queue. Then it rolls the
108*4882a593Smuzhiyun * transaction and picks up processing where it left off. It is
109*4882a593Smuzhiyun * required that ->finish_item must be careful to leave enough
110*4882a593Smuzhiyun * transaction reservation to fit the new log intent item.
111*4882a593Smuzhiyun *
112*4882a593Smuzhiyun * This is an example of remapping the extent (E, E+B) into file X at
113*4882a593Smuzhiyun * offset A and dealing with the extent (C, C+B) already being mapped
114*4882a593Smuzhiyun * there:
115*4882a593Smuzhiyun * +-------------------------------------------------+
116*4882a593Smuzhiyun * | Unmap file X startblock C offset A length B | t0
117*4882a593Smuzhiyun * | Intent to reduce refcount for extent (C, B) |
118*4882a593Smuzhiyun * | Intent to remove rmap (X, C, A, B) |
119*4882a593Smuzhiyun * | Intent to free extent (D, 1) (bmbt block) |
120*4882a593Smuzhiyun * | Intent to map (X, A, B) at startblock E |
121*4882a593Smuzhiyun * +-------------------------------------------------+
122*4882a593Smuzhiyun * | Map file X startblock E offset A length B | t1
123*4882a593Smuzhiyun * | Done mapping (X, E, A, B) |
124*4882a593Smuzhiyun * | Intent to increase refcount for extent (E, B) |
125*4882a593Smuzhiyun * | Intent to add rmap (X, E, A, B) |
126*4882a593Smuzhiyun * +-------------------------------------------------+
127*4882a593Smuzhiyun * | Reduce refcount for extent (C, B) | t2
128*4882a593Smuzhiyun * | Done reducing refcount for extent (C, 9) |
129*4882a593Smuzhiyun * | Intent to reduce refcount for extent (C+9, B-9) |
130*4882a593Smuzhiyun * | (ran out of space after 9 refcount updates) |
131*4882a593Smuzhiyun * +-------------------------------------------------+
132*4882a593Smuzhiyun * | Reduce refcount for extent (C+9, B+9) | t3
133*4882a593Smuzhiyun * | Done reducing refcount for extent (C+9, B-9) |
134*4882a593Smuzhiyun * | Increase refcount for extent (E, B) |
135*4882a593Smuzhiyun * | Done increasing refcount for extent (E, B) |
136*4882a593Smuzhiyun * | Intent to free extent (C, B) |
137*4882a593Smuzhiyun * | Intent to free extent (F, 1) (refcountbt block) |
138*4882a593Smuzhiyun * | Intent to remove rmap (F, 1, REFC) |
139*4882a593Smuzhiyun * +-------------------------------------------------+
140*4882a593Smuzhiyun * | Remove rmap (X, C, A, B) | t4
141*4882a593Smuzhiyun * | Done removing rmap (X, C, A, B) |
142*4882a593Smuzhiyun * | Add rmap (X, E, A, B) |
143*4882a593Smuzhiyun * | Done adding rmap (X, E, A, B) |
144*4882a593Smuzhiyun * | Remove rmap (F, 1, REFC) |
145*4882a593Smuzhiyun * | Done removing rmap (F, 1, REFC) |
146*4882a593Smuzhiyun * +-------------------------------------------------+
147*4882a593Smuzhiyun * | Free extent (C, B) | t5
148*4882a593Smuzhiyun * | Done freeing extent (C, B) |
149*4882a593Smuzhiyun * | Free extent (D, 1) |
150*4882a593Smuzhiyun * | Done freeing extent (D, 1) |
151*4882a593Smuzhiyun * | Free extent (F, 1) |
152*4882a593Smuzhiyun * | Done freeing extent (F, 1) |
153*4882a593Smuzhiyun * +-------------------------------------------------+
154*4882a593Smuzhiyun *
155*4882a593Smuzhiyun * If we should crash before t2 commits, log recovery replays
156*4882a593Smuzhiyun * the following intent items:
157*4882a593Smuzhiyun *
158*4882a593Smuzhiyun * - Intent to reduce refcount for extent (C, B)
159*4882a593Smuzhiyun * - Intent to remove rmap (X, C, A, B)
160*4882a593Smuzhiyun * - Intent to free extent (D, 1) (bmbt block)
161*4882a593Smuzhiyun * - Intent to increase refcount for extent (E, B)
162*4882a593Smuzhiyun * - Intent to add rmap (X, E, A, B)
163*4882a593Smuzhiyun *
164*4882a593Smuzhiyun * In the process of recovering, it should also generate and take care
165*4882a593Smuzhiyun * of these intent items:
166*4882a593Smuzhiyun *
167*4882a593Smuzhiyun * - Intent to free extent (C, B)
168*4882a593Smuzhiyun * - Intent to free extent (F, 1) (refcountbt block)
169*4882a593Smuzhiyun * - Intent to remove rmap (F, 1, REFC)
170*4882a593Smuzhiyun *
171*4882a593Smuzhiyun * Note that the continuation requested between t2 and t3 is likely to
172*4882a593Smuzhiyun * reoccur.
173*4882a593Smuzhiyun */
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun static const struct xfs_defer_op_type *defer_op_types[] = {
176*4882a593Smuzhiyun [XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type,
177*4882a593Smuzhiyun [XFS_DEFER_OPS_TYPE_REFCOUNT] = &xfs_refcount_update_defer_type,
178*4882a593Smuzhiyun [XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
179*4882a593Smuzhiyun [XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
180*4882a593Smuzhiyun [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
181*4882a593Smuzhiyun };
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun static void
xfs_defer_create_intent(struct xfs_trans * tp,struct xfs_defer_pending * dfp,bool sort)184*4882a593Smuzhiyun xfs_defer_create_intent(
185*4882a593Smuzhiyun struct xfs_trans *tp,
186*4882a593Smuzhiyun struct xfs_defer_pending *dfp,
187*4882a593Smuzhiyun bool sort)
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun if (!dfp->dfp_intent)
192*4882a593Smuzhiyun dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
193*4882a593Smuzhiyun dfp->dfp_count, sort);
194*4882a593Smuzhiyun }
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun /*
197*4882a593Smuzhiyun * For each pending item in the intake list, log its intent item and the
198*4882a593Smuzhiyun * associated extents, then add the entire intake list to the end of
199*4882a593Smuzhiyun * the pending list.
200*4882a593Smuzhiyun */
201*4882a593Smuzhiyun STATIC void
xfs_defer_create_intents(struct xfs_trans * tp)202*4882a593Smuzhiyun xfs_defer_create_intents(
203*4882a593Smuzhiyun struct xfs_trans *tp)
204*4882a593Smuzhiyun {
205*4882a593Smuzhiyun struct xfs_defer_pending *dfp;
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
208*4882a593Smuzhiyun trace_xfs_defer_create_intent(tp->t_mountp, dfp);
209*4882a593Smuzhiyun xfs_defer_create_intent(tp, dfp, true);
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun /* Abort all the intents that were committed. */
214*4882a593Smuzhiyun STATIC void
xfs_defer_trans_abort(struct xfs_trans * tp,struct list_head * dop_pending)215*4882a593Smuzhiyun xfs_defer_trans_abort(
216*4882a593Smuzhiyun struct xfs_trans *tp,
217*4882a593Smuzhiyun struct list_head *dop_pending)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun struct xfs_defer_pending *dfp;
220*4882a593Smuzhiyun const struct xfs_defer_op_type *ops;
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun trace_xfs_defer_trans_abort(tp, _RET_IP_);
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun /* Abort intent items that don't have a done item. */
225*4882a593Smuzhiyun list_for_each_entry(dfp, dop_pending, dfp_list) {
226*4882a593Smuzhiyun ops = defer_op_types[dfp->dfp_type];
227*4882a593Smuzhiyun trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
228*4882a593Smuzhiyun if (dfp->dfp_intent && !dfp->dfp_done) {
229*4882a593Smuzhiyun ops->abort_intent(dfp->dfp_intent);
230*4882a593Smuzhiyun dfp->dfp_intent = NULL;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun /* Roll a transaction so we can do some deferred op processing. */
236*4882a593Smuzhiyun STATIC int
xfs_defer_trans_roll(struct xfs_trans ** tpp)237*4882a593Smuzhiyun xfs_defer_trans_roll(
238*4882a593Smuzhiyun struct xfs_trans **tpp)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun struct xfs_trans *tp = *tpp;
241*4882a593Smuzhiyun struct xfs_buf_log_item *bli;
242*4882a593Smuzhiyun struct xfs_inode_log_item *ili;
243*4882a593Smuzhiyun struct xfs_log_item *lip;
244*4882a593Smuzhiyun struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS];
245*4882a593Smuzhiyun struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES];
246*4882a593Smuzhiyun unsigned int ordered = 0; /* bitmap */
247*4882a593Smuzhiyun int bpcount = 0, ipcount = 0;
248*4882a593Smuzhiyun int i;
249*4882a593Smuzhiyun int error;
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS);
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun list_for_each_entry(lip, &tp->t_items, li_trans) {
254*4882a593Smuzhiyun switch (lip->li_type) {
255*4882a593Smuzhiyun case XFS_LI_BUF:
256*4882a593Smuzhiyun bli = container_of(lip, struct xfs_buf_log_item,
257*4882a593Smuzhiyun bli_item);
258*4882a593Smuzhiyun if (bli->bli_flags & XFS_BLI_HOLD) {
259*4882a593Smuzhiyun if (bpcount >= XFS_DEFER_OPS_NR_BUFS) {
260*4882a593Smuzhiyun ASSERT(0);
261*4882a593Smuzhiyun return -EFSCORRUPTED;
262*4882a593Smuzhiyun }
263*4882a593Smuzhiyun if (bli->bli_flags & XFS_BLI_ORDERED)
264*4882a593Smuzhiyun ordered |= (1U << bpcount);
265*4882a593Smuzhiyun else
266*4882a593Smuzhiyun xfs_trans_dirty_buf(tp, bli->bli_buf);
267*4882a593Smuzhiyun bplist[bpcount++] = bli->bli_buf;
268*4882a593Smuzhiyun }
269*4882a593Smuzhiyun break;
270*4882a593Smuzhiyun case XFS_LI_INODE:
271*4882a593Smuzhiyun ili = container_of(lip, struct xfs_inode_log_item,
272*4882a593Smuzhiyun ili_item);
273*4882a593Smuzhiyun if (ili->ili_lock_flags == 0) {
274*4882a593Smuzhiyun if (ipcount >= XFS_DEFER_OPS_NR_INODES) {
275*4882a593Smuzhiyun ASSERT(0);
276*4882a593Smuzhiyun return -EFSCORRUPTED;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun xfs_trans_log_inode(tp, ili->ili_inode,
279*4882a593Smuzhiyun XFS_ILOG_CORE);
280*4882a593Smuzhiyun iplist[ipcount++] = ili->ili_inode;
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun break;
283*4882a593Smuzhiyun default:
284*4882a593Smuzhiyun break;
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun trace_xfs_defer_trans_roll(tp, _RET_IP_);
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun /*
291*4882a593Smuzhiyun * Roll the transaction. Rolling always given a new transaction (even
292*4882a593Smuzhiyun * if committing the old one fails!) to hand back to the caller, so we
293*4882a593Smuzhiyun * join the held resources to the new transaction so that we always
294*4882a593Smuzhiyun * return with the held resources joined to @tpp, no matter what
295*4882a593Smuzhiyun * happened.
296*4882a593Smuzhiyun */
297*4882a593Smuzhiyun error = xfs_trans_roll(tpp);
298*4882a593Smuzhiyun tp = *tpp;
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun /* Rejoin the joined inodes. */
301*4882a593Smuzhiyun for (i = 0; i < ipcount; i++)
302*4882a593Smuzhiyun xfs_trans_ijoin(tp, iplist[i], 0);
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun /* Rejoin the buffers and dirty them so the log moves forward. */
305*4882a593Smuzhiyun for (i = 0; i < bpcount; i++) {
306*4882a593Smuzhiyun xfs_trans_bjoin(tp, bplist[i]);
307*4882a593Smuzhiyun if (ordered & (1U << i))
308*4882a593Smuzhiyun xfs_trans_ordered_buf(tp, bplist[i]);
309*4882a593Smuzhiyun xfs_trans_bhold(tp, bplist[i]);
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun if (error)
313*4882a593Smuzhiyun trace_xfs_defer_trans_roll_error(tp, error);
314*4882a593Smuzhiyun return error;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun /*
318*4882a593Smuzhiyun * Free up any items left in the list.
319*4882a593Smuzhiyun */
320*4882a593Smuzhiyun static void
xfs_defer_cancel_list(struct xfs_mount * mp,struct list_head * dop_list)321*4882a593Smuzhiyun xfs_defer_cancel_list(
322*4882a593Smuzhiyun struct xfs_mount *mp,
323*4882a593Smuzhiyun struct list_head *dop_list)
324*4882a593Smuzhiyun {
325*4882a593Smuzhiyun struct xfs_defer_pending *dfp;
326*4882a593Smuzhiyun struct xfs_defer_pending *pli;
327*4882a593Smuzhiyun struct list_head *pwi;
328*4882a593Smuzhiyun struct list_head *n;
329*4882a593Smuzhiyun const struct xfs_defer_op_type *ops;
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun /*
332*4882a593Smuzhiyun * Free the pending items. Caller should already have arranged
333*4882a593Smuzhiyun * for the intent items to be released.
334*4882a593Smuzhiyun */
335*4882a593Smuzhiyun list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
336*4882a593Smuzhiyun ops = defer_op_types[dfp->dfp_type];
337*4882a593Smuzhiyun trace_xfs_defer_cancel_list(mp, dfp);
338*4882a593Smuzhiyun list_del(&dfp->dfp_list);
339*4882a593Smuzhiyun list_for_each_safe(pwi, n, &dfp->dfp_work) {
340*4882a593Smuzhiyun list_del(pwi);
341*4882a593Smuzhiyun dfp->dfp_count--;
342*4882a593Smuzhiyun ops->cancel_item(pwi);
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun ASSERT(dfp->dfp_count == 0);
345*4882a593Smuzhiyun kmem_free(dfp);
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun }
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun /*
350*4882a593Smuzhiyun * Prevent a log intent item from pinning the tail of the log by logging a
351*4882a593Smuzhiyun * done item to release the intent item; and then log a new intent item.
352*4882a593Smuzhiyun * The caller should provide a fresh transaction and roll it after we're done.
353*4882a593Smuzhiyun */
354*4882a593Smuzhiyun static int
xfs_defer_relog(struct xfs_trans ** tpp,struct list_head * dfops)355*4882a593Smuzhiyun xfs_defer_relog(
356*4882a593Smuzhiyun struct xfs_trans **tpp,
357*4882a593Smuzhiyun struct list_head *dfops)
358*4882a593Smuzhiyun {
359*4882a593Smuzhiyun struct xlog *log = (*tpp)->t_mountp->m_log;
360*4882a593Smuzhiyun struct xfs_defer_pending *dfp;
361*4882a593Smuzhiyun xfs_lsn_t threshold_lsn = NULLCOMMITLSN;
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun list_for_each_entry(dfp, dfops, dfp_list) {
367*4882a593Smuzhiyun /*
368*4882a593Smuzhiyun * If the log intent item for this deferred op is not a part of
369*4882a593Smuzhiyun * the current log checkpoint, relog the intent item to keep
370*4882a593Smuzhiyun * the log tail moving forward. We're ok with this being racy
371*4882a593Smuzhiyun * because an incorrect decision means we'll be a little slower
372*4882a593Smuzhiyun * at pushing the tail.
373*4882a593Smuzhiyun */
374*4882a593Smuzhiyun if (dfp->dfp_intent == NULL ||
375*4882a593Smuzhiyun xfs_log_item_in_current_chkpt(dfp->dfp_intent))
376*4882a593Smuzhiyun continue;
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun /*
379*4882a593Smuzhiyun * Figure out where we need the tail to be in order to maintain
380*4882a593Smuzhiyun * the minimum required free space in the log. Only sample
381*4882a593Smuzhiyun * the log threshold once per call.
382*4882a593Smuzhiyun */
383*4882a593Smuzhiyun if (threshold_lsn == NULLCOMMITLSN) {
384*4882a593Smuzhiyun threshold_lsn = xlog_grant_push_threshold(log, 0);
385*4882a593Smuzhiyun if (threshold_lsn == NULLCOMMITLSN)
386*4882a593Smuzhiyun break;
387*4882a593Smuzhiyun }
388*4882a593Smuzhiyun if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
389*4882a593Smuzhiyun continue;
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
392*4882a593Smuzhiyun XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
393*4882a593Smuzhiyun dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
394*4882a593Smuzhiyun }
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
397*4882a593Smuzhiyun return xfs_defer_trans_roll(tpp);
398*4882a593Smuzhiyun return 0;
399*4882a593Smuzhiyun }
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun /*
402*4882a593Smuzhiyun * Log an intent-done item for the first pending intent, and finish the work
403*4882a593Smuzhiyun * items.
404*4882a593Smuzhiyun */
405*4882a593Smuzhiyun static int
xfs_defer_finish_one(struct xfs_trans * tp,struct xfs_defer_pending * dfp)406*4882a593Smuzhiyun xfs_defer_finish_one(
407*4882a593Smuzhiyun struct xfs_trans *tp,
408*4882a593Smuzhiyun struct xfs_defer_pending *dfp)
409*4882a593Smuzhiyun {
410*4882a593Smuzhiyun const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
411*4882a593Smuzhiyun struct xfs_btree_cur *state = NULL;
412*4882a593Smuzhiyun struct list_head *li, *n;
413*4882a593Smuzhiyun int error;
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
416*4882a593Smuzhiyun
417*4882a593Smuzhiyun dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
418*4882a593Smuzhiyun list_for_each_safe(li, n, &dfp->dfp_work) {
419*4882a593Smuzhiyun list_del(li);
420*4882a593Smuzhiyun dfp->dfp_count--;
421*4882a593Smuzhiyun error = ops->finish_item(tp, dfp->dfp_done, li, &state);
422*4882a593Smuzhiyun if (error == -EAGAIN) {
423*4882a593Smuzhiyun /*
424*4882a593Smuzhiyun * Caller wants a fresh transaction; put the work item
425*4882a593Smuzhiyun * back on the list and log a new log intent item to
426*4882a593Smuzhiyun * replace the old one. See "Requesting a Fresh
427*4882a593Smuzhiyun * Transaction while Finishing Deferred Work" above.
428*4882a593Smuzhiyun */
429*4882a593Smuzhiyun list_add(li, &dfp->dfp_work);
430*4882a593Smuzhiyun dfp->dfp_count++;
431*4882a593Smuzhiyun dfp->dfp_done = NULL;
432*4882a593Smuzhiyun dfp->dfp_intent = NULL;
433*4882a593Smuzhiyun xfs_defer_create_intent(tp, dfp, false);
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun if (error)
437*4882a593Smuzhiyun goto out;
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun /* Done with the dfp, free it. */
441*4882a593Smuzhiyun list_del(&dfp->dfp_list);
442*4882a593Smuzhiyun kmem_free(dfp);
443*4882a593Smuzhiyun out:
444*4882a593Smuzhiyun if (ops->finish_cleanup)
445*4882a593Smuzhiyun ops->finish_cleanup(tp, state, error);
446*4882a593Smuzhiyun return error;
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun /*
450*4882a593Smuzhiyun * Finish all the pending work. This involves logging intent items for
451*4882a593Smuzhiyun * any work items that wandered in since the last transaction roll (if
452*4882a593Smuzhiyun * one has even happened), rolling the transaction, and finishing the
453*4882a593Smuzhiyun * work items in the first item on the logged-and-pending list.
454*4882a593Smuzhiyun *
455*4882a593Smuzhiyun * If an inode is provided, relog it to the new transaction.
456*4882a593Smuzhiyun */
457*4882a593Smuzhiyun int
xfs_defer_finish_noroll(struct xfs_trans ** tp)458*4882a593Smuzhiyun xfs_defer_finish_noroll(
459*4882a593Smuzhiyun struct xfs_trans **tp)
460*4882a593Smuzhiyun {
461*4882a593Smuzhiyun struct xfs_defer_pending *dfp;
462*4882a593Smuzhiyun int error = 0;
463*4882a593Smuzhiyun LIST_HEAD(dop_pending);
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun trace_xfs_defer_finish(*tp, _RET_IP_);
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun /* Until we run out of pending work to finish... */
470*4882a593Smuzhiyun while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
471*4882a593Smuzhiyun /*
472*4882a593Smuzhiyun * Deferred items that are created in the process of finishing
473*4882a593Smuzhiyun * other deferred work items should be queued at the head of
474*4882a593Smuzhiyun * the pending list, which puts them ahead of the deferred work
475*4882a593Smuzhiyun * that was created by the caller. This keeps the number of
476*4882a593Smuzhiyun * pending work items to a minimum, which decreases the amount
477*4882a593Smuzhiyun * of time that any one intent item can stick around in memory,
478*4882a593Smuzhiyun * pinning the log tail.
479*4882a593Smuzhiyun */
480*4882a593Smuzhiyun xfs_defer_create_intents(*tp);
481*4882a593Smuzhiyun list_splice_init(&(*tp)->t_dfops, &dop_pending);
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun error = xfs_defer_trans_roll(tp);
484*4882a593Smuzhiyun if (error)
485*4882a593Smuzhiyun goto out_shutdown;
486*4882a593Smuzhiyun
487*4882a593Smuzhiyun /* Possibly relog intent items to keep the log moving. */
488*4882a593Smuzhiyun error = xfs_defer_relog(tp, &dop_pending);
489*4882a593Smuzhiyun if (error)
490*4882a593Smuzhiyun goto out_shutdown;
491*4882a593Smuzhiyun
492*4882a593Smuzhiyun dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
493*4882a593Smuzhiyun dfp_list);
494*4882a593Smuzhiyun error = xfs_defer_finish_one(*tp, dfp);
495*4882a593Smuzhiyun if (error && error != -EAGAIN)
496*4882a593Smuzhiyun goto out_shutdown;
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun trace_xfs_defer_finish_done(*tp, _RET_IP_);
500*4882a593Smuzhiyun return 0;
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun out_shutdown:
503*4882a593Smuzhiyun xfs_defer_trans_abort(*tp, &dop_pending);
504*4882a593Smuzhiyun xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
505*4882a593Smuzhiyun trace_xfs_defer_finish_error(*tp, error);
506*4882a593Smuzhiyun xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
507*4882a593Smuzhiyun xfs_defer_cancel(*tp);
508*4882a593Smuzhiyun return error;
509*4882a593Smuzhiyun }
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun int
xfs_defer_finish(struct xfs_trans ** tp)512*4882a593Smuzhiyun xfs_defer_finish(
513*4882a593Smuzhiyun struct xfs_trans **tp)
514*4882a593Smuzhiyun {
515*4882a593Smuzhiyun int error;
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun /*
518*4882a593Smuzhiyun * Finish and roll the transaction once more to avoid returning to the
519*4882a593Smuzhiyun * caller with a dirty transaction.
520*4882a593Smuzhiyun */
521*4882a593Smuzhiyun error = xfs_defer_finish_noroll(tp);
522*4882a593Smuzhiyun if (error)
523*4882a593Smuzhiyun return error;
524*4882a593Smuzhiyun if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
525*4882a593Smuzhiyun error = xfs_defer_trans_roll(tp);
526*4882a593Smuzhiyun if (error) {
527*4882a593Smuzhiyun xfs_force_shutdown((*tp)->t_mountp,
528*4882a593Smuzhiyun SHUTDOWN_CORRUPT_INCORE);
529*4882a593Smuzhiyun return error;
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun }
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun /* Reset LOWMODE now that we've finished all the dfops. */
534*4882a593Smuzhiyun ASSERT(list_empty(&(*tp)->t_dfops));
535*4882a593Smuzhiyun (*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
536*4882a593Smuzhiyun return 0;
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun void
xfs_defer_cancel(struct xfs_trans * tp)540*4882a593Smuzhiyun xfs_defer_cancel(
541*4882a593Smuzhiyun struct xfs_trans *tp)
542*4882a593Smuzhiyun {
543*4882a593Smuzhiyun struct xfs_mount *mp = tp->t_mountp;
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun trace_xfs_defer_cancel(tp, _RET_IP_);
546*4882a593Smuzhiyun xfs_defer_cancel_list(mp, &tp->t_dfops);
547*4882a593Smuzhiyun }
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun /* Add an item for later deferred processing. */
550*4882a593Smuzhiyun void
xfs_defer_add(struct xfs_trans * tp,enum xfs_defer_ops_type type,struct list_head * li)551*4882a593Smuzhiyun xfs_defer_add(
552*4882a593Smuzhiyun struct xfs_trans *tp,
553*4882a593Smuzhiyun enum xfs_defer_ops_type type,
554*4882a593Smuzhiyun struct list_head *li)
555*4882a593Smuzhiyun {
556*4882a593Smuzhiyun struct xfs_defer_pending *dfp = NULL;
557*4882a593Smuzhiyun const struct xfs_defer_op_type *ops;
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
560*4882a593Smuzhiyun BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun /*
563*4882a593Smuzhiyun * Add the item to a pending item at the end of the intake list.
564*4882a593Smuzhiyun * If the last pending item has the same type, reuse it. Else,
565*4882a593Smuzhiyun * create a new pending item at the end of the intake list.
566*4882a593Smuzhiyun */
567*4882a593Smuzhiyun if (!list_empty(&tp->t_dfops)) {
568*4882a593Smuzhiyun dfp = list_last_entry(&tp->t_dfops,
569*4882a593Smuzhiyun struct xfs_defer_pending, dfp_list);
570*4882a593Smuzhiyun ops = defer_op_types[dfp->dfp_type];
571*4882a593Smuzhiyun if (dfp->dfp_type != type ||
572*4882a593Smuzhiyun (ops->max_items && dfp->dfp_count >= ops->max_items))
573*4882a593Smuzhiyun dfp = NULL;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun if (!dfp) {
576*4882a593Smuzhiyun dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
577*4882a593Smuzhiyun KM_NOFS);
578*4882a593Smuzhiyun dfp->dfp_type = type;
579*4882a593Smuzhiyun dfp->dfp_intent = NULL;
580*4882a593Smuzhiyun dfp->dfp_done = NULL;
581*4882a593Smuzhiyun dfp->dfp_count = 0;
582*4882a593Smuzhiyun INIT_LIST_HEAD(&dfp->dfp_work);
583*4882a593Smuzhiyun list_add_tail(&dfp->dfp_list, &tp->t_dfops);
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun list_add_tail(li, &dfp->dfp_work);
587*4882a593Smuzhiyun dfp->dfp_count++;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun /*
591*4882a593Smuzhiyun * Move deferred ops from one transaction to another and reset the source to
592*4882a593Smuzhiyun * initial state. This is primarily used to carry state forward across
593*4882a593Smuzhiyun * transaction rolls with pending dfops.
594*4882a593Smuzhiyun */
595*4882a593Smuzhiyun void
xfs_defer_move(struct xfs_trans * dtp,struct xfs_trans * stp)596*4882a593Smuzhiyun xfs_defer_move(
597*4882a593Smuzhiyun struct xfs_trans *dtp,
598*4882a593Smuzhiyun struct xfs_trans *stp)
599*4882a593Smuzhiyun {
600*4882a593Smuzhiyun list_splice_init(&stp->t_dfops, &dtp->t_dfops);
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun /*
603*4882a593Smuzhiyun * Low free space mode was historically controlled by a dfops field.
604*4882a593Smuzhiyun * This meant that low mode state potentially carried across multiple
605*4882a593Smuzhiyun * transaction rolls. Transfer low mode on a dfops move to preserve
606*4882a593Smuzhiyun * that behavior.
607*4882a593Smuzhiyun */
608*4882a593Smuzhiyun dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
609*4882a593Smuzhiyun stp->t_flags &= ~XFS_TRANS_LOWMODE;
610*4882a593Smuzhiyun }
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun /*
613*4882a593Smuzhiyun * Prepare a chain of fresh deferred ops work items to be completed later. Log
614*4882a593Smuzhiyun * recovery requires the ability to put off until later the actual finishing
615*4882a593Smuzhiyun * work so that it can process unfinished items recovered from the log in
616*4882a593Smuzhiyun * correct order.
617*4882a593Smuzhiyun *
618*4882a593Smuzhiyun * Create and log intent items for all the work that we're capturing so that we
619*4882a593Smuzhiyun * can be assured that the items will get replayed if the system goes down
620*4882a593Smuzhiyun * before log recovery gets a chance to finish the work it put off. The entire
621*4882a593Smuzhiyun * deferred ops state is transferred to the capture structure and the
622*4882a593Smuzhiyun * transaction is then ready for the caller to commit it. If there are no
623*4882a593Smuzhiyun * intent items to capture, this function returns NULL.
624*4882a593Smuzhiyun *
625*4882a593Smuzhiyun * If capture_ip is not NULL, the capture structure will obtain an extra
626*4882a593Smuzhiyun * reference to the inode.
627*4882a593Smuzhiyun */
628*4882a593Smuzhiyun static struct xfs_defer_capture *
xfs_defer_ops_capture(struct xfs_trans * tp,struct xfs_inode * capture_ip)629*4882a593Smuzhiyun xfs_defer_ops_capture(
630*4882a593Smuzhiyun struct xfs_trans *tp,
631*4882a593Smuzhiyun struct xfs_inode *capture_ip)
632*4882a593Smuzhiyun {
633*4882a593Smuzhiyun struct xfs_defer_capture *dfc;
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun if (list_empty(&tp->t_dfops))
636*4882a593Smuzhiyun return NULL;
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun /* Create an object to capture the defer ops. */
639*4882a593Smuzhiyun dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
640*4882a593Smuzhiyun INIT_LIST_HEAD(&dfc->dfc_list);
641*4882a593Smuzhiyun INIT_LIST_HEAD(&dfc->dfc_dfops);
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun xfs_defer_create_intents(tp);
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun /* Move the dfops chain and transaction state to the capture struct. */
646*4882a593Smuzhiyun list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
647*4882a593Smuzhiyun dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
648*4882a593Smuzhiyun tp->t_flags &= ~XFS_TRANS_LOWMODE;
649*4882a593Smuzhiyun
650*4882a593Smuzhiyun /* Capture the remaining block reservations along with the dfops. */
651*4882a593Smuzhiyun dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
652*4882a593Smuzhiyun dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun /* Preserve the log reservation size. */
655*4882a593Smuzhiyun dfc->dfc_logres = tp->t_log_res;
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun /*
658*4882a593Smuzhiyun * Grab an extra reference to this inode and attach it to the capture
659*4882a593Smuzhiyun * structure.
660*4882a593Smuzhiyun */
661*4882a593Smuzhiyun if (capture_ip) {
662*4882a593Smuzhiyun ihold(VFS_I(capture_ip));
663*4882a593Smuzhiyun dfc->dfc_capture_ip = capture_ip;
664*4882a593Smuzhiyun }
665*4882a593Smuzhiyun
666*4882a593Smuzhiyun return dfc;
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun /* Release all resources that we used to capture deferred ops. */
670*4882a593Smuzhiyun void
xfs_defer_ops_release(struct xfs_mount * mp,struct xfs_defer_capture * dfc)671*4882a593Smuzhiyun xfs_defer_ops_release(
672*4882a593Smuzhiyun struct xfs_mount *mp,
673*4882a593Smuzhiyun struct xfs_defer_capture *dfc)
674*4882a593Smuzhiyun {
675*4882a593Smuzhiyun xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
676*4882a593Smuzhiyun if (dfc->dfc_capture_ip)
677*4882a593Smuzhiyun xfs_irele(dfc->dfc_capture_ip);
678*4882a593Smuzhiyun kmem_free(dfc);
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun /*
682*4882a593Smuzhiyun * Capture any deferred ops and commit the transaction. This is the last step
683*4882a593Smuzhiyun * needed to finish a log intent item that we recovered from the log. If any
684*4882a593Smuzhiyun * of the deferred ops operate on an inode, the caller must pass in that inode
685*4882a593Smuzhiyun * so that the reference can be transferred to the capture structure. The
686*4882a593Smuzhiyun * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
687*4882a593Smuzhiyun * xfs_defer_ops_continue.
688*4882a593Smuzhiyun */
689*4882a593Smuzhiyun int
xfs_defer_ops_capture_and_commit(struct xfs_trans * tp,struct xfs_inode * capture_ip,struct list_head * capture_list)690*4882a593Smuzhiyun xfs_defer_ops_capture_and_commit(
691*4882a593Smuzhiyun struct xfs_trans *tp,
692*4882a593Smuzhiyun struct xfs_inode *capture_ip,
693*4882a593Smuzhiyun struct list_head *capture_list)
694*4882a593Smuzhiyun {
695*4882a593Smuzhiyun struct xfs_mount *mp = tp->t_mountp;
696*4882a593Smuzhiyun struct xfs_defer_capture *dfc;
697*4882a593Smuzhiyun int error;
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun /* If we don't capture anything, commit transaction and exit. */
702*4882a593Smuzhiyun dfc = xfs_defer_ops_capture(tp, capture_ip);
703*4882a593Smuzhiyun if (!dfc)
704*4882a593Smuzhiyun return xfs_trans_commit(tp);
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun /* Commit the transaction and add the capture structure to the list. */
707*4882a593Smuzhiyun error = xfs_trans_commit(tp);
708*4882a593Smuzhiyun if (error) {
709*4882a593Smuzhiyun xfs_defer_ops_release(mp, dfc);
710*4882a593Smuzhiyun return error;
711*4882a593Smuzhiyun }
712*4882a593Smuzhiyun
713*4882a593Smuzhiyun list_add_tail(&dfc->dfc_list, capture_list);
714*4882a593Smuzhiyun return 0;
715*4882a593Smuzhiyun }
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun /*
718*4882a593Smuzhiyun * Attach a chain of captured deferred ops to a new transaction and free the
719*4882a593Smuzhiyun * capture structure. If an inode was captured, it will be passed back to the
720*4882a593Smuzhiyun * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
721*4882a593Smuzhiyun * The caller now owns the inode reference.
722*4882a593Smuzhiyun */
723*4882a593Smuzhiyun void
xfs_defer_ops_continue(struct xfs_defer_capture * dfc,struct xfs_trans * tp,struct xfs_inode ** captured_ipp)724*4882a593Smuzhiyun xfs_defer_ops_continue(
725*4882a593Smuzhiyun struct xfs_defer_capture *dfc,
726*4882a593Smuzhiyun struct xfs_trans *tp,
727*4882a593Smuzhiyun struct xfs_inode **captured_ipp)
728*4882a593Smuzhiyun {
729*4882a593Smuzhiyun ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
730*4882a593Smuzhiyun ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun /* Lock and join the captured inode to the new transaction. */
733*4882a593Smuzhiyun if (dfc->dfc_capture_ip) {
734*4882a593Smuzhiyun xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
735*4882a593Smuzhiyun xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
736*4882a593Smuzhiyun }
737*4882a593Smuzhiyun *captured_ipp = dfc->dfc_capture_ip;
738*4882a593Smuzhiyun
739*4882a593Smuzhiyun /* Move captured dfops chain and state to the transaction. */
740*4882a593Smuzhiyun list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
741*4882a593Smuzhiyun tp->t_flags |= dfc->dfc_tpflags;
742*4882a593Smuzhiyun
743*4882a593Smuzhiyun kmem_free(dfc);
744*4882a593Smuzhiyun }
745