xref: /OK3568_Linux_fs/kernel/fs/xfs/libxfs/xfs_defer.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0+
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2016 Oracle.  All Rights Reserved.
4*4882a593Smuzhiyun  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun #include "xfs.h"
7*4882a593Smuzhiyun #include "xfs_fs.h"
8*4882a593Smuzhiyun #include "xfs_shared.h"
9*4882a593Smuzhiyun #include "xfs_format.h"
10*4882a593Smuzhiyun #include "xfs_log_format.h"
11*4882a593Smuzhiyun #include "xfs_trans_resv.h"
12*4882a593Smuzhiyun #include "xfs_mount.h"
13*4882a593Smuzhiyun #include "xfs_defer.h"
14*4882a593Smuzhiyun #include "xfs_trans.h"
15*4882a593Smuzhiyun #include "xfs_buf_item.h"
16*4882a593Smuzhiyun #include "xfs_inode.h"
17*4882a593Smuzhiyun #include "xfs_inode_item.h"
18*4882a593Smuzhiyun #include "xfs_trace.h"
19*4882a593Smuzhiyun #include "xfs_icache.h"
20*4882a593Smuzhiyun #include "xfs_log.h"
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun /*
23*4882a593Smuzhiyun  * Deferred Operations in XFS
24*4882a593Smuzhiyun  *
25*4882a593Smuzhiyun  * Due to the way locking rules work in XFS, certain transactions (block
26*4882a593Smuzhiyun  * mapping and unmapping, typically) have permanent reservations so that
27*4882a593Smuzhiyun  * we can roll the transaction to adhere to AG locking order rules and
28*4882a593Smuzhiyun  * to unlock buffers between metadata updates.  Prior to rmap/reflink,
29*4882a593Smuzhiyun  * the mapping code had a mechanism to perform these deferrals for
30*4882a593Smuzhiyun  * extents that were going to be freed; this code makes that facility
31*4882a593Smuzhiyun  * more generic.
32*4882a593Smuzhiyun  *
33*4882a593Smuzhiyun  * When adding the reverse mapping and reflink features, it became
34*4882a593Smuzhiyun  * necessary to perform complex remapping multi-transactions to comply
35*4882a593Smuzhiyun  * with AG locking order rules, and to be able to spread a single
36*4882a593Smuzhiyun  * refcount update operation (an operation on an n-block extent can
37*4882a593Smuzhiyun  * update as many as n records!) among multiple transactions.  XFS can
38*4882a593Smuzhiyun  * roll a transaction to facilitate this, but using this facility
39*4882a593Smuzhiyun  * requires us to log "intent" items in case log recovery needs to
40*4882a593Smuzhiyun  * redo the operation, and to log "done" items to indicate that redo
41*4882a593Smuzhiyun  * is not necessary.
42*4882a593Smuzhiyun  *
43*4882a593Smuzhiyun  * Deferred work is tracked in xfs_defer_pending items.  Each pending
44*4882a593Smuzhiyun  * item tracks one type of deferred work.  Incoming work items (which
45*4882a593Smuzhiyun  * have not yet had an intent logged) are attached to a pending item
46*4882a593Smuzhiyun  * on the dop_intake list, where they wait for the caller to finish
47*4882a593Smuzhiyun  * the deferred operations.
48*4882a593Smuzhiyun  *
49*4882a593Smuzhiyun  * Finishing a set of deferred operations is an involved process.  To
50*4882a593Smuzhiyun  * start, we define "rolling a deferred-op transaction" as follows:
51*4882a593Smuzhiyun  *
52*4882a593Smuzhiyun  * > For each xfs_defer_pending item on the dop_intake list,
53*4882a593Smuzhiyun  *   - Sort the work items in AG order.  XFS locking
54*4882a593Smuzhiyun  *     order rules require us to lock buffers in AG order.
55*4882a593Smuzhiyun  *   - Create a log intent item for that type.
56*4882a593Smuzhiyun  *   - Attach it to the pending item.
57*4882a593Smuzhiyun  *   - Move the pending item from the dop_intake list to the
58*4882a593Smuzhiyun  *     dop_pending list.
59*4882a593Smuzhiyun  * > Roll the transaction.
60*4882a593Smuzhiyun  *
61*4882a593Smuzhiyun  * NOTE: To avoid exceeding the transaction reservation, we limit the
62*4882a593Smuzhiyun  * number of items that we attach to a given xfs_defer_pending.
63*4882a593Smuzhiyun  *
64*4882a593Smuzhiyun  * The actual finishing process looks like this:
65*4882a593Smuzhiyun  *
66*4882a593Smuzhiyun  * > For each xfs_defer_pending in the dop_pending list,
67*4882a593Smuzhiyun  *   - Roll the deferred-op transaction as above.
68*4882a593Smuzhiyun  *   - Create a log done item for that type, and attach it to the
69*4882a593Smuzhiyun  *     log intent item.
70*4882a593Smuzhiyun  *   - For each work item attached to the log intent item,
71*4882a593Smuzhiyun  *     * Perform the described action.
72*4882a593Smuzhiyun  *     * Attach the work item to the log done item.
73*4882a593Smuzhiyun  *     * If the result of doing the work was -EAGAIN, ->finish work
74*4882a593Smuzhiyun  *       wants a new transaction.  See the "Requesting a Fresh
75*4882a593Smuzhiyun  *       Transaction while Finishing Deferred Work" section below for
76*4882a593Smuzhiyun  *       details.
77*4882a593Smuzhiyun  *
78*4882a593Smuzhiyun  * The key here is that we must log an intent item for all pending
79*4882a593Smuzhiyun  * work items every time we roll the transaction, and that we must log
80*4882a593Smuzhiyun  * a done item as soon as the work is completed.  With this mechanism
81*4882a593Smuzhiyun  * we can perform complex remapping operations, chaining intent items
82*4882a593Smuzhiyun  * as needed.
83*4882a593Smuzhiyun  *
84*4882a593Smuzhiyun  * Requesting a Fresh Transaction while Finishing Deferred Work
85*4882a593Smuzhiyun  *
86*4882a593Smuzhiyun  * If ->finish_item decides that it needs a fresh transaction to
87*4882a593Smuzhiyun  * finish the work, it must ask its caller (xfs_defer_finish) for a
88*4882a593Smuzhiyun  * continuation.  The most likely cause of this circumstance are the
89*4882a593Smuzhiyun  * refcount adjust functions deciding that they've logged enough items
90*4882a593Smuzhiyun  * to be at risk of exceeding the transaction reservation.
91*4882a593Smuzhiyun  *
92*4882a593Smuzhiyun  * To get a fresh transaction, we want to log the existing log done
93*4882a593Smuzhiyun  * item to prevent the log intent item from replaying, immediately log
94*4882a593Smuzhiyun  * a new log intent item with the unfinished work items, roll the
95*4882a593Smuzhiyun  * transaction, and re-call ->finish_item wherever it left off.  The
96*4882a593Smuzhiyun  * log done item and the new log intent item must be in the same
97*4882a593Smuzhiyun  * transaction or atomicity cannot be guaranteed; defer_finish ensures
98*4882a593Smuzhiyun  * that this happens.
99*4882a593Smuzhiyun  *
100*4882a593Smuzhiyun  * This requires some coordination between ->finish_item and
101*4882a593Smuzhiyun  * defer_finish.  Upon deciding to request a new transaction,
102*4882a593Smuzhiyun  * ->finish_item should update the current work item to reflect the
103*4882a593Smuzhiyun  * unfinished work.  Next, it should reset the log done item's list
104*4882a593Smuzhiyun  * count to the number of items finished, and return -EAGAIN.
105*4882a593Smuzhiyun  * defer_finish sees the -EAGAIN, logs the new log intent item
106*4882a593Smuzhiyun  * with the remaining work items, and leaves the xfs_defer_pending
107*4882a593Smuzhiyun  * item at the head of the dop_work queue.  Then it rolls the
108*4882a593Smuzhiyun  * transaction and picks up processing where it left off.  It is
109*4882a593Smuzhiyun  * required that ->finish_item must be careful to leave enough
110*4882a593Smuzhiyun  * transaction reservation to fit the new log intent item.
111*4882a593Smuzhiyun  *
112*4882a593Smuzhiyun  * This is an example of remapping the extent (E, E+B) into file X at
113*4882a593Smuzhiyun  * offset A and dealing with the extent (C, C+B) already being mapped
114*4882a593Smuzhiyun  * there:
115*4882a593Smuzhiyun  * +-------------------------------------------------+
116*4882a593Smuzhiyun  * | Unmap file X startblock C offset A length B     | t0
117*4882a593Smuzhiyun  * | Intent to reduce refcount for extent (C, B)     |
118*4882a593Smuzhiyun  * | Intent to remove rmap (X, C, A, B)              |
119*4882a593Smuzhiyun  * | Intent to free extent (D, 1) (bmbt block)       |
120*4882a593Smuzhiyun  * | Intent to map (X, A, B) at startblock E         |
121*4882a593Smuzhiyun  * +-------------------------------------------------+
122*4882a593Smuzhiyun  * | Map file X startblock E offset A length B       | t1
123*4882a593Smuzhiyun  * | Done mapping (X, E, A, B)                       |
124*4882a593Smuzhiyun  * | Intent to increase refcount for extent (E, B)   |
125*4882a593Smuzhiyun  * | Intent to add rmap (X, E, A, B)                 |
126*4882a593Smuzhiyun  * +-------------------------------------------------+
127*4882a593Smuzhiyun  * | Reduce refcount for extent (C, B)               | t2
128*4882a593Smuzhiyun  * | Done reducing refcount for extent (C, 9)        |
129*4882a593Smuzhiyun  * | Intent to reduce refcount for extent (C+9, B-9) |
130*4882a593Smuzhiyun  * | (ran out of space after 9 refcount updates)     |
131*4882a593Smuzhiyun  * +-------------------------------------------------+
132*4882a593Smuzhiyun  * | Reduce refcount for extent (C+9, B+9)           | t3
133*4882a593Smuzhiyun  * | Done reducing refcount for extent (C+9, B-9)    |
134*4882a593Smuzhiyun  * | Increase refcount for extent (E, B)             |
135*4882a593Smuzhiyun  * | Done increasing refcount for extent (E, B)      |
136*4882a593Smuzhiyun  * | Intent to free extent (C, B)                    |
137*4882a593Smuzhiyun  * | Intent to free extent (F, 1) (refcountbt block) |
138*4882a593Smuzhiyun  * | Intent to remove rmap (F, 1, REFC)              |
139*4882a593Smuzhiyun  * +-------------------------------------------------+
140*4882a593Smuzhiyun  * | Remove rmap (X, C, A, B)                        | t4
141*4882a593Smuzhiyun  * | Done removing rmap (X, C, A, B)                 |
142*4882a593Smuzhiyun  * | Add rmap (X, E, A, B)                           |
143*4882a593Smuzhiyun  * | Done adding rmap (X, E, A, B)                   |
144*4882a593Smuzhiyun  * | Remove rmap (F, 1, REFC)                        |
145*4882a593Smuzhiyun  * | Done removing rmap (F, 1, REFC)                 |
146*4882a593Smuzhiyun  * +-------------------------------------------------+
147*4882a593Smuzhiyun  * | Free extent (C, B)                              | t5
148*4882a593Smuzhiyun  * | Done freeing extent (C, B)                      |
149*4882a593Smuzhiyun  * | Free extent (D, 1)                              |
150*4882a593Smuzhiyun  * | Done freeing extent (D, 1)                      |
151*4882a593Smuzhiyun  * | Free extent (F, 1)                              |
152*4882a593Smuzhiyun  * | Done freeing extent (F, 1)                      |
153*4882a593Smuzhiyun  * +-------------------------------------------------+
154*4882a593Smuzhiyun  *
155*4882a593Smuzhiyun  * If we should crash before t2 commits, log recovery replays
156*4882a593Smuzhiyun  * the following intent items:
157*4882a593Smuzhiyun  *
158*4882a593Smuzhiyun  * - Intent to reduce refcount for extent (C, B)
159*4882a593Smuzhiyun  * - Intent to remove rmap (X, C, A, B)
160*4882a593Smuzhiyun  * - Intent to free extent (D, 1) (bmbt block)
161*4882a593Smuzhiyun  * - Intent to increase refcount for extent (E, B)
162*4882a593Smuzhiyun  * - Intent to add rmap (X, E, A, B)
163*4882a593Smuzhiyun  *
164*4882a593Smuzhiyun  * In the process of recovering, it should also generate and take care
165*4882a593Smuzhiyun  * of these intent items:
166*4882a593Smuzhiyun  *
167*4882a593Smuzhiyun  * - Intent to free extent (C, B)
168*4882a593Smuzhiyun  * - Intent to free extent (F, 1) (refcountbt block)
169*4882a593Smuzhiyun  * - Intent to remove rmap (F, 1, REFC)
170*4882a593Smuzhiyun  *
171*4882a593Smuzhiyun  * Note that the continuation requested between t2 and t3 is likely to
172*4882a593Smuzhiyun  * reoccur.
173*4882a593Smuzhiyun  */
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun static const struct xfs_defer_op_type *defer_op_types[] = {
176*4882a593Smuzhiyun 	[XFS_DEFER_OPS_TYPE_BMAP]	= &xfs_bmap_update_defer_type,
177*4882a593Smuzhiyun 	[XFS_DEFER_OPS_TYPE_REFCOUNT]	= &xfs_refcount_update_defer_type,
178*4882a593Smuzhiyun 	[XFS_DEFER_OPS_TYPE_RMAP]	= &xfs_rmap_update_defer_type,
179*4882a593Smuzhiyun 	[XFS_DEFER_OPS_TYPE_FREE]	= &xfs_extent_free_defer_type,
180*4882a593Smuzhiyun 	[XFS_DEFER_OPS_TYPE_AGFL_FREE]	= &xfs_agfl_free_defer_type,
181*4882a593Smuzhiyun };
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun static void
xfs_defer_create_intent(struct xfs_trans * tp,struct xfs_defer_pending * dfp,bool sort)184*4882a593Smuzhiyun xfs_defer_create_intent(
185*4882a593Smuzhiyun 	struct xfs_trans		*tp,
186*4882a593Smuzhiyun 	struct xfs_defer_pending	*dfp,
187*4882a593Smuzhiyun 	bool				sort)
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun 	const struct xfs_defer_op_type	*ops = defer_op_types[dfp->dfp_type];
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun 	if (!dfp->dfp_intent)
192*4882a593Smuzhiyun 		dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
193*4882a593Smuzhiyun 						     dfp->dfp_count, sort);
194*4882a593Smuzhiyun }
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun /*
197*4882a593Smuzhiyun  * For each pending item in the intake list, log its intent item and the
198*4882a593Smuzhiyun  * associated extents, then add the entire intake list to the end of
199*4882a593Smuzhiyun  * the pending list.
200*4882a593Smuzhiyun  */
201*4882a593Smuzhiyun STATIC void
xfs_defer_create_intents(struct xfs_trans * tp)202*4882a593Smuzhiyun xfs_defer_create_intents(
203*4882a593Smuzhiyun 	struct xfs_trans		*tp)
204*4882a593Smuzhiyun {
205*4882a593Smuzhiyun 	struct xfs_defer_pending	*dfp;
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
208*4882a593Smuzhiyun 		trace_xfs_defer_create_intent(tp->t_mountp, dfp);
209*4882a593Smuzhiyun 		xfs_defer_create_intent(tp, dfp, true);
210*4882a593Smuzhiyun 	}
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun /* Abort all the intents that were committed. */
214*4882a593Smuzhiyun STATIC void
xfs_defer_trans_abort(struct xfs_trans * tp,struct list_head * dop_pending)215*4882a593Smuzhiyun xfs_defer_trans_abort(
216*4882a593Smuzhiyun 	struct xfs_trans		*tp,
217*4882a593Smuzhiyun 	struct list_head		*dop_pending)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun 	struct xfs_defer_pending	*dfp;
220*4882a593Smuzhiyun 	const struct xfs_defer_op_type	*ops;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	trace_xfs_defer_trans_abort(tp, _RET_IP_);
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	/* Abort intent items that don't have a done item. */
225*4882a593Smuzhiyun 	list_for_each_entry(dfp, dop_pending, dfp_list) {
226*4882a593Smuzhiyun 		ops = defer_op_types[dfp->dfp_type];
227*4882a593Smuzhiyun 		trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
228*4882a593Smuzhiyun 		if (dfp->dfp_intent && !dfp->dfp_done) {
229*4882a593Smuzhiyun 			ops->abort_intent(dfp->dfp_intent);
230*4882a593Smuzhiyun 			dfp->dfp_intent = NULL;
231*4882a593Smuzhiyun 		}
232*4882a593Smuzhiyun 	}
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun /* Roll a transaction so we can do some deferred op processing. */
236*4882a593Smuzhiyun STATIC int
xfs_defer_trans_roll(struct xfs_trans ** tpp)237*4882a593Smuzhiyun xfs_defer_trans_roll(
238*4882a593Smuzhiyun 	struct xfs_trans		**tpp)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun 	struct xfs_trans		*tp = *tpp;
241*4882a593Smuzhiyun 	struct xfs_buf_log_item		*bli;
242*4882a593Smuzhiyun 	struct xfs_inode_log_item	*ili;
243*4882a593Smuzhiyun 	struct xfs_log_item		*lip;
244*4882a593Smuzhiyun 	struct xfs_buf			*bplist[XFS_DEFER_OPS_NR_BUFS];
245*4882a593Smuzhiyun 	struct xfs_inode		*iplist[XFS_DEFER_OPS_NR_INODES];
246*4882a593Smuzhiyun 	unsigned int			ordered = 0; /* bitmap */
247*4882a593Smuzhiyun 	int				bpcount = 0, ipcount = 0;
248*4882a593Smuzhiyun 	int				i;
249*4882a593Smuzhiyun 	int				error;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS);
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	list_for_each_entry(lip, &tp->t_items, li_trans) {
254*4882a593Smuzhiyun 		switch (lip->li_type) {
255*4882a593Smuzhiyun 		case XFS_LI_BUF:
256*4882a593Smuzhiyun 			bli = container_of(lip, struct xfs_buf_log_item,
257*4882a593Smuzhiyun 					   bli_item);
258*4882a593Smuzhiyun 			if (bli->bli_flags & XFS_BLI_HOLD) {
259*4882a593Smuzhiyun 				if (bpcount >= XFS_DEFER_OPS_NR_BUFS) {
260*4882a593Smuzhiyun 					ASSERT(0);
261*4882a593Smuzhiyun 					return -EFSCORRUPTED;
262*4882a593Smuzhiyun 				}
263*4882a593Smuzhiyun 				if (bli->bli_flags & XFS_BLI_ORDERED)
264*4882a593Smuzhiyun 					ordered |= (1U << bpcount);
265*4882a593Smuzhiyun 				else
266*4882a593Smuzhiyun 					xfs_trans_dirty_buf(tp, bli->bli_buf);
267*4882a593Smuzhiyun 				bplist[bpcount++] = bli->bli_buf;
268*4882a593Smuzhiyun 			}
269*4882a593Smuzhiyun 			break;
270*4882a593Smuzhiyun 		case XFS_LI_INODE:
271*4882a593Smuzhiyun 			ili = container_of(lip, struct xfs_inode_log_item,
272*4882a593Smuzhiyun 					   ili_item);
273*4882a593Smuzhiyun 			if (ili->ili_lock_flags == 0) {
274*4882a593Smuzhiyun 				if (ipcount >= XFS_DEFER_OPS_NR_INODES) {
275*4882a593Smuzhiyun 					ASSERT(0);
276*4882a593Smuzhiyun 					return -EFSCORRUPTED;
277*4882a593Smuzhiyun 				}
278*4882a593Smuzhiyun 				xfs_trans_log_inode(tp, ili->ili_inode,
279*4882a593Smuzhiyun 						    XFS_ILOG_CORE);
280*4882a593Smuzhiyun 				iplist[ipcount++] = ili->ili_inode;
281*4882a593Smuzhiyun 			}
282*4882a593Smuzhiyun 			break;
283*4882a593Smuzhiyun 		default:
284*4882a593Smuzhiyun 			break;
285*4882a593Smuzhiyun 		}
286*4882a593Smuzhiyun 	}
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	trace_xfs_defer_trans_roll(tp, _RET_IP_);
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 	/*
291*4882a593Smuzhiyun 	 * Roll the transaction.  Rolling always given a new transaction (even
292*4882a593Smuzhiyun 	 * if committing the old one fails!) to hand back to the caller, so we
293*4882a593Smuzhiyun 	 * join the held resources to the new transaction so that we always
294*4882a593Smuzhiyun 	 * return with the held resources joined to @tpp, no matter what
295*4882a593Smuzhiyun 	 * happened.
296*4882a593Smuzhiyun 	 */
297*4882a593Smuzhiyun 	error = xfs_trans_roll(tpp);
298*4882a593Smuzhiyun 	tp = *tpp;
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	/* Rejoin the joined inodes. */
301*4882a593Smuzhiyun 	for (i = 0; i < ipcount; i++)
302*4882a593Smuzhiyun 		xfs_trans_ijoin(tp, iplist[i], 0);
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 	/* Rejoin the buffers and dirty them so the log moves forward. */
305*4882a593Smuzhiyun 	for (i = 0; i < bpcount; i++) {
306*4882a593Smuzhiyun 		xfs_trans_bjoin(tp, bplist[i]);
307*4882a593Smuzhiyun 		if (ordered & (1U << i))
308*4882a593Smuzhiyun 			xfs_trans_ordered_buf(tp, bplist[i]);
309*4882a593Smuzhiyun 		xfs_trans_bhold(tp, bplist[i]);
310*4882a593Smuzhiyun 	}
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	if (error)
313*4882a593Smuzhiyun 		trace_xfs_defer_trans_roll_error(tp, error);
314*4882a593Smuzhiyun 	return error;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun /*
318*4882a593Smuzhiyun  * Free up any items left in the list.
319*4882a593Smuzhiyun  */
320*4882a593Smuzhiyun static void
xfs_defer_cancel_list(struct xfs_mount * mp,struct list_head * dop_list)321*4882a593Smuzhiyun xfs_defer_cancel_list(
322*4882a593Smuzhiyun 	struct xfs_mount		*mp,
323*4882a593Smuzhiyun 	struct list_head		*dop_list)
324*4882a593Smuzhiyun {
325*4882a593Smuzhiyun 	struct xfs_defer_pending	*dfp;
326*4882a593Smuzhiyun 	struct xfs_defer_pending	*pli;
327*4882a593Smuzhiyun 	struct list_head		*pwi;
328*4882a593Smuzhiyun 	struct list_head		*n;
329*4882a593Smuzhiyun 	const struct xfs_defer_op_type	*ops;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	/*
332*4882a593Smuzhiyun 	 * Free the pending items.  Caller should already have arranged
333*4882a593Smuzhiyun 	 * for the intent items to be released.
334*4882a593Smuzhiyun 	 */
335*4882a593Smuzhiyun 	list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
336*4882a593Smuzhiyun 		ops = defer_op_types[dfp->dfp_type];
337*4882a593Smuzhiyun 		trace_xfs_defer_cancel_list(mp, dfp);
338*4882a593Smuzhiyun 		list_del(&dfp->dfp_list);
339*4882a593Smuzhiyun 		list_for_each_safe(pwi, n, &dfp->dfp_work) {
340*4882a593Smuzhiyun 			list_del(pwi);
341*4882a593Smuzhiyun 			dfp->dfp_count--;
342*4882a593Smuzhiyun 			ops->cancel_item(pwi);
343*4882a593Smuzhiyun 		}
344*4882a593Smuzhiyun 		ASSERT(dfp->dfp_count == 0);
345*4882a593Smuzhiyun 		kmem_free(dfp);
346*4882a593Smuzhiyun 	}
347*4882a593Smuzhiyun }
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun /*
350*4882a593Smuzhiyun  * Prevent a log intent item from pinning the tail of the log by logging a
351*4882a593Smuzhiyun  * done item to release the intent item; and then log a new intent item.
352*4882a593Smuzhiyun  * The caller should provide a fresh transaction and roll it after we're done.
353*4882a593Smuzhiyun  */
354*4882a593Smuzhiyun static int
xfs_defer_relog(struct xfs_trans ** tpp,struct list_head * dfops)355*4882a593Smuzhiyun xfs_defer_relog(
356*4882a593Smuzhiyun 	struct xfs_trans		**tpp,
357*4882a593Smuzhiyun 	struct list_head		*dfops)
358*4882a593Smuzhiyun {
359*4882a593Smuzhiyun 	struct xlog			*log = (*tpp)->t_mountp->m_log;
360*4882a593Smuzhiyun 	struct xfs_defer_pending	*dfp;
361*4882a593Smuzhiyun 	xfs_lsn_t			threshold_lsn = NULLCOMMITLSN;
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 	list_for_each_entry(dfp, dfops, dfp_list) {
367*4882a593Smuzhiyun 		/*
368*4882a593Smuzhiyun 		 * If the log intent item for this deferred op is not a part of
369*4882a593Smuzhiyun 		 * the current log checkpoint, relog the intent item to keep
370*4882a593Smuzhiyun 		 * the log tail moving forward.  We're ok with this being racy
371*4882a593Smuzhiyun 		 * because an incorrect decision means we'll be a little slower
372*4882a593Smuzhiyun 		 * at pushing the tail.
373*4882a593Smuzhiyun 		 */
374*4882a593Smuzhiyun 		if (dfp->dfp_intent == NULL ||
375*4882a593Smuzhiyun 		    xfs_log_item_in_current_chkpt(dfp->dfp_intent))
376*4882a593Smuzhiyun 			continue;
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 		/*
379*4882a593Smuzhiyun 		 * Figure out where we need the tail to be in order to maintain
380*4882a593Smuzhiyun 		 * the minimum required free space in the log.  Only sample
381*4882a593Smuzhiyun 		 * the log threshold once per call.
382*4882a593Smuzhiyun 		 */
383*4882a593Smuzhiyun 		if (threshold_lsn == NULLCOMMITLSN) {
384*4882a593Smuzhiyun 			threshold_lsn = xlog_grant_push_threshold(log, 0);
385*4882a593Smuzhiyun 			if (threshold_lsn == NULLCOMMITLSN)
386*4882a593Smuzhiyun 				break;
387*4882a593Smuzhiyun 		}
388*4882a593Smuzhiyun 		if (XFS_LSN_CMP(dfp->dfp_intent->li_lsn, threshold_lsn) >= 0)
389*4882a593Smuzhiyun 			continue;
390*4882a593Smuzhiyun 
391*4882a593Smuzhiyun 		trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp);
392*4882a593Smuzhiyun 		XFS_STATS_INC((*tpp)->t_mountp, defer_relog);
393*4882a593Smuzhiyun 		dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp);
394*4882a593Smuzhiyun 	}
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 	if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
397*4882a593Smuzhiyun 		return xfs_defer_trans_roll(tpp);
398*4882a593Smuzhiyun 	return 0;
399*4882a593Smuzhiyun }
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun /*
402*4882a593Smuzhiyun  * Log an intent-done item for the first pending intent, and finish the work
403*4882a593Smuzhiyun  * items.
404*4882a593Smuzhiyun  */
405*4882a593Smuzhiyun static int
xfs_defer_finish_one(struct xfs_trans * tp,struct xfs_defer_pending * dfp)406*4882a593Smuzhiyun xfs_defer_finish_one(
407*4882a593Smuzhiyun 	struct xfs_trans		*tp,
408*4882a593Smuzhiyun 	struct xfs_defer_pending	*dfp)
409*4882a593Smuzhiyun {
410*4882a593Smuzhiyun 	const struct xfs_defer_op_type	*ops = defer_op_types[dfp->dfp_type];
411*4882a593Smuzhiyun 	struct xfs_btree_cur		*state = NULL;
412*4882a593Smuzhiyun 	struct list_head		*li, *n;
413*4882a593Smuzhiyun 	int				error;
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 	trace_xfs_defer_pending_finish(tp->t_mountp, dfp);
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 	dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count);
418*4882a593Smuzhiyun 	list_for_each_safe(li, n, &dfp->dfp_work) {
419*4882a593Smuzhiyun 		list_del(li);
420*4882a593Smuzhiyun 		dfp->dfp_count--;
421*4882a593Smuzhiyun 		error = ops->finish_item(tp, dfp->dfp_done, li, &state);
422*4882a593Smuzhiyun 		if (error == -EAGAIN) {
423*4882a593Smuzhiyun 			/*
424*4882a593Smuzhiyun 			 * Caller wants a fresh transaction; put the work item
425*4882a593Smuzhiyun 			 * back on the list and log a new log intent item to
426*4882a593Smuzhiyun 			 * replace the old one.  See "Requesting a Fresh
427*4882a593Smuzhiyun 			 * Transaction while Finishing Deferred Work" above.
428*4882a593Smuzhiyun 			 */
429*4882a593Smuzhiyun 			list_add(li, &dfp->dfp_work);
430*4882a593Smuzhiyun 			dfp->dfp_count++;
431*4882a593Smuzhiyun 			dfp->dfp_done = NULL;
432*4882a593Smuzhiyun 			dfp->dfp_intent = NULL;
433*4882a593Smuzhiyun 			xfs_defer_create_intent(tp, dfp, false);
434*4882a593Smuzhiyun 		}
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 		if (error)
437*4882a593Smuzhiyun 			goto out;
438*4882a593Smuzhiyun 	}
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	/* Done with the dfp, free it. */
441*4882a593Smuzhiyun 	list_del(&dfp->dfp_list);
442*4882a593Smuzhiyun 	kmem_free(dfp);
443*4882a593Smuzhiyun out:
444*4882a593Smuzhiyun 	if (ops->finish_cleanup)
445*4882a593Smuzhiyun 		ops->finish_cleanup(tp, state, error);
446*4882a593Smuzhiyun 	return error;
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun /*
450*4882a593Smuzhiyun  * Finish all the pending work.  This involves logging intent items for
451*4882a593Smuzhiyun  * any work items that wandered in since the last transaction roll (if
452*4882a593Smuzhiyun  * one has even happened), rolling the transaction, and finishing the
453*4882a593Smuzhiyun  * work items in the first item on the logged-and-pending list.
454*4882a593Smuzhiyun  *
455*4882a593Smuzhiyun  * If an inode is provided, relog it to the new transaction.
456*4882a593Smuzhiyun  */
457*4882a593Smuzhiyun int
xfs_defer_finish_noroll(struct xfs_trans ** tp)458*4882a593Smuzhiyun xfs_defer_finish_noroll(
459*4882a593Smuzhiyun 	struct xfs_trans		**tp)
460*4882a593Smuzhiyun {
461*4882a593Smuzhiyun 	struct xfs_defer_pending	*dfp;
462*4882a593Smuzhiyun 	int				error = 0;
463*4882a593Smuzhiyun 	LIST_HEAD(dop_pending);
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 	trace_xfs_defer_finish(*tp, _RET_IP_);
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 	/* Until we run out of pending work to finish... */
470*4882a593Smuzhiyun 	while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) {
471*4882a593Smuzhiyun 		/*
472*4882a593Smuzhiyun 		 * Deferred items that are created in the process of finishing
473*4882a593Smuzhiyun 		 * other deferred work items should be queued at the head of
474*4882a593Smuzhiyun 		 * the pending list, which puts them ahead of the deferred work
475*4882a593Smuzhiyun 		 * that was created by the caller.  This keeps the number of
476*4882a593Smuzhiyun 		 * pending work items to a minimum, which decreases the amount
477*4882a593Smuzhiyun 		 * of time that any one intent item can stick around in memory,
478*4882a593Smuzhiyun 		 * pinning the log tail.
479*4882a593Smuzhiyun 		 */
480*4882a593Smuzhiyun 		xfs_defer_create_intents(*tp);
481*4882a593Smuzhiyun 		list_splice_init(&(*tp)->t_dfops, &dop_pending);
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun 		error = xfs_defer_trans_roll(tp);
484*4882a593Smuzhiyun 		if (error)
485*4882a593Smuzhiyun 			goto out_shutdown;
486*4882a593Smuzhiyun 
487*4882a593Smuzhiyun 		/* Possibly relog intent items to keep the log moving. */
488*4882a593Smuzhiyun 		error = xfs_defer_relog(tp, &dop_pending);
489*4882a593Smuzhiyun 		if (error)
490*4882a593Smuzhiyun 			goto out_shutdown;
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 		dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
493*4882a593Smuzhiyun 				       dfp_list);
494*4882a593Smuzhiyun 		error = xfs_defer_finish_one(*tp, dfp);
495*4882a593Smuzhiyun 		if (error && error != -EAGAIN)
496*4882a593Smuzhiyun 			goto out_shutdown;
497*4882a593Smuzhiyun 	}
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	trace_xfs_defer_finish_done(*tp, _RET_IP_);
500*4882a593Smuzhiyun 	return 0;
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun out_shutdown:
503*4882a593Smuzhiyun 	xfs_defer_trans_abort(*tp, &dop_pending);
504*4882a593Smuzhiyun 	xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
505*4882a593Smuzhiyun 	trace_xfs_defer_finish_error(*tp, error);
506*4882a593Smuzhiyun 	xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending);
507*4882a593Smuzhiyun 	xfs_defer_cancel(*tp);
508*4882a593Smuzhiyun 	return error;
509*4882a593Smuzhiyun }
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun int
xfs_defer_finish(struct xfs_trans ** tp)512*4882a593Smuzhiyun xfs_defer_finish(
513*4882a593Smuzhiyun 	struct xfs_trans	**tp)
514*4882a593Smuzhiyun {
515*4882a593Smuzhiyun 	int			error;
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	/*
518*4882a593Smuzhiyun 	 * Finish and roll the transaction once more to avoid returning to the
519*4882a593Smuzhiyun 	 * caller with a dirty transaction.
520*4882a593Smuzhiyun 	 */
521*4882a593Smuzhiyun 	error = xfs_defer_finish_noroll(tp);
522*4882a593Smuzhiyun 	if (error)
523*4882a593Smuzhiyun 		return error;
524*4882a593Smuzhiyun 	if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
525*4882a593Smuzhiyun 		error = xfs_defer_trans_roll(tp);
526*4882a593Smuzhiyun 		if (error) {
527*4882a593Smuzhiyun 			xfs_force_shutdown((*tp)->t_mountp,
528*4882a593Smuzhiyun 					   SHUTDOWN_CORRUPT_INCORE);
529*4882a593Smuzhiyun 			return error;
530*4882a593Smuzhiyun 		}
531*4882a593Smuzhiyun 	}
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	/* Reset LOWMODE now that we've finished all the dfops. */
534*4882a593Smuzhiyun 	ASSERT(list_empty(&(*tp)->t_dfops));
535*4882a593Smuzhiyun 	(*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
536*4882a593Smuzhiyun 	return 0;
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun void
xfs_defer_cancel(struct xfs_trans * tp)540*4882a593Smuzhiyun xfs_defer_cancel(
541*4882a593Smuzhiyun 	struct xfs_trans	*tp)
542*4882a593Smuzhiyun {
543*4882a593Smuzhiyun 	struct xfs_mount	*mp = tp->t_mountp;
544*4882a593Smuzhiyun 
545*4882a593Smuzhiyun 	trace_xfs_defer_cancel(tp, _RET_IP_);
546*4882a593Smuzhiyun 	xfs_defer_cancel_list(mp, &tp->t_dfops);
547*4882a593Smuzhiyun }
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun /* Add an item for later deferred processing. */
550*4882a593Smuzhiyun void
xfs_defer_add(struct xfs_trans * tp,enum xfs_defer_ops_type type,struct list_head * li)551*4882a593Smuzhiyun xfs_defer_add(
552*4882a593Smuzhiyun 	struct xfs_trans		*tp,
553*4882a593Smuzhiyun 	enum xfs_defer_ops_type		type,
554*4882a593Smuzhiyun 	struct list_head		*li)
555*4882a593Smuzhiyun {
556*4882a593Smuzhiyun 	struct xfs_defer_pending	*dfp = NULL;
557*4882a593Smuzhiyun 	const struct xfs_defer_op_type	*ops;
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
560*4882a593Smuzhiyun 	BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
561*4882a593Smuzhiyun 
562*4882a593Smuzhiyun 	/*
563*4882a593Smuzhiyun 	 * Add the item to a pending item at the end of the intake list.
564*4882a593Smuzhiyun 	 * If the last pending item has the same type, reuse it.  Else,
565*4882a593Smuzhiyun 	 * create a new pending item at the end of the intake list.
566*4882a593Smuzhiyun 	 */
567*4882a593Smuzhiyun 	if (!list_empty(&tp->t_dfops)) {
568*4882a593Smuzhiyun 		dfp = list_last_entry(&tp->t_dfops,
569*4882a593Smuzhiyun 				struct xfs_defer_pending, dfp_list);
570*4882a593Smuzhiyun 		ops = defer_op_types[dfp->dfp_type];
571*4882a593Smuzhiyun 		if (dfp->dfp_type != type ||
572*4882a593Smuzhiyun 		    (ops->max_items && dfp->dfp_count >= ops->max_items))
573*4882a593Smuzhiyun 			dfp = NULL;
574*4882a593Smuzhiyun 	}
575*4882a593Smuzhiyun 	if (!dfp) {
576*4882a593Smuzhiyun 		dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
577*4882a593Smuzhiyun 				KM_NOFS);
578*4882a593Smuzhiyun 		dfp->dfp_type = type;
579*4882a593Smuzhiyun 		dfp->dfp_intent = NULL;
580*4882a593Smuzhiyun 		dfp->dfp_done = NULL;
581*4882a593Smuzhiyun 		dfp->dfp_count = 0;
582*4882a593Smuzhiyun 		INIT_LIST_HEAD(&dfp->dfp_work);
583*4882a593Smuzhiyun 		list_add_tail(&dfp->dfp_list, &tp->t_dfops);
584*4882a593Smuzhiyun 	}
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	list_add_tail(li, &dfp->dfp_work);
587*4882a593Smuzhiyun 	dfp->dfp_count++;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun /*
591*4882a593Smuzhiyun  * Move deferred ops from one transaction to another and reset the source to
592*4882a593Smuzhiyun  * initial state. This is primarily used to carry state forward across
593*4882a593Smuzhiyun  * transaction rolls with pending dfops.
594*4882a593Smuzhiyun  */
595*4882a593Smuzhiyun void
xfs_defer_move(struct xfs_trans * dtp,struct xfs_trans * stp)596*4882a593Smuzhiyun xfs_defer_move(
597*4882a593Smuzhiyun 	struct xfs_trans	*dtp,
598*4882a593Smuzhiyun 	struct xfs_trans	*stp)
599*4882a593Smuzhiyun {
600*4882a593Smuzhiyun 	list_splice_init(&stp->t_dfops, &dtp->t_dfops);
601*4882a593Smuzhiyun 
602*4882a593Smuzhiyun 	/*
603*4882a593Smuzhiyun 	 * Low free space mode was historically controlled by a dfops field.
604*4882a593Smuzhiyun 	 * This meant that low mode state potentially carried across multiple
605*4882a593Smuzhiyun 	 * transaction rolls. Transfer low mode on a dfops move to preserve
606*4882a593Smuzhiyun 	 * that behavior.
607*4882a593Smuzhiyun 	 */
608*4882a593Smuzhiyun 	dtp->t_flags |= (stp->t_flags & XFS_TRANS_LOWMODE);
609*4882a593Smuzhiyun 	stp->t_flags &= ~XFS_TRANS_LOWMODE;
610*4882a593Smuzhiyun }
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun /*
613*4882a593Smuzhiyun  * Prepare a chain of fresh deferred ops work items to be completed later.  Log
614*4882a593Smuzhiyun  * recovery requires the ability to put off until later the actual finishing
615*4882a593Smuzhiyun  * work so that it can process unfinished items recovered from the log in
616*4882a593Smuzhiyun  * correct order.
617*4882a593Smuzhiyun  *
618*4882a593Smuzhiyun  * Create and log intent items for all the work that we're capturing so that we
619*4882a593Smuzhiyun  * can be assured that the items will get replayed if the system goes down
620*4882a593Smuzhiyun  * before log recovery gets a chance to finish the work it put off.  The entire
621*4882a593Smuzhiyun  * deferred ops state is transferred to the capture structure and the
622*4882a593Smuzhiyun  * transaction is then ready for the caller to commit it.  If there are no
623*4882a593Smuzhiyun  * intent items to capture, this function returns NULL.
624*4882a593Smuzhiyun  *
625*4882a593Smuzhiyun  * If capture_ip is not NULL, the capture structure will obtain an extra
626*4882a593Smuzhiyun  * reference to the inode.
627*4882a593Smuzhiyun  */
628*4882a593Smuzhiyun static struct xfs_defer_capture *
xfs_defer_ops_capture(struct xfs_trans * tp,struct xfs_inode * capture_ip)629*4882a593Smuzhiyun xfs_defer_ops_capture(
630*4882a593Smuzhiyun 	struct xfs_trans		*tp,
631*4882a593Smuzhiyun 	struct xfs_inode		*capture_ip)
632*4882a593Smuzhiyun {
633*4882a593Smuzhiyun 	struct xfs_defer_capture	*dfc;
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun 	if (list_empty(&tp->t_dfops))
636*4882a593Smuzhiyun 		return NULL;
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun 	/* Create an object to capture the defer ops. */
639*4882a593Smuzhiyun 	dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
640*4882a593Smuzhiyun 	INIT_LIST_HEAD(&dfc->dfc_list);
641*4882a593Smuzhiyun 	INIT_LIST_HEAD(&dfc->dfc_dfops);
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun 	xfs_defer_create_intents(tp);
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun 	/* Move the dfops chain and transaction state to the capture struct. */
646*4882a593Smuzhiyun 	list_splice_init(&tp->t_dfops, &dfc->dfc_dfops);
647*4882a593Smuzhiyun 	dfc->dfc_tpflags = tp->t_flags & XFS_TRANS_LOWMODE;
648*4882a593Smuzhiyun 	tp->t_flags &= ~XFS_TRANS_LOWMODE;
649*4882a593Smuzhiyun 
650*4882a593Smuzhiyun 	/* Capture the remaining block reservations along with the dfops. */
651*4882a593Smuzhiyun 	dfc->dfc_blkres = tp->t_blk_res - tp->t_blk_res_used;
652*4882a593Smuzhiyun 	dfc->dfc_rtxres = tp->t_rtx_res - tp->t_rtx_res_used;
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	/* Preserve the log reservation size. */
655*4882a593Smuzhiyun 	dfc->dfc_logres = tp->t_log_res;
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun 	/*
658*4882a593Smuzhiyun 	 * Grab an extra reference to this inode and attach it to the capture
659*4882a593Smuzhiyun 	 * structure.
660*4882a593Smuzhiyun 	 */
661*4882a593Smuzhiyun 	if (capture_ip) {
662*4882a593Smuzhiyun 		ihold(VFS_I(capture_ip));
663*4882a593Smuzhiyun 		dfc->dfc_capture_ip = capture_ip;
664*4882a593Smuzhiyun 	}
665*4882a593Smuzhiyun 
666*4882a593Smuzhiyun 	return dfc;
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun /* Release all resources that we used to capture deferred ops. */
670*4882a593Smuzhiyun void
xfs_defer_ops_release(struct xfs_mount * mp,struct xfs_defer_capture * dfc)671*4882a593Smuzhiyun xfs_defer_ops_release(
672*4882a593Smuzhiyun 	struct xfs_mount		*mp,
673*4882a593Smuzhiyun 	struct xfs_defer_capture	*dfc)
674*4882a593Smuzhiyun {
675*4882a593Smuzhiyun 	xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
676*4882a593Smuzhiyun 	if (dfc->dfc_capture_ip)
677*4882a593Smuzhiyun 		xfs_irele(dfc->dfc_capture_ip);
678*4882a593Smuzhiyun 	kmem_free(dfc);
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun /*
682*4882a593Smuzhiyun  * Capture any deferred ops and commit the transaction.  This is the last step
683*4882a593Smuzhiyun  * needed to finish a log intent item that we recovered from the log.  If any
684*4882a593Smuzhiyun  * of the deferred ops operate on an inode, the caller must pass in that inode
685*4882a593Smuzhiyun  * so that the reference can be transferred to the capture structure.  The
686*4882a593Smuzhiyun  * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
687*4882a593Smuzhiyun  * xfs_defer_ops_continue.
688*4882a593Smuzhiyun  */
689*4882a593Smuzhiyun int
xfs_defer_ops_capture_and_commit(struct xfs_trans * tp,struct xfs_inode * capture_ip,struct list_head * capture_list)690*4882a593Smuzhiyun xfs_defer_ops_capture_and_commit(
691*4882a593Smuzhiyun 	struct xfs_trans		*tp,
692*4882a593Smuzhiyun 	struct xfs_inode		*capture_ip,
693*4882a593Smuzhiyun 	struct list_head		*capture_list)
694*4882a593Smuzhiyun {
695*4882a593Smuzhiyun 	struct xfs_mount		*mp = tp->t_mountp;
696*4882a593Smuzhiyun 	struct xfs_defer_capture	*dfc;
697*4882a593Smuzhiyun 	int				error;
698*4882a593Smuzhiyun 
699*4882a593Smuzhiyun 	ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun 	/* If we don't capture anything, commit transaction and exit. */
702*4882a593Smuzhiyun 	dfc = xfs_defer_ops_capture(tp, capture_ip);
703*4882a593Smuzhiyun 	if (!dfc)
704*4882a593Smuzhiyun 		return xfs_trans_commit(tp);
705*4882a593Smuzhiyun 
706*4882a593Smuzhiyun 	/* Commit the transaction and add the capture structure to the list. */
707*4882a593Smuzhiyun 	error = xfs_trans_commit(tp);
708*4882a593Smuzhiyun 	if (error) {
709*4882a593Smuzhiyun 		xfs_defer_ops_release(mp, dfc);
710*4882a593Smuzhiyun 		return error;
711*4882a593Smuzhiyun 	}
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun 	list_add_tail(&dfc->dfc_list, capture_list);
714*4882a593Smuzhiyun 	return 0;
715*4882a593Smuzhiyun }
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun /*
718*4882a593Smuzhiyun  * Attach a chain of captured deferred ops to a new transaction and free the
719*4882a593Smuzhiyun  * capture structure.  If an inode was captured, it will be passed back to the
720*4882a593Smuzhiyun  * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
721*4882a593Smuzhiyun  * The caller now owns the inode reference.
722*4882a593Smuzhiyun  */
723*4882a593Smuzhiyun void
xfs_defer_ops_continue(struct xfs_defer_capture * dfc,struct xfs_trans * tp,struct xfs_inode ** captured_ipp)724*4882a593Smuzhiyun xfs_defer_ops_continue(
725*4882a593Smuzhiyun 	struct xfs_defer_capture	*dfc,
726*4882a593Smuzhiyun 	struct xfs_trans		*tp,
727*4882a593Smuzhiyun 	struct xfs_inode		**captured_ipp)
728*4882a593Smuzhiyun {
729*4882a593Smuzhiyun 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
730*4882a593Smuzhiyun 	ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun 	/* Lock and join the captured inode to the new transaction. */
733*4882a593Smuzhiyun 	if (dfc->dfc_capture_ip) {
734*4882a593Smuzhiyun 		xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
735*4882a593Smuzhiyun 		xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
736*4882a593Smuzhiyun 	}
737*4882a593Smuzhiyun 	*captured_ipp = dfc->dfc_capture_ip;
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun 	/* Move captured dfops chain and state to the transaction. */
740*4882a593Smuzhiyun 	list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
741*4882a593Smuzhiyun 	tp->t_flags |= dfc->dfc_tpflags;
742*4882a593Smuzhiyun 
743*4882a593Smuzhiyun 	kmem_free(dfc);
744*4882a593Smuzhiyun }
745