xref: /OK3568_Linux_fs/kernel/fs/xfs/xfs_extent_busy.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4*4882a593Smuzhiyun  * Copyright (c) 2010 David Chinner.
5*4882a593Smuzhiyun  * Copyright (c) 2011 Christoph Hellwig.
6*4882a593Smuzhiyun  * All Rights Reserved.
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun #include "xfs.h"
9*4882a593Smuzhiyun #include "xfs_fs.h"
10*4882a593Smuzhiyun #include "xfs_format.h"
11*4882a593Smuzhiyun #include "xfs_log_format.h"
12*4882a593Smuzhiyun #include "xfs_shared.h"
13*4882a593Smuzhiyun #include "xfs_trans_resv.h"
14*4882a593Smuzhiyun #include "xfs_sb.h"
15*4882a593Smuzhiyun #include "xfs_mount.h"
16*4882a593Smuzhiyun #include "xfs_alloc.h"
17*4882a593Smuzhiyun #include "xfs_extent_busy.h"
18*4882a593Smuzhiyun #include "xfs_trace.h"
19*4882a593Smuzhiyun #include "xfs_trans.h"
20*4882a593Smuzhiyun #include "xfs_log.h"
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun void
xfs_extent_busy_insert(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len,unsigned int flags)23*4882a593Smuzhiyun xfs_extent_busy_insert(
24*4882a593Smuzhiyun 	struct xfs_trans	*tp,
25*4882a593Smuzhiyun 	xfs_agnumber_t		agno,
26*4882a593Smuzhiyun 	xfs_agblock_t		bno,
27*4882a593Smuzhiyun 	xfs_extlen_t		len,
28*4882a593Smuzhiyun 	unsigned int		flags)
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun 	struct xfs_extent_busy	*new;
31*4882a593Smuzhiyun 	struct xfs_extent_busy	*busyp;
32*4882a593Smuzhiyun 	struct xfs_perag	*pag;
33*4882a593Smuzhiyun 	struct rb_node		**rbp;
34*4882a593Smuzhiyun 	struct rb_node		*parent = NULL;
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun 	new = kmem_zalloc(sizeof(struct xfs_extent_busy), 0);
37*4882a593Smuzhiyun 	new->agno = agno;
38*4882a593Smuzhiyun 	new->bno = bno;
39*4882a593Smuzhiyun 	new->length = len;
40*4882a593Smuzhiyun 	INIT_LIST_HEAD(&new->list);
41*4882a593Smuzhiyun 	new->flags = flags;
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	/* trace before insert to be able to see failed inserts */
44*4882a593Smuzhiyun 	trace_xfs_extent_busy(tp->t_mountp, agno, bno, len);
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	pag = xfs_perag_get(tp->t_mountp, new->agno);
47*4882a593Smuzhiyun 	spin_lock(&pag->pagb_lock);
48*4882a593Smuzhiyun 	rbp = &pag->pagb_tree.rb_node;
49*4882a593Smuzhiyun 	while (*rbp) {
50*4882a593Smuzhiyun 		parent = *rbp;
51*4882a593Smuzhiyun 		busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 		if (new->bno < busyp->bno) {
54*4882a593Smuzhiyun 			rbp = &(*rbp)->rb_left;
55*4882a593Smuzhiyun 			ASSERT(new->bno + new->length <= busyp->bno);
56*4882a593Smuzhiyun 		} else if (new->bno > busyp->bno) {
57*4882a593Smuzhiyun 			rbp = &(*rbp)->rb_right;
58*4882a593Smuzhiyun 			ASSERT(bno >= busyp->bno + busyp->length);
59*4882a593Smuzhiyun 		} else {
60*4882a593Smuzhiyun 			ASSERT(0);
61*4882a593Smuzhiyun 		}
62*4882a593Smuzhiyun 	}
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	rb_link_node(&new->rb_node, parent, rbp);
65*4882a593Smuzhiyun 	rb_insert_color(&new->rb_node, &pag->pagb_tree);
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 	list_add(&new->list, &tp->t_busy);
68*4882a593Smuzhiyun 	spin_unlock(&pag->pagb_lock);
69*4882a593Smuzhiyun 	xfs_perag_put(pag);
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun /*
73*4882a593Smuzhiyun  * Search for a busy extent within the range of the extent we are about to
74*4882a593Smuzhiyun  * allocate.  You need to be holding the busy extent tree lock when calling
75*4882a593Smuzhiyun  * xfs_extent_busy_search(). This function returns 0 for no overlapping busy
76*4882a593Smuzhiyun  * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
77*4882a593Smuzhiyun  * match. This is done so that a non-zero return indicates an overlap that
78*4882a593Smuzhiyun  * will require a synchronous transaction, but it can still be
79*4882a593Smuzhiyun  * used to distinguish between a partial or exact match.
80*4882a593Smuzhiyun  */
81*4882a593Smuzhiyun int
xfs_extent_busy_search(struct xfs_mount * mp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len)82*4882a593Smuzhiyun xfs_extent_busy_search(
83*4882a593Smuzhiyun 	struct xfs_mount	*mp,
84*4882a593Smuzhiyun 	xfs_agnumber_t		agno,
85*4882a593Smuzhiyun 	xfs_agblock_t		bno,
86*4882a593Smuzhiyun 	xfs_extlen_t		len)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun 	struct xfs_perag	*pag;
89*4882a593Smuzhiyun 	struct rb_node		*rbp;
90*4882a593Smuzhiyun 	struct xfs_extent_busy	*busyp;
91*4882a593Smuzhiyun 	int			match = 0;
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	pag = xfs_perag_get(mp, agno);
94*4882a593Smuzhiyun 	spin_lock(&pag->pagb_lock);
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	rbp = pag->pagb_tree.rb_node;
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 	/* find closest start bno overlap */
99*4882a593Smuzhiyun 	while (rbp) {
100*4882a593Smuzhiyun 		busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
101*4882a593Smuzhiyun 		if (bno < busyp->bno) {
102*4882a593Smuzhiyun 			/* may overlap, but exact start block is lower */
103*4882a593Smuzhiyun 			if (bno + len > busyp->bno)
104*4882a593Smuzhiyun 				match = -1;
105*4882a593Smuzhiyun 			rbp = rbp->rb_left;
106*4882a593Smuzhiyun 		} else if (bno > busyp->bno) {
107*4882a593Smuzhiyun 			/* may overlap, but exact start block is higher */
108*4882a593Smuzhiyun 			if (bno < busyp->bno + busyp->length)
109*4882a593Smuzhiyun 				match = -1;
110*4882a593Smuzhiyun 			rbp = rbp->rb_right;
111*4882a593Smuzhiyun 		} else {
112*4882a593Smuzhiyun 			/* bno matches busyp, length determines exact match */
113*4882a593Smuzhiyun 			match = (busyp->length == len) ? 1 : -1;
114*4882a593Smuzhiyun 			break;
115*4882a593Smuzhiyun 		}
116*4882a593Smuzhiyun 	}
117*4882a593Smuzhiyun 	spin_unlock(&pag->pagb_lock);
118*4882a593Smuzhiyun 	xfs_perag_put(pag);
119*4882a593Smuzhiyun 	return match;
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun /*
123*4882a593Smuzhiyun  * The found free extent [fbno, fend] overlaps part or all of the given busy
124*4882a593Smuzhiyun  * extent.  If the overlap covers the beginning, the end, or all of the busy
125*4882a593Smuzhiyun  * extent, the overlapping portion can be made unbusy and used for the
126*4882a593Smuzhiyun  * allocation.  We can't split a busy extent because we can't modify a
127*4882a593Smuzhiyun  * transaction/CIL context busy list, but we can update an entry's block
128*4882a593Smuzhiyun  * number or length.
129*4882a593Smuzhiyun  *
130*4882a593Smuzhiyun  * Returns true if the extent can safely be reused, or false if the search
131*4882a593Smuzhiyun  * needs to be restarted.
132*4882a593Smuzhiyun  */
133*4882a593Smuzhiyun STATIC bool
xfs_extent_busy_update_extent(struct xfs_mount * mp,struct xfs_perag * pag,struct xfs_extent_busy * busyp,xfs_agblock_t fbno,xfs_extlen_t flen,bool userdata)134*4882a593Smuzhiyun xfs_extent_busy_update_extent(
135*4882a593Smuzhiyun 	struct xfs_mount	*mp,
136*4882a593Smuzhiyun 	struct xfs_perag	*pag,
137*4882a593Smuzhiyun 	struct xfs_extent_busy	*busyp,
138*4882a593Smuzhiyun 	xfs_agblock_t		fbno,
139*4882a593Smuzhiyun 	xfs_extlen_t		flen,
140*4882a593Smuzhiyun 	bool			userdata) __releases(&pag->pagb_lock)
141*4882a593Smuzhiyun 					  __acquires(&pag->pagb_lock)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun 	xfs_agblock_t		fend = fbno + flen;
144*4882a593Smuzhiyun 	xfs_agblock_t		bbno = busyp->bno;
145*4882a593Smuzhiyun 	xfs_agblock_t		bend = bbno + busyp->length;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	/*
148*4882a593Smuzhiyun 	 * This extent is currently being discarded.  Give the thread
149*4882a593Smuzhiyun 	 * performing the discard a chance to mark the extent unbusy
150*4882a593Smuzhiyun 	 * and retry.
151*4882a593Smuzhiyun 	 */
152*4882a593Smuzhiyun 	if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
153*4882a593Smuzhiyun 		spin_unlock(&pag->pagb_lock);
154*4882a593Smuzhiyun 		delay(1);
155*4882a593Smuzhiyun 		spin_lock(&pag->pagb_lock);
156*4882a593Smuzhiyun 		return false;
157*4882a593Smuzhiyun 	}
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	/*
160*4882a593Smuzhiyun 	 * If there is a busy extent overlapping a user allocation, we have
161*4882a593Smuzhiyun 	 * no choice but to force the log and retry the search.
162*4882a593Smuzhiyun 	 *
163*4882a593Smuzhiyun 	 * Fortunately this does not happen during normal operation, but
164*4882a593Smuzhiyun 	 * only if the filesystem is very low on space and has to dip into
165*4882a593Smuzhiyun 	 * the AGFL for normal allocations.
166*4882a593Smuzhiyun 	 */
167*4882a593Smuzhiyun 	if (userdata)
168*4882a593Smuzhiyun 		goto out_force_log;
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun 	if (bbno < fbno && bend > fend) {
171*4882a593Smuzhiyun 		/*
172*4882a593Smuzhiyun 		 * Case 1:
173*4882a593Smuzhiyun 		 *    bbno           bend
174*4882a593Smuzhiyun 		 *    +BBBBBBBBBBBBBBBBB+
175*4882a593Smuzhiyun 		 *        +---------+
176*4882a593Smuzhiyun 		 *        fbno   fend
177*4882a593Smuzhiyun 		 */
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 		/*
180*4882a593Smuzhiyun 		 * We would have to split the busy extent to be able to track
181*4882a593Smuzhiyun 		 * it correct, which we cannot do because we would have to
182*4882a593Smuzhiyun 		 * modify the list of busy extents attached to the transaction
183*4882a593Smuzhiyun 		 * or CIL context, which is immutable.
184*4882a593Smuzhiyun 		 *
185*4882a593Smuzhiyun 		 * Force out the log to clear the busy extent and retry the
186*4882a593Smuzhiyun 		 * search.
187*4882a593Smuzhiyun 		 */
188*4882a593Smuzhiyun 		goto out_force_log;
189*4882a593Smuzhiyun 	} else if (bbno >= fbno && bend <= fend) {
190*4882a593Smuzhiyun 		/*
191*4882a593Smuzhiyun 		 * Case 2:
192*4882a593Smuzhiyun 		 *    bbno           bend
193*4882a593Smuzhiyun 		 *    +BBBBBBBBBBBBBBBBB+
194*4882a593Smuzhiyun 		 *    +-----------------+
195*4882a593Smuzhiyun 		 *    fbno           fend
196*4882a593Smuzhiyun 		 *
197*4882a593Smuzhiyun 		 * Case 3:
198*4882a593Smuzhiyun 		 *    bbno           bend
199*4882a593Smuzhiyun 		 *    +BBBBBBBBBBBBBBBBB+
200*4882a593Smuzhiyun 		 *    +--------------------------+
201*4882a593Smuzhiyun 		 *    fbno                    fend
202*4882a593Smuzhiyun 		 *
203*4882a593Smuzhiyun 		 * Case 4:
204*4882a593Smuzhiyun 		 *             bbno           bend
205*4882a593Smuzhiyun 		 *             +BBBBBBBBBBBBBBBBB+
206*4882a593Smuzhiyun 		 *    +--------------------------+
207*4882a593Smuzhiyun 		 *    fbno                    fend
208*4882a593Smuzhiyun 		 *
209*4882a593Smuzhiyun 		 * Case 5:
210*4882a593Smuzhiyun 		 *             bbno           bend
211*4882a593Smuzhiyun 		 *             +BBBBBBBBBBBBBBBBB+
212*4882a593Smuzhiyun 		 *    +-----------------------------------+
213*4882a593Smuzhiyun 		 *    fbno                             fend
214*4882a593Smuzhiyun 		 *
215*4882a593Smuzhiyun 		 */
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 		/*
218*4882a593Smuzhiyun 		 * The busy extent is fully covered by the extent we are
219*4882a593Smuzhiyun 		 * allocating, and can simply be removed from the rbtree.
220*4882a593Smuzhiyun 		 * However we cannot remove it from the immutable list
221*4882a593Smuzhiyun 		 * tracking busy extents in the transaction or CIL context,
222*4882a593Smuzhiyun 		 * so set the length to zero to mark it invalid.
223*4882a593Smuzhiyun 		 *
224*4882a593Smuzhiyun 		 * We also need to restart the busy extent search from the
225*4882a593Smuzhiyun 		 * tree root, because erasing the node can rearrange the
226*4882a593Smuzhiyun 		 * tree topology.
227*4882a593Smuzhiyun 		 */
228*4882a593Smuzhiyun 		rb_erase(&busyp->rb_node, &pag->pagb_tree);
229*4882a593Smuzhiyun 		busyp->length = 0;
230*4882a593Smuzhiyun 		return false;
231*4882a593Smuzhiyun 	} else if (fend < bend) {
232*4882a593Smuzhiyun 		/*
233*4882a593Smuzhiyun 		 * Case 6:
234*4882a593Smuzhiyun 		 *              bbno           bend
235*4882a593Smuzhiyun 		 *             +BBBBBBBBBBBBBBBBB+
236*4882a593Smuzhiyun 		 *             +---------+
237*4882a593Smuzhiyun 		 *             fbno   fend
238*4882a593Smuzhiyun 		 *
239*4882a593Smuzhiyun 		 * Case 7:
240*4882a593Smuzhiyun 		 *             bbno           bend
241*4882a593Smuzhiyun 		 *             +BBBBBBBBBBBBBBBBB+
242*4882a593Smuzhiyun 		 *    +------------------+
243*4882a593Smuzhiyun 		 *    fbno            fend
244*4882a593Smuzhiyun 		 *
245*4882a593Smuzhiyun 		 */
246*4882a593Smuzhiyun 		busyp->bno = fend;
247*4882a593Smuzhiyun 	} else if (bbno < fbno) {
248*4882a593Smuzhiyun 		/*
249*4882a593Smuzhiyun 		 * Case 8:
250*4882a593Smuzhiyun 		 *    bbno           bend
251*4882a593Smuzhiyun 		 *    +BBBBBBBBBBBBBBBBB+
252*4882a593Smuzhiyun 		 *        +-------------+
253*4882a593Smuzhiyun 		 *        fbno       fend
254*4882a593Smuzhiyun 		 *
255*4882a593Smuzhiyun 		 * Case 9:
256*4882a593Smuzhiyun 		 *    bbno           bend
257*4882a593Smuzhiyun 		 *    +BBBBBBBBBBBBBBBBB+
258*4882a593Smuzhiyun 		 *        +----------------------+
259*4882a593Smuzhiyun 		 *        fbno                fend
260*4882a593Smuzhiyun 		 */
261*4882a593Smuzhiyun 		busyp->length = fbno - busyp->bno;
262*4882a593Smuzhiyun 	} else {
263*4882a593Smuzhiyun 		ASSERT(0);
264*4882a593Smuzhiyun 	}
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
267*4882a593Smuzhiyun 	return true;
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun out_force_log:
270*4882a593Smuzhiyun 	spin_unlock(&pag->pagb_lock);
271*4882a593Smuzhiyun 	xfs_log_force(mp, XFS_LOG_SYNC);
272*4882a593Smuzhiyun 	trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
273*4882a593Smuzhiyun 	spin_lock(&pag->pagb_lock);
274*4882a593Smuzhiyun 	return false;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun /*
279*4882a593Smuzhiyun  * For a given extent [fbno, flen], make sure we can reuse it safely.
280*4882a593Smuzhiyun  */
281*4882a593Smuzhiyun void
xfs_extent_busy_reuse(struct xfs_mount * mp,xfs_agnumber_t agno,xfs_agblock_t fbno,xfs_extlen_t flen,bool userdata)282*4882a593Smuzhiyun xfs_extent_busy_reuse(
283*4882a593Smuzhiyun 	struct xfs_mount	*mp,
284*4882a593Smuzhiyun 	xfs_agnumber_t		agno,
285*4882a593Smuzhiyun 	xfs_agblock_t		fbno,
286*4882a593Smuzhiyun 	xfs_extlen_t		flen,
287*4882a593Smuzhiyun 	bool			userdata)
288*4882a593Smuzhiyun {
289*4882a593Smuzhiyun 	struct xfs_perag	*pag;
290*4882a593Smuzhiyun 	struct rb_node		*rbp;
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	ASSERT(flen > 0);
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 	pag = xfs_perag_get(mp, agno);
295*4882a593Smuzhiyun 	spin_lock(&pag->pagb_lock);
296*4882a593Smuzhiyun restart:
297*4882a593Smuzhiyun 	rbp = pag->pagb_tree.rb_node;
298*4882a593Smuzhiyun 	while (rbp) {
299*4882a593Smuzhiyun 		struct xfs_extent_busy *busyp =
300*4882a593Smuzhiyun 			rb_entry(rbp, struct xfs_extent_busy, rb_node);
301*4882a593Smuzhiyun 		xfs_agblock_t	bbno = busyp->bno;
302*4882a593Smuzhiyun 		xfs_agblock_t	bend = bbno + busyp->length;
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 		if (fbno + flen <= bbno) {
305*4882a593Smuzhiyun 			rbp = rbp->rb_left;
306*4882a593Smuzhiyun 			continue;
307*4882a593Smuzhiyun 		} else if (fbno >= bend) {
308*4882a593Smuzhiyun 			rbp = rbp->rb_right;
309*4882a593Smuzhiyun 			continue;
310*4882a593Smuzhiyun 		}
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 		if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
313*4882a593Smuzhiyun 						  userdata))
314*4882a593Smuzhiyun 			goto restart;
315*4882a593Smuzhiyun 	}
316*4882a593Smuzhiyun 	spin_unlock(&pag->pagb_lock);
317*4882a593Smuzhiyun 	xfs_perag_put(pag);
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun /*
321*4882a593Smuzhiyun  * For a given extent [fbno, flen], search the busy extent list to find a
322*4882a593Smuzhiyun  * subset of the extent that is not busy.  If *rlen is smaller than
323*4882a593Smuzhiyun  * args->minlen no suitable extent could be found, and the higher level
324*4882a593Smuzhiyun  * code needs to force out the log and retry the allocation.
325*4882a593Smuzhiyun  *
326*4882a593Smuzhiyun  * Return the current busy generation for the AG if the extent is busy. This
327*4882a593Smuzhiyun  * value can be used to wait for at least one of the currently busy extents
328*4882a593Smuzhiyun  * to be cleared. Note that the busy list is not guaranteed to be empty after
329*4882a593Smuzhiyun  * the gen is woken. The state of a specific extent must always be confirmed
330*4882a593Smuzhiyun  * with another call to xfs_extent_busy_trim() before it can be used.
331*4882a593Smuzhiyun  */
332*4882a593Smuzhiyun bool
xfs_extent_busy_trim(struct xfs_alloc_arg * args,xfs_agblock_t * bno,xfs_extlen_t * len,unsigned * busy_gen)333*4882a593Smuzhiyun xfs_extent_busy_trim(
334*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,
335*4882a593Smuzhiyun 	xfs_agblock_t		*bno,
336*4882a593Smuzhiyun 	xfs_extlen_t		*len,
337*4882a593Smuzhiyun 	unsigned		*busy_gen)
338*4882a593Smuzhiyun {
339*4882a593Smuzhiyun 	xfs_agblock_t		fbno;
340*4882a593Smuzhiyun 	xfs_extlen_t		flen;
341*4882a593Smuzhiyun 	struct rb_node		*rbp;
342*4882a593Smuzhiyun 	bool			ret = false;
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun 	ASSERT(*len > 0);
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 	spin_lock(&args->pag->pagb_lock);
347*4882a593Smuzhiyun restart:
348*4882a593Smuzhiyun 	fbno = *bno;
349*4882a593Smuzhiyun 	flen = *len;
350*4882a593Smuzhiyun 	rbp = args->pag->pagb_tree.rb_node;
351*4882a593Smuzhiyun 	while (rbp && flen >= args->minlen) {
352*4882a593Smuzhiyun 		struct xfs_extent_busy *busyp =
353*4882a593Smuzhiyun 			rb_entry(rbp, struct xfs_extent_busy, rb_node);
354*4882a593Smuzhiyun 		xfs_agblock_t	fend = fbno + flen;
355*4882a593Smuzhiyun 		xfs_agblock_t	bbno = busyp->bno;
356*4882a593Smuzhiyun 		xfs_agblock_t	bend = bbno + busyp->length;
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 		if (fend <= bbno) {
359*4882a593Smuzhiyun 			rbp = rbp->rb_left;
360*4882a593Smuzhiyun 			continue;
361*4882a593Smuzhiyun 		} else if (fbno >= bend) {
362*4882a593Smuzhiyun 			rbp = rbp->rb_right;
363*4882a593Smuzhiyun 			continue;
364*4882a593Smuzhiyun 		}
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 		/*
367*4882a593Smuzhiyun 		 * If this is a metadata allocation, try to reuse the busy
368*4882a593Smuzhiyun 		 * extent instead of trimming the allocation.
369*4882a593Smuzhiyun 		 */
370*4882a593Smuzhiyun 		if (!(args->datatype & XFS_ALLOC_USERDATA) &&
371*4882a593Smuzhiyun 		    !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
372*4882a593Smuzhiyun 			if (!xfs_extent_busy_update_extent(args->mp, args->pag,
373*4882a593Smuzhiyun 							  busyp, fbno, flen,
374*4882a593Smuzhiyun 							  false))
375*4882a593Smuzhiyun 				goto restart;
376*4882a593Smuzhiyun 			continue;
377*4882a593Smuzhiyun 		}
378*4882a593Smuzhiyun 
379*4882a593Smuzhiyun 		if (bbno <= fbno) {
380*4882a593Smuzhiyun 			/* start overlap */
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 			/*
383*4882a593Smuzhiyun 			 * Case 1:
384*4882a593Smuzhiyun 			 *    bbno           bend
385*4882a593Smuzhiyun 			 *    +BBBBBBBBBBBBBBBBB+
386*4882a593Smuzhiyun 			 *        +---------+
387*4882a593Smuzhiyun 			 *        fbno   fend
388*4882a593Smuzhiyun 			 *
389*4882a593Smuzhiyun 			 * Case 2:
390*4882a593Smuzhiyun 			 *    bbno           bend
391*4882a593Smuzhiyun 			 *    +BBBBBBBBBBBBBBBBB+
392*4882a593Smuzhiyun 			 *    +-------------+
393*4882a593Smuzhiyun 			 *    fbno       fend
394*4882a593Smuzhiyun 			 *
395*4882a593Smuzhiyun 			 * Case 3:
396*4882a593Smuzhiyun 			 *    bbno           bend
397*4882a593Smuzhiyun 			 *    +BBBBBBBBBBBBBBBBB+
398*4882a593Smuzhiyun 			 *        +-------------+
399*4882a593Smuzhiyun 			 *        fbno       fend
400*4882a593Smuzhiyun 			 *
401*4882a593Smuzhiyun 			 * Case 4:
402*4882a593Smuzhiyun 			 *    bbno           bend
403*4882a593Smuzhiyun 			 *    +BBBBBBBBBBBBBBBBB+
404*4882a593Smuzhiyun 			 *    +-----------------+
405*4882a593Smuzhiyun 			 *    fbno           fend
406*4882a593Smuzhiyun 			 *
407*4882a593Smuzhiyun 			 * No unbusy region in extent, return failure.
408*4882a593Smuzhiyun 			 */
409*4882a593Smuzhiyun 			if (fend <= bend)
410*4882a593Smuzhiyun 				goto fail;
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun 			/*
413*4882a593Smuzhiyun 			 * Case 5:
414*4882a593Smuzhiyun 			 *    bbno           bend
415*4882a593Smuzhiyun 			 *    +BBBBBBBBBBBBBBBBB+
416*4882a593Smuzhiyun 			 *        +----------------------+
417*4882a593Smuzhiyun 			 *        fbno                fend
418*4882a593Smuzhiyun 			 *
419*4882a593Smuzhiyun 			 * Case 6:
420*4882a593Smuzhiyun 			 *    bbno           bend
421*4882a593Smuzhiyun 			 *    +BBBBBBBBBBBBBBBBB+
422*4882a593Smuzhiyun 			 *    +--------------------------+
423*4882a593Smuzhiyun 			 *    fbno                    fend
424*4882a593Smuzhiyun 			 *
425*4882a593Smuzhiyun 			 * Needs to be trimmed to:
426*4882a593Smuzhiyun 			 *                       +-------+
427*4882a593Smuzhiyun 			 *                       fbno fend
428*4882a593Smuzhiyun 			 */
429*4882a593Smuzhiyun 			fbno = bend;
430*4882a593Smuzhiyun 		} else if (bend >= fend) {
431*4882a593Smuzhiyun 			/* end overlap */
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 			/*
434*4882a593Smuzhiyun 			 * Case 7:
435*4882a593Smuzhiyun 			 *             bbno           bend
436*4882a593Smuzhiyun 			 *             +BBBBBBBBBBBBBBBBB+
437*4882a593Smuzhiyun 			 *    +------------------+
438*4882a593Smuzhiyun 			 *    fbno            fend
439*4882a593Smuzhiyun 			 *
440*4882a593Smuzhiyun 			 * Case 8:
441*4882a593Smuzhiyun 			 *             bbno           bend
442*4882a593Smuzhiyun 			 *             +BBBBBBBBBBBBBBBBB+
443*4882a593Smuzhiyun 			 *    +--------------------------+
444*4882a593Smuzhiyun 			 *    fbno                    fend
445*4882a593Smuzhiyun 			 *
446*4882a593Smuzhiyun 			 * Needs to be trimmed to:
447*4882a593Smuzhiyun 			 *    +-------+
448*4882a593Smuzhiyun 			 *    fbno fend
449*4882a593Smuzhiyun 			 */
450*4882a593Smuzhiyun 			fend = bbno;
451*4882a593Smuzhiyun 		} else {
452*4882a593Smuzhiyun 			/* middle overlap */
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 			/*
455*4882a593Smuzhiyun 			 * Case 9:
456*4882a593Smuzhiyun 			 *             bbno           bend
457*4882a593Smuzhiyun 			 *             +BBBBBBBBBBBBBBBBB+
458*4882a593Smuzhiyun 			 *    +-----------------------------------+
459*4882a593Smuzhiyun 			 *    fbno                             fend
460*4882a593Smuzhiyun 			 *
461*4882a593Smuzhiyun 			 * Can be trimmed to:
462*4882a593Smuzhiyun 			 *    +-------+        OR         +-------+
463*4882a593Smuzhiyun 			 *    fbno fend                   fbno fend
464*4882a593Smuzhiyun 			 *
465*4882a593Smuzhiyun 			 * Backward allocation leads to significant
466*4882a593Smuzhiyun 			 * fragmentation of directories, which degrades
467*4882a593Smuzhiyun 			 * directory performance, therefore we always want to
468*4882a593Smuzhiyun 			 * choose the option that produces forward allocation
469*4882a593Smuzhiyun 			 * patterns.
470*4882a593Smuzhiyun 			 * Preferring the lower bno extent will make the next
471*4882a593Smuzhiyun 			 * request use "fend" as the start of the next
472*4882a593Smuzhiyun 			 * allocation;  if the segment is no longer busy at
473*4882a593Smuzhiyun 			 * that point, we'll get a contiguous allocation, but
474*4882a593Smuzhiyun 			 * even if it is still busy, we will get a forward
475*4882a593Smuzhiyun 			 * allocation.
476*4882a593Smuzhiyun 			 * We try to avoid choosing the segment at "bend",
477*4882a593Smuzhiyun 			 * because that can lead to the next allocation
478*4882a593Smuzhiyun 			 * taking the segment at "fbno", which would be a
479*4882a593Smuzhiyun 			 * backward allocation.  We only use the segment at
480*4882a593Smuzhiyun 			 * "fbno" if it is much larger than the current
481*4882a593Smuzhiyun 			 * requested size, because in that case there's a
482*4882a593Smuzhiyun 			 * good chance subsequent allocations will be
483*4882a593Smuzhiyun 			 * contiguous.
484*4882a593Smuzhiyun 			 */
485*4882a593Smuzhiyun 			if (bbno - fbno >= args->maxlen) {
486*4882a593Smuzhiyun 				/* left candidate fits perfect */
487*4882a593Smuzhiyun 				fend = bbno;
488*4882a593Smuzhiyun 			} else if (fend - bend >= args->maxlen * 4) {
489*4882a593Smuzhiyun 				/* right candidate has enough free space */
490*4882a593Smuzhiyun 				fbno = bend;
491*4882a593Smuzhiyun 			} else if (bbno - fbno >= args->minlen) {
492*4882a593Smuzhiyun 				/* left candidate fits minimum requirement */
493*4882a593Smuzhiyun 				fend = bbno;
494*4882a593Smuzhiyun 			} else {
495*4882a593Smuzhiyun 				goto fail;
496*4882a593Smuzhiyun 			}
497*4882a593Smuzhiyun 		}
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 		flen = fend - fbno;
500*4882a593Smuzhiyun 	}
501*4882a593Smuzhiyun out:
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 	if (fbno != *bno || flen != *len) {
504*4882a593Smuzhiyun 		trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len,
505*4882a593Smuzhiyun 					  fbno, flen);
506*4882a593Smuzhiyun 		*bno = fbno;
507*4882a593Smuzhiyun 		*len = flen;
508*4882a593Smuzhiyun 		*busy_gen = args->pag->pagb_gen;
509*4882a593Smuzhiyun 		ret = true;
510*4882a593Smuzhiyun 	}
511*4882a593Smuzhiyun 	spin_unlock(&args->pag->pagb_lock);
512*4882a593Smuzhiyun 	return ret;
513*4882a593Smuzhiyun fail:
514*4882a593Smuzhiyun 	/*
515*4882a593Smuzhiyun 	 * Return a zero extent length as failure indications.  All callers
516*4882a593Smuzhiyun 	 * re-check if the trimmed extent satisfies the minlen requirement.
517*4882a593Smuzhiyun 	 */
518*4882a593Smuzhiyun 	flen = 0;
519*4882a593Smuzhiyun 	goto out;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun STATIC void
xfs_extent_busy_clear_one(struct xfs_mount * mp,struct xfs_perag * pag,struct xfs_extent_busy * busyp)523*4882a593Smuzhiyun xfs_extent_busy_clear_one(
524*4882a593Smuzhiyun 	struct xfs_mount	*mp,
525*4882a593Smuzhiyun 	struct xfs_perag	*pag,
526*4882a593Smuzhiyun 	struct xfs_extent_busy	*busyp)
527*4882a593Smuzhiyun {
528*4882a593Smuzhiyun 	if (busyp->length) {
529*4882a593Smuzhiyun 		trace_xfs_extent_busy_clear(mp, busyp->agno, busyp->bno,
530*4882a593Smuzhiyun 						busyp->length);
531*4882a593Smuzhiyun 		rb_erase(&busyp->rb_node, &pag->pagb_tree);
532*4882a593Smuzhiyun 	}
533*4882a593Smuzhiyun 
534*4882a593Smuzhiyun 	list_del_init(&busyp->list);
535*4882a593Smuzhiyun 	kmem_free(busyp);
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun static void
xfs_extent_busy_put_pag(struct xfs_perag * pag,bool wakeup)539*4882a593Smuzhiyun xfs_extent_busy_put_pag(
540*4882a593Smuzhiyun 	struct xfs_perag	*pag,
541*4882a593Smuzhiyun 	bool			wakeup)
542*4882a593Smuzhiyun 		__releases(pag->pagb_lock)
543*4882a593Smuzhiyun {
544*4882a593Smuzhiyun 	if (wakeup) {
545*4882a593Smuzhiyun 		pag->pagb_gen++;
546*4882a593Smuzhiyun 		wake_up_all(&pag->pagb_wait);
547*4882a593Smuzhiyun 	}
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	spin_unlock(&pag->pagb_lock);
550*4882a593Smuzhiyun 	xfs_perag_put(pag);
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun /*
554*4882a593Smuzhiyun  * Remove all extents on the passed in list from the busy extents tree.
555*4882a593Smuzhiyun  * If do_discard is set skip extents that need to be discarded, and mark
556*4882a593Smuzhiyun  * these as undergoing a discard operation instead.
557*4882a593Smuzhiyun  */
558*4882a593Smuzhiyun void
xfs_extent_busy_clear(struct xfs_mount * mp,struct list_head * list,bool do_discard)559*4882a593Smuzhiyun xfs_extent_busy_clear(
560*4882a593Smuzhiyun 	struct xfs_mount	*mp,
561*4882a593Smuzhiyun 	struct list_head	*list,
562*4882a593Smuzhiyun 	bool			do_discard)
563*4882a593Smuzhiyun {
564*4882a593Smuzhiyun 	struct xfs_extent_busy	*busyp, *n;
565*4882a593Smuzhiyun 	struct xfs_perag	*pag = NULL;
566*4882a593Smuzhiyun 	xfs_agnumber_t		agno = NULLAGNUMBER;
567*4882a593Smuzhiyun 	bool			wakeup = false;
568*4882a593Smuzhiyun 
569*4882a593Smuzhiyun 	list_for_each_entry_safe(busyp, n, list, list) {
570*4882a593Smuzhiyun 		if (busyp->agno != agno) {
571*4882a593Smuzhiyun 			if (pag)
572*4882a593Smuzhiyun 				xfs_extent_busy_put_pag(pag, wakeup);
573*4882a593Smuzhiyun 			agno = busyp->agno;
574*4882a593Smuzhiyun 			pag = xfs_perag_get(mp, agno);
575*4882a593Smuzhiyun 			spin_lock(&pag->pagb_lock);
576*4882a593Smuzhiyun 			wakeup = false;
577*4882a593Smuzhiyun 		}
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun 		if (do_discard && busyp->length &&
580*4882a593Smuzhiyun 		    !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) {
581*4882a593Smuzhiyun 			busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
582*4882a593Smuzhiyun 		} else {
583*4882a593Smuzhiyun 			xfs_extent_busy_clear_one(mp, pag, busyp);
584*4882a593Smuzhiyun 			wakeup = true;
585*4882a593Smuzhiyun 		}
586*4882a593Smuzhiyun 	}
587*4882a593Smuzhiyun 
588*4882a593Smuzhiyun 	if (pag)
589*4882a593Smuzhiyun 		xfs_extent_busy_put_pag(pag, wakeup);
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun /*
593*4882a593Smuzhiyun  * Flush out all busy extents for this AG.
594*4882a593Smuzhiyun  */
595*4882a593Smuzhiyun void
xfs_extent_busy_flush(struct xfs_mount * mp,struct xfs_perag * pag,unsigned busy_gen)596*4882a593Smuzhiyun xfs_extent_busy_flush(
597*4882a593Smuzhiyun 	struct xfs_mount	*mp,
598*4882a593Smuzhiyun 	struct xfs_perag	*pag,
599*4882a593Smuzhiyun 	unsigned		busy_gen)
600*4882a593Smuzhiyun {
601*4882a593Smuzhiyun 	DEFINE_WAIT		(wait);
602*4882a593Smuzhiyun 	int			error;
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	error = xfs_log_force(mp, XFS_LOG_SYNC);
605*4882a593Smuzhiyun 	if (error)
606*4882a593Smuzhiyun 		return;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	do {
609*4882a593Smuzhiyun 		prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
610*4882a593Smuzhiyun 		if  (busy_gen != READ_ONCE(pag->pagb_gen))
611*4882a593Smuzhiyun 			break;
612*4882a593Smuzhiyun 		schedule();
613*4882a593Smuzhiyun 	} while (1);
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 	finish_wait(&pag->pagb_wait, &wait);
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun void
xfs_extent_busy_wait_all(struct xfs_mount * mp)619*4882a593Smuzhiyun xfs_extent_busy_wait_all(
620*4882a593Smuzhiyun 	struct xfs_mount	*mp)
621*4882a593Smuzhiyun {
622*4882a593Smuzhiyun 	DEFINE_WAIT		(wait);
623*4882a593Smuzhiyun 	xfs_agnumber_t		agno;
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
626*4882a593Smuzhiyun 		struct xfs_perag *pag = xfs_perag_get(mp, agno);
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun 		do {
629*4882a593Smuzhiyun 			prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
630*4882a593Smuzhiyun 			if  (RB_EMPTY_ROOT(&pag->pagb_tree))
631*4882a593Smuzhiyun 				break;
632*4882a593Smuzhiyun 			schedule();
633*4882a593Smuzhiyun 		} while (1);
634*4882a593Smuzhiyun 		finish_wait(&pag->pagb_wait, &wait);
635*4882a593Smuzhiyun 
636*4882a593Smuzhiyun 		xfs_perag_put(pag);
637*4882a593Smuzhiyun 	}
638*4882a593Smuzhiyun }
639*4882a593Smuzhiyun 
640*4882a593Smuzhiyun /*
641*4882a593Smuzhiyun  * Callback for list_sort to sort busy extents by the AG they reside in.
642*4882a593Smuzhiyun  */
643*4882a593Smuzhiyun int
xfs_extent_busy_ag_cmp(void * priv,struct list_head * l1,struct list_head * l2)644*4882a593Smuzhiyun xfs_extent_busy_ag_cmp(
645*4882a593Smuzhiyun 	void			*priv,
646*4882a593Smuzhiyun 	struct list_head	*l1,
647*4882a593Smuzhiyun 	struct list_head	*l2)
648*4882a593Smuzhiyun {
649*4882a593Smuzhiyun 	struct xfs_extent_busy	*b1 =
650*4882a593Smuzhiyun 		container_of(l1, struct xfs_extent_busy, list);
651*4882a593Smuzhiyun 	struct xfs_extent_busy	*b2 =
652*4882a593Smuzhiyun 		container_of(l2, struct xfs_extent_busy, list);
653*4882a593Smuzhiyun 	s32 diff;
654*4882a593Smuzhiyun 
655*4882a593Smuzhiyun 	diff = b1->agno - b2->agno;
656*4882a593Smuzhiyun 	if (!diff)
657*4882a593Smuzhiyun 		diff = b1->bno - b2->bno;
658*4882a593Smuzhiyun 	return diff;
659*4882a593Smuzhiyun }
660