xref: /OK3568_Linux_fs/kernel/fs/xfs/libxfs/xfs_alloc.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4*4882a593Smuzhiyun  * All Rights Reserved.
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun #include "xfs.h"
7*4882a593Smuzhiyun #include "xfs_fs.h"
8*4882a593Smuzhiyun #include "xfs_format.h"
9*4882a593Smuzhiyun #include "xfs_log_format.h"
10*4882a593Smuzhiyun #include "xfs_shared.h"
11*4882a593Smuzhiyun #include "xfs_trans_resv.h"
12*4882a593Smuzhiyun #include "xfs_bit.h"
13*4882a593Smuzhiyun #include "xfs_sb.h"
14*4882a593Smuzhiyun #include "xfs_mount.h"
15*4882a593Smuzhiyun #include "xfs_defer.h"
16*4882a593Smuzhiyun #include "xfs_btree.h"
17*4882a593Smuzhiyun #include "xfs_rmap.h"
18*4882a593Smuzhiyun #include "xfs_alloc_btree.h"
19*4882a593Smuzhiyun #include "xfs_alloc.h"
20*4882a593Smuzhiyun #include "xfs_extent_busy.h"
21*4882a593Smuzhiyun #include "xfs_errortag.h"
22*4882a593Smuzhiyun #include "xfs_error.h"
23*4882a593Smuzhiyun #include "xfs_trace.h"
24*4882a593Smuzhiyun #include "xfs_trans.h"
25*4882a593Smuzhiyun #include "xfs_buf_item.h"
26*4882a593Smuzhiyun #include "xfs_log.h"
27*4882a593Smuzhiyun #include "xfs_ag_resv.h"
28*4882a593Smuzhiyun #include "xfs_bmap.h"
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun extern kmem_zone_t	*xfs_bmap_free_item_zone;
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun struct workqueue_struct *xfs_alloc_wq;
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun #define	XFSA_FIXUP_BNO_OK	1
37*4882a593Smuzhiyun #define	XFSA_FIXUP_CNT_OK	2
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
40*4882a593Smuzhiyun STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
41*4882a593Smuzhiyun STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun /*
44*4882a593Smuzhiyun  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
45*4882a593Smuzhiyun  * the beginning of the block for a proper header with the location information
46*4882a593Smuzhiyun  * and CRC.
47*4882a593Smuzhiyun  */
48*4882a593Smuzhiyun unsigned int
xfs_agfl_size(struct xfs_mount * mp)49*4882a593Smuzhiyun xfs_agfl_size(
50*4882a593Smuzhiyun 	struct xfs_mount	*mp)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun 	unsigned int		size = mp->m_sb.sb_sectsize;
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	if (xfs_sb_version_hascrc(&mp->m_sb))
55*4882a593Smuzhiyun 		size -= sizeof(struct xfs_agfl);
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 	return size / sizeof(xfs_agblock_t);
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun unsigned int
xfs_refc_block(struct xfs_mount * mp)61*4882a593Smuzhiyun xfs_refc_block(
62*4882a593Smuzhiyun 	struct xfs_mount	*mp)
63*4882a593Smuzhiyun {
64*4882a593Smuzhiyun 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
65*4882a593Smuzhiyun 		return XFS_RMAP_BLOCK(mp) + 1;
66*4882a593Smuzhiyun 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
67*4882a593Smuzhiyun 		return XFS_FIBT_BLOCK(mp) + 1;
68*4882a593Smuzhiyun 	return XFS_IBT_BLOCK(mp) + 1;
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun xfs_extlen_t
xfs_prealloc_blocks(struct xfs_mount * mp)72*4882a593Smuzhiyun xfs_prealloc_blocks(
73*4882a593Smuzhiyun 	struct xfs_mount	*mp)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun 	if (xfs_sb_version_hasreflink(&mp->m_sb))
76*4882a593Smuzhiyun 		return xfs_refc_block(mp) + 1;
77*4882a593Smuzhiyun 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
78*4882a593Smuzhiyun 		return XFS_RMAP_BLOCK(mp) + 1;
79*4882a593Smuzhiyun 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
80*4882a593Smuzhiyun 		return XFS_FIBT_BLOCK(mp) + 1;
81*4882a593Smuzhiyun 	return XFS_IBT_BLOCK(mp) + 1;
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun /*
85*4882a593Smuzhiyun  * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
86*4882a593Smuzhiyun  * AGF buffer (PV 947395), we place constraints on the relationship among
87*4882a593Smuzhiyun  * actual allocations for data blocks, freelist blocks, and potential file data
88*4882a593Smuzhiyun  * bmap btree blocks. However, these restrictions may result in no actual space
89*4882a593Smuzhiyun  * allocated for a delayed extent, for example, a data block in a certain AG is
90*4882a593Smuzhiyun  * allocated but there is no additional block for the additional bmap btree
91*4882a593Smuzhiyun  * block due to a split of the bmap btree of the file. The result of this may
92*4882a593Smuzhiyun  * lead to an infinite loop when the file gets flushed to disk and all delayed
93*4882a593Smuzhiyun  * extents need to be actually allocated. To get around this, we explicitly set
94*4882a593Smuzhiyun  * aside a few blocks which will not be reserved in delayed allocation.
95*4882a593Smuzhiyun  *
96*4882a593Smuzhiyun  * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
97*4882a593Smuzhiyun  * potential split of the file's bmap btree.
98*4882a593Smuzhiyun  */
99*4882a593Smuzhiyun unsigned int
xfs_alloc_set_aside(struct xfs_mount * mp)100*4882a593Smuzhiyun xfs_alloc_set_aside(
101*4882a593Smuzhiyun 	struct xfs_mount	*mp)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun 	return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun /*
107*4882a593Smuzhiyun  * When deciding how much space to allocate out of an AG, we limit the
108*4882a593Smuzhiyun  * allocation maximum size to the size the AG. However, we cannot use all the
109*4882a593Smuzhiyun  * blocks in the AG - some are permanently used by metadata. These
110*4882a593Smuzhiyun  * blocks are generally:
111*4882a593Smuzhiyun  *	- the AG superblock, AGF, AGI and AGFL
112*4882a593Smuzhiyun  *	- the AGF (bno and cnt) and AGI btree root blocks, and optionally
113*4882a593Smuzhiyun  *	  the AGI free inode and rmap btree root blocks.
114*4882a593Smuzhiyun  *	- blocks on the AGFL according to xfs_alloc_set_aside() limits
115*4882a593Smuzhiyun  *	- the rmapbt root block
116*4882a593Smuzhiyun  *
117*4882a593Smuzhiyun  * The AG headers are sector sized, so the amount of space they take up is
118*4882a593Smuzhiyun  * dependent on filesystem geometry. The others are all single blocks.
119*4882a593Smuzhiyun  */
120*4882a593Smuzhiyun unsigned int
xfs_alloc_ag_max_usable(struct xfs_mount * mp)121*4882a593Smuzhiyun xfs_alloc_ag_max_usable(
122*4882a593Smuzhiyun 	struct xfs_mount	*mp)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun 	unsigned int		blocks;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
127*4882a593Smuzhiyun 	blocks += XFS_ALLOC_AGFL_RESERVE;
128*4882a593Smuzhiyun 	blocks += 3;			/* AGF, AGI btree root blocks */
129*4882a593Smuzhiyun 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
130*4882a593Smuzhiyun 		blocks++;		/* finobt root block */
131*4882a593Smuzhiyun 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
132*4882a593Smuzhiyun 		blocks++; 		/* rmap root block */
133*4882a593Smuzhiyun 	if (xfs_sb_version_hasreflink(&mp->m_sb))
134*4882a593Smuzhiyun 		blocks++;		/* refcount root block */
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	return mp->m_sb.sb_agblocks - blocks;
137*4882a593Smuzhiyun }
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun /*
140*4882a593Smuzhiyun  * Lookup the record equal to [bno, len] in the btree given by cur.
141*4882a593Smuzhiyun  */
142*4882a593Smuzhiyun STATIC int				/* error */
xfs_alloc_lookup_eq(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)143*4882a593Smuzhiyun xfs_alloc_lookup_eq(
144*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur,	/* btree cursor */
145*4882a593Smuzhiyun 	xfs_agblock_t		bno,	/* starting block of extent */
146*4882a593Smuzhiyun 	xfs_extlen_t		len,	/* length of extent */
147*4882a593Smuzhiyun 	int			*stat)	/* success/failure */
148*4882a593Smuzhiyun {
149*4882a593Smuzhiyun 	int			error;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	cur->bc_rec.a.ar_startblock = bno;
152*4882a593Smuzhiyun 	cur->bc_rec.a.ar_blockcount = len;
153*4882a593Smuzhiyun 	error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
154*4882a593Smuzhiyun 	cur->bc_ag.abt.active = (*stat == 1);
155*4882a593Smuzhiyun 	return error;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun /*
159*4882a593Smuzhiyun  * Lookup the first record greater than or equal to [bno, len]
160*4882a593Smuzhiyun  * in the btree given by cur.
161*4882a593Smuzhiyun  */
162*4882a593Smuzhiyun int				/* error */
xfs_alloc_lookup_ge(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)163*4882a593Smuzhiyun xfs_alloc_lookup_ge(
164*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur,	/* btree cursor */
165*4882a593Smuzhiyun 	xfs_agblock_t		bno,	/* starting block of extent */
166*4882a593Smuzhiyun 	xfs_extlen_t		len,	/* length of extent */
167*4882a593Smuzhiyun 	int			*stat)	/* success/failure */
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun 	int			error;
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	cur->bc_rec.a.ar_startblock = bno;
172*4882a593Smuzhiyun 	cur->bc_rec.a.ar_blockcount = len;
173*4882a593Smuzhiyun 	error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
174*4882a593Smuzhiyun 	cur->bc_ag.abt.active = (*stat == 1);
175*4882a593Smuzhiyun 	return error;
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun /*
179*4882a593Smuzhiyun  * Lookup the first record less than or equal to [bno, len]
180*4882a593Smuzhiyun  * in the btree given by cur.
181*4882a593Smuzhiyun  */
182*4882a593Smuzhiyun int					/* error */
xfs_alloc_lookup_le(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)183*4882a593Smuzhiyun xfs_alloc_lookup_le(
184*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur,	/* btree cursor */
185*4882a593Smuzhiyun 	xfs_agblock_t		bno,	/* starting block of extent */
186*4882a593Smuzhiyun 	xfs_extlen_t		len,	/* length of extent */
187*4882a593Smuzhiyun 	int			*stat)	/* success/failure */
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun 	int			error;
190*4882a593Smuzhiyun 	cur->bc_rec.a.ar_startblock = bno;
191*4882a593Smuzhiyun 	cur->bc_rec.a.ar_blockcount = len;
192*4882a593Smuzhiyun 	error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
193*4882a593Smuzhiyun 	cur->bc_ag.abt.active = (*stat == 1);
194*4882a593Smuzhiyun 	return error;
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun static inline bool
xfs_alloc_cur_active(struct xfs_btree_cur * cur)198*4882a593Smuzhiyun xfs_alloc_cur_active(
199*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun 	return cur && cur->bc_ag.abt.active;
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun /*
205*4882a593Smuzhiyun  * Update the record referred to by cur to the value given
206*4882a593Smuzhiyun  * by [bno, len].
207*4882a593Smuzhiyun  * This either works (return 0) or gets an EFSCORRUPTED error.
208*4882a593Smuzhiyun  */
209*4882a593Smuzhiyun STATIC int				/* error */
xfs_alloc_update(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len)210*4882a593Smuzhiyun xfs_alloc_update(
211*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur,	/* btree cursor */
212*4882a593Smuzhiyun 	xfs_agblock_t		bno,	/* starting block of extent */
213*4882a593Smuzhiyun 	xfs_extlen_t		len)	/* length of extent */
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun 	union xfs_btree_rec	rec;
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	rec.alloc.ar_startblock = cpu_to_be32(bno);
218*4882a593Smuzhiyun 	rec.alloc.ar_blockcount = cpu_to_be32(len);
219*4882a593Smuzhiyun 	return xfs_btree_update(cur, &rec);
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun /*
223*4882a593Smuzhiyun  * Get the data from the pointed-to record.
224*4882a593Smuzhiyun  */
225*4882a593Smuzhiyun int					/* error */
xfs_alloc_get_rec(struct xfs_btree_cur * cur,xfs_agblock_t * bno,xfs_extlen_t * len,int * stat)226*4882a593Smuzhiyun xfs_alloc_get_rec(
227*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur,	/* btree cursor */
228*4882a593Smuzhiyun 	xfs_agblock_t		*bno,	/* output: starting block of extent */
229*4882a593Smuzhiyun 	xfs_extlen_t		*len,	/* output: length of extent */
230*4882a593Smuzhiyun 	int			*stat)	/* output: success/failure */
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun 	struct xfs_mount	*mp = cur->bc_mp;
233*4882a593Smuzhiyun 	xfs_agnumber_t		agno = cur->bc_ag.agno;
234*4882a593Smuzhiyun 	union xfs_btree_rec	*rec;
235*4882a593Smuzhiyun 	int			error;
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	error = xfs_btree_get_rec(cur, &rec, stat);
238*4882a593Smuzhiyun 	if (error || !(*stat))
239*4882a593Smuzhiyun 		return error;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	*bno = be32_to_cpu(rec->alloc.ar_startblock);
242*4882a593Smuzhiyun 	*len = be32_to_cpu(rec->alloc.ar_blockcount);
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	if (*len == 0)
245*4882a593Smuzhiyun 		goto out_bad_rec;
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	/* check for valid extent range, including overflow */
248*4882a593Smuzhiyun 	if (!xfs_verify_agbno(mp, agno, *bno))
249*4882a593Smuzhiyun 		goto out_bad_rec;
250*4882a593Smuzhiyun 	if (*bno > *bno + *len)
251*4882a593Smuzhiyun 		goto out_bad_rec;
252*4882a593Smuzhiyun 	if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
253*4882a593Smuzhiyun 		goto out_bad_rec;
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	return 0;
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun out_bad_rec:
258*4882a593Smuzhiyun 	xfs_warn(mp,
259*4882a593Smuzhiyun 		"%s Freespace BTree record corruption in AG %d detected!",
260*4882a593Smuzhiyun 		cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
261*4882a593Smuzhiyun 	xfs_warn(mp,
262*4882a593Smuzhiyun 		"start block 0x%x block count 0x%x", *bno, *len);
263*4882a593Smuzhiyun 	return -EFSCORRUPTED;
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun /*
267*4882a593Smuzhiyun  * Compute aligned version of the found extent.
268*4882a593Smuzhiyun  * Takes alignment and min length into account.
269*4882a593Smuzhiyun  */
270*4882a593Smuzhiyun STATIC bool
xfs_alloc_compute_aligned(xfs_alloc_arg_t * args,xfs_agblock_t foundbno,xfs_extlen_t foundlen,xfs_agblock_t * resbno,xfs_extlen_t * reslen,unsigned * busy_gen)271*4882a593Smuzhiyun xfs_alloc_compute_aligned(
272*4882a593Smuzhiyun 	xfs_alloc_arg_t	*args,		/* allocation argument structure */
273*4882a593Smuzhiyun 	xfs_agblock_t	foundbno,	/* starting block in found extent */
274*4882a593Smuzhiyun 	xfs_extlen_t	foundlen,	/* length in found extent */
275*4882a593Smuzhiyun 	xfs_agblock_t	*resbno,	/* result block number */
276*4882a593Smuzhiyun 	xfs_extlen_t	*reslen,	/* result length */
277*4882a593Smuzhiyun 	unsigned	*busy_gen)
278*4882a593Smuzhiyun {
279*4882a593Smuzhiyun 	xfs_agblock_t	bno = foundbno;
280*4882a593Smuzhiyun 	xfs_extlen_t	len = foundlen;
281*4882a593Smuzhiyun 	xfs_extlen_t	diff;
282*4882a593Smuzhiyun 	bool		busy;
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 	/* Trim busy sections out of found extent */
285*4882a593Smuzhiyun 	busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun 	/*
288*4882a593Smuzhiyun 	 * If we have a largish extent that happens to start before min_agbno,
289*4882a593Smuzhiyun 	 * see if we can shift it into range...
290*4882a593Smuzhiyun 	 */
291*4882a593Smuzhiyun 	if (bno < args->min_agbno && bno + len > args->min_agbno) {
292*4882a593Smuzhiyun 		diff = args->min_agbno - bno;
293*4882a593Smuzhiyun 		if (len > diff) {
294*4882a593Smuzhiyun 			bno += diff;
295*4882a593Smuzhiyun 			len -= diff;
296*4882a593Smuzhiyun 		}
297*4882a593Smuzhiyun 	}
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 	if (args->alignment > 1 && len >= args->minlen) {
300*4882a593Smuzhiyun 		xfs_agblock_t	aligned_bno = roundup(bno, args->alignment);
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 		diff = aligned_bno - bno;
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 		*resbno = aligned_bno;
305*4882a593Smuzhiyun 		*reslen = diff >= len ? 0 : len - diff;
306*4882a593Smuzhiyun 	} else {
307*4882a593Smuzhiyun 		*resbno = bno;
308*4882a593Smuzhiyun 		*reslen = len;
309*4882a593Smuzhiyun 	}
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	return busy;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun /*
315*4882a593Smuzhiyun  * Compute best start block and diff for "near" allocations.
316*4882a593Smuzhiyun  * freelen >= wantlen already checked by caller.
317*4882a593Smuzhiyun  */
318*4882a593Smuzhiyun STATIC xfs_extlen_t			/* difference value (absolute) */
xfs_alloc_compute_diff(xfs_agblock_t wantbno,xfs_extlen_t wantlen,xfs_extlen_t alignment,int datatype,xfs_agblock_t freebno,xfs_extlen_t freelen,xfs_agblock_t * newbnop)319*4882a593Smuzhiyun xfs_alloc_compute_diff(
320*4882a593Smuzhiyun 	xfs_agblock_t	wantbno,	/* target starting block */
321*4882a593Smuzhiyun 	xfs_extlen_t	wantlen,	/* target length */
322*4882a593Smuzhiyun 	xfs_extlen_t	alignment,	/* target alignment */
323*4882a593Smuzhiyun 	int		datatype,	/* are we allocating data? */
324*4882a593Smuzhiyun 	xfs_agblock_t	freebno,	/* freespace's starting block */
325*4882a593Smuzhiyun 	xfs_extlen_t	freelen,	/* freespace's length */
326*4882a593Smuzhiyun 	xfs_agblock_t	*newbnop)	/* result: best start block from free */
327*4882a593Smuzhiyun {
328*4882a593Smuzhiyun 	xfs_agblock_t	freeend;	/* end of freespace extent */
329*4882a593Smuzhiyun 	xfs_agblock_t	newbno1;	/* return block number */
330*4882a593Smuzhiyun 	xfs_agblock_t	newbno2;	/* other new block number */
331*4882a593Smuzhiyun 	xfs_extlen_t	newlen1=0;	/* length with newbno1 */
332*4882a593Smuzhiyun 	xfs_extlen_t	newlen2=0;	/* length with newbno2 */
333*4882a593Smuzhiyun 	xfs_agblock_t	wantend;	/* end of target extent */
334*4882a593Smuzhiyun 	bool		userdata = datatype & XFS_ALLOC_USERDATA;
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	ASSERT(freelen >= wantlen);
337*4882a593Smuzhiyun 	freeend = freebno + freelen;
338*4882a593Smuzhiyun 	wantend = wantbno + wantlen;
339*4882a593Smuzhiyun 	/*
340*4882a593Smuzhiyun 	 * We want to allocate from the start of a free extent if it is past
341*4882a593Smuzhiyun 	 * the desired block or if we are allocating user data and the free
342*4882a593Smuzhiyun 	 * extent is before desired block. The second case is there to allow
343*4882a593Smuzhiyun 	 * for contiguous allocation from the remaining free space if the file
344*4882a593Smuzhiyun 	 * grows in the short term.
345*4882a593Smuzhiyun 	 */
346*4882a593Smuzhiyun 	if (freebno >= wantbno || (userdata && freeend < wantend)) {
347*4882a593Smuzhiyun 		if ((newbno1 = roundup(freebno, alignment)) >= freeend)
348*4882a593Smuzhiyun 			newbno1 = NULLAGBLOCK;
349*4882a593Smuzhiyun 	} else if (freeend >= wantend && alignment > 1) {
350*4882a593Smuzhiyun 		newbno1 = roundup(wantbno, alignment);
351*4882a593Smuzhiyun 		newbno2 = newbno1 - alignment;
352*4882a593Smuzhiyun 		if (newbno1 >= freeend)
353*4882a593Smuzhiyun 			newbno1 = NULLAGBLOCK;
354*4882a593Smuzhiyun 		else
355*4882a593Smuzhiyun 			newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
356*4882a593Smuzhiyun 		if (newbno2 < freebno)
357*4882a593Smuzhiyun 			newbno2 = NULLAGBLOCK;
358*4882a593Smuzhiyun 		else
359*4882a593Smuzhiyun 			newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
360*4882a593Smuzhiyun 		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
361*4882a593Smuzhiyun 			if (newlen1 < newlen2 ||
362*4882a593Smuzhiyun 			    (newlen1 == newlen2 &&
363*4882a593Smuzhiyun 			     XFS_ABSDIFF(newbno1, wantbno) >
364*4882a593Smuzhiyun 			     XFS_ABSDIFF(newbno2, wantbno)))
365*4882a593Smuzhiyun 				newbno1 = newbno2;
366*4882a593Smuzhiyun 		} else if (newbno2 != NULLAGBLOCK)
367*4882a593Smuzhiyun 			newbno1 = newbno2;
368*4882a593Smuzhiyun 	} else if (freeend >= wantend) {
369*4882a593Smuzhiyun 		newbno1 = wantbno;
370*4882a593Smuzhiyun 	} else if (alignment > 1) {
371*4882a593Smuzhiyun 		newbno1 = roundup(freeend - wantlen, alignment);
372*4882a593Smuzhiyun 		if (newbno1 > freeend - wantlen &&
373*4882a593Smuzhiyun 		    newbno1 - alignment >= freebno)
374*4882a593Smuzhiyun 			newbno1 -= alignment;
375*4882a593Smuzhiyun 		else if (newbno1 >= freeend)
376*4882a593Smuzhiyun 			newbno1 = NULLAGBLOCK;
377*4882a593Smuzhiyun 	} else
378*4882a593Smuzhiyun 		newbno1 = freeend - wantlen;
379*4882a593Smuzhiyun 	*newbnop = newbno1;
380*4882a593Smuzhiyun 	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
381*4882a593Smuzhiyun }
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun /*
384*4882a593Smuzhiyun  * Fix up the length, based on mod and prod.
385*4882a593Smuzhiyun  * len should be k * prod + mod for some k.
386*4882a593Smuzhiyun  * If len is too small it is returned unchanged.
387*4882a593Smuzhiyun  * If len hits maxlen it is left alone.
388*4882a593Smuzhiyun  */
389*4882a593Smuzhiyun STATIC void
xfs_alloc_fix_len(xfs_alloc_arg_t * args)390*4882a593Smuzhiyun xfs_alloc_fix_len(
391*4882a593Smuzhiyun 	xfs_alloc_arg_t	*args)		/* allocation argument structure */
392*4882a593Smuzhiyun {
393*4882a593Smuzhiyun 	xfs_extlen_t	k;
394*4882a593Smuzhiyun 	xfs_extlen_t	rlen;
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 	ASSERT(args->mod < args->prod);
397*4882a593Smuzhiyun 	rlen = args->len;
398*4882a593Smuzhiyun 	ASSERT(rlen >= args->minlen);
399*4882a593Smuzhiyun 	ASSERT(rlen <= args->maxlen);
400*4882a593Smuzhiyun 	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
401*4882a593Smuzhiyun 	    (args->mod == 0 && rlen < args->prod))
402*4882a593Smuzhiyun 		return;
403*4882a593Smuzhiyun 	k = rlen % args->prod;
404*4882a593Smuzhiyun 	if (k == args->mod)
405*4882a593Smuzhiyun 		return;
406*4882a593Smuzhiyun 	if (k > args->mod)
407*4882a593Smuzhiyun 		rlen = rlen - (k - args->mod);
408*4882a593Smuzhiyun 	else
409*4882a593Smuzhiyun 		rlen = rlen - args->prod + (args->mod - k);
410*4882a593Smuzhiyun 	/* casts to (int) catch length underflows */
411*4882a593Smuzhiyun 	if ((int)rlen < (int)args->minlen)
412*4882a593Smuzhiyun 		return;
413*4882a593Smuzhiyun 	ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
414*4882a593Smuzhiyun 	ASSERT(rlen % args->prod == args->mod);
415*4882a593Smuzhiyun 	ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
416*4882a593Smuzhiyun 		rlen + args->minleft);
417*4882a593Smuzhiyun 	args->len = rlen;
418*4882a593Smuzhiyun }
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun /*
421*4882a593Smuzhiyun  * Update the two btrees, logically removing from freespace the extent
422*4882a593Smuzhiyun  * starting at rbno, rlen blocks.  The extent is contained within the
423*4882a593Smuzhiyun  * actual (current) free extent fbno for flen blocks.
424*4882a593Smuzhiyun  * Flags are passed in indicating whether the cursors are set to the
425*4882a593Smuzhiyun  * relevant records.
426*4882a593Smuzhiyun  */
427*4882a593Smuzhiyun STATIC int				/* error code */
xfs_alloc_fixup_trees(xfs_btree_cur_t * cnt_cur,xfs_btree_cur_t * bno_cur,xfs_agblock_t fbno,xfs_extlen_t flen,xfs_agblock_t rbno,xfs_extlen_t rlen,int flags)428*4882a593Smuzhiyun xfs_alloc_fixup_trees(
429*4882a593Smuzhiyun 	xfs_btree_cur_t	*cnt_cur,	/* cursor for by-size btree */
430*4882a593Smuzhiyun 	xfs_btree_cur_t	*bno_cur,	/* cursor for by-block btree */
431*4882a593Smuzhiyun 	xfs_agblock_t	fbno,		/* starting block of free extent */
432*4882a593Smuzhiyun 	xfs_extlen_t	flen,		/* length of free extent */
433*4882a593Smuzhiyun 	xfs_agblock_t	rbno,		/* starting block of returned extent */
434*4882a593Smuzhiyun 	xfs_extlen_t	rlen,		/* length of returned extent */
435*4882a593Smuzhiyun 	int		flags)		/* flags, XFSA_FIXUP_... */
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun 	int		error;		/* error code */
438*4882a593Smuzhiyun 	int		i;		/* operation results */
439*4882a593Smuzhiyun 	xfs_agblock_t	nfbno1;		/* first new free startblock */
440*4882a593Smuzhiyun 	xfs_agblock_t	nfbno2;		/* second new free startblock */
441*4882a593Smuzhiyun 	xfs_extlen_t	nflen1=0;	/* first new free length */
442*4882a593Smuzhiyun 	xfs_extlen_t	nflen2=0;	/* second new free length */
443*4882a593Smuzhiyun 	struct xfs_mount *mp;
444*4882a593Smuzhiyun 
445*4882a593Smuzhiyun 	mp = cnt_cur->bc_mp;
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 	/*
448*4882a593Smuzhiyun 	 * Look up the record in the by-size tree if necessary.
449*4882a593Smuzhiyun 	 */
450*4882a593Smuzhiyun 	if (flags & XFSA_FIXUP_CNT_OK) {
451*4882a593Smuzhiyun #ifdef DEBUG
452*4882a593Smuzhiyun 		if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
453*4882a593Smuzhiyun 			return error;
454*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp,
455*4882a593Smuzhiyun 				   i != 1 ||
456*4882a593Smuzhiyun 				   nfbno1 != fbno ||
457*4882a593Smuzhiyun 				   nflen1 != flen))
458*4882a593Smuzhiyun 			return -EFSCORRUPTED;
459*4882a593Smuzhiyun #endif
460*4882a593Smuzhiyun 	} else {
461*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
462*4882a593Smuzhiyun 			return error;
463*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1))
464*4882a593Smuzhiyun 			return -EFSCORRUPTED;
465*4882a593Smuzhiyun 	}
466*4882a593Smuzhiyun 	/*
467*4882a593Smuzhiyun 	 * Look up the record in the by-block tree if necessary.
468*4882a593Smuzhiyun 	 */
469*4882a593Smuzhiyun 	if (flags & XFSA_FIXUP_BNO_OK) {
470*4882a593Smuzhiyun #ifdef DEBUG
471*4882a593Smuzhiyun 		if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
472*4882a593Smuzhiyun 			return error;
473*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp,
474*4882a593Smuzhiyun 				   i != 1 ||
475*4882a593Smuzhiyun 				   nfbno1 != fbno ||
476*4882a593Smuzhiyun 				   nflen1 != flen))
477*4882a593Smuzhiyun 			return -EFSCORRUPTED;
478*4882a593Smuzhiyun #endif
479*4882a593Smuzhiyun 	} else {
480*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
481*4882a593Smuzhiyun 			return error;
482*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1))
483*4882a593Smuzhiyun 			return -EFSCORRUPTED;
484*4882a593Smuzhiyun 	}
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun #ifdef DEBUG
487*4882a593Smuzhiyun 	if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
488*4882a593Smuzhiyun 		struct xfs_btree_block	*bnoblock;
489*4882a593Smuzhiyun 		struct xfs_btree_block	*cntblock;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 		bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
492*4882a593Smuzhiyun 		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp,
495*4882a593Smuzhiyun 				   bnoblock->bb_numrecs !=
496*4882a593Smuzhiyun 				   cntblock->bb_numrecs))
497*4882a593Smuzhiyun 			return -EFSCORRUPTED;
498*4882a593Smuzhiyun 	}
499*4882a593Smuzhiyun #endif
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	/*
502*4882a593Smuzhiyun 	 * Deal with all four cases: the allocated record is contained
503*4882a593Smuzhiyun 	 * within the freespace record, so we can have new freespace
504*4882a593Smuzhiyun 	 * at either (or both) end, or no freespace remaining.
505*4882a593Smuzhiyun 	 */
506*4882a593Smuzhiyun 	if (rbno == fbno && rlen == flen)
507*4882a593Smuzhiyun 		nfbno1 = nfbno2 = NULLAGBLOCK;
508*4882a593Smuzhiyun 	else if (rbno == fbno) {
509*4882a593Smuzhiyun 		nfbno1 = rbno + rlen;
510*4882a593Smuzhiyun 		nflen1 = flen - rlen;
511*4882a593Smuzhiyun 		nfbno2 = NULLAGBLOCK;
512*4882a593Smuzhiyun 	} else if (rbno + rlen == fbno + flen) {
513*4882a593Smuzhiyun 		nfbno1 = fbno;
514*4882a593Smuzhiyun 		nflen1 = flen - rlen;
515*4882a593Smuzhiyun 		nfbno2 = NULLAGBLOCK;
516*4882a593Smuzhiyun 	} else {
517*4882a593Smuzhiyun 		nfbno1 = fbno;
518*4882a593Smuzhiyun 		nflen1 = rbno - fbno;
519*4882a593Smuzhiyun 		nfbno2 = rbno + rlen;
520*4882a593Smuzhiyun 		nflen2 = (fbno + flen) - nfbno2;
521*4882a593Smuzhiyun 	}
522*4882a593Smuzhiyun 	/*
523*4882a593Smuzhiyun 	 * Delete the entry from the by-size btree.
524*4882a593Smuzhiyun 	 */
525*4882a593Smuzhiyun 	if ((error = xfs_btree_delete(cnt_cur, &i)))
526*4882a593Smuzhiyun 		return error;
527*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(mp, i != 1))
528*4882a593Smuzhiyun 		return -EFSCORRUPTED;
529*4882a593Smuzhiyun 	/*
530*4882a593Smuzhiyun 	 * Add new by-size btree entry(s).
531*4882a593Smuzhiyun 	 */
532*4882a593Smuzhiyun 	if (nfbno1 != NULLAGBLOCK) {
533*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
534*4882a593Smuzhiyun 			return error;
535*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 0))
536*4882a593Smuzhiyun 			return -EFSCORRUPTED;
537*4882a593Smuzhiyun 		if ((error = xfs_btree_insert(cnt_cur, &i)))
538*4882a593Smuzhiyun 			return error;
539*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1))
540*4882a593Smuzhiyun 			return -EFSCORRUPTED;
541*4882a593Smuzhiyun 	}
542*4882a593Smuzhiyun 	if (nfbno2 != NULLAGBLOCK) {
543*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
544*4882a593Smuzhiyun 			return error;
545*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 0))
546*4882a593Smuzhiyun 			return -EFSCORRUPTED;
547*4882a593Smuzhiyun 		if ((error = xfs_btree_insert(cnt_cur, &i)))
548*4882a593Smuzhiyun 			return error;
549*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1))
550*4882a593Smuzhiyun 			return -EFSCORRUPTED;
551*4882a593Smuzhiyun 	}
552*4882a593Smuzhiyun 	/*
553*4882a593Smuzhiyun 	 * Fix up the by-block btree entry(s).
554*4882a593Smuzhiyun 	 */
555*4882a593Smuzhiyun 	if (nfbno1 == NULLAGBLOCK) {
556*4882a593Smuzhiyun 		/*
557*4882a593Smuzhiyun 		 * No remaining freespace, just delete the by-block tree entry.
558*4882a593Smuzhiyun 		 */
559*4882a593Smuzhiyun 		if ((error = xfs_btree_delete(bno_cur, &i)))
560*4882a593Smuzhiyun 			return error;
561*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1))
562*4882a593Smuzhiyun 			return -EFSCORRUPTED;
563*4882a593Smuzhiyun 	} else {
564*4882a593Smuzhiyun 		/*
565*4882a593Smuzhiyun 		 * Update the by-block entry to start later|be shorter.
566*4882a593Smuzhiyun 		 */
567*4882a593Smuzhiyun 		if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
568*4882a593Smuzhiyun 			return error;
569*4882a593Smuzhiyun 	}
570*4882a593Smuzhiyun 	if (nfbno2 != NULLAGBLOCK) {
571*4882a593Smuzhiyun 		/*
572*4882a593Smuzhiyun 		 * 2 resulting free entries, need to add one.
573*4882a593Smuzhiyun 		 */
574*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
575*4882a593Smuzhiyun 			return error;
576*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 0))
577*4882a593Smuzhiyun 			return -EFSCORRUPTED;
578*4882a593Smuzhiyun 		if ((error = xfs_btree_insert(bno_cur, &i)))
579*4882a593Smuzhiyun 			return error;
580*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1))
581*4882a593Smuzhiyun 			return -EFSCORRUPTED;
582*4882a593Smuzhiyun 	}
583*4882a593Smuzhiyun 	return 0;
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun static xfs_failaddr_t
xfs_agfl_verify(struct xfs_buf * bp)587*4882a593Smuzhiyun xfs_agfl_verify(
588*4882a593Smuzhiyun 	struct xfs_buf	*bp)
589*4882a593Smuzhiyun {
590*4882a593Smuzhiyun 	struct xfs_mount *mp = bp->b_mount;
591*4882a593Smuzhiyun 	struct xfs_agfl	*agfl = XFS_BUF_TO_AGFL(bp);
592*4882a593Smuzhiyun 	__be32		*agfl_bno = xfs_buf_to_agfl_bno(bp);
593*4882a593Smuzhiyun 	int		i;
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	/*
596*4882a593Smuzhiyun 	 * There is no verification of non-crc AGFLs because mkfs does not
597*4882a593Smuzhiyun 	 * initialise the AGFL to zero or NULL. Hence the only valid part of the
598*4882a593Smuzhiyun 	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
599*4882a593Smuzhiyun 	 * can't verify just those entries are valid.
600*4882a593Smuzhiyun 	 */
601*4882a593Smuzhiyun 	if (!xfs_sb_version_hascrc(&mp->m_sb))
602*4882a593Smuzhiyun 		return NULL;
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
605*4882a593Smuzhiyun 		return __this_address;
606*4882a593Smuzhiyun 	if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
607*4882a593Smuzhiyun 		return __this_address;
608*4882a593Smuzhiyun 	/*
609*4882a593Smuzhiyun 	 * during growfs operations, the perag is not fully initialised,
610*4882a593Smuzhiyun 	 * so we can't use it for any useful checking. growfs ensures we can't
611*4882a593Smuzhiyun 	 * use it by using uncached buffers that don't have the perag attached
612*4882a593Smuzhiyun 	 * so we can detect and avoid this problem.
613*4882a593Smuzhiyun 	 */
614*4882a593Smuzhiyun 	if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
615*4882a593Smuzhiyun 		return __this_address;
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun 	for (i = 0; i < xfs_agfl_size(mp); i++) {
618*4882a593Smuzhiyun 		if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
619*4882a593Smuzhiyun 		    be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
620*4882a593Smuzhiyun 			return __this_address;
621*4882a593Smuzhiyun 	}
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
624*4882a593Smuzhiyun 		return __this_address;
625*4882a593Smuzhiyun 	return NULL;
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun static void
xfs_agfl_read_verify(struct xfs_buf * bp)629*4882a593Smuzhiyun xfs_agfl_read_verify(
630*4882a593Smuzhiyun 	struct xfs_buf	*bp)
631*4882a593Smuzhiyun {
632*4882a593Smuzhiyun 	struct xfs_mount *mp = bp->b_mount;
633*4882a593Smuzhiyun 	xfs_failaddr_t	fa;
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun 	/*
636*4882a593Smuzhiyun 	 * There is no verification of non-crc AGFLs because mkfs does not
637*4882a593Smuzhiyun 	 * initialise the AGFL to zero or NULL. Hence the only valid part of the
638*4882a593Smuzhiyun 	 * AGFL is what the AGF says is active. We can't get to the AGF, so we
639*4882a593Smuzhiyun 	 * can't verify just those entries are valid.
640*4882a593Smuzhiyun 	 */
641*4882a593Smuzhiyun 	if (!xfs_sb_version_hascrc(&mp->m_sb))
642*4882a593Smuzhiyun 		return;
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
645*4882a593Smuzhiyun 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
646*4882a593Smuzhiyun 	else {
647*4882a593Smuzhiyun 		fa = xfs_agfl_verify(bp);
648*4882a593Smuzhiyun 		if (fa)
649*4882a593Smuzhiyun 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
650*4882a593Smuzhiyun 	}
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun static void
xfs_agfl_write_verify(struct xfs_buf * bp)654*4882a593Smuzhiyun xfs_agfl_write_verify(
655*4882a593Smuzhiyun 	struct xfs_buf	*bp)
656*4882a593Smuzhiyun {
657*4882a593Smuzhiyun 	struct xfs_mount	*mp = bp->b_mount;
658*4882a593Smuzhiyun 	struct xfs_buf_log_item	*bip = bp->b_log_item;
659*4882a593Smuzhiyun 	xfs_failaddr_t		fa;
660*4882a593Smuzhiyun 
661*4882a593Smuzhiyun 	/* no verification of non-crc AGFLs */
662*4882a593Smuzhiyun 	if (!xfs_sb_version_hascrc(&mp->m_sb))
663*4882a593Smuzhiyun 		return;
664*4882a593Smuzhiyun 
665*4882a593Smuzhiyun 	fa = xfs_agfl_verify(bp);
666*4882a593Smuzhiyun 	if (fa) {
667*4882a593Smuzhiyun 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
668*4882a593Smuzhiyun 		return;
669*4882a593Smuzhiyun 	}
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun 	if (bip)
672*4882a593Smuzhiyun 		XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 	xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
675*4882a593Smuzhiyun }
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun const struct xfs_buf_ops xfs_agfl_buf_ops = {
678*4882a593Smuzhiyun 	.name = "xfs_agfl",
679*4882a593Smuzhiyun 	.magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
680*4882a593Smuzhiyun 	.verify_read = xfs_agfl_read_verify,
681*4882a593Smuzhiyun 	.verify_write = xfs_agfl_write_verify,
682*4882a593Smuzhiyun 	.verify_struct = xfs_agfl_verify,
683*4882a593Smuzhiyun };
684*4882a593Smuzhiyun 
685*4882a593Smuzhiyun /*
686*4882a593Smuzhiyun  * Read in the allocation group free block array.
687*4882a593Smuzhiyun  */
688*4882a593Smuzhiyun int					/* error */
xfs_alloc_read_agfl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_buf_t ** bpp)689*4882a593Smuzhiyun xfs_alloc_read_agfl(
690*4882a593Smuzhiyun 	xfs_mount_t	*mp,		/* mount point structure */
691*4882a593Smuzhiyun 	xfs_trans_t	*tp,		/* transaction pointer */
692*4882a593Smuzhiyun 	xfs_agnumber_t	agno,		/* allocation group number */
693*4882a593Smuzhiyun 	xfs_buf_t	**bpp)		/* buffer for the ag free block array */
694*4882a593Smuzhiyun {
695*4882a593Smuzhiyun 	xfs_buf_t	*bp;		/* return value */
696*4882a593Smuzhiyun 	int		error;
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	ASSERT(agno != NULLAGNUMBER);
699*4882a593Smuzhiyun 	error = xfs_trans_read_buf(
700*4882a593Smuzhiyun 			mp, tp, mp->m_ddev_targp,
701*4882a593Smuzhiyun 			XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
702*4882a593Smuzhiyun 			XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
703*4882a593Smuzhiyun 	if (error)
704*4882a593Smuzhiyun 		return error;
705*4882a593Smuzhiyun 	xfs_buf_set_ref(bp, XFS_AGFL_REF);
706*4882a593Smuzhiyun 	*bpp = bp;
707*4882a593Smuzhiyun 	return 0;
708*4882a593Smuzhiyun }
709*4882a593Smuzhiyun 
710*4882a593Smuzhiyun STATIC int
xfs_alloc_update_counters(struct xfs_trans * tp,struct xfs_buf * agbp,long len)711*4882a593Smuzhiyun xfs_alloc_update_counters(
712*4882a593Smuzhiyun 	struct xfs_trans	*tp,
713*4882a593Smuzhiyun 	struct xfs_buf		*agbp,
714*4882a593Smuzhiyun 	long			len)
715*4882a593Smuzhiyun {
716*4882a593Smuzhiyun 	struct xfs_agf		*agf = agbp->b_addr;
717*4882a593Smuzhiyun 
718*4882a593Smuzhiyun 	agbp->b_pag->pagf_freeblks += len;
719*4882a593Smuzhiyun 	be32_add_cpu(&agf->agf_freeblks, len);
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun 	xfs_trans_agblocks_delta(tp, len);
722*4882a593Smuzhiyun 	if (unlikely(be32_to_cpu(agf->agf_freeblks) >
723*4882a593Smuzhiyun 		     be32_to_cpu(agf->agf_length))) {
724*4882a593Smuzhiyun 		xfs_buf_mark_corrupt(agbp);
725*4882a593Smuzhiyun 		return -EFSCORRUPTED;
726*4882a593Smuzhiyun 	}
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
729*4882a593Smuzhiyun 	return 0;
730*4882a593Smuzhiyun }
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun /*
733*4882a593Smuzhiyun  * Block allocation algorithm and data structures.
734*4882a593Smuzhiyun  */
735*4882a593Smuzhiyun struct xfs_alloc_cur {
736*4882a593Smuzhiyun 	struct xfs_btree_cur		*cnt;	/* btree cursors */
737*4882a593Smuzhiyun 	struct xfs_btree_cur		*bnolt;
738*4882a593Smuzhiyun 	struct xfs_btree_cur		*bnogt;
739*4882a593Smuzhiyun 	xfs_extlen_t			cur_len;/* current search length */
740*4882a593Smuzhiyun 	xfs_agblock_t			rec_bno;/* extent startblock */
741*4882a593Smuzhiyun 	xfs_extlen_t			rec_len;/* extent length */
742*4882a593Smuzhiyun 	xfs_agblock_t			bno;	/* alloc bno */
743*4882a593Smuzhiyun 	xfs_extlen_t			len;	/* alloc len */
744*4882a593Smuzhiyun 	xfs_extlen_t			diff;	/* diff from search bno */
745*4882a593Smuzhiyun 	unsigned int			busy_gen;/* busy state */
746*4882a593Smuzhiyun 	bool				busy;
747*4882a593Smuzhiyun };
748*4882a593Smuzhiyun 
749*4882a593Smuzhiyun /*
750*4882a593Smuzhiyun  * Set up cursors, etc. in the extent allocation cursor. This function can be
751*4882a593Smuzhiyun  * called multiple times to reset an initialized structure without having to
752*4882a593Smuzhiyun  * reallocate cursors.
753*4882a593Smuzhiyun  */
754*4882a593Smuzhiyun static int
xfs_alloc_cur_setup(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)755*4882a593Smuzhiyun xfs_alloc_cur_setup(
756*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,
757*4882a593Smuzhiyun 	struct xfs_alloc_cur	*acur)
758*4882a593Smuzhiyun {
759*4882a593Smuzhiyun 	int			error;
760*4882a593Smuzhiyun 	int			i;
761*4882a593Smuzhiyun 
762*4882a593Smuzhiyun 	ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
763*4882a593Smuzhiyun 
764*4882a593Smuzhiyun 	acur->cur_len = args->maxlen;
765*4882a593Smuzhiyun 	acur->rec_bno = 0;
766*4882a593Smuzhiyun 	acur->rec_len = 0;
767*4882a593Smuzhiyun 	acur->bno = 0;
768*4882a593Smuzhiyun 	acur->len = 0;
769*4882a593Smuzhiyun 	acur->diff = -1;
770*4882a593Smuzhiyun 	acur->busy = false;
771*4882a593Smuzhiyun 	acur->busy_gen = 0;
772*4882a593Smuzhiyun 
773*4882a593Smuzhiyun 	/*
774*4882a593Smuzhiyun 	 * Perform an initial cntbt lookup to check for availability of maxlen
775*4882a593Smuzhiyun 	 * extents. If this fails, we'll return -ENOSPC to signal the caller to
776*4882a593Smuzhiyun 	 * attempt a small allocation.
777*4882a593Smuzhiyun 	 */
778*4882a593Smuzhiyun 	if (!acur->cnt)
779*4882a593Smuzhiyun 		acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
780*4882a593Smuzhiyun 					args->agbp, args->agno, XFS_BTNUM_CNT);
781*4882a593Smuzhiyun 	error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
782*4882a593Smuzhiyun 	if (error)
783*4882a593Smuzhiyun 		return error;
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 	/*
786*4882a593Smuzhiyun 	 * Allocate the bnobt left and right search cursors.
787*4882a593Smuzhiyun 	 */
788*4882a593Smuzhiyun 	if (!acur->bnolt)
789*4882a593Smuzhiyun 		acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
790*4882a593Smuzhiyun 					args->agbp, args->agno, XFS_BTNUM_BNO);
791*4882a593Smuzhiyun 	if (!acur->bnogt)
792*4882a593Smuzhiyun 		acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
793*4882a593Smuzhiyun 					args->agbp, args->agno, XFS_BTNUM_BNO);
794*4882a593Smuzhiyun 	return i == 1 ? 0 : -ENOSPC;
795*4882a593Smuzhiyun }
796*4882a593Smuzhiyun 
797*4882a593Smuzhiyun static void
xfs_alloc_cur_close(struct xfs_alloc_cur * acur,bool error)798*4882a593Smuzhiyun xfs_alloc_cur_close(
799*4882a593Smuzhiyun 	struct xfs_alloc_cur	*acur,
800*4882a593Smuzhiyun 	bool			error)
801*4882a593Smuzhiyun {
802*4882a593Smuzhiyun 	int			cur_error = XFS_BTREE_NOERROR;
803*4882a593Smuzhiyun 
804*4882a593Smuzhiyun 	if (error)
805*4882a593Smuzhiyun 		cur_error = XFS_BTREE_ERROR;
806*4882a593Smuzhiyun 
807*4882a593Smuzhiyun 	if (acur->cnt)
808*4882a593Smuzhiyun 		xfs_btree_del_cursor(acur->cnt, cur_error);
809*4882a593Smuzhiyun 	if (acur->bnolt)
810*4882a593Smuzhiyun 		xfs_btree_del_cursor(acur->bnolt, cur_error);
811*4882a593Smuzhiyun 	if (acur->bnogt)
812*4882a593Smuzhiyun 		xfs_btree_del_cursor(acur->bnogt, cur_error);
813*4882a593Smuzhiyun 	acur->cnt = acur->bnolt = acur->bnogt = NULL;
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun /*
817*4882a593Smuzhiyun  * Check an extent for allocation and track the best available candidate in the
818*4882a593Smuzhiyun  * allocation structure. The cursor is deactivated if it has entered an out of
819*4882a593Smuzhiyun  * range state based on allocation arguments. Optionally return the extent
820*4882a593Smuzhiyun  * extent geometry and allocation status if requested by the caller.
821*4882a593Smuzhiyun  */
822*4882a593Smuzhiyun static int
xfs_alloc_cur_check(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,int * new)823*4882a593Smuzhiyun xfs_alloc_cur_check(
824*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,
825*4882a593Smuzhiyun 	struct xfs_alloc_cur	*acur,
826*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur,
827*4882a593Smuzhiyun 	int			*new)
828*4882a593Smuzhiyun {
829*4882a593Smuzhiyun 	int			error, i;
830*4882a593Smuzhiyun 	xfs_agblock_t		bno, bnoa, bnew;
831*4882a593Smuzhiyun 	xfs_extlen_t		len, lena, diff = -1;
832*4882a593Smuzhiyun 	bool			busy;
833*4882a593Smuzhiyun 	unsigned		busy_gen = 0;
834*4882a593Smuzhiyun 	bool			deactivate = false;
835*4882a593Smuzhiyun 	bool			isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
836*4882a593Smuzhiyun 
837*4882a593Smuzhiyun 	*new = 0;
838*4882a593Smuzhiyun 
839*4882a593Smuzhiyun 	error = xfs_alloc_get_rec(cur, &bno, &len, &i);
840*4882a593Smuzhiyun 	if (error)
841*4882a593Smuzhiyun 		return error;
842*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(args->mp, i != 1))
843*4882a593Smuzhiyun 		return -EFSCORRUPTED;
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 	/*
846*4882a593Smuzhiyun 	 * Check minlen and deactivate a cntbt cursor if out of acceptable size
847*4882a593Smuzhiyun 	 * range (i.e., walking backwards looking for a minlen extent).
848*4882a593Smuzhiyun 	 */
849*4882a593Smuzhiyun 	if (len < args->minlen) {
850*4882a593Smuzhiyun 		deactivate = !isbnobt;
851*4882a593Smuzhiyun 		goto out;
852*4882a593Smuzhiyun 	}
853*4882a593Smuzhiyun 
854*4882a593Smuzhiyun 	busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
855*4882a593Smuzhiyun 					 &busy_gen);
856*4882a593Smuzhiyun 	acur->busy |= busy;
857*4882a593Smuzhiyun 	if (busy)
858*4882a593Smuzhiyun 		acur->busy_gen = busy_gen;
859*4882a593Smuzhiyun 	/* deactivate a bnobt cursor outside of locality range */
860*4882a593Smuzhiyun 	if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
861*4882a593Smuzhiyun 		deactivate = isbnobt;
862*4882a593Smuzhiyun 		goto out;
863*4882a593Smuzhiyun 	}
864*4882a593Smuzhiyun 	if (lena < args->minlen)
865*4882a593Smuzhiyun 		goto out;
866*4882a593Smuzhiyun 
867*4882a593Smuzhiyun 	args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
868*4882a593Smuzhiyun 	xfs_alloc_fix_len(args);
869*4882a593Smuzhiyun 	ASSERT(args->len >= args->minlen);
870*4882a593Smuzhiyun 	if (args->len < acur->len)
871*4882a593Smuzhiyun 		goto out;
872*4882a593Smuzhiyun 
873*4882a593Smuzhiyun 	/*
874*4882a593Smuzhiyun 	 * We have an aligned record that satisfies minlen and beats or matches
875*4882a593Smuzhiyun 	 * the candidate extent size. Compare locality for near allocation mode.
876*4882a593Smuzhiyun 	 */
877*4882a593Smuzhiyun 	ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
878*4882a593Smuzhiyun 	diff = xfs_alloc_compute_diff(args->agbno, args->len,
879*4882a593Smuzhiyun 				      args->alignment, args->datatype,
880*4882a593Smuzhiyun 				      bnoa, lena, &bnew);
881*4882a593Smuzhiyun 	if (bnew == NULLAGBLOCK)
882*4882a593Smuzhiyun 		goto out;
883*4882a593Smuzhiyun 
884*4882a593Smuzhiyun 	/*
885*4882a593Smuzhiyun 	 * Deactivate a bnobt cursor with worse locality than the current best.
886*4882a593Smuzhiyun 	 */
887*4882a593Smuzhiyun 	if (diff > acur->diff) {
888*4882a593Smuzhiyun 		deactivate = isbnobt;
889*4882a593Smuzhiyun 		goto out;
890*4882a593Smuzhiyun 	}
891*4882a593Smuzhiyun 
892*4882a593Smuzhiyun 	ASSERT(args->len > acur->len ||
893*4882a593Smuzhiyun 	       (args->len == acur->len && diff <= acur->diff));
894*4882a593Smuzhiyun 	acur->rec_bno = bno;
895*4882a593Smuzhiyun 	acur->rec_len = len;
896*4882a593Smuzhiyun 	acur->bno = bnew;
897*4882a593Smuzhiyun 	acur->len = args->len;
898*4882a593Smuzhiyun 	acur->diff = diff;
899*4882a593Smuzhiyun 	*new = 1;
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 	/*
902*4882a593Smuzhiyun 	 * We're done if we found a perfect allocation. This only deactivates
903*4882a593Smuzhiyun 	 * the current cursor, but this is just an optimization to terminate a
904*4882a593Smuzhiyun 	 * cntbt search that otherwise runs to the edge of the tree.
905*4882a593Smuzhiyun 	 */
906*4882a593Smuzhiyun 	if (acur->diff == 0 && acur->len == args->maxlen)
907*4882a593Smuzhiyun 		deactivate = true;
908*4882a593Smuzhiyun out:
909*4882a593Smuzhiyun 	if (deactivate)
910*4882a593Smuzhiyun 		cur->bc_ag.abt.active = false;
911*4882a593Smuzhiyun 	trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
912*4882a593Smuzhiyun 				  *new);
913*4882a593Smuzhiyun 	return 0;
914*4882a593Smuzhiyun }
915*4882a593Smuzhiyun 
916*4882a593Smuzhiyun /*
917*4882a593Smuzhiyun  * Complete an allocation of a candidate extent. Remove the extent from both
918*4882a593Smuzhiyun  * trees and update the args structure.
919*4882a593Smuzhiyun  */
920*4882a593Smuzhiyun STATIC int
xfs_alloc_cur_finish(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)921*4882a593Smuzhiyun xfs_alloc_cur_finish(
922*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,
923*4882a593Smuzhiyun 	struct xfs_alloc_cur	*acur)
924*4882a593Smuzhiyun {
925*4882a593Smuzhiyun 	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
926*4882a593Smuzhiyun 	int			error;
927*4882a593Smuzhiyun 
928*4882a593Smuzhiyun 	ASSERT(acur->cnt && acur->bnolt);
929*4882a593Smuzhiyun 	ASSERT(acur->bno >= acur->rec_bno);
930*4882a593Smuzhiyun 	ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
931*4882a593Smuzhiyun 	ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
932*4882a593Smuzhiyun 
933*4882a593Smuzhiyun 	error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
934*4882a593Smuzhiyun 				      acur->rec_len, acur->bno, acur->len, 0);
935*4882a593Smuzhiyun 	if (error)
936*4882a593Smuzhiyun 		return error;
937*4882a593Smuzhiyun 
938*4882a593Smuzhiyun 	args->agbno = acur->bno;
939*4882a593Smuzhiyun 	args->len = acur->len;
940*4882a593Smuzhiyun 	args->wasfromfl = 0;
941*4882a593Smuzhiyun 
942*4882a593Smuzhiyun 	trace_xfs_alloc_cur(args);
943*4882a593Smuzhiyun 	return 0;
944*4882a593Smuzhiyun }
945*4882a593Smuzhiyun 
946*4882a593Smuzhiyun /*
947*4882a593Smuzhiyun  * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
948*4882a593Smuzhiyun  * bno optimized lookup to search for extents with ideal size and locality.
949*4882a593Smuzhiyun  */
950*4882a593Smuzhiyun STATIC int
xfs_alloc_cntbt_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)951*4882a593Smuzhiyun xfs_alloc_cntbt_iter(
952*4882a593Smuzhiyun 	struct xfs_alloc_arg		*args,
953*4882a593Smuzhiyun 	struct xfs_alloc_cur		*acur)
954*4882a593Smuzhiyun {
955*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur = acur->cnt;
956*4882a593Smuzhiyun 	xfs_agblock_t		bno;
957*4882a593Smuzhiyun 	xfs_extlen_t		len, cur_len;
958*4882a593Smuzhiyun 	int			error;
959*4882a593Smuzhiyun 	int			i;
960*4882a593Smuzhiyun 
961*4882a593Smuzhiyun 	if (!xfs_alloc_cur_active(cur))
962*4882a593Smuzhiyun 		return 0;
963*4882a593Smuzhiyun 
964*4882a593Smuzhiyun 	/* locality optimized lookup */
965*4882a593Smuzhiyun 	cur_len = acur->cur_len;
966*4882a593Smuzhiyun 	error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
967*4882a593Smuzhiyun 	if (error)
968*4882a593Smuzhiyun 		return error;
969*4882a593Smuzhiyun 	if (i == 0)
970*4882a593Smuzhiyun 		return 0;
971*4882a593Smuzhiyun 	error = xfs_alloc_get_rec(cur, &bno, &len, &i);
972*4882a593Smuzhiyun 	if (error)
973*4882a593Smuzhiyun 		return error;
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	/* check the current record and update search length from it */
976*4882a593Smuzhiyun 	error = xfs_alloc_cur_check(args, acur, cur, &i);
977*4882a593Smuzhiyun 	if (error)
978*4882a593Smuzhiyun 		return error;
979*4882a593Smuzhiyun 	ASSERT(len >= acur->cur_len);
980*4882a593Smuzhiyun 	acur->cur_len = len;
981*4882a593Smuzhiyun 
982*4882a593Smuzhiyun 	/*
983*4882a593Smuzhiyun 	 * We looked up the first record >= [agbno, len] above. The agbno is a
984*4882a593Smuzhiyun 	 * secondary key and so the current record may lie just before or after
985*4882a593Smuzhiyun 	 * agbno. If it is past agbno, check the previous record too so long as
986*4882a593Smuzhiyun 	 * the length matches as it may be closer. Don't check a smaller record
987*4882a593Smuzhiyun 	 * because that could deactivate our cursor.
988*4882a593Smuzhiyun 	 */
989*4882a593Smuzhiyun 	if (bno > args->agbno) {
990*4882a593Smuzhiyun 		error = xfs_btree_decrement(cur, 0, &i);
991*4882a593Smuzhiyun 		if (!error && i) {
992*4882a593Smuzhiyun 			error = xfs_alloc_get_rec(cur, &bno, &len, &i);
993*4882a593Smuzhiyun 			if (!error && i && len == acur->cur_len)
994*4882a593Smuzhiyun 				error = xfs_alloc_cur_check(args, acur, cur,
995*4882a593Smuzhiyun 							    &i);
996*4882a593Smuzhiyun 		}
997*4882a593Smuzhiyun 		if (error)
998*4882a593Smuzhiyun 			return error;
999*4882a593Smuzhiyun 	}
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	/*
1002*4882a593Smuzhiyun 	 * Increment the search key until we find at least one allocation
1003*4882a593Smuzhiyun 	 * candidate or if the extent we found was larger. Otherwise, double the
1004*4882a593Smuzhiyun 	 * search key to optimize the search. Efficiency is more important here
1005*4882a593Smuzhiyun 	 * than absolute best locality.
1006*4882a593Smuzhiyun 	 */
1007*4882a593Smuzhiyun 	cur_len <<= 1;
1008*4882a593Smuzhiyun 	if (!acur->len || acur->cur_len >= cur_len)
1009*4882a593Smuzhiyun 		acur->cur_len++;
1010*4882a593Smuzhiyun 	else
1011*4882a593Smuzhiyun 		acur->cur_len = cur_len;
1012*4882a593Smuzhiyun 
1013*4882a593Smuzhiyun 	return error;
1014*4882a593Smuzhiyun }
1015*4882a593Smuzhiyun 
1016*4882a593Smuzhiyun /*
1017*4882a593Smuzhiyun  * Deal with the case where only small freespaces remain. Either return the
1018*4882a593Smuzhiyun  * contents of the last freespace record, or allocate space from the freelist if
1019*4882a593Smuzhiyun  * there is nothing in the tree.
1020*4882a593Smuzhiyun  */
1021*4882a593Smuzhiyun STATIC int			/* error */
xfs_alloc_ag_vextent_small(struct xfs_alloc_arg * args,struct xfs_btree_cur * ccur,xfs_agblock_t * fbnop,xfs_extlen_t * flenp,int * stat)1022*4882a593Smuzhiyun xfs_alloc_ag_vextent_small(
1023*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,	/* allocation argument structure */
1024*4882a593Smuzhiyun 	struct xfs_btree_cur	*ccur,	/* optional by-size cursor */
1025*4882a593Smuzhiyun 	xfs_agblock_t		*fbnop,	/* result block number */
1026*4882a593Smuzhiyun 	xfs_extlen_t		*flenp,	/* result length */
1027*4882a593Smuzhiyun 	int			*stat)	/* status: 0-freelist, 1-normal/none */
1028*4882a593Smuzhiyun {
1029*4882a593Smuzhiyun 	struct xfs_agf		*agf = args->agbp->b_addr;
1030*4882a593Smuzhiyun 	int			error = 0;
1031*4882a593Smuzhiyun 	xfs_agblock_t		fbno = NULLAGBLOCK;
1032*4882a593Smuzhiyun 	xfs_extlen_t		flen = 0;
1033*4882a593Smuzhiyun 	int			i = 0;
1034*4882a593Smuzhiyun 
1035*4882a593Smuzhiyun 	/*
1036*4882a593Smuzhiyun 	 * If a cntbt cursor is provided, try to allocate the largest record in
1037*4882a593Smuzhiyun 	 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1038*4882a593Smuzhiyun 	 * allocation. Make sure to respect minleft even when pulling from the
1039*4882a593Smuzhiyun 	 * freelist.
1040*4882a593Smuzhiyun 	 */
1041*4882a593Smuzhiyun 	if (ccur)
1042*4882a593Smuzhiyun 		error = xfs_btree_decrement(ccur, 0, &i);
1043*4882a593Smuzhiyun 	if (error)
1044*4882a593Smuzhiyun 		goto error;
1045*4882a593Smuzhiyun 	if (i) {
1046*4882a593Smuzhiyun 		error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1047*4882a593Smuzhiyun 		if (error)
1048*4882a593Smuzhiyun 			goto error;
1049*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1050*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
1051*4882a593Smuzhiyun 			goto error;
1052*4882a593Smuzhiyun 		}
1053*4882a593Smuzhiyun 		goto out;
1054*4882a593Smuzhiyun 	}
1055*4882a593Smuzhiyun 
1056*4882a593Smuzhiyun 	if (args->minlen != 1 || args->alignment != 1 ||
1057*4882a593Smuzhiyun 	    args->resv == XFS_AG_RESV_AGFL ||
1058*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_flcount) <= args->minleft)
1059*4882a593Smuzhiyun 		goto out;
1060*4882a593Smuzhiyun 
1061*4882a593Smuzhiyun 	error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1062*4882a593Smuzhiyun 	if (error)
1063*4882a593Smuzhiyun 		goto error;
1064*4882a593Smuzhiyun 	if (fbno == NULLAGBLOCK)
1065*4882a593Smuzhiyun 		goto out;
1066*4882a593Smuzhiyun 
1067*4882a593Smuzhiyun 	xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1068*4882a593Smuzhiyun 			      (args->datatype & XFS_ALLOC_NOBUSY));
1069*4882a593Smuzhiyun 
1070*4882a593Smuzhiyun 	if (args->datatype & XFS_ALLOC_USERDATA) {
1071*4882a593Smuzhiyun 		struct xfs_buf	*bp;
1072*4882a593Smuzhiyun 
1073*4882a593Smuzhiyun 		error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1074*4882a593Smuzhiyun 				XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1075*4882a593Smuzhiyun 				args->mp->m_bsize, 0, &bp);
1076*4882a593Smuzhiyun 		if (error)
1077*4882a593Smuzhiyun 			goto error;
1078*4882a593Smuzhiyun 		xfs_trans_binval(args->tp, bp);
1079*4882a593Smuzhiyun 	}
1080*4882a593Smuzhiyun 	*fbnop = args->agbno = fbno;
1081*4882a593Smuzhiyun 	*flenp = args->len = 1;
1082*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1083*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
1084*4882a593Smuzhiyun 		goto error;
1085*4882a593Smuzhiyun 	}
1086*4882a593Smuzhiyun 	args->wasfromfl = 1;
1087*4882a593Smuzhiyun 	trace_xfs_alloc_small_freelist(args);
1088*4882a593Smuzhiyun 
1089*4882a593Smuzhiyun 	/*
1090*4882a593Smuzhiyun 	 * If we're feeding an AGFL block to something that doesn't live in the
1091*4882a593Smuzhiyun 	 * free space, we need to clear out the OWN_AG rmap.
1092*4882a593Smuzhiyun 	 */
1093*4882a593Smuzhiyun 	error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
1094*4882a593Smuzhiyun 			      &XFS_RMAP_OINFO_AG);
1095*4882a593Smuzhiyun 	if (error)
1096*4882a593Smuzhiyun 		goto error;
1097*4882a593Smuzhiyun 
1098*4882a593Smuzhiyun 	*stat = 0;
1099*4882a593Smuzhiyun 	return 0;
1100*4882a593Smuzhiyun 
1101*4882a593Smuzhiyun out:
1102*4882a593Smuzhiyun 	/*
1103*4882a593Smuzhiyun 	 * Can't do the allocation, give up.
1104*4882a593Smuzhiyun 	 */
1105*4882a593Smuzhiyun 	if (flen < args->minlen) {
1106*4882a593Smuzhiyun 		args->agbno = NULLAGBLOCK;
1107*4882a593Smuzhiyun 		trace_xfs_alloc_small_notenough(args);
1108*4882a593Smuzhiyun 		flen = 0;
1109*4882a593Smuzhiyun 	}
1110*4882a593Smuzhiyun 	*fbnop = fbno;
1111*4882a593Smuzhiyun 	*flenp = flen;
1112*4882a593Smuzhiyun 	*stat = 1;
1113*4882a593Smuzhiyun 	trace_xfs_alloc_small_done(args);
1114*4882a593Smuzhiyun 	return 0;
1115*4882a593Smuzhiyun 
1116*4882a593Smuzhiyun error:
1117*4882a593Smuzhiyun 	trace_xfs_alloc_small_error(args);
1118*4882a593Smuzhiyun 	return error;
1119*4882a593Smuzhiyun }
1120*4882a593Smuzhiyun 
1121*4882a593Smuzhiyun /*
1122*4882a593Smuzhiyun  * Allocate a variable extent in the allocation group agno.
1123*4882a593Smuzhiyun  * Type and bno are used to determine where in the allocation group the
1124*4882a593Smuzhiyun  * extent will start.
1125*4882a593Smuzhiyun  * Extent's length (returned in *len) will be between minlen and maxlen,
1126*4882a593Smuzhiyun  * and of the form k * prod + mod unless there's nothing that large.
1127*4882a593Smuzhiyun  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1128*4882a593Smuzhiyun  */
1129*4882a593Smuzhiyun STATIC int			/* error */
xfs_alloc_ag_vextent(xfs_alloc_arg_t * args)1130*4882a593Smuzhiyun xfs_alloc_ag_vextent(
1131*4882a593Smuzhiyun 	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
1132*4882a593Smuzhiyun {
1133*4882a593Smuzhiyun 	int		error=0;
1134*4882a593Smuzhiyun 
1135*4882a593Smuzhiyun 	ASSERT(args->minlen > 0);
1136*4882a593Smuzhiyun 	ASSERT(args->maxlen > 0);
1137*4882a593Smuzhiyun 	ASSERT(args->minlen <= args->maxlen);
1138*4882a593Smuzhiyun 	ASSERT(args->mod < args->prod);
1139*4882a593Smuzhiyun 	ASSERT(args->alignment > 0);
1140*4882a593Smuzhiyun 
1141*4882a593Smuzhiyun 	/*
1142*4882a593Smuzhiyun 	 * Branch to correct routine based on the type.
1143*4882a593Smuzhiyun 	 */
1144*4882a593Smuzhiyun 	args->wasfromfl = 0;
1145*4882a593Smuzhiyun 	switch (args->type) {
1146*4882a593Smuzhiyun 	case XFS_ALLOCTYPE_THIS_AG:
1147*4882a593Smuzhiyun 		error = xfs_alloc_ag_vextent_size(args);
1148*4882a593Smuzhiyun 		break;
1149*4882a593Smuzhiyun 	case XFS_ALLOCTYPE_NEAR_BNO:
1150*4882a593Smuzhiyun 		error = xfs_alloc_ag_vextent_near(args);
1151*4882a593Smuzhiyun 		break;
1152*4882a593Smuzhiyun 	case XFS_ALLOCTYPE_THIS_BNO:
1153*4882a593Smuzhiyun 		error = xfs_alloc_ag_vextent_exact(args);
1154*4882a593Smuzhiyun 		break;
1155*4882a593Smuzhiyun 	default:
1156*4882a593Smuzhiyun 		ASSERT(0);
1157*4882a593Smuzhiyun 		/* NOTREACHED */
1158*4882a593Smuzhiyun 	}
1159*4882a593Smuzhiyun 
1160*4882a593Smuzhiyun 	if (error || args->agbno == NULLAGBLOCK)
1161*4882a593Smuzhiyun 		return error;
1162*4882a593Smuzhiyun 
1163*4882a593Smuzhiyun 	ASSERT(args->len >= args->minlen);
1164*4882a593Smuzhiyun 	ASSERT(args->len <= args->maxlen);
1165*4882a593Smuzhiyun 	ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
1166*4882a593Smuzhiyun 	ASSERT(args->agbno % args->alignment == 0);
1167*4882a593Smuzhiyun 
1168*4882a593Smuzhiyun 	/* if not file data, insert new block into the reverse map btree */
1169*4882a593Smuzhiyun 	if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
1170*4882a593Smuzhiyun 		error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
1171*4882a593Smuzhiyun 				       args->agbno, args->len, &args->oinfo);
1172*4882a593Smuzhiyun 		if (error)
1173*4882a593Smuzhiyun 			return error;
1174*4882a593Smuzhiyun 	}
1175*4882a593Smuzhiyun 
1176*4882a593Smuzhiyun 	if (!args->wasfromfl) {
1177*4882a593Smuzhiyun 		error = xfs_alloc_update_counters(args->tp, args->agbp,
1178*4882a593Smuzhiyun 						  -((long)(args->len)));
1179*4882a593Smuzhiyun 		if (error)
1180*4882a593Smuzhiyun 			return error;
1181*4882a593Smuzhiyun 
1182*4882a593Smuzhiyun 		ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
1183*4882a593Smuzhiyun 					      args->agbno, args->len));
1184*4882a593Smuzhiyun 	}
1185*4882a593Smuzhiyun 
1186*4882a593Smuzhiyun 	xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
1187*4882a593Smuzhiyun 
1188*4882a593Smuzhiyun 	XFS_STATS_INC(args->mp, xs_allocx);
1189*4882a593Smuzhiyun 	XFS_STATS_ADD(args->mp, xs_allocb, args->len);
1190*4882a593Smuzhiyun 	return error;
1191*4882a593Smuzhiyun }
1192*4882a593Smuzhiyun 
1193*4882a593Smuzhiyun /*
1194*4882a593Smuzhiyun  * Allocate a variable extent at exactly agno/bno.
1195*4882a593Smuzhiyun  * Extent's length (returned in *len) will be between minlen and maxlen,
1196*4882a593Smuzhiyun  * and of the form k * prod + mod unless there's nothing that large.
1197*4882a593Smuzhiyun  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1198*4882a593Smuzhiyun  */
1199*4882a593Smuzhiyun STATIC int			/* error */
xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t * args)1200*4882a593Smuzhiyun xfs_alloc_ag_vextent_exact(
1201*4882a593Smuzhiyun 	xfs_alloc_arg_t	*args)	/* allocation argument structure */
1202*4882a593Smuzhiyun {
1203*4882a593Smuzhiyun 	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1204*4882a593Smuzhiyun 	xfs_btree_cur_t	*bno_cur;/* by block-number btree cursor */
1205*4882a593Smuzhiyun 	xfs_btree_cur_t	*cnt_cur;/* by count btree cursor */
1206*4882a593Smuzhiyun 	int		error;
1207*4882a593Smuzhiyun 	xfs_agblock_t	fbno;	/* start block of found extent */
1208*4882a593Smuzhiyun 	xfs_extlen_t	flen;	/* length of found extent */
1209*4882a593Smuzhiyun 	xfs_agblock_t	tbno;	/* start block of busy extent */
1210*4882a593Smuzhiyun 	xfs_extlen_t	tlen;	/* length of busy extent */
1211*4882a593Smuzhiyun 	xfs_agblock_t	tend;	/* end block of busy extent */
1212*4882a593Smuzhiyun 	int		i;	/* success/failure of operation */
1213*4882a593Smuzhiyun 	unsigned	busy_gen;
1214*4882a593Smuzhiyun 
1215*4882a593Smuzhiyun 	ASSERT(args->alignment == 1);
1216*4882a593Smuzhiyun 
1217*4882a593Smuzhiyun 	/*
1218*4882a593Smuzhiyun 	 * Allocate/initialize a cursor for the by-number freespace btree.
1219*4882a593Smuzhiyun 	 */
1220*4882a593Smuzhiyun 	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1221*4882a593Smuzhiyun 					  args->agno, XFS_BTNUM_BNO);
1222*4882a593Smuzhiyun 
1223*4882a593Smuzhiyun 	/*
1224*4882a593Smuzhiyun 	 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1225*4882a593Smuzhiyun 	 * Look for the closest free block <= bno, it must contain bno
1226*4882a593Smuzhiyun 	 * if any free block does.
1227*4882a593Smuzhiyun 	 */
1228*4882a593Smuzhiyun 	error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1229*4882a593Smuzhiyun 	if (error)
1230*4882a593Smuzhiyun 		goto error0;
1231*4882a593Smuzhiyun 	if (!i)
1232*4882a593Smuzhiyun 		goto not_found;
1233*4882a593Smuzhiyun 
1234*4882a593Smuzhiyun 	/*
1235*4882a593Smuzhiyun 	 * Grab the freespace record.
1236*4882a593Smuzhiyun 	 */
1237*4882a593Smuzhiyun 	error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1238*4882a593Smuzhiyun 	if (error)
1239*4882a593Smuzhiyun 		goto error0;
1240*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1241*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
1242*4882a593Smuzhiyun 		goto error0;
1243*4882a593Smuzhiyun 	}
1244*4882a593Smuzhiyun 	ASSERT(fbno <= args->agbno);
1245*4882a593Smuzhiyun 
1246*4882a593Smuzhiyun 	/*
1247*4882a593Smuzhiyun 	 * Check for overlapping busy extents.
1248*4882a593Smuzhiyun 	 */
1249*4882a593Smuzhiyun 	tbno = fbno;
1250*4882a593Smuzhiyun 	tlen = flen;
1251*4882a593Smuzhiyun 	xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
1252*4882a593Smuzhiyun 
1253*4882a593Smuzhiyun 	/*
1254*4882a593Smuzhiyun 	 * Give up if the start of the extent is busy, or the freespace isn't
1255*4882a593Smuzhiyun 	 * long enough for the minimum request.
1256*4882a593Smuzhiyun 	 */
1257*4882a593Smuzhiyun 	if (tbno > args->agbno)
1258*4882a593Smuzhiyun 		goto not_found;
1259*4882a593Smuzhiyun 	if (tlen < args->minlen)
1260*4882a593Smuzhiyun 		goto not_found;
1261*4882a593Smuzhiyun 	tend = tbno + tlen;
1262*4882a593Smuzhiyun 	if (tend < args->agbno + args->minlen)
1263*4882a593Smuzhiyun 		goto not_found;
1264*4882a593Smuzhiyun 
1265*4882a593Smuzhiyun 	/*
1266*4882a593Smuzhiyun 	 * End of extent will be smaller of the freespace end and the
1267*4882a593Smuzhiyun 	 * maximal requested end.
1268*4882a593Smuzhiyun 	 *
1269*4882a593Smuzhiyun 	 * Fix the length according to mod and prod if given.
1270*4882a593Smuzhiyun 	 */
1271*4882a593Smuzhiyun 	args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1272*4882a593Smuzhiyun 						- args->agbno;
1273*4882a593Smuzhiyun 	xfs_alloc_fix_len(args);
1274*4882a593Smuzhiyun 	ASSERT(args->agbno + args->len <= tend);
1275*4882a593Smuzhiyun 
1276*4882a593Smuzhiyun 	/*
1277*4882a593Smuzhiyun 	 * We are allocating agbno for args->len
1278*4882a593Smuzhiyun 	 * Allocate/initialize a cursor for the by-size btree.
1279*4882a593Smuzhiyun 	 */
1280*4882a593Smuzhiyun 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1281*4882a593Smuzhiyun 		args->agno, XFS_BTNUM_CNT);
1282*4882a593Smuzhiyun 	ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
1283*4882a593Smuzhiyun 	error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1284*4882a593Smuzhiyun 				      args->len, XFSA_FIXUP_BNO_OK);
1285*4882a593Smuzhiyun 	if (error) {
1286*4882a593Smuzhiyun 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1287*4882a593Smuzhiyun 		goto error0;
1288*4882a593Smuzhiyun 	}
1289*4882a593Smuzhiyun 
1290*4882a593Smuzhiyun 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1291*4882a593Smuzhiyun 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1292*4882a593Smuzhiyun 
1293*4882a593Smuzhiyun 	args->wasfromfl = 0;
1294*4882a593Smuzhiyun 	trace_xfs_alloc_exact_done(args);
1295*4882a593Smuzhiyun 	return 0;
1296*4882a593Smuzhiyun 
1297*4882a593Smuzhiyun not_found:
1298*4882a593Smuzhiyun 	/* Didn't find it, return null. */
1299*4882a593Smuzhiyun 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1300*4882a593Smuzhiyun 	args->agbno = NULLAGBLOCK;
1301*4882a593Smuzhiyun 	trace_xfs_alloc_exact_notfound(args);
1302*4882a593Smuzhiyun 	return 0;
1303*4882a593Smuzhiyun 
1304*4882a593Smuzhiyun error0:
1305*4882a593Smuzhiyun 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1306*4882a593Smuzhiyun 	trace_xfs_alloc_exact_error(args);
1307*4882a593Smuzhiyun 	return error;
1308*4882a593Smuzhiyun }
1309*4882a593Smuzhiyun 
1310*4882a593Smuzhiyun /*
1311*4882a593Smuzhiyun  * Search a given number of btree records in a given direction. Check each
1312*4882a593Smuzhiyun  * record against the good extent we've already found.
1313*4882a593Smuzhiyun  */
1314*4882a593Smuzhiyun STATIC int
xfs_alloc_walk_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,bool increment,bool find_one,int count,int * stat)1315*4882a593Smuzhiyun xfs_alloc_walk_iter(
1316*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,
1317*4882a593Smuzhiyun 	struct xfs_alloc_cur	*acur,
1318*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur,
1319*4882a593Smuzhiyun 	bool			increment,
1320*4882a593Smuzhiyun 	bool			find_one, /* quit on first candidate */
1321*4882a593Smuzhiyun 	int			count,    /* rec count (-1 for infinite) */
1322*4882a593Smuzhiyun 	int			*stat)
1323*4882a593Smuzhiyun {
1324*4882a593Smuzhiyun 	int			error;
1325*4882a593Smuzhiyun 	int			i;
1326*4882a593Smuzhiyun 
1327*4882a593Smuzhiyun 	*stat = 0;
1328*4882a593Smuzhiyun 
1329*4882a593Smuzhiyun 	/*
1330*4882a593Smuzhiyun 	 * Search so long as the cursor is active or we find a better extent.
1331*4882a593Smuzhiyun 	 * The cursor is deactivated if it extends beyond the range of the
1332*4882a593Smuzhiyun 	 * current allocation candidate.
1333*4882a593Smuzhiyun 	 */
1334*4882a593Smuzhiyun 	while (xfs_alloc_cur_active(cur) && count) {
1335*4882a593Smuzhiyun 		error = xfs_alloc_cur_check(args, acur, cur, &i);
1336*4882a593Smuzhiyun 		if (error)
1337*4882a593Smuzhiyun 			return error;
1338*4882a593Smuzhiyun 		if (i == 1) {
1339*4882a593Smuzhiyun 			*stat = 1;
1340*4882a593Smuzhiyun 			if (find_one)
1341*4882a593Smuzhiyun 				break;
1342*4882a593Smuzhiyun 		}
1343*4882a593Smuzhiyun 		if (!xfs_alloc_cur_active(cur))
1344*4882a593Smuzhiyun 			break;
1345*4882a593Smuzhiyun 
1346*4882a593Smuzhiyun 		if (increment)
1347*4882a593Smuzhiyun 			error = xfs_btree_increment(cur, 0, &i);
1348*4882a593Smuzhiyun 		else
1349*4882a593Smuzhiyun 			error = xfs_btree_decrement(cur, 0, &i);
1350*4882a593Smuzhiyun 		if (error)
1351*4882a593Smuzhiyun 			return error;
1352*4882a593Smuzhiyun 		if (i == 0)
1353*4882a593Smuzhiyun 			cur->bc_ag.abt.active = false;
1354*4882a593Smuzhiyun 
1355*4882a593Smuzhiyun 		if (count > 0)
1356*4882a593Smuzhiyun 			count--;
1357*4882a593Smuzhiyun 	}
1358*4882a593Smuzhiyun 
1359*4882a593Smuzhiyun 	return 0;
1360*4882a593Smuzhiyun }
1361*4882a593Smuzhiyun 
1362*4882a593Smuzhiyun /*
1363*4882a593Smuzhiyun  * Search the by-bno and by-size btrees in parallel in search of an extent with
1364*4882a593Smuzhiyun  * ideal locality based on the NEAR mode ->agbno locality hint.
1365*4882a593Smuzhiyun  */
1366*4882a593Smuzhiyun STATIC int
xfs_alloc_ag_vextent_locality(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,int * stat)1367*4882a593Smuzhiyun xfs_alloc_ag_vextent_locality(
1368*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,
1369*4882a593Smuzhiyun 	struct xfs_alloc_cur	*acur,
1370*4882a593Smuzhiyun 	int			*stat)
1371*4882a593Smuzhiyun {
1372*4882a593Smuzhiyun 	struct xfs_btree_cur	*fbcur = NULL;
1373*4882a593Smuzhiyun 	int			error;
1374*4882a593Smuzhiyun 	int			i;
1375*4882a593Smuzhiyun 	bool			fbinc;
1376*4882a593Smuzhiyun 
1377*4882a593Smuzhiyun 	ASSERT(acur->len == 0);
1378*4882a593Smuzhiyun 	ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
1379*4882a593Smuzhiyun 
1380*4882a593Smuzhiyun 	*stat = 0;
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 	error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1383*4882a593Smuzhiyun 	if (error)
1384*4882a593Smuzhiyun 		return error;
1385*4882a593Smuzhiyun 	error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1386*4882a593Smuzhiyun 	if (error)
1387*4882a593Smuzhiyun 		return error;
1388*4882a593Smuzhiyun 	error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1389*4882a593Smuzhiyun 	if (error)
1390*4882a593Smuzhiyun 		return error;
1391*4882a593Smuzhiyun 
1392*4882a593Smuzhiyun 	/*
1393*4882a593Smuzhiyun 	 * Search the bnobt and cntbt in parallel. Search the bnobt left and
1394*4882a593Smuzhiyun 	 * right and lookup the closest extent to the locality hint for each
1395*4882a593Smuzhiyun 	 * extent size key in the cntbt. The entire search terminates
1396*4882a593Smuzhiyun 	 * immediately on a bnobt hit because that means we've found best case
1397*4882a593Smuzhiyun 	 * locality. Otherwise the search continues until the cntbt cursor runs
1398*4882a593Smuzhiyun 	 * off the end of the tree. If no allocation candidate is found at this
1399*4882a593Smuzhiyun 	 * point, give up on locality, walk backwards from the end of the cntbt
1400*4882a593Smuzhiyun 	 * and take the first available extent.
1401*4882a593Smuzhiyun 	 *
1402*4882a593Smuzhiyun 	 * The parallel tree searches balance each other out to provide fairly
1403*4882a593Smuzhiyun 	 * consistent performance for various situations. The bnobt search can
1404*4882a593Smuzhiyun 	 * have pathological behavior in the worst case scenario of larger
1405*4882a593Smuzhiyun 	 * allocation requests and fragmented free space. On the other hand, the
1406*4882a593Smuzhiyun 	 * bnobt is able to satisfy most smaller allocation requests much more
1407*4882a593Smuzhiyun 	 * quickly than the cntbt. The cntbt search can sift through fragmented
1408*4882a593Smuzhiyun 	 * free space and sets of free extents for larger allocation requests
1409*4882a593Smuzhiyun 	 * more quickly than the bnobt. Since the locality hint is just a hint
1410*4882a593Smuzhiyun 	 * and we don't want to scan the entire bnobt for perfect locality, the
1411*4882a593Smuzhiyun 	 * cntbt search essentially bounds the bnobt search such that we can
1412*4882a593Smuzhiyun 	 * find good enough locality at reasonable performance in most cases.
1413*4882a593Smuzhiyun 	 */
1414*4882a593Smuzhiyun 	while (xfs_alloc_cur_active(acur->bnolt) ||
1415*4882a593Smuzhiyun 	       xfs_alloc_cur_active(acur->bnogt) ||
1416*4882a593Smuzhiyun 	       xfs_alloc_cur_active(acur->cnt)) {
1417*4882a593Smuzhiyun 
1418*4882a593Smuzhiyun 		trace_xfs_alloc_cur_lookup(args);
1419*4882a593Smuzhiyun 
1420*4882a593Smuzhiyun 		/*
1421*4882a593Smuzhiyun 		 * Search the bnobt left and right. In the case of a hit, finish
1422*4882a593Smuzhiyun 		 * the search in the opposite direction and we're done.
1423*4882a593Smuzhiyun 		 */
1424*4882a593Smuzhiyun 		error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1425*4882a593Smuzhiyun 					    true, 1, &i);
1426*4882a593Smuzhiyun 		if (error)
1427*4882a593Smuzhiyun 			return error;
1428*4882a593Smuzhiyun 		if (i == 1) {
1429*4882a593Smuzhiyun 			trace_xfs_alloc_cur_left(args);
1430*4882a593Smuzhiyun 			fbcur = acur->bnogt;
1431*4882a593Smuzhiyun 			fbinc = true;
1432*4882a593Smuzhiyun 			break;
1433*4882a593Smuzhiyun 		}
1434*4882a593Smuzhiyun 		error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1435*4882a593Smuzhiyun 					    1, &i);
1436*4882a593Smuzhiyun 		if (error)
1437*4882a593Smuzhiyun 			return error;
1438*4882a593Smuzhiyun 		if (i == 1) {
1439*4882a593Smuzhiyun 			trace_xfs_alloc_cur_right(args);
1440*4882a593Smuzhiyun 			fbcur = acur->bnolt;
1441*4882a593Smuzhiyun 			fbinc = false;
1442*4882a593Smuzhiyun 			break;
1443*4882a593Smuzhiyun 		}
1444*4882a593Smuzhiyun 
1445*4882a593Smuzhiyun 		/*
1446*4882a593Smuzhiyun 		 * Check the extent with best locality based on the current
1447*4882a593Smuzhiyun 		 * extent size search key and keep track of the best candidate.
1448*4882a593Smuzhiyun 		 */
1449*4882a593Smuzhiyun 		error = xfs_alloc_cntbt_iter(args, acur);
1450*4882a593Smuzhiyun 		if (error)
1451*4882a593Smuzhiyun 			return error;
1452*4882a593Smuzhiyun 		if (!xfs_alloc_cur_active(acur->cnt)) {
1453*4882a593Smuzhiyun 			trace_xfs_alloc_cur_lookup_done(args);
1454*4882a593Smuzhiyun 			break;
1455*4882a593Smuzhiyun 		}
1456*4882a593Smuzhiyun 	}
1457*4882a593Smuzhiyun 
1458*4882a593Smuzhiyun 	/*
1459*4882a593Smuzhiyun 	 * If we failed to find anything due to busy extents, return empty
1460*4882a593Smuzhiyun 	 * handed so the caller can flush and retry. If no busy extents were
1461*4882a593Smuzhiyun 	 * found, walk backwards from the end of the cntbt as a last resort.
1462*4882a593Smuzhiyun 	 */
1463*4882a593Smuzhiyun 	if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1464*4882a593Smuzhiyun 		error = xfs_btree_decrement(acur->cnt, 0, &i);
1465*4882a593Smuzhiyun 		if (error)
1466*4882a593Smuzhiyun 			return error;
1467*4882a593Smuzhiyun 		if (i) {
1468*4882a593Smuzhiyun 			acur->cnt->bc_ag.abt.active = true;
1469*4882a593Smuzhiyun 			fbcur = acur->cnt;
1470*4882a593Smuzhiyun 			fbinc = false;
1471*4882a593Smuzhiyun 		}
1472*4882a593Smuzhiyun 	}
1473*4882a593Smuzhiyun 
1474*4882a593Smuzhiyun 	/*
1475*4882a593Smuzhiyun 	 * Search in the opposite direction for a better entry in the case of
1476*4882a593Smuzhiyun 	 * a bnobt hit or walk backwards from the end of the cntbt.
1477*4882a593Smuzhiyun 	 */
1478*4882a593Smuzhiyun 	if (fbcur) {
1479*4882a593Smuzhiyun 		error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1480*4882a593Smuzhiyun 					    &i);
1481*4882a593Smuzhiyun 		if (error)
1482*4882a593Smuzhiyun 			return error;
1483*4882a593Smuzhiyun 	}
1484*4882a593Smuzhiyun 
1485*4882a593Smuzhiyun 	if (acur->len)
1486*4882a593Smuzhiyun 		*stat = 1;
1487*4882a593Smuzhiyun 
1488*4882a593Smuzhiyun 	return 0;
1489*4882a593Smuzhiyun }
1490*4882a593Smuzhiyun 
1491*4882a593Smuzhiyun /* Check the last block of the cnt btree for allocations. */
1492*4882a593Smuzhiyun static int
xfs_alloc_ag_vextent_lastblock(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,xfs_agblock_t * bno,xfs_extlen_t * len,bool * allocated)1493*4882a593Smuzhiyun xfs_alloc_ag_vextent_lastblock(
1494*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,
1495*4882a593Smuzhiyun 	struct xfs_alloc_cur	*acur,
1496*4882a593Smuzhiyun 	xfs_agblock_t		*bno,
1497*4882a593Smuzhiyun 	xfs_extlen_t		*len,
1498*4882a593Smuzhiyun 	bool			*allocated)
1499*4882a593Smuzhiyun {
1500*4882a593Smuzhiyun 	int			error;
1501*4882a593Smuzhiyun 	int			i;
1502*4882a593Smuzhiyun 
1503*4882a593Smuzhiyun #ifdef DEBUG
1504*4882a593Smuzhiyun 	/* Randomly don't execute the first algorithm. */
1505*4882a593Smuzhiyun 	if (prandom_u32() & 1)
1506*4882a593Smuzhiyun 		return 0;
1507*4882a593Smuzhiyun #endif
1508*4882a593Smuzhiyun 
1509*4882a593Smuzhiyun 	/*
1510*4882a593Smuzhiyun 	 * Start from the entry that lookup found, sequence through all larger
1511*4882a593Smuzhiyun 	 * free blocks.  If we're actually pointing at a record smaller than
1512*4882a593Smuzhiyun 	 * maxlen, go to the start of this block, and skip all those smaller
1513*4882a593Smuzhiyun 	 * than minlen.
1514*4882a593Smuzhiyun 	 */
1515*4882a593Smuzhiyun 	if (*len || args->alignment > 1) {
1516*4882a593Smuzhiyun 		acur->cnt->bc_ptrs[0] = 1;
1517*4882a593Smuzhiyun 		do {
1518*4882a593Smuzhiyun 			error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1519*4882a593Smuzhiyun 			if (error)
1520*4882a593Smuzhiyun 				return error;
1521*4882a593Smuzhiyun 			if (XFS_IS_CORRUPT(args->mp, i != 1))
1522*4882a593Smuzhiyun 				return -EFSCORRUPTED;
1523*4882a593Smuzhiyun 			if (*len >= args->minlen)
1524*4882a593Smuzhiyun 				break;
1525*4882a593Smuzhiyun 			error = xfs_btree_increment(acur->cnt, 0, &i);
1526*4882a593Smuzhiyun 			if (error)
1527*4882a593Smuzhiyun 				return error;
1528*4882a593Smuzhiyun 		} while (i);
1529*4882a593Smuzhiyun 		ASSERT(*len >= args->minlen);
1530*4882a593Smuzhiyun 		if (!i)
1531*4882a593Smuzhiyun 			return 0;
1532*4882a593Smuzhiyun 	}
1533*4882a593Smuzhiyun 
1534*4882a593Smuzhiyun 	error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1535*4882a593Smuzhiyun 	if (error)
1536*4882a593Smuzhiyun 		return error;
1537*4882a593Smuzhiyun 
1538*4882a593Smuzhiyun 	/*
1539*4882a593Smuzhiyun 	 * It didn't work.  We COULD be in a case where there's a good record
1540*4882a593Smuzhiyun 	 * somewhere, so try again.
1541*4882a593Smuzhiyun 	 */
1542*4882a593Smuzhiyun 	if (acur->len == 0)
1543*4882a593Smuzhiyun 		return 0;
1544*4882a593Smuzhiyun 
1545*4882a593Smuzhiyun 	trace_xfs_alloc_near_first(args);
1546*4882a593Smuzhiyun 	*allocated = true;
1547*4882a593Smuzhiyun 	return 0;
1548*4882a593Smuzhiyun }
1549*4882a593Smuzhiyun 
1550*4882a593Smuzhiyun /*
1551*4882a593Smuzhiyun  * Allocate a variable extent near bno in the allocation group agno.
1552*4882a593Smuzhiyun  * Extent's length (returned in len) will be between minlen and maxlen,
1553*4882a593Smuzhiyun  * and of the form k * prod + mod unless there's nothing that large.
1554*4882a593Smuzhiyun  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1555*4882a593Smuzhiyun  */
1556*4882a593Smuzhiyun STATIC int
xfs_alloc_ag_vextent_near(struct xfs_alloc_arg * args)1557*4882a593Smuzhiyun xfs_alloc_ag_vextent_near(
1558*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args)
1559*4882a593Smuzhiyun {
1560*4882a593Smuzhiyun 	struct xfs_alloc_cur	acur = {};
1561*4882a593Smuzhiyun 	int			error;		/* error code */
1562*4882a593Smuzhiyun 	int			i;		/* result code, temporary */
1563*4882a593Smuzhiyun 	xfs_agblock_t		bno;
1564*4882a593Smuzhiyun 	xfs_extlen_t		len;
1565*4882a593Smuzhiyun 
1566*4882a593Smuzhiyun 	/* handle uninitialized agbno range so caller doesn't have to */
1567*4882a593Smuzhiyun 	if (!args->min_agbno && !args->max_agbno)
1568*4882a593Smuzhiyun 		args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1569*4882a593Smuzhiyun 	ASSERT(args->min_agbno <= args->max_agbno);
1570*4882a593Smuzhiyun 
1571*4882a593Smuzhiyun 	/* clamp agbno to the range if it's outside */
1572*4882a593Smuzhiyun 	if (args->agbno < args->min_agbno)
1573*4882a593Smuzhiyun 		args->agbno = args->min_agbno;
1574*4882a593Smuzhiyun 	if (args->agbno > args->max_agbno)
1575*4882a593Smuzhiyun 		args->agbno = args->max_agbno;
1576*4882a593Smuzhiyun 
1577*4882a593Smuzhiyun restart:
1578*4882a593Smuzhiyun 	len = 0;
1579*4882a593Smuzhiyun 
1580*4882a593Smuzhiyun 	/*
1581*4882a593Smuzhiyun 	 * Set up cursors and see if there are any free extents as big as
1582*4882a593Smuzhiyun 	 * maxlen. If not, pick the last entry in the tree unless the tree is
1583*4882a593Smuzhiyun 	 * empty.
1584*4882a593Smuzhiyun 	 */
1585*4882a593Smuzhiyun 	error = xfs_alloc_cur_setup(args, &acur);
1586*4882a593Smuzhiyun 	if (error == -ENOSPC) {
1587*4882a593Smuzhiyun 		error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1588*4882a593Smuzhiyun 				&len, &i);
1589*4882a593Smuzhiyun 		if (error)
1590*4882a593Smuzhiyun 			goto out;
1591*4882a593Smuzhiyun 		if (i == 0 || len == 0) {
1592*4882a593Smuzhiyun 			trace_xfs_alloc_near_noentry(args);
1593*4882a593Smuzhiyun 			goto out;
1594*4882a593Smuzhiyun 		}
1595*4882a593Smuzhiyun 		ASSERT(i == 1);
1596*4882a593Smuzhiyun 	} else if (error) {
1597*4882a593Smuzhiyun 		goto out;
1598*4882a593Smuzhiyun 	}
1599*4882a593Smuzhiyun 
1600*4882a593Smuzhiyun 	/*
1601*4882a593Smuzhiyun 	 * First algorithm.
1602*4882a593Smuzhiyun 	 * If the requested extent is large wrt the freespaces available
1603*4882a593Smuzhiyun 	 * in this a.g., then the cursor will be pointing to a btree entry
1604*4882a593Smuzhiyun 	 * near the right edge of the tree.  If it's in the last btree leaf
1605*4882a593Smuzhiyun 	 * block, then we just examine all the entries in that block
1606*4882a593Smuzhiyun 	 * that are big enough, and pick the best one.
1607*4882a593Smuzhiyun 	 */
1608*4882a593Smuzhiyun 	if (xfs_btree_islastblock(acur.cnt, 0)) {
1609*4882a593Smuzhiyun 		bool		allocated = false;
1610*4882a593Smuzhiyun 
1611*4882a593Smuzhiyun 		error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1612*4882a593Smuzhiyun 				&allocated);
1613*4882a593Smuzhiyun 		if (error)
1614*4882a593Smuzhiyun 			goto out;
1615*4882a593Smuzhiyun 		if (allocated)
1616*4882a593Smuzhiyun 			goto alloc_finish;
1617*4882a593Smuzhiyun 	}
1618*4882a593Smuzhiyun 
1619*4882a593Smuzhiyun 	/*
1620*4882a593Smuzhiyun 	 * Second algorithm. Combined cntbt and bnobt search to find ideal
1621*4882a593Smuzhiyun 	 * locality.
1622*4882a593Smuzhiyun 	 */
1623*4882a593Smuzhiyun 	error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1624*4882a593Smuzhiyun 	if (error)
1625*4882a593Smuzhiyun 		goto out;
1626*4882a593Smuzhiyun 
1627*4882a593Smuzhiyun 	/*
1628*4882a593Smuzhiyun 	 * If we couldn't get anything, give up.
1629*4882a593Smuzhiyun 	 */
1630*4882a593Smuzhiyun 	if (!acur.len) {
1631*4882a593Smuzhiyun 		if (acur.busy) {
1632*4882a593Smuzhiyun 			trace_xfs_alloc_near_busy(args);
1633*4882a593Smuzhiyun 			xfs_extent_busy_flush(args->mp, args->pag,
1634*4882a593Smuzhiyun 					      acur.busy_gen);
1635*4882a593Smuzhiyun 			goto restart;
1636*4882a593Smuzhiyun 		}
1637*4882a593Smuzhiyun 		trace_xfs_alloc_size_neither(args);
1638*4882a593Smuzhiyun 		args->agbno = NULLAGBLOCK;
1639*4882a593Smuzhiyun 		goto out;
1640*4882a593Smuzhiyun 	}
1641*4882a593Smuzhiyun 
1642*4882a593Smuzhiyun alloc_finish:
1643*4882a593Smuzhiyun 	/* fix up btrees on a successful allocation */
1644*4882a593Smuzhiyun 	error = xfs_alloc_cur_finish(args, &acur);
1645*4882a593Smuzhiyun 
1646*4882a593Smuzhiyun out:
1647*4882a593Smuzhiyun 	xfs_alloc_cur_close(&acur, error);
1648*4882a593Smuzhiyun 	return error;
1649*4882a593Smuzhiyun }
1650*4882a593Smuzhiyun 
1651*4882a593Smuzhiyun /*
1652*4882a593Smuzhiyun  * Allocate a variable extent anywhere in the allocation group agno.
1653*4882a593Smuzhiyun  * Extent's length (returned in len) will be between minlen and maxlen,
1654*4882a593Smuzhiyun  * and of the form k * prod + mod unless there's nothing that large.
1655*4882a593Smuzhiyun  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1656*4882a593Smuzhiyun  */
1657*4882a593Smuzhiyun STATIC int				/* error */
xfs_alloc_ag_vextent_size(xfs_alloc_arg_t * args)1658*4882a593Smuzhiyun xfs_alloc_ag_vextent_size(
1659*4882a593Smuzhiyun 	xfs_alloc_arg_t	*args)		/* allocation argument structure */
1660*4882a593Smuzhiyun {
1661*4882a593Smuzhiyun 	struct xfs_agf	*agf = args->agbp->b_addr;
1662*4882a593Smuzhiyun 	xfs_btree_cur_t	*bno_cur;	/* cursor for bno btree */
1663*4882a593Smuzhiyun 	xfs_btree_cur_t	*cnt_cur;	/* cursor for cnt btree */
1664*4882a593Smuzhiyun 	int		error;		/* error result */
1665*4882a593Smuzhiyun 	xfs_agblock_t	fbno;		/* start of found freespace */
1666*4882a593Smuzhiyun 	xfs_extlen_t	flen;		/* length of found freespace */
1667*4882a593Smuzhiyun 	int		i;		/* temp status variable */
1668*4882a593Smuzhiyun 	xfs_agblock_t	rbno;		/* returned block number */
1669*4882a593Smuzhiyun 	xfs_extlen_t	rlen;		/* length of returned extent */
1670*4882a593Smuzhiyun 	bool		busy;
1671*4882a593Smuzhiyun 	unsigned	busy_gen;
1672*4882a593Smuzhiyun 
1673*4882a593Smuzhiyun restart:
1674*4882a593Smuzhiyun 	/*
1675*4882a593Smuzhiyun 	 * Allocate and initialize a cursor for the by-size btree.
1676*4882a593Smuzhiyun 	 */
1677*4882a593Smuzhiyun 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1678*4882a593Smuzhiyun 		args->agno, XFS_BTNUM_CNT);
1679*4882a593Smuzhiyun 	bno_cur = NULL;
1680*4882a593Smuzhiyun 	busy = false;
1681*4882a593Smuzhiyun 
1682*4882a593Smuzhiyun 	/*
1683*4882a593Smuzhiyun 	 * Look for an entry >= maxlen+alignment-1 blocks.
1684*4882a593Smuzhiyun 	 */
1685*4882a593Smuzhiyun 	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1686*4882a593Smuzhiyun 			args->maxlen + args->alignment - 1, &i)))
1687*4882a593Smuzhiyun 		goto error0;
1688*4882a593Smuzhiyun 
1689*4882a593Smuzhiyun 	/*
1690*4882a593Smuzhiyun 	 * If none then we have to settle for a smaller extent. In the case that
1691*4882a593Smuzhiyun 	 * there are no large extents, this will return the last entry in the
1692*4882a593Smuzhiyun 	 * tree unless the tree is empty. In the case that there are only busy
1693*4882a593Smuzhiyun 	 * large extents, this will return the largest small extent unless there
1694*4882a593Smuzhiyun 	 * are no smaller extents available.
1695*4882a593Smuzhiyun 	 */
1696*4882a593Smuzhiyun 	if (!i) {
1697*4882a593Smuzhiyun 		error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1698*4882a593Smuzhiyun 						   &fbno, &flen, &i);
1699*4882a593Smuzhiyun 		if (error)
1700*4882a593Smuzhiyun 			goto error0;
1701*4882a593Smuzhiyun 		if (i == 0 || flen == 0) {
1702*4882a593Smuzhiyun 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1703*4882a593Smuzhiyun 			trace_xfs_alloc_size_noentry(args);
1704*4882a593Smuzhiyun 			return 0;
1705*4882a593Smuzhiyun 		}
1706*4882a593Smuzhiyun 		ASSERT(i == 1);
1707*4882a593Smuzhiyun 		busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1708*4882a593Smuzhiyun 				&rlen, &busy_gen);
1709*4882a593Smuzhiyun 	} else {
1710*4882a593Smuzhiyun 		/*
1711*4882a593Smuzhiyun 		 * Search for a non-busy extent that is large enough.
1712*4882a593Smuzhiyun 		 */
1713*4882a593Smuzhiyun 		for (;;) {
1714*4882a593Smuzhiyun 			error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1715*4882a593Smuzhiyun 			if (error)
1716*4882a593Smuzhiyun 				goto error0;
1717*4882a593Smuzhiyun 			if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1718*4882a593Smuzhiyun 				error = -EFSCORRUPTED;
1719*4882a593Smuzhiyun 				goto error0;
1720*4882a593Smuzhiyun 			}
1721*4882a593Smuzhiyun 
1722*4882a593Smuzhiyun 			busy = xfs_alloc_compute_aligned(args, fbno, flen,
1723*4882a593Smuzhiyun 					&rbno, &rlen, &busy_gen);
1724*4882a593Smuzhiyun 
1725*4882a593Smuzhiyun 			if (rlen >= args->maxlen)
1726*4882a593Smuzhiyun 				break;
1727*4882a593Smuzhiyun 
1728*4882a593Smuzhiyun 			error = xfs_btree_increment(cnt_cur, 0, &i);
1729*4882a593Smuzhiyun 			if (error)
1730*4882a593Smuzhiyun 				goto error0;
1731*4882a593Smuzhiyun 			if (i == 0) {
1732*4882a593Smuzhiyun 				/*
1733*4882a593Smuzhiyun 				 * Our only valid extents must have been busy.
1734*4882a593Smuzhiyun 				 * Make it unbusy by forcing the log out and
1735*4882a593Smuzhiyun 				 * retrying.
1736*4882a593Smuzhiyun 				 */
1737*4882a593Smuzhiyun 				xfs_btree_del_cursor(cnt_cur,
1738*4882a593Smuzhiyun 						     XFS_BTREE_NOERROR);
1739*4882a593Smuzhiyun 				trace_xfs_alloc_size_busy(args);
1740*4882a593Smuzhiyun 				xfs_extent_busy_flush(args->mp,
1741*4882a593Smuzhiyun 							args->pag, busy_gen);
1742*4882a593Smuzhiyun 				goto restart;
1743*4882a593Smuzhiyun 			}
1744*4882a593Smuzhiyun 		}
1745*4882a593Smuzhiyun 	}
1746*4882a593Smuzhiyun 
1747*4882a593Smuzhiyun 	/*
1748*4882a593Smuzhiyun 	 * In the first case above, we got the last entry in the
1749*4882a593Smuzhiyun 	 * by-size btree.  Now we check to see if the space hits maxlen
1750*4882a593Smuzhiyun 	 * once aligned; if not, we search left for something better.
1751*4882a593Smuzhiyun 	 * This can't happen in the second case above.
1752*4882a593Smuzhiyun 	 */
1753*4882a593Smuzhiyun 	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1754*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(args->mp,
1755*4882a593Smuzhiyun 			   rlen != 0 &&
1756*4882a593Smuzhiyun 			   (rlen > flen ||
1757*4882a593Smuzhiyun 			    rbno + rlen > fbno + flen))) {
1758*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
1759*4882a593Smuzhiyun 		goto error0;
1760*4882a593Smuzhiyun 	}
1761*4882a593Smuzhiyun 	if (rlen < args->maxlen) {
1762*4882a593Smuzhiyun 		xfs_agblock_t	bestfbno;
1763*4882a593Smuzhiyun 		xfs_extlen_t	bestflen;
1764*4882a593Smuzhiyun 		xfs_agblock_t	bestrbno;
1765*4882a593Smuzhiyun 		xfs_extlen_t	bestrlen;
1766*4882a593Smuzhiyun 
1767*4882a593Smuzhiyun 		bestrlen = rlen;
1768*4882a593Smuzhiyun 		bestrbno = rbno;
1769*4882a593Smuzhiyun 		bestflen = flen;
1770*4882a593Smuzhiyun 		bestfbno = fbno;
1771*4882a593Smuzhiyun 		for (;;) {
1772*4882a593Smuzhiyun 			if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1773*4882a593Smuzhiyun 				goto error0;
1774*4882a593Smuzhiyun 			if (i == 0)
1775*4882a593Smuzhiyun 				break;
1776*4882a593Smuzhiyun 			if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1777*4882a593Smuzhiyun 					&i)))
1778*4882a593Smuzhiyun 				goto error0;
1779*4882a593Smuzhiyun 			if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1780*4882a593Smuzhiyun 				error = -EFSCORRUPTED;
1781*4882a593Smuzhiyun 				goto error0;
1782*4882a593Smuzhiyun 			}
1783*4882a593Smuzhiyun 			if (flen < bestrlen)
1784*4882a593Smuzhiyun 				break;
1785*4882a593Smuzhiyun 			busy = xfs_alloc_compute_aligned(args, fbno, flen,
1786*4882a593Smuzhiyun 					&rbno, &rlen, &busy_gen);
1787*4882a593Smuzhiyun 			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1788*4882a593Smuzhiyun 			if (XFS_IS_CORRUPT(args->mp,
1789*4882a593Smuzhiyun 					   rlen != 0 &&
1790*4882a593Smuzhiyun 					   (rlen > flen ||
1791*4882a593Smuzhiyun 					    rbno + rlen > fbno + flen))) {
1792*4882a593Smuzhiyun 				error = -EFSCORRUPTED;
1793*4882a593Smuzhiyun 				goto error0;
1794*4882a593Smuzhiyun 			}
1795*4882a593Smuzhiyun 			if (rlen > bestrlen) {
1796*4882a593Smuzhiyun 				bestrlen = rlen;
1797*4882a593Smuzhiyun 				bestrbno = rbno;
1798*4882a593Smuzhiyun 				bestflen = flen;
1799*4882a593Smuzhiyun 				bestfbno = fbno;
1800*4882a593Smuzhiyun 				if (rlen == args->maxlen)
1801*4882a593Smuzhiyun 					break;
1802*4882a593Smuzhiyun 			}
1803*4882a593Smuzhiyun 		}
1804*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1805*4882a593Smuzhiyun 				&i)))
1806*4882a593Smuzhiyun 			goto error0;
1807*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1808*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
1809*4882a593Smuzhiyun 			goto error0;
1810*4882a593Smuzhiyun 		}
1811*4882a593Smuzhiyun 		rlen = bestrlen;
1812*4882a593Smuzhiyun 		rbno = bestrbno;
1813*4882a593Smuzhiyun 		flen = bestflen;
1814*4882a593Smuzhiyun 		fbno = bestfbno;
1815*4882a593Smuzhiyun 	}
1816*4882a593Smuzhiyun 	args->wasfromfl = 0;
1817*4882a593Smuzhiyun 	/*
1818*4882a593Smuzhiyun 	 * Fix up the length.
1819*4882a593Smuzhiyun 	 */
1820*4882a593Smuzhiyun 	args->len = rlen;
1821*4882a593Smuzhiyun 	if (rlen < args->minlen) {
1822*4882a593Smuzhiyun 		if (busy) {
1823*4882a593Smuzhiyun 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1824*4882a593Smuzhiyun 			trace_xfs_alloc_size_busy(args);
1825*4882a593Smuzhiyun 			xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1826*4882a593Smuzhiyun 			goto restart;
1827*4882a593Smuzhiyun 		}
1828*4882a593Smuzhiyun 		goto out_nominleft;
1829*4882a593Smuzhiyun 	}
1830*4882a593Smuzhiyun 	xfs_alloc_fix_len(args);
1831*4882a593Smuzhiyun 
1832*4882a593Smuzhiyun 	rlen = args->len;
1833*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1834*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
1835*4882a593Smuzhiyun 		goto error0;
1836*4882a593Smuzhiyun 	}
1837*4882a593Smuzhiyun 	/*
1838*4882a593Smuzhiyun 	 * Allocate and initialize a cursor for the by-block tree.
1839*4882a593Smuzhiyun 	 */
1840*4882a593Smuzhiyun 	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1841*4882a593Smuzhiyun 		args->agno, XFS_BTNUM_BNO);
1842*4882a593Smuzhiyun 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1843*4882a593Smuzhiyun 			rbno, rlen, XFSA_FIXUP_CNT_OK)))
1844*4882a593Smuzhiyun 		goto error0;
1845*4882a593Smuzhiyun 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1846*4882a593Smuzhiyun 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1847*4882a593Smuzhiyun 	cnt_cur = bno_cur = NULL;
1848*4882a593Smuzhiyun 	args->len = rlen;
1849*4882a593Smuzhiyun 	args->agbno = rbno;
1850*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(args->mp,
1851*4882a593Smuzhiyun 			   args->agbno + args->len >
1852*4882a593Smuzhiyun 			   be32_to_cpu(agf->agf_length))) {
1853*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
1854*4882a593Smuzhiyun 		goto error0;
1855*4882a593Smuzhiyun 	}
1856*4882a593Smuzhiyun 	trace_xfs_alloc_size_done(args);
1857*4882a593Smuzhiyun 	return 0;
1858*4882a593Smuzhiyun 
1859*4882a593Smuzhiyun error0:
1860*4882a593Smuzhiyun 	trace_xfs_alloc_size_error(args);
1861*4882a593Smuzhiyun 	if (cnt_cur)
1862*4882a593Smuzhiyun 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1863*4882a593Smuzhiyun 	if (bno_cur)
1864*4882a593Smuzhiyun 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1865*4882a593Smuzhiyun 	return error;
1866*4882a593Smuzhiyun 
1867*4882a593Smuzhiyun out_nominleft:
1868*4882a593Smuzhiyun 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1869*4882a593Smuzhiyun 	trace_xfs_alloc_size_nominleft(args);
1870*4882a593Smuzhiyun 	args->agbno = NULLAGBLOCK;
1871*4882a593Smuzhiyun 	return 0;
1872*4882a593Smuzhiyun }
1873*4882a593Smuzhiyun 
1874*4882a593Smuzhiyun /*
1875*4882a593Smuzhiyun  * Free the extent starting at agno/bno for length.
1876*4882a593Smuzhiyun  */
1877*4882a593Smuzhiyun STATIC int
xfs_free_ag_extent(struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type)1878*4882a593Smuzhiyun xfs_free_ag_extent(
1879*4882a593Smuzhiyun 	struct xfs_trans		*tp,
1880*4882a593Smuzhiyun 	struct xfs_buf			*agbp,
1881*4882a593Smuzhiyun 	xfs_agnumber_t			agno,
1882*4882a593Smuzhiyun 	xfs_agblock_t			bno,
1883*4882a593Smuzhiyun 	xfs_extlen_t			len,
1884*4882a593Smuzhiyun 	const struct xfs_owner_info	*oinfo,
1885*4882a593Smuzhiyun 	enum xfs_ag_resv_type		type)
1886*4882a593Smuzhiyun {
1887*4882a593Smuzhiyun 	struct xfs_mount		*mp;
1888*4882a593Smuzhiyun 	struct xfs_btree_cur		*bno_cur;
1889*4882a593Smuzhiyun 	struct xfs_btree_cur		*cnt_cur;
1890*4882a593Smuzhiyun 	xfs_agblock_t			gtbno; /* start of right neighbor */
1891*4882a593Smuzhiyun 	xfs_extlen_t			gtlen; /* length of right neighbor */
1892*4882a593Smuzhiyun 	xfs_agblock_t			ltbno; /* start of left neighbor */
1893*4882a593Smuzhiyun 	xfs_extlen_t			ltlen; /* length of left neighbor */
1894*4882a593Smuzhiyun 	xfs_agblock_t			nbno; /* new starting block of freesp */
1895*4882a593Smuzhiyun 	xfs_extlen_t			nlen; /* new length of freespace */
1896*4882a593Smuzhiyun 	int				haveleft; /* have a left neighbor */
1897*4882a593Smuzhiyun 	int				haveright; /* have a right neighbor */
1898*4882a593Smuzhiyun 	int				i;
1899*4882a593Smuzhiyun 	int				error;
1900*4882a593Smuzhiyun 
1901*4882a593Smuzhiyun 	bno_cur = cnt_cur = NULL;
1902*4882a593Smuzhiyun 	mp = tp->t_mountp;
1903*4882a593Smuzhiyun 
1904*4882a593Smuzhiyun 	if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1905*4882a593Smuzhiyun 		error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1906*4882a593Smuzhiyun 		if (error)
1907*4882a593Smuzhiyun 			goto error0;
1908*4882a593Smuzhiyun 	}
1909*4882a593Smuzhiyun 
1910*4882a593Smuzhiyun 	/*
1911*4882a593Smuzhiyun 	 * Allocate and initialize a cursor for the by-block btree.
1912*4882a593Smuzhiyun 	 */
1913*4882a593Smuzhiyun 	bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1914*4882a593Smuzhiyun 	/*
1915*4882a593Smuzhiyun 	 * Look for a neighboring block on the left (lower block numbers)
1916*4882a593Smuzhiyun 	 * that is contiguous with this space.
1917*4882a593Smuzhiyun 	 */
1918*4882a593Smuzhiyun 	if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1919*4882a593Smuzhiyun 		goto error0;
1920*4882a593Smuzhiyun 	if (haveleft) {
1921*4882a593Smuzhiyun 		/*
1922*4882a593Smuzhiyun 		 * There is a block to our left.
1923*4882a593Smuzhiyun 		 */
1924*4882a593Smuzhiyun 		if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1925*4882a593Smuzhiyun 			goto error0;
1926*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1927*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
1928*4882a593Smuzhiyun 			goto error0;
1929*4882a593Smuzhiyun 		}
1930*4882a593Smuzhiyun 		/*
1931*4882a593Smuzhiyun 		 * It's not contiguous, though.
1932*4882a593Smuzhiyun 		 */
1933*4882a593Smuzhiyun 		if (ltbno + ltlen < bno)
1934*4882a593Smuzhiyun 			haveleft = 0;
1935*4882a593Smuzhiyun 		else {
1936*4882a593Smuzhiyun 			/*
1937*4882a593Smuzhiyun 			 * If this failure happens the request to free this
1938*4882a593Smuzhiyun 			 * space was invalid, it's (partly) already free.
1939*4882a593Smuzhiyun 			 * Very bad.
1940*4882a593Smuzhiyun 			 */
1941*4882a593Smuzhiyun 			if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1942*4882a593Smuzhiyun 				error = -EFSCORRUPTED;
1943*4882a593Smuzhiyun 				goto error0;
1944*4882a593Smuzhiyun 			}
1945*4882a593Smuzhiyun 		}
1946*4882a593Smuzhiyun 	}
1947*4882a593Smuzhiyun 	/*
1948*4882a593Smuzhiyun 	 * Look for a neighboring block on the right (higher block numbers)
1949*4882a593Smuzhiyun 	 * that is contiguous with this space.
1950*4882a593Smuzhiyun 	 */
1951*4882a593Smuzhiyun 	if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1952*4882a593Smuzhiyun 		goto error0;
1953*4882a593Smuzhiyun 	if (haveright) {
1954*4882a593Smuzhiyun 		/*
1955*4882a593Smuzhiyun 		 * There is a block to our right.
1956*4882a593Smuzhiyun 		 */
1957*4882a593Smuzhiyun 		if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1958*4882a593Smuzhiyun 			goto error0;
1959*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1960*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
1961*4882a593Smuzhiyun 			goto error0;
1962*4882a593Smuzhiyun 		}
1963*4882a593Smuzhiyun 		/*
1964*4882a593Smuzhiyun 		 * It's not contiguous, though.
1965*4882a593Smuzhiyun 		 */
1966*4882a593Smuzhiyun 		if (bno + len < gtbno)
1967*4882a593Smuzhiyun 			haveright = 0;
1968*4882a593Smuzhiyun 		else {
1969*4882a593Smuzhiyun 			/*
1970*4882a593Smuzhiyun 			 * If this failure happens the request to free this
1971*4882a593Smuzhiyun 			 * space was invalid, it's (partly) already free.
1972*4882a593Smuzhiyun 			 * Very bad.
1973*4882a593Smuzhiyun 			 */
1974*4882a593Smuzhiyun 			if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1975*4882a593Smuzhiyun 				error = -EFSCORRUPTED;
1976*4882a593Smuzhiyun 				goto error0;
1977*4882a593Smuzhiyun 			}
1978*4882a593Smuzhiyun 		}
1979*4882a593Smuzhiyun 	}
1980*4882a593Smuzhiyun 	/*
1981*4882a593Smuzhiyun 	 * Now allocate and initialize a cursor for the by-size tree.
1982*4882a593Smuzhiyun 	 */
1983*4882a593Smuzhiyun 	cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1984*4882a593Smuzhiyun 	/*
1985*4882a593Smuzhiyun 	 * Have both left and right contiguous neighbors.
1986*4882a593Smuzhiyun 	 * Merge all three into a single free block.
1987*4882a593Smuzhiyun 	 */
1988*4882a593Smuzhiyun 	if (haveleft && haveright) {
1989*4882a593Smuzhiyun 		/*
1990*4882a593Smuzhiyun 		 * Delete the old by-size entry on the left.
1991*4882a593Smuzhiyun 		 */
1992*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1993*4882a593Smuzhiyun 			goto error0;
1994*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1995*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
1996*4882a593Smuzhiyun 			goto error0;
1997*4882a593Smuzhiyun 		}
1998*4882a593Smuzhiyun 		if ((error = xfs_btree_delete(cnt_cur, &i)))
1999*4882a593Smuzhiyun 			goto error0;
2000*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2001*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2002*4882a593Smuzhiyun 			goto error0;
2003*4882a593Smuzhiyun 		}
2004*4882a593Smuzhiyun 		/*
2005*4882a593Smuzhiyun 		 * Delete the old by-size entry on the right.
2006*4882a593Smuzhiyun 		 */
2007*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2008*4882a593Smuzhiyun 			goto error0;
2009*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2010*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2011*4882a593Smuzhiyun 			goto error0;
2012*4882a593Smuzhiyun 		}
2013*4882a593Smuzhiyun 		if ((error = xfs_btree_delete(cnt_cur, &i)))
2014*4882a593Smuzhiyun 			goto error0;
2015*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2016*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2017*4882a593Smuzhiyun 			goto error0;
2018*4882a593Smuzhiyun 		}
2019*4882a593Smuzhiyun 		/*
2020*4882a593Smuzhiyun 		 * Delete the old by-block entry for the right block.
2021*4882a593Smuzhiyun 		 */
2022*4882a593Smuzhiyun 		if ((error = xfs_btree_delete(bno_cur, &i)))
2023*4882a593Smuzhiyun 			goto error0;
2024*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2025*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2026*4882a593Smuzhiyun 			goto error0;
2027*4882a593Smuzhiyun 		}
2028*4882a593Smuzhiyun 		/*
2029*4882a593Smuzhiyun 		 * Move the by-block cursor back to the left neighbor.
2030*4882a593Smuzhiyun 		 */
2031*4882a593Smuzhiyun 		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2032*4882a593Smuzhiyun 			goto error0;
2033*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2034*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2035*4882a593Smuzhiyun 			goto error0;
2036*4882a593Smuzhiyun 		}
2037*4882a593Smuzhiyun #ifdef DEBUG
2038*4882a593Smuzhiyun 		/*
2039*4882a593Smuzhiyun 		 * Check that this is the right record: delete didn't
2040*4882a593Smuzhiyun 		 * mangle the cursor.
2041*4882a593Smuzhiyun 		 */
2042*4882a593Smuzhiyun 		{
2043*4882a593Smuzhiyun 			xfs_agblock_t	xxbno;
2044*4882a593Smuzhiyun 			xfs_extlen_t	xxlen;
2045*4882a593Smuzhiyun 
2046*4882a593Smuzhiyun 			if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2047*4882a593Smuzhiyun 					&i)))
2048*4882a593Smuzhiyun 				goto error0;
2049*4882a593Smuzhiyun 			if (XFS_IS_CORRUPT(mp,
2050*4882a593Smuzhiyun 					   i != 1 ||
2051*4882a593Smuzhiyun 					   xxbno != ltbno ||
2052*4882a593Smuzhiyun 					   xxlen != ltlen)) {
2053*4882a593Smuzhiyun 				error = -EFSCORRUPTED;
2054*4882a593Smuzhiyun 				goto error0;
2055*4882a593Smuzhiyun 			}
2056*4882a593Smuzhiyun 		}
2057*4882a593Smuzhiyun #endif
2058*4882a593Smuzhiyun 		/*
2059*4882a593Smuzhiyun 		 * Update remaining by-block entry to the new, joined block.
2060*4882a593Smuzhiyun 		 */
2061*4882a593Smuzhiyun 		nbno = ltbno;
2062*4882a593Smuzhiyun 		nlen = len + ltlen + gtlen;
2063*4882a593Smuzhiyun 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2064*4882a593Smuzhiyun 			goto error0;
2065*4882a593Smuzhiyun 	}
2066*4882a593Smuzhiyun 	/*
2067*4882a593Smuzhiyun 	 * Have only a left contiguous neighbor.
2068*4882a593Smuzhiyun 	 * Merge it together with the new freespace.
2069*4882a593Smuzhiyun 	 */
2070*4882a593Smuzhiyun 	else if (haveleft) {
2071*4882a593Smuzhiyun 		/*
2072*4882a593Smuzhiyun 		 * Delete the old by-size entry on the left.
2073*4882a593Smuzhiyun 		 */
2074*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2075*4882a593Smuzhiyun 			goto error0;
2076*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2077*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2078*4882a593Smuzhiyun 			goto error0;
2079*4882a593Smuzhiyun 		}
2080*4882a593Smuzhiyun 		if ((error = xfs_btree_delete(cnt_cur, &i)))
2081*4882a593Smuzhiyun 			goto error0;
2082*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2083*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2084*4882a593Smuzhiyun 			goto error0;
2085*4882a593Smuzhiyun 		}
2086*4882a593Smuzhiyun 		/*
2087*4882a593Smuzhiyun 		 * Back up the by-block cursor to the left neighbor, and
2088*4882a593Smuzhiyun 		 * update its length.
2089*4882a593Smuzhiyun 		 */
2090*4882a593Smuzhiyun 		if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2091*4882a593Smuzhiyun 			goto error0;
2092*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2093*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2094*4882a593Smuzhiyun 			goto error0;
2095*4882a593Smuzhiyun 		}
2096*4882a593Smuzhiyun 		nbno = ltbno;
2097*4882a593Smuzhiyun 		nlen = len + ltlen;
2098*4882a593Smuzhiyun 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2099*4882a593Smuzhiyun 			goto error0;
2100*4882a593Smuzhiyun 	}
2101*4882a593Smuzhiyun 	/*
2102*4882a593Smuzhiyun 	 * Have only a right contiguous neighbor.
2103*4882a593Smuzhiyun 	 * Merge it together with the new freespace.
2104*4882a593Smuzhiyun 	 */
2105*4882a593Smuzhiyun 	else if (haveright) {
2106*4882a593Smuzhiyun 		/*
2107*4882a593Smuzhiyun 		 * Delete the old by-size entry on the right.
2108*4882a593Smuzhiyun 		 */
2109*4882a593Smuzhiyun 		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2110*4882a593Smuzhiyun 			goto error0;
2111*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2112*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2113*4882a593Smuzhiyun 			goto error0;
2114*4882a593Smuzhiyun 		}
2115*4882a593Smuzhiyun 		if ((error = xfs_btree_delete(cnt_cur, &i)))
2116*4882a593Smuzhiyun 			goto error0;
2117*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2118*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2119*4882a593Smuzhiyun 			goto error0;
2120*4882a593Smuzhiyun 		}
2121*4882a593Smuzhiyun 		/*
2122*4882a593Smuzhiyun 		 * Update the starting block and length of the right
2123*4882a593Smuzhiyun 		 * neighbor in the by-block tree.
2124*4882a593Smuzhiyun 		 */
2125*4882a593Smuzhiyun 		nbno = bno;
2126*4882a593Smuzhiyun 		nlen = len + gtlen;
2127*4882a593Smuzhiyun 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2128*4882a593Smuzhiyun 			goto error0;
2129*4882a593Smuzhiyun 	}
2130*4882a593Smuzhiyun 	/*
2131*4882a593Smuzhiyun 	 * No contiguous neighbors.
2132*4882a593Smuzhiyun 	 * Insert the new freespace into the by-block tree.
2133*4882a593Smuzhiyun 	 */
2134*4882a593Smuzhiyun 	else {
2135*4882a593Smuzhiyun 		nbno = bno;
2136*4882a593Smuzhiyun 		nlen = len;
2137*4882a593Smuzhiyun 		if ((error = xfs_btree_insert(bno_cur, &i)))
2138*4882a593Smuzhiyun 			goto error0;
2139*4882a593Smuzhiyun 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2140*4882a593Smuzhiyun 			error = -EFSCORRUPTED;
2141*4882a593Smuzhiyun 			goto error0;
2142*4882a593Smuzhiyun 		}
2143*4882a593Smuzhiyun 	}
2144*4882a593Smuzhiyun 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2145*4882a593Smuzhiyun 	bno_cur = NULL;
2146*4882a593Smuzhiyun 	/*
2147*4882a593Smuzhiyun 	 * In all cases we need to insert the new freespace in the by-size tree.
2148*4882a593Smuzhiyun 	 */
2149*4882a593Smuzhiyun 	if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2150*4882a593Smuzhiyun 		goto error0;
2151*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(mp, i != 0)) {
2152*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
2153*4882a593Smuzhiyun 		goto error0;
2154*4882a593Smuzhiyun 	}
2155*4882a593Smuzhiyun 	if ((error = xfs_btree_insert(cnt_cur, &i)))
2156*4882a593Smuzhiyun 		goto error0;
2157*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(mp, i != 1)) {
2158*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
2159*4882a593Smuzhiyun 		goto error0;
2160*4882a593Smuzhiyun 	}
2161*4882a593Smuzhiyun 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2162*4882a593Smuzhiyun 	cnt_cur = NULL;
2163*4882a593Smuzhiyun 
2164*4882a593Smuzhiyun 	/*
2165*4882a593Smuzhiyun 	 * Update the freespace totals in the ag and superblock.
2166*4882a593Smuzhiyun 	 */
2167*4882a593Smuzhiyun 	error = xfs_alloc_update_counters(tp, agbp, len);
2168*4882a593Smuzhiyun 	xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
2169*4882a593Smuzhiyun 	if (error)
2170*4882a593Smuzhiyun 		goto error0;
2171*4882a593Smuzhiyun 
2172*4882a593Smuzhiyun 	XFS_STATS_INC(mp, xs_freex);
2173*4882a593Smuzhiyun 	XFS_STATS_ADD(mp, xs_freeb, len);
2174*4882a593Smuzhiyun 
2175*4882a593Smuzhiyun 	trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
2176*4882a593Smuzhiyun 
2177*4882a593Smuzhiyun 	return 0;
2178*4882a593Smuzhiyun 
2179*4882a593Smuzhiyun  error0:
2180*4882a593Smuzhiyun 	trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2181*4882a593Smuzhiyun 	if (bno_cur)
2182*4882a593Smuzhiyun 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2183*4882a593Smuzhiyun 	if (cnt_cur)
2184*4882a593Smuzhiyun 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2185*4882a593Smuzhiyun 	return error;
2186*4882a593Smuzhiyun }
2187*4882a593Smuzhiyun 
2188*4882a593Smuzhiyun /*
2189*4882a593Smuzhiyun  * Visible (exported) allocation/free functions.
2190*4882a593Smuzhiyun  * Some of these are used just by xfs_alloc_btree.c and this file.
2191*4882a593Smuzhiyun  */
2192*4882a593Smuzhiyun 
2193*4882a593Smuzhiyun /*
2194*4882a593Smuzhiyun  * Compute and fill in value of m_ag_maxlevels.
2195*4882a593Smuzhiyun  */
2196*4882a593Smuzhiyun void
xfs_alloc_compute_maxlevels(xfs_mount_t * mp)2197*4882a593Smuzhiyun xfs_alloc_compute_maxlevels(
2198*4882a593Smuzhiyun 	xfs_mount_t	*mp)	/* file system mount structure */
2199*4882a593Smuzhiyun {
2200*4882a593Smuzhiyun 	mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
2201*4882a593Smuzhiyun 			(mp->m_sb.sb_agblocks + 1) / 2);
2202*4882a593Smuzhiyun }
2203*4882a593Smuzhiyun 
2204*4882a593Smuzhiyun /*
2205*4882a593Smuzhiyun  * Find the length of the longest extent in an AG.  The 'need' parameter
2206*4882a593Smuzhiyun  * specifies how much space we're going to need for the AGFL and the
2207*4882a593Smuzhiyun  * 'reserved' parameter tells us how many blocks in this AG are reserved for
2208*4882a593Smuzhiyun  * other callers.
2209*4882a593Smuzhiyun  */
2210*4882a593Smuzhiyun xfs_extlen_t
xfs_alloc_longest_free_extent(struct xfs_perag * pag,xfs_extlen_t need,xfs_extlen_t reserved)2211*4882a593Smuzhiyun xfs_alloc_longest_free_extent(
2212*4882a593Smuzhiyun 	struct xfs_perag	*pag,
2213*4882a593Smuzhiyun 	xfs_extlen_t		need,
2214*4882a593Smuzhiyun 	xfs_extlen_t		reserved)
2215*4882a593Smuzhiyun {
2216*4882a593Smuzhiyun 	xfs_extlen_t		delta = 0;
2217*4882a593Smuzhiyun 
2218*4882a593Smuzhiyun 	/*
2219*4882a593Smuzhiyun 	 * If the AGFL needs a recharge, we'll have to subtract that from the
2220*4882a593Smuzhiyun 	 * longest extent.
2221*4882a593Smuzhiyun 	 */
2222*4882a593Smuzhiyun 	if (need > pag->pagf_flcount)
2223*4882a593Smuzhiyun 		delta = need - pag->pagf_flcount;
2224*4882a593Smuzhiyun 
2225*4882a593Smuzhiyun 	/*
2226*4882a593Smuzhiyun 	 * If we cannot maintain others' reservations with space from the
2227*4882a593Smuzhiyun 	 * not-longest freesp extents, we'll have to subtract /that/ from
2228*4882a593Smuzhiyun 	 * the longest extent too.
2229*4882a593Smuzhiyun 	 */
2230*4882a593Smuzhiyun 	if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2231*4882a593Smuzhiyun 		delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2232*4882a593Smuzhiyun 
2233*4882a593Smuzhiyun 	/*
2234*4882a593Smuzhiyun 	 * If the longest extent is long enough to satisfy all the
2235*4882a593Smuzhiyun 	 * reservations and AGFL rules in place, we can return this extent.
2236*4882a593Smuzhiyun 	 */
2237*4882a593Smuzhiyun 	if (pag->pagf_longest > delta)
2238*4882a593Smuzhiyun 		return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2239*4882a593Smuzhiyun 				pag->pagf_longest - delta);
2240*4882a593Smuzhiyun 
2241*4882a593Smuzhiyun 	/* Otherwise, let the caller try for 1 block if there's space. */
2242*4882a593Smuzhiyun 	return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2243*4882a593Smuzhiyun }
2244*4882a593Smuzhiyun 
2245*4882a593Smuzhiyun /*
2246*4882a593Smuzhiyun  * Compute the minimum length of the AGFL in the given AG.  If @pag is NULL,
2247*4882a593Smuzhiyun  * return the largest possible minimum length.
2248*4882a593Smuzhiyun  */
2249*4882a593Smuzhiyun unsigned int
xfs_alloc_min_freelist(struct xfs_mount * mp,struct xfs_perag * pag)2250*4882a593Smuzhiyun xfs_alloc_min_freelist(
2251*4882a593Smuzhiyun 	struct xfs_mount	*mp,
2252*4882a593Smuzhiyun 	struct xfs_perag	*pag)
2253*4882a593Smuzhiyun {
2254*4882a593Smuzhiyun 	/* AG btrees have at least 1 level. */
2255*4882a593Smuzhiyun 	static const uint8_t	fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2256*4882a593Smuzhiyun 	const uint8_t		*levels = pag ? pag->pagf_levels : fake_levels;
2257*4882a593Smuzhiyun 	unsigned int		min_free;
2258*4882a593Smuzhiyun 
2259*4882a593Smuzhiyun 	ASSERT(mp->m_ag_maxlevels > 0);
2260*4882a593Smuzhiyun 
2261*4882a593Smuzhiyun 	/* space needed by-bno freespace btree */
2262*4882a593Smuzhiyun 	min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
2263*4882a593Smuzhiyun 				       mp->m_ag_maxlevels);
2264*4882a593Smuzhiyun 	/* space needed by-size freespace btree */
2265*4882a593Smuzhiyun 	min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
2266*4882a593Smuzhiyun 				       mp->m_ag_maxlevels);
2267*4882a593Smuzhiyun 	/* space needed reverse mapping used space btree */
2268*4882a593Smuzhiyun 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2269*4882a593Smuzhiyun 		min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2270*4882a593Smuzhiyun 						mp->m_rmap_maxlevels);
2271*4882a593Smuzhiyun 
2272*4882a593Smuzhiyun 	return min_free;
2273*4882a593Smuzhiyun }
2274*4882a593Smuzhiyun 
2275*4882a593Smuzhiyun /*
2276*4882a593Smuzhiyun  * Check if the operation we are fixing up the freelist for should go ahead or
2277*4882a593Smuzhiyun  * not. If we are freeing blocks, we always allow it, otherwise the allocation
2278*4882a593Smuzhiyun  * is dependent on whether the size and shape of free space available will
2279*4882a593Smuzhiyun  * permit the requested allocation to take place.
2280*4882a593Smuzhiyun  */
2281*4882a593Smuzhiyun static bool
xfs_alloc_space_available(struct xfs_alloc_arg * args,xfs_extlen_t min_free,int flags)2282*4882a593Smuzhiyun xfs_alloc_space_available(
2283*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,
2284*4882a593Smuzhiyun 	xfs_extlen_t		min_free,
2285*4882a593Smuzhiyun 	int			flags)
2286*4882a593Smuzhiyun {
2287*4882a593Smuzhiyun 	struct xfs_perag	*pag = args->pag;
2288*4882a593Smuzhiyun 	xfs_extlen_t		alloc_len, longest;
2289*4882a593Smuzhiyun 	xfs_extlen_t		reservation; /* blocks that are still reserved */
2290*4882a593Smuzhiyun 	int			available;
2291*4882a593Smuzhiyun 	xfs_extlen_t		agflcount;
2292*4882a593Smuzhiyun 
2293*4882a593Smuzhiyun 	if (flags & XFS_ALLOC_FLAG_FREEING)
2294*4882a593Smuzhiyun 		return true;
2295*4882a593Smuzhiyun 
2296*4882a593Smuzhiyun 	reservation = xfs_ag_resv_needed(pag, args->resv);
2297*4882a593Smuzhiyun 
2298*4882a593Smuzhiyun 	/* do we have enough contiguous free space for the allocation? */
2299*4882a593Smuzhiyun 	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2300*4882a593Smuzhiyun 	longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
2301*4882a593Smuzhiyun 	if (longest < alloc_len)
2302*4882a593Smuzhiyun 		return false;
2303*4882a593Smuzhiyun 
2304*4882a593Smuzhiyun 	/*
2305*4882a593Smuzhiyun 	 * Do we have enough free space remaining for the allocation? Don't
2306*4882a593Smuzhiyun 	 * account extra agfl blocks because we are about to defer free them,
2307*4882a593Smuzhiyun 	 * making them unavailable until the current transaction commits.
2308*4882a593Smuzhiyun 	 */
2309*4882a593Smuzhiyun 	agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2310*4882a593Smuzhiyun 	available = (int)(pag->pagf_freeblks + agflcount -
2311*4882a593Smuzhiyun 			  reservation - min_free - args->minleft);
2312*4882a593Smuzhiyun 	if (available < (int)max(args->total, alloc_len))
2313*4882a593Smuzhiyun 		return false;
2314*4882a593Smuzhiyun 
2315*4882a593Smuzhiyun 	/*
2316*4882a593Smuzhiyun 	 * Clamp maxlen to the amount of free space available for the actual
2317*4882a593Smuzhiyun 	 * extent allocation.
2318*4882a593Smuzhiyun 	 */
2319*4882a593Smuzhiyun 	if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2320*4882a593Smuzhiyun 		args->maxlen = available;
2321*4882a593Smuzhiyun 		ASSERT(args->maxlen > 0);
2322*4882a593Smuzhiyun 		ASSERT(args->maxlen >= args->minlen);
2323*4882a593Smuzhiyun 	}
2324*4882a593Smuzhiyun 
2325*4882a593Smuzhiyun 	return true;
2326*4882a593Smuzhiyun }
2327*4882a593Smuzhiyun 
2328*4882a593Smuzhiyun int
xfs_free_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,struct xfs_buf * agbp,struct xfs_owner_info * oinfo)2329*4882a593Smuzhiyun xfs_free_agfl_block(
2330*4882a593Smuzhiyun 	struct xfs_trans	*tp,
2331*4882a593Smuzhiyun 	xfs_agnumber_t		agno,
2332*4882a593Smuzhiyun 	xfs_agblock_t		agbno,
2333*4882a593Smuzhiyun 	struct xfs_buf		*agbp,
2334*4882a593Smuzhiyun 	struct xfs_owner_info	*oinfo)
2335*4882a593Smuzhiyun {
2336*4882a593Smuzhiyun 	int			error;
2337*4882a593Smuzhiyun 	struct xfs_buf		*bp;
2338*4882a593Smuzhiyun 
2339*4882a593Smuzhiyun 	error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2340*4882a593Smuzhiyun 				   XFS_AG_RESV_AGFL);
2341*4882a593Smuzhiyun 	if (error)
2342*4882a593Smuzhiyun 		return error;
2343*4882a593Smuzhiyun 
2344*4882a593Smuzhiyun 	error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2345*4882a593Smuzhiyun 			XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2346*4882a593Smuzhiyun 			tp->t_mountp->m_bsize, 0, &bp);
2347*4882a593Smuzhiyun 	if (error)
2348*4882a593Smuzhiyun 		return error;
2349*4882a593Smuzhiyun 	xfs_trans_binval(tp, bp);
2350*4882a593Smuzhiyun 
2351*4882a593Smuzhiyun 	return 0;
2352*4882a593Smuzhiyun }
2353*4882a593Smuzhiyun 
2354*4882a593Smuzhiyun /*
2355*4882a593Smuzhiyun  * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2356*4882a593Smuzhiyun  * is to detect an agfl header padding mismatch between current and early v5
2357*4882a593Smuzhiyun  * kernels. This problem manifests as a 1-slot size difference between the
2358*4882a593Smuzhiyun  * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2359*4882a593Smuzhiyun  * may also catch variants of agfl count corruption unrelated to padding. Either
2360*4882a593Smuzhiyun  * way, we'll reset the agfl and warn the user.
2361*4882a593Smuzhiyun  *
2362*4882a593Smuzhiyun  * Return true if a reset is required before the agfl can be used, false
2363*4882a593Smuzhiyun  * otherwise.
2364*4882a593Smuzhiyun  */
2365*4882a593Smuzhiyun static bool
xfs_agfl_needs_reset(struct xfs_mount * mp,struct xfs_agf * agf)2366*4882a593Smuzhiyun xfs_agfl_needs_reset(
2367*4882a593Smuzhiyun 	struct xfs_mount	*mp,
2368*4882a593Smuzhiyun 	struct xfs_agf		*agf)
2369*4882a593Smuzhiyun {
2370*4882a593Smuzhiyun 	uint32_t		f = be32_to_cpu(agf->agf_flfirst);
2371*4882a593Smuzhiyun 	uint32_t		l = be32_to_cpu(agf->agf_fllast);
2372*4882a593Smuzhiyun 	uint32_t		c = be32_to_cpu(agf->agf_flcount);
2373*4882a593Smuzhiyun 	int			agfl_size = xfs_agfl_size(mp);
2374*4882a593Smuzhiyun 	int			active;
2375*4882a593Smuzhiyun 
2376*4882a593Smuzhiyun 	/* no agfl header on v4 supers */
2377*4882a593Smuzhiyun 	if (!xfs_sb_version_hascrc(&mp->m_sb))
2378*4882a593Smuzhiyun 		return false;
2379*4882a593Smuzhiyun 
2380*4882a593Smuzhiyun 	/*
2381*4882a593Smuzhiyun 	 * The agf read verifier catches severe corruption of these fields.
2382*4882a593Smuzhiyun 	 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2383*4882a593Smuzhiyun 	 * the verifier allows it.
2384*4882a593Smuzhiyun 	 */
2385*4882a593Smuzhiyun 	if (f >= agfl_size || l >= agfl_size)
2386*4882a593Smuzhiyun 		return true;
2387*4882a593Smuzhiyun 	if (c > agfl_size)
2388*4882a593Smuzhiyun 		return true;
2389*4882a593Smuzhiyun 
2390*4882a593Smuzhiyun 	/*
2391*4882a593Smuzhiyun 	 * Check consistency between the on-disk count and the active range. An
2392*4882a593Smuzhiyun 	 * agfl padding mismatch manifests as an inconsistent flcount.
2393*4882a593Smuzhiyun 	 */
2394*4882a593Smuzhiyun 	if (c && l >= f)
2395*4882a593Smuzhiyun 		active = l - f + 1;
2396*4882a593Smuzhiyun 	else if (c)
2397*4882a593Smuzhiyun 		active = agfl_size - f + l + 1;
2398*4882a593Smuzhiyun 	else
2399*4882a593Smuzhiyun 		active = 0;
2400*4882a593Smuzhiyun 
2401*4882a593Smuzhiyun 	return active != c;
2402*4882a593Smuzhiyun }
2403*4882a593Smuzhiyun 
2404*4882a593Smuzhiyun /*
2405*4882a593Smuzhiyun  * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2406*4882a593Smuzhiyun  * agfl content cannot be trusted. Warn the user that a repair is required to
2407*4882a593Smuzhiyun  * recover leaked blocks.
2408*4882a593Smuzhiyun  *
2409*4882a593Smuzhiyun  * The purpose of this mechanism is to handle filesystems affected by the agfl
2410*4882a593Smuzhiyun  * header padding mismatch problem. A reset keeps the filesystem online with a
2411*4882a593Smuzhiyun  * relatively minor free space accounting inconsistency rather than suffer the
2412*4882a593Smuzhiyun  * inevitable crash from use of an invalid agfl block.
2413*4882a593Smuzhiyun  */
2414*4882a593Smuzhiyun static void
xfs_agfl_reset(struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_perag * pag)2415*4882a593Smuzhiyun xfs_agfl_reset(
2416*4882a593Smuzhiyun 	struct xfs_trans	*tp,
2417*4882a593Smuzhiyun 	struct xfs_buf		*agbp,
2418*4882a593Smuzhiyun 	struct xfs_perag	*pag)
2419*4882a593Smuzhiyun {
2420*4882a593Smuzhiyun 	struct xfs_mount	*mp = tp->t_mountp;
2421*4882a593Smuzhiyun 	struct xfs_agf		*agf = agbp->b_addr;
2422*4882a593Smuzhiyun 
2423*4882a593Smuzhiyun 	ASSERT(pag->pagf_agflreset);
2424*4882a593Smuzhiyun 	trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2425*4882a593Smuzhiyun 
2426*4882a593Smuzhiyun 	xfs_warn(mp,
2427*4882a593Smuzhiyun 	       "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2428*4882a593Smuzhiyun 	       "Please unmount and run xfs_repair.",
2429*4882a593Smuzhiyun 	         pag->pag_agno, pag->pagf_flcount);
2430*4882a593Smuzhiyun 
2431*4882a593Smuzhiyun 	agf->agf_flfirst = 0;
2432*4882a593Smuzhiyun 	agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2433*4882a593Smuzhiyun 	agf->agf_flcount = 0;
2434*4882a593Smuzhiyun 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2435*4882a593Smuzhiyun 				    XFS_AGF_FLCOUNT);
2436*4882a593Smuzhiyun 
2437*4882a593Smuzhiyun 	pag->pagf_flcount = 0;
2438*4882a593Smuzhiyun 	pag->pagf_agflreset = false;
2439*4882a593Smuzhiyun }
2440*4882a593Smuzhiyun 
2441*4882a593Smuzhiyun /*
2442*4882a593Smuzhiyun  * Defer an AGFL block free. This is effectively equivalent to
2443*4882a593Smuzhiyun  * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2444*4882a593Smuzhiyun  *
2445*4882a593Smuzhiyun  * Deferring AGFL frees helps prevent log reservation overruns due to too many
2446*4882a593Smuzhiyun  * allocation operations in a transaction. AGFL frees are prone to this problem
2447*4882a593Smuzhiyun  * because for one they are always freed one at a time. Further, an immediate
2448*4882a593Smuzhiyun  * AGFL block free can cause a btree join and require another block free before
2449*4882a593Smuzhiyun  * the real allocation can proceed. Deferring the free disconnects freeing up
2450*4882a593Smuzhiyun  * the AGFL slot from freeing the block.
2451*4882a593Smuzhiyun  */
2452*4882a593Smuzhiyun STATIC void
xfs_defer_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_fsblock_t agbno,struct xfs_owner_info * oinfo)2453*4882a593Smuzhiyun xfs_defer_agfl_block(
2454*4882a593Smuzhiyun 	struct xfs_trans		*tp,
2455*4882a593Smuzhiyun 	xfs_agnumber_t			agno,
2456*4882a593Smuzhiyun 	xfs_fsblock_t			agbno,
2457*4882a593Smuzhiyun 	struct xfs_owner_info		*oinfo)
2458*4882a593Smuzhiyun {
2459*4882a593Smuzhiyun 	struct xfs_mount		*mp = tp->t_mountp;
2460*4882a593Smuzhiyun 	struct xfs_extent_free_item	*new;		/* new element */
2461*4882a593Smuzhiyun 
2462*4882a593Smuzhiyun 	ASSERT(xfs_bmap_free_item_zone != NULL);
2463*4882a593Smuzhiyun 	ASSERT(oinfo != NULL);
2464*4882a593Smuzhiyun 
2465*4882a593Smuzhiyun 	new = kmem_cache_alloc(xfs_bmap_free_item_zone,
2466*4882a593Smuzhiyun 			       GFP_KERNEL | __GFP_NOFAIL);
2467*4882a593Smuzhiyun 	new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2468*4882a593Smuzhiyun 	new->xefi_blockcount = 1;
2469*4882a593Smuzhiyun 	new->xefi_oinfo = *oinfo;
2470*4882a593Smuzhiyun 	new->xefi_skip_discard = false;
2471*4882a593Smuzhiyun 
2472*4882a593Smuzhiyun 	trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2473*4882a593Smuzhiyun 
2474*4882a593Smuzhiyun 	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
2475*4882a593Smuzhiyun }
2476*4882a593Smuzhiyun 
2477*4882a593Smuzhiyun /*
2478*4882a593Smuzhiyun  * Decide whether to use this allocation group for this allocation.
2479*4882a593Smuzhiyun  * If so, fix up the btree freelist's size.
2480*4882a593Smuzhiyun  */
2481*4882a593Smuzhiyun int			/* error */
xfs_alloc_fix_freelist(struct xfs_alloc_arg * args,int flags)2482*4882a593Smuzhiyun xfs_alloc_fix_freelist(
2483*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args,	/* allocation argument structure */
2484*4882a593Smuzhiyun 	int			flags)	/* XFS_ALLOC_FLAG_... */
2485*4882a593Smuzhiyun {
2486*4882a593Smuzhiyun 	struct xfs_mount	*mp = args->mp;
2487*4882a593Smuzhiyun 	struct xfs_perag	*pag = args->pag;
2488*4882a593Smuzhiyun 	struct xfs_trans	*tp = args->tp;
2489*4882a593Smuzhiyun 	struct xfs_buf		*agbp = NULL;
2490*4882a593Smuzhiyun 	struct xfs_buf		*agflbp = NULL;
2491*4882a593Smuzhiyun 	struct xfs_alloc_arg	targs;	/* local allocation arguments */
2492*4882a593Smuzhiyun 	xfs_agblock_t		bno;	/* freelist block */
2493*4882a593Smuzhiyun 	xfs_extlen_t		need;	/* total blocks needed in freelist */
2494*4882a593Smuzhiyun 	int			error = 0;
2495*4882a593Smuzhiyun 
2496*4882a593Smuzhiyun 	/* deferred ops (AGFL block frees) require permanent transactions */
2497*4882a593Smuzhiyun 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2498*4882a593Smuzhiyun 
2499*4882a593Smuzhiyun 	if (!pag->pagf_init) {
2500*4882a593Smuzhiyun 		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2501*4882a593Smuzhiyun 		if (error) {
2502*4882a593Smuzhiyun 			/* Couldn't lock the AGF so skip this AG. */
2503*4882a593Smuzhiyun 			if (error == -EAGAIN)
2504*4882a593Smuzhiyun 				error = 0;
2505*4882a593Smuzhiyun 			goto out_no_agbp;
2506*4882a593Smuzhiyun 		}
2507*4882a593Smuzhiyun 	}
2508*4882a593Smuzhiyun 
2509*4882a593Smuzhiyun 	/*
2510*4882a593Smuzhiyun 	 * If this is a metadata preferred pag and we are user data then try
2511*4882a593Smuzhiyun 	 * somewhere else if we are not being asked to try harder at this
2512*4882a593Smuzhiyun 	 * point
2513*4882a593Smuzhiyun 	 */
2514*4882a593Smuzhiyun 	if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
2515*4882a593Smuzhiyun 	    (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2516*4882a593Smuzhiyun 		ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2517*4882a593Smuzhiyun 		goto out_agbp_relse;
2518*4882a593Smuzhiyun 	}
2519*4882a593Smuzhiyun 
2520*4882a593Smuzhiyun 	need = xfs_alloc_min_freelist(mp, pag);
2521*4882a593Smuzhiyun 	if (!xfs_alloc_space_available(args, need, flags |
2522*4882a593Smuzhiyun 			XFS_ALLOC_FLAG_CHECK))
2523*4882a593Smuzhiyun 		goto out_agbp_relse;
2524*4882a593Smuzhiyun 
2525*4882a593Smuzhiyun 	/*
2526*4882a593Smuzhiyun 	 * Get the a.g. freespace buffer.
2527*4882a593Smuzhiyun 	 * Can fail if we're not blocking on locks, and it's held.
2528*4882a593Smuzhiyun 	 */
2529*4882a593Smuzhiyun 	if (!agbp) {
2530*4882a593Smuzhiyun 		error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2531*4882a593Smuzhiyun 		if (error) {
2532*4882a593Smuzhiyun 			/* Couldn't lock the AGF so skip this AG. */
2533*4882a593Smuzhiyun 			if (error == -EAGAIN)
2534*4882a593Smuzhiyun 				error = 0;
2535*4882a593Smuzhiyun 			goto out_no_agbp;
2536*4882a593Smuzhiyun 		}
2537*4882a593Smuzhiyun 	}
2538*4882a593Smuzhiyun 
2539*4882a593Smuzhiyun 	/* reset a padding mismatched agfl before final free space check */
2540*4882a593Smuzhiyun 	if (pag->pagf_agflreset)
2541*4882a593Smuzhiyun 		xfs_agfl_reset(tp, agbp, pag);
2542*4882a593Smuzhiyun 
2543*4882a593Smuzhiyun 	/* If there isn't enough total space or single-extent, reject it. */
2544*4882a593Smuzhiyun 	need = xfs_alloc_min_freelist(mp, pag);
2545*4882a593Smuzhiyun 	if (!xfs_alloc_space_available(args, need, flags))
2546*4882a593Smuzhiyun 		goto out_agbp_relse;
2547*4882a593Smuzhiyun 
2548*4882a593Smuzhiyun 	/*
2549*4882a593Smuzhiyun 	 * Make the freelist shorter if it's too long.
2550*4882a593Smuzhiyun 	 *
2551*4882a593Smuzhiyun 	 * Note that from this point onwards, we will always release the agf and
2552*4882a593Smuzhiyun 	 * agfl buffers on error. This handles the case where we error out and
2553*4882a593Smuzhiyun 	 * the buffers are clean or may not have been joined to the transaction
2554*4882a593Smuzhiyun 	 * and hence need to be released manually. If they have been joined to
2555*4882a593Smuzhiyun 	 * the transaction, then xfs_trans_brelse() will handle them
2556*4882a593Smuzhiyun 	 * appropriately based on the recursion count and dirty state of the
2557*4882a593Smuzhiyun 	 * buffer.
2558*4882a593Smuzhiyun 	 *
2559*4882a593Smuzhiyun 	 * XXX (dgc): When we have lots of free space, does this buy us
2560*4882a593Smuzhiyun 	 * anything other than extra overhead when we need to put more blocks
2561*4882a593Smuzhiyun 	 * back on the free list? Maybe we should only do this when space is
2562*4882a593Smuzhiyun 	 * getting low or the AGFL is more than half full?
2563*4882a593Smuzhiyun 	 *
2564*4882a593Smuzhiyun 	 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2565*4882a593Smuzhiyun 	 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2566*4882a593Smuzhiyun 	 * updating the rmapbt.  Both flags are used in xfs_repair while we're
2567*4882a593Smuzhiyun 	 * rebuilding the rmapbt, and neither are used by the kernel.  They're
2568*4882a593Smuzhiyun 	 * both required to ensure that rmaps are correctly recorded for the
2569*4882a593Smuzhiyun 	 * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
2570*4882a593Smuzhiyun 	 * repair/rmap.c in xfsprogs for details.
2571*4882a593Smuzhiyun 	 */
2572*4882a593Smuzhiyun 	memset(&targs, 0, sizeof(targs));
2573*4882a593Smuzhiyun 	/* struct copy below */
2574*4882a593Smuzhiyun 	if (flags & XFS_ALLOC_FLAG_NORMAP)
2575*4882a593Smuzhiyun 		targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
2576*4882a593Smuzhiyun 	else
2577*4882a593Smuzhiyun 		targs.oinfo = XFS_RMAP_OINFO_AG;
2578*4882a593Smuzhiyun 	while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
2579*4882a593Smuzhiyun 		error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2580*4882a593Smuzhiyun 		if (error)
2581*4882a593Smuzhiyun 			goto out_agbp_relse;
2582*4882a593Smuzhiyun 
2583*4882a593Smuzhiyun 		/* defer agfl frees */
2584*4882a593Smuzhiyun 		xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2585*4882a593Smuzhiyun 	}
2586*4882a593Smuzhiyun 
2587*4882a593Smuzhiyun 	targs.tp = tp;
2588*4882a593Smuzhiyun 	targs.mp = mp;
2589*4882a593Smuzhiyun 	targs.agbp = agbp;
2590*4882a593Smuzhiyun 	targs.agno = args->agno;
2591*4882a593Smuzhiyun 	targs.alignment = targs.minlen = targs.prod = 1;
2592*4882a593Smuzhiyun 	targs.type = XFS_ALLOCTYPE_THIS_AG;
2593*4882a593Smuzhiyun 	targs.pag = pag;
2594*4882a593Smuzhiyun 	error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2595*4882a593Smuzhiyun 	if (error)
2596*4882a593Smuzhiyun 		goto out_agbp_relse;
2597*4882a593Smuzhiyun 
2598*4882a593Smuzhiyun 	/* Make the freelist longer if it's too short. */
2599*4882a593Smuzhiyun 	while (pag->pagf_flcount < need) {
2600*4882a593Smuzhiyun 		targs.agbno = 0;
2601*4882a593Smuzhiyun 		targs.maxlen = need - pag->pagf_flcount;
2602*4882a593Smuzhiyun 		targs.resv = XFS_AG_RESV_AGFL;
2603*4882a593Smuzhiyun 
2604*4882a593Smuzhiyun 		/* Allocate as many blocks as possible at once. */
2605*4882a593Smuzhiyun 		error = xfs_alloc_ag_vextent(&targs);
2606*4882a593Smuzhiyun 		if (error)
2607*4882a593Smuzhiyun 			goto out_agflbp_relse;
2608*4882a593Smuzhiyun 
2609*4882a593Smuzhiyun 		/*
2610*4882a593Smuzhiyun 		 * Stop if we run out.  Won't happen if callers are obeying
2611*4882a593Smuzhiyun 		 * the restrictions correctly.  Can happen for free calls
2612*4882a593Smuzhiyun 		 * on a completely full ag.
2613*4882a593Smuzhiyun 		 */
2614*4882a593Smuzhiyun 		if (targs.agbno == NULLAGBLOCK) {
2615*4882a593Smuzhiyun 			if (flags & XFS_ALLOC_FLAG_FREEING)
2616*4882a593Smuzhiyun 				break;
2617*4882a593Smuzhiyun 			goto out_agflbp_relse;
2618*4882a593Smuzhiyun 		}
2619*4882a593Smuzhiyun 		/*
2620*4882a593Smuzhiyun 		 * Put each allocated block on the list.
2621*4882a593Smuzhiyun 		 */
2622*4882a593Smuzhiyun 		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2623*4882a593Smuzhiyun 			error = xfs_alloc_put_freelist(tp, agbp,
2624*4882a593Smuzhiyun 							agflbp, bno, 0);
2625*4882a593Smuzhiyun 			if (error)
2626*4882a593Smuzhiyun 				goto out_agflbp_relse;
2627*4882a593Smuzhiyun 		}
2628*4882a593Smuzhiyun 	}
2629*4882a593Smuzhiyun 	xfs_trans_brelse(tp, agflbp);
2630*4882a593Smuzhiyun 	args->agbp = agbp;
2631*4882a593Smuzhiyun 	return 0;
2632*4882a593Smuzhiyun 
2633*4882a593Smuzhiyun out_agflbp_relse:
2634*4882a593Smuzhiyun 	xfs_trans_brelse(tp, agflbp);
2635*4882a593Smuzhiyun out_agbp_relse:
2636*4882a593Smuzhiyun 	if (agbp)
2637*4882a593Smuzhiyun 		xfs_trans_brelse(tp, agbp);
2638*4882a593Smuzhiyun out_no_agbp:
2639*4882a593Smuzhiyun 	args->agbp = NULL;
2640*4882a593Smuzhiyun 	return error;
2641*4882a593Smuzhiyun }
2642*4882a593Smuzhiyun 
2643*4882a593Smuzhiyun /*
2644*4882a593Smuzhiyun  * Get a block from the freelist.
2645*4882a593Smuzhiyun  * Returns with the buffer for the block gotten.
2646*4882a593Smuzhiyun  */
2647*4882a593Smuzhiyun int				/* error */
xfs_alloc_get_freelist(xfs_trans_t * tp,xfs_buf_t * agbp,xfs_agblock_t * bnop,int btreeblk)2648*4882a593Smuzhiyun xfs_alloc_get_freelist(
2649*4882a593Smuzhiyun 	xfs_trans_t	*tp,	/* transaction pointer */
2650*4882a593Smuzhiyun 	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
2651*4882a593Smuzhiyun 	xfs_agblock_t	*bnop,	/* block address retrieved from freelist */
2652*4882a593Smuzhiyun 	int		btreeblk) /* destination is a AGF btree */
2653*4882a593Smuzhiyun {
2654*4882a593Smuzhiyun 	struct xfs_agf	*agf = agbp->b_addr;
2655*4882a593Smuzhiyun 	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
2656*4882a593Smuzhiyun 	xfs_agblock_t	bno;	/* block number returned */
2657*4882a593Smuzhiyun 	__be32		*agfl_bno;
2658*4882a593Smuzhiyun 	int		error;
2659*4882a593Smuzhiyun 	int		logflags;
2660*4882a593Smuzhiyun 	xfs_mount_t	*mp = tp->t_mountp;
2661*4882a593Smuzhiyun 	xfs_perag_t	*pag;	/* per allocation group data */
2662*4882a593Smuzhiyun 
2663*4882a593Smuzhiyun 	/*
2664*4882a593Smuzhiyun 	 * Freelist is empty, give up.
2665*4882a593Smuzhiyun 	 */
2666*4882a593Smuzhiyun 	if (!agf->agf_flcount) {
2667*4882a593Smuzhiyun 		*bnop = NULLAGBLOCK;
2668*4882a593Smuzhiyun 		return 0;
2669*4882a593Smuzhiyun 	}
2670*4882a593Smuzhiyun 	/*
2671*4882a593Smuzhiyun 	 * Read the array of free blocks.
2672*4882a593Smuzhiyun 	 */
2673*4882a593Smuzhiyun 	error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2674*4882a593Smuzhiyun 				    &agflbp);
2675*4882a593Smuzhiyun 	if (error)
2676*4882a593Smuzhiyun 		return error;
2677*4882a593Smuzhiyun 
2678*4882a593Smuzhiyun 
2679*4882a593Smuzhiyun 	/*
2680*4882a593Smuzhiyun 	 * Get the block number and update the data structures.
2681*4882a593Smuzhiyun 	 */
2682*4882a593Smuzhiyun 	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
2683*4882a593Smuzhiyun 	bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2684*4882a593Smuzhiyun 	be32_add_cpu(&agf->agf_flfirst, 1);
2685*4882a593Smuzhiyun 	xfs_trans_brelse(tp, agflbp);
2686*4882a593Smuzhiyun 	if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
2687*4882a593Smuzhiyun 		agf->agf_flfirst = 0;
2688*4882a593Smuzhiyun 
2689*4882a593Smuzhiyun 	pag = agbp->b_pag;
2690*4882a593Smuzhiyun 	ASSERT(!pag->pagf_agflreset);
2691*4882a593Smuzhiyun 	be32_add_cpu(&agf->agf_flcount, -1);
2692*4882a593Smuzhiyun 	xfs_trans_agflist_delta(tp, -1);
2693*4882a593Smuzhiyun 	pag->pagf_flcount--;
2694*4882a593Smuzhiyun 
2695*4882a593Smuzhiyun 	logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2696*4882a593Smuzhiyun 	if (btreeblk) {
2697*4882a593Smuzhiyun 		be32_add_cpu(&agf->agf_btreeblks, 1);
2698*4882a593Smuzhiyun 		pag->pagf_btreeblks++;
2699*4882a593Smuzhiyun 		logflags |= XFS_AGF_BTREEBLKS;
2700*4882a593Smuzhiyun 	}
2701*4882a593Smuzhiyun 
2702*4882a593Smuzhiyun 	xfs_alloc_log_agf(tp, agbp, logflags);
2703*4882a593Smuzhiyun 	*bnop = bno;
2704*4882a593Smuzhiyun 
2705*4882a593Smuzhiyun 	return 0;
2706*4882a593Smuzhiyun }
2707*4882a593Smuzhiyun 
2708*4882a593Smuzhiyun /*
2709*4882a593Smuzhiyun  * Log the given fields from the agf structure.
2710*4882a593Smuzhiyun  */
2711*4882a593Smuzhiyun void
xfs_alloc_log_agf(xfs_trans_t * tp,xfs_buf_t * bp,int fields)2712*4882a593Smuzhiyun xfs_alloc_log_agf(
2713*4882a593Smuzhiyun 	xfs_trans_t	*tp,	/* transaction pointer */
2714*4882a593Smuzhiyun 	xfs_buf_t	*bp,	/* buffer for a.g. freelist header */
2715*4882a593Smuzhiyun 	int		fields)	/* mask of fields to be logged (XFS_AGF_...) */
2716*4882a593Smuzhiyun {
2717*4882a593Smuzhiyun 	int	first;		/* first byte offset */
2718*4882a593Smuzhiyun 	int	last;		/* last byte offset */
2719*4882a593Smuzhiyun 	static const short	offsets[] = {
2720*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_magicnum),
2721*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_versionnum),
2722*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_seqno),
2723*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_length),
2724*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_roots[0]),
2725*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_levels[0]),
2726*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_flfirst),
2727*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_fllast),
2728*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_flcount),
2729*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_freeblks),
2730*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_longest),
2731*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_btreeblks),
2732*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_uuid),
2733*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_rmap_blocks),
2734*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_refcount_blocks),
2735*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_refcount_root),
2736*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_refcount_level),
2737*4882a593Smuzhiyun 		/* needed so that we don't log the whole rest of the structure: */
2738*4882a593Smuzhiyun 		offsetof(xfs_agf_t, agf_spare64),
2739*4882a593Smuzhiyun 		sizeof(xfs_agf_t)
2740*4882a593Smuzhiyun 	};
2741*4882a593Smuzhiyun 
2742*4882a593Smuzhiyun 	trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
2743*4882a593Smuzhiyun 
2744*4882a593Smuzhiyun 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2745*4882a593Smuzhiyun 
2746*4882a593Smuzhiyun 	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2747*4882a593Smuzhiyun 	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2748*4882a593Smuzhiyun }
2749*4882a593Smuzhiyun 
2750*4882a593Smuzhiyun /*
2751*4882a593Smuzhiyun  * Interface for inode allocation to force the pag data to be initialized.
2752*4882a593Smuzhiyun  */
2753*4882a593Smuzhiyun int					/* error */
xfs_alloc_pagf_init(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,int flags)2754*4882a593Smuzhiyun xfs_alloc_pagf_init(
2755*4882a593Smuzhiyun 	xfs_mount_t		*mp,	/* file system mount structure */
2756*4882a593Smuzhiyun 	xfs_trans_t		*tp,	/* transaction pointer */
2757*4882a593Smuzhiyun 	xfs_agnumber_t		agno,	/* allocation group number */
2758*4882a593Smuzhiyun 	int			flags)	/* XFS_ALLOC_FLAGS_... */
2759*4882a593Smuzhiyun {
2760*4882a593Smuzhiyun 	xfs_buf_t		*bp;
2761*4882a593Smuzhiyun 	int			error;
2762*4882a593Smuzhiyun 
2763*4882a593Smuzhiyun 	error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
2764*4882a593Smuzhiyun 	if (!error)
2765*4882a593Smuzhiyun 		xfs_trans_brelse(tp, bp);
2766*4882a593Smuzhiyun 	return error;
2767*4882a593Smuzhiyun }
2768*4882a593Smuzhiyun 
2769*4882a593Smuzhiyun /*
2770*4882a593Smuzhiyun  * Put the block on the freelist for the allocation group.
2771*4882a593Smuzhiyun  */
2772*4882a593Smuzhiyun int					/* error */
xfs_alloc_put_freelist(xfs_trans_t * tp,xfs_buf_t * agbp,xfs_buf_t * agflbp,xfs_agblock_t bno,int btreeblk)2773*4882a593Smuzhiyun xfs_alloc_put_freelist(
2774*4882a593Smuzhiyun 	xfs_trans_t		*tp,	/* transaction pointer */
2775*4882a593Smuzhiyun 	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
2776*4882a593Smuzhiyun 	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
2777*4882a593Smuzhiyun 	xfs_agblock_t		bno,	/* block being freed */
2778*4882a593Smuzhiyun 	int			btreeblk) /* block came from a AGF btree */
2779*4882a593Smuzhiyun {
2780*4882a593Smuzhiyun 	struct xfs_mount	*mp = tp->t_mountp;
2781*4882a593Smuzhiyun 	struct xfs_agf		*agf = agbp->b_addr;
2782*4882a593Smuzhiyun 	__be32			*blockp;/* pointer to array entry */
2783*4882a593Smuzhiyun 	int			error;
2784*4882a593Smuzhiyun 	int			logflags;
2785*4882a593Smuzhiyun 	xfs_perag_t		*pag;	/* per allocation group data */
2786*4882a593Smuzhiyun 	__be32			*agfl_bno;
2787*4882a593Smuzhiyun 	int			startoff;
2788*4882a593Smuzhiyun 
2789*4882a593Smuzhiyun 	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2790*4882a593Smuzhiyun 			be32_to_cpu(agf->agf_seqno), &agflbp)))
2791*4882a593Smuzhiyun 		return error;
2792*4882a593Smuzhiyun 	be32_add_cpu(&agf->agf_fllast, 1);
2793*4882a593Smuzhiyun 	if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
2794*4882a593Smuzhiyun 		agf->agf_fllast = 0;
2795*4882a593Smuzhiyun 
2796*4882a593Smuzhiyun 	pag = agbp->b_pag;
2797*4882a593Smuzhiyun 	ASSERT(!pag->pagf_agflreset);
2798*4882a593Smuzhiyun 	be32_add_cpu(&agf->agf_flcount, 1);
2799*4882a593Smuzhiyun 	xfs_trans_agflist_delta(tp, 1);
2800*4882a593Smuzhiyun 	pag->pagf_flcount++;
2801*4882a593Smuzhiyun 
2802*4882a593Smuzhiyun 	logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2803*4882a593Smuzhiyun 	if (btreeblk) {
2804*4882a593Smuzhiyun 		be32_add_cpu(&agf->agf_btreeblks, -1);
2805*4882a593Smuzhiyun 		pag->pagf_btreeblks--;
2806*4882a593Smuzhiyun 		logflags |= XFS_AGF_BTREEBLKS;
2807*4882a593Smuzhiyun 	}
2808*4882a593Smuzhiyun 
2809*4882a593Smuzhiyun 	xfs_alloc_log_agf(tp, agbp, logflags);
2810*4882a593Smuzhiyun 
2811*4882a593Smuzhiyun 	ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
2812*4882a593Smuzhiyun 
2813*4882a593Smuzhiyun 	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
2814*4882a593Smuzhiyun 	blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2815*4882a593Smuzhiyun 	*blockp = cpu_to_be32(bno);
2816*4882a593Smuzhiyun 	startoff = (char *)blockp - (char *)agflbp->b_addr;
2817*4882a593Smuzhiyun 
2818*4882a593Smuzhiyun 	xfs_alloc_log_agf(tp, agbp, logflags);
2819*4882a593Smuzhiyun 
2820*4882a593Smuzhiyun 	xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2821*4882a593Smuzhiyun 	xfs_trans_log_buf(tp, agflbp, startoff,
2822*4882a593Smuzhiyun 			  startoff + sizeof(xfs_agblock_t) - 1);
2823*4882a593Smuzhiyun 	return 0;
2824*4882a593Smuzhiyun }
2825*4882a593Smuzhiyun 
2826*4882a593Smuzhiyun static xfs_failaddr_t
xfs_agf_verify(struct xfs_buf * bp)2827*4882a593Smuzhiyun xfs_agf_verify(
2828*4882a593Smuzhiyun 	struct xfs_buf		*bp)
2829*4882a593Smuzhiyun {
2830*4882a593Smuzhiyun 	struct xfs_mount	*mp = bp->b_mount;
2831*4882a593Smuzhiyun 	struct xfs_agf		*agf = bp->b_addr;
2832*4882a593Smuzhiyun 
2833*4882a593Smuzhiyun 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
2834*4882a593Smuzhiyun 		if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2835*4882a593Smuzhiyun 			return __this_address;
2836*4882a593Smuzhiyun 		if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
2837*4882a593Smuzhiyun 			return __this_address;
2838*4882a593Smuzhiyun 	}
2839*4882a593Smuzhiyun 
2840*4882a593Smuzhiyun 	if (!xfs_verify_magic(bp, agf->agf_magicnum))
2841*4882a593Smuzhiyun 		return __this_address;
2842*4882a593Smuzhiyun 
2843*4882a593Smuzhiyun 	if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2844*4882a593Smuzhiyun 	      be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2845*4882a593Smuzhiyun 	      be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2846*4882a593Smuzhiyun 	      be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2847*4882a593Smuzhiyun 	      be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
2848*4882a593Smuzhiyun 		return __this_address;
2849*4882a593Smuzhiyun 
2850*4882a593Smuzhiyun 	if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
2851*4882a593Smuzhiyun 		return __this_address;
2852*4882a593Smuzhiyun 
2853*4882a593Smuzhiyun 	if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
2854*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
2855*4882a593Smuzhiyun 		return __this_address;
2856*4882a593Smuzhiyun 
2857*4882a593Smuzhiyun 	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2858*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2859*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2860*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2861*4882a593Smuzhiyun 		return __this_address;
2862*4882a593Smuzhiyun 
2863*4882a593Smuzhiyun 	if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2864*4882a593Smuzhiyun 	    (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2865*4882a593Smuzhiyun 	     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
2866*4882a593Smuzhiyun 		return __this_address;
2867*4882a593Smuzhiyun 
2868*4882a593Smuzhiyun 	if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2869*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
2870*4882a593Smuzhiyun 		return __this_address;
2871*4882a593Smuzhiyun 
2872*4882a593Smuzhiyun 	/*
2873*4882a593Smuzhiyun 	 * during growfs operations, the perag is not fully initialised,
2874*4882a593Smuzhiyun 	 * so we can't use it for any useful checking. growfs ensures we can't
2875*4882a593Smuzhiyun 	 * use it by using uncached buffers that don't have the perag attached
2876*4882a593Smuzhiyun 	 * so we can detect and avoid this problem.
2877*4882a593Smuzhiyun 	 */
2878*4882a593Smuzhiyun 	if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2879*4882a593Smuzhiyun 		return __this_address;
2880*4882a593Smuzhiyun 
2881*4882a593Smuzhiyun 	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2882*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2883*4882a593Smuzhiyun 		return __this_address;
2884*4882a593Smuzhiyun 
2885*4882a593Smuzhiyun 	if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2886*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_refcount_blocks) >
2887*4882a593Smuzhiyun 	    be32_to_cpu(agf->agf_length))
2888*4882a593Smuzhiyun 		return __this_address;
2889*4882a593Smuzhiyun 
2890*4882a593Smuzhiyun 	if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2891*4882a593Smuzhiyun 	    (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2892*4882a593Smuzhiyun 	     be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
2893*4882a593Smuzhiyun 		return __this_address;
2894*4882a593Smuzhiyun 
2895*4882a593Smuzhiyun 	return NULL;
2896*4882a593Smuzhiyun 
2897*4882a593Smuzhiyun }
2898*4882a593Smuzhiyun 
2899*4882a593Smuzhiyun static void
xfs_agf_read_verify(struct xfs_buf * bp)2900*4882a593Smuzhiyun xfs_agf_read_verify(
2901*4882a593Smuzhiyun 	struct xfs_buf	*bp)
2902*4882a593Smuzhiyun {
2903*4882a593Smuzhiyun 	struct xfs_mount *mp = bp->b_mount;
2904*4882a593Smuzhiyun 	xfs_failaddr_t	fa;
2905*4882a593Smuzhiyun 
2906*4882a593Smuzhiyun 	if (xfs_sb_version_hascrc(&mp->m_sb) &&
2907*4882a593Smuzhiyun 	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2908*4882a593Smuzhiyun 		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2909*4882a593Smuzhiyun 	else {
2910*4882a593Smuzhiyun 		fa = xfs_agf_verify(bp);
2911*4882a593Smuzhiyun 		if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2912*4882a593Smuzhiyun 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2913*4882a593Smuzhiyun 	}
2914*4882a593Smuzhiyun }
2915*4882a593Smuzhiyun 
2916*4882a593Smuzhiyun static void
xfs_agf_write_verify(struct xfs_buf * bp)2917*4882a593Smuzhiyun xfs_agf_write_verify(
2918*4882a593Smuzhiyun 	struct xfs_buf	*bp)
2919*4882a593Smuzhiyun {
2920*4882a593Smuzhiyun 	struct xfs_mount	*mp = bp->b_mount;
2921*4882a593Smuzhiyun 	struct xfs_buf_log_item	*bip = bp->b_log_item;
2922*4882a593Smuzhiyun 	struct xfs_agf		*agf = bp->b_addr;
2923*4882a593Smuzhiyun 	xfs_failaddr_t		fa;
2924*4882a593Smuzhiyun 
2925*4882a593Smuzhiyun 	fa = xfs_agf_verify(bp);
2926*4882a593Smuzhiyun 	if (fa) {
2927*4882a593Smuzhiyun 		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2928*4882a593Smuzhiyun 		return;
2929*4882a593Smuzhiyun 	}
2930*4882a593Smuzhiyun 
2931*4882a593Smuzhiyun 	if (!xfs_sb_version_hascrc(&mp->m_sb))
2932*4882a593Smuzhiyun 		return;
2933*4882a593Smuzhiyun 
2934*4882a593Smuzhiyun 	if (bip)
2935*4882a593Smuzhiyun 		agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2936*4882a593Smuzhiyun 
2937*4882a593Smuzhiyun 	xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2938*4882a593Smuzhiyun }
2939*4882a593Smuzhiyun 
2940*4882a593Smuzhiyun const struct xfs_buf_ops xfs_agf_buf_ops = {
2941*4882a593Smuzhiyun 	.name = "xfs_agf",
2942*4882a593Smuzhiyun 	.magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
2943*4882a593Smuzhiyun 	.verify_read = xfs_agf_read_verify,
2944*4882a593Smuzhiyun 	.verify_write = xfs_agf_write_verify,
2945*4882a593Smuzhiyun 	.verify_struct = xfs_agf_verify,
2946*4882a593Smuzhiyun };
2947*4882a593Smuzhiyun 
2948*4882a593Smuzhiyun /*
2949*4882a593Smuzhiyun  * Read in the allocation group header (free/alloc section).
2950*4882a593Smuzhiyun  */
2951*4882a593Smuzhiyun int					/* error */
xfs_read_agf(struct xfs_mount * mp,struct xfs_trans * tp,xfs_agnumber_t agno,int flags,struct xfs_buf ** bpp)2952*4882a593Smuzhiyun xfs_read_agf(
2953*4882a593Smuzhiyun 	struct xfs_mount	*mp,	/* mount point structure */
2954*4882a593Smuzhiyun 	struct xfs_trans	*tp,	/* transaction pointer */
2955*4882a593Smuzhiyun 	xfs_agnumber_t		agno,	/* allocation group number */
2956*4882a593Smuzhiyun 	int			flags,	/* XFS_BUF_ */
2957*4882a593Smuzhiyun 	struct xfs_buf		**bpp)	/* buffer for the ag freelist header */
2958*4882a593Smuzhiyun {
2959*4882a593Smuzhiyun 	int		error;
2960*4882a593Smuzhiyun 
2961*4882a593Smuzhiyun 	trace_xfs_read_agf(mp, agno);
2962*4882a593Smuzhiyun 
2963*4882a593Smuzhiyun 	ASSERT(agno != NULLAGNUMBER);
2964*4882a593Smuzhiyun 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
2965*4882a593Smuzhiyun 			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2966*4882a593Smuzhiyun 			XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2967*4882a593Smuzhiyun 	if (error)
2968*4882a593Smuzhiyun 		return error;
2969*4882a593Smuzhiyun 
2970*4882a593Smuzhiyun 	ASSERT(!(*bpp)->b_error);
2971*4882a593Smuzhiyun 	xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2972*4882a593Smuzhiyun 	return 0;
2973*4882a593Smuzhiyun }
2974*4882a593Smuzhiyun 
2975*4882a593Smuzhiyun /*
2976*4882a593Smuzhiyun  * Read in the allocation group header (free/alloc section).
2977*4882a593Smuzhiyun  */
2978*4882a593Smuzhiyun int					/* error */
xfs_alloc_read_agf(struct xfs_mount * mp,struct xfs_trans * tp,xfs_agnumber_t agno,int flags,struct xfs_buf ** bpp)2979*4882a593Smuzhiyun xfs_alloc_read_agf(
2980*4882a593Smuzhiyun 	struct xfs_mount	*mp,	/* mount point structure */
2981*4882a593Smuzhiyun 	struct xfs_trans	*tp,	/* transaction pointer */
2982*4882a593Smuzhiyun 	xfs_agnumber_t		agno,	/* allocation group number */
2983*4882a593Smuzhiyun 	int			flags,	/* XFS_ALLOC_FLAG_... */
2984*4882a593Smuzhiyun 	struct xfs_buf		**bpp)	/* buffer for the ag freelist header */
2985*4882a593Smuzhiyun {
2986*4882a593Smuzhiyun 	struct xfs_agf		*agf;		/* ag freelist header */
2987*4882a593Smuzhiyun 	struct xfs_perag	*pag;		/* per allocation group data */
2988*4882a593Smuzhiyun 	int			error;
2989*4882a593Smuzhiyun 
2990*4882a593Smuzhiyun 	trace_xfs_alloc_read_agf(mp, agno);
2991*4882a593Smuzhiyun 
2992*4882a593Smuzhiyun 	/* We don't support trylock when freeing. */
2993*4882a593Smuzhiyun 	ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
2994*4882a593Smuzhiyun 			(XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
2995*4882a593Smuzhiyun 	ASSERT(agno != NULLAGNUMBER);
2996*4882a593Smuzhiyun 	error = xfs_read_agf(mp, tp, agno,
2997*4882a593Smuzhiyun 			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2998*4882a593Smuzhiyun 			bpp);
2999*4882a593Smuzhiyun 	if (error)
3000*4882a593Smuzhiyun 		return error;
3001*4882a593Smuzhiyun 	ASSERT(!(*bpp)->b_error);
3002*4882a593Smuzhiyun 
3003*4882a593Smuzhiyun 	agf = (*bpp)->b_addr;
3004*4882a593Smuzhiyun 	pag = (*bpp)->b_pag;
3005*4882a593Smuzhiyun 	if (!pag->pagf_init) {
3006*4882a593Smuzhiyun 		pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
3007*4882a593Smuzhiyun 		pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
3008*4882a593Smuzhiyun 		pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3009*4882a593Smuzhiyun 		pag->pagf_longest = be32_to_cpu(agf->agf_longest);
3010*4882a593Smuzhiyun 		pag->pagf_levels[XFS_BTNUM_BNOi] =
3011*4882a593Smuzhiyun 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
3012*4882a593Smuzhiyun 		pag->pagf_levels[XFS_BTNUM_CNTi] =
3013*4882a593Smuzhiyun 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3014*4882a593Smuzhiyun 		pag->pagf_levels[XFS_BTNUM_RMAPi] =
3015*4882a593Smuzhiyun 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
3016*4882a593Smuzhiyun 		pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
3017*4882a593Smuzhiyun 		pag->pagf_init = 1;
3018*4882a593Smuzhiyun 		pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
3019*4882a593Smuzhiyun 	}
3020*4882a593Smuzhiyun #ifdef DEBUG
3021*4882a593Smuzhiyun 	else if (!XFS_FORCED_SHUTDOWN(mp)) {
3022*4882a593Smuzhiyun 		ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
3023*4882a593Smuzhiyun 		ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
3024*4882a593Smuzhiyun 		ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3025*4882a593Smuzhiyun 		ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
3026*4882a593Smuzhiyun 		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
3027*4882a593Smuzhiyun 		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
3028*4882a593Smuzhiyun 		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
3029*4882a593Smuzhiyun 		       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
3030*4882a593Smuzhiyun 	}
3031*4882a593Smuzhiyun #endif
3032*4882a593Smuzhiyun 	return 0;
3033*4882a593Smuzhiyun }
3034*4882a593Smuzhiyun 
3035*4882a593Smuzhiyun /*
3036*4882a593Smuzhiyun  * Allocate an extent (variable-size).
3037*4882a593Smuzhiyun  * Depending on the allocation type, we either look in a single allocation
3038*4882a593Smuzhiyun  * group or loop over the allocation groups to find the result.
3039*4882a593Smuzhiyun  */
3040*4882a593Smuzhiyun int				/* error */
xfs_alloc_vextent(struct xfs_alloc_arg * args)3041*4882a593Smuzhiyun xfs_alloc_vextent(
3042*4882a593Smuzhiyun 	struct xfs_alloc_arg	*args)	/* allocation argument structure */
3043*4882a593Smuzhiyun {
3044*4882a593Smuzhiyun 	xfs_agblock_t		agsize;	/* allocation group size */
3045*4882a593Smuzhiyun 	int			error;
3046*4882a593Smuzhiyun 	int			flags;	/* XFS_ALLOC_FLAG_... locking flags */
3047*4882a593Smuzhiyun 	struct xfs_mount	*mp;	/* mount structure pointer */
3048*4882a593Smuzhiyun 	xfs_agnumber_t		sagno;	/* starting allocation group number */
3049*4882a593Smuzhiyun 	xfs_alloctype_t		type;	/* input allocation type */
3050*4882a593Smuzhiyun 	int			bump_rotor = 0;
3051*4882a593Smuzhiyun 	xfs_agnumber_t		rotorstep = xfs_rotorstep; /* inode32 agf stepper */
3052*4882a593Smuzhiyun 
3053*4882a593Smuzhiyun 	mp = args->mp;
3054*4882a593Smuzhiyun 	type = args->otype = args->type;
3055*4882a593Smuzhiyun 	args->agbno = NULLAGBLOCK;
3056*4882a593Smuzhiyun 	/*
3057*4882a593Smuzhiyun 	 * Just fix this up, for the case where the last a.g. is shorter
3058*4882a593Smuzhiyun 	 * (or there's only one a.g.) and the caller couldn't easily figure
3059*4882a593Smuzhiyun 	 * that out (xfs_bmap_alloc).
3060*4882a593Smuzhiyun 	 */
3061*4882a593Smuzhiyun 	agsize = mp->m_sb.sb_agblocks;
3062*4882a593Smuzhiyun 	if (args->maxlen > agsize)
3063*4882a593Smuzhiyun 		args->maxlen = agsize;
3064*4882a593Smuzhiyun 	if (args->alignment == 0)
3065*4882a593Smuzhiyun 		args->alignment = 1;
3066*4882a593Smuzhiyun 	ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
3067*4882a593Smuzhiyun 	ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
3068*4882a593Smuzhiyun 	ASSERT(args->minlen <= args->maxlen);
3069*4882a593Smuzhiyun 	ASSERT(args->minlen <= agsize);
3070*4882a593Smuzhiyun 	ASSERT(args->mod < args->prod);
3071*4882a593Smuzhiyun 	if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
3072*4882a593Smuzhiyun 	    XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
3073*4882a593Smuzhiyun 	    args->minlen > args->maxlen || args->minlen > agsize ||
3074*4882a593Smuzhiyun 	    args->mod >= args->prod) {
3075*4882a593Smuzhiyun 		args->fsbno = NULLFSBLOCK;
3076*4882a593Smuzhiyun 		trace_xfs_alloc_vextent_badargs(args);
3077*4882a593Smuzhiyun 		return 0;
3078*4882a593Smuzhiyun 	}
3079*4882a593Smuzhiyun 
3080*4882a593Smuzhiyun 	switch (type) {
3081*4882a593Smuzhiyun 	case XFS_ALLOCTYPE_THIS_AG:
3082*4882a593Smuzhiyun 	case XFS_ALLOCTYPE_NEAR_BNO:
3083*4882a593Smuzhiyun 	case XFS_ALLOCTYPE_THIS_BNO:
3084*4882a593Smuzhiyun 		/*
3085*4882a593Smuzhiyun 		 * These three force us into a single a.g.
3086*4882a593Smuzhiyun 		 */
3087*4882a593Smuzhiyun 		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3088*4882a593Smuzhiyun 		args->pag = xfs_perag_get(mp, args->agno);
3089*4882a593Smuzhiyun 		error = xfs_alloc_fix_freelist(args, 0);
3090*4882a593Smuzhiyun 		if (error) {
3091*4882a593Smuzhiyun 			trace_xfs_alloc_vextent_nofix(args);
3092*4882a593Smuzhiyun 			goto error0;
3093*4882a593Smuzhiyun 		}
3094*4882a593Smuzhiyun 		if (!args->agbp) {
3095*4882a593Smuzhiyun 			trace_xfs_alloc_vextent_noagbp(args);
3096*4882a593Smuzhiyun 			break;
3097*4882a593Smuzhiyun 		}
3098*4882a593Smuzhiyun 		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3099*4882a593Smuzhiyun 		if ((error = xfs_alloc_ag_vextent(args)))
3100*4882a593Smuzhiyun 			goto error0;
3101*4882a593Smuzhiyun 		break;
3102*4882a593Smuzhiyun 	case XFS_ALLOCTYPE_START_BNO:
3103*4882a593Smuzhiyun 		/*
3104*4882a593Smuzhiyun 		 * Try near allocation first, then anywhere-in-ag after
3105*4882a593Smuzhiyun 		 * the first a.g. fails.
3106*4882a593Smuzhiyun 		 */
3107*4882a593Smuzhiyun 		if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3108*4882a593Smuzhiyun 		    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
3109*4882a593Smuzhiyun 			args->fsbno = XFS_AGB_TO_FSB(mp,
3110*4882a593Smuzhiyun 					((mp->m_agfrotor / rotorstep) %
3111*4882a593Smuzhiyun 					mp->m_sb.sb_agcount), 0);
3112*4882a593Smuzhiyun 			bump_rotor = 1;
3113*4882a593Smuzhiyun 		}
3114*4882a593Smuzhiyun 		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3115*4882a593Smuzhiyun 		args->type = XFS_ALLOCTYPE_NEAR_BNO;
3116*4882a593Smuzhiyun 		/* FALLTHROUGH */
3117*4882a593Smuzhiyun 	case XFS_ALLOCTYPE_FIRST_AG:
3118*4882a593Smuzhiyun 		/*
3119*4882a593Smuzhiyun 		 * Rotate through the allocation groups looking for a winner.
3120*4882a593Smuzhiyun 		 */
3121*4882a593Smuzhiyun 		if (type == XFS_ALLOCTYPE_FIRST_AG) {
3122*4882a593Smuzhiyun 			/*
3123*4882a593Smuzhiyun 			 * Start with allocation group given by bno.
3124*4882a593Smuzhiyun 			 */
3125*4882a593Smuzhiyun 			args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3126*4882a593Smuzhiyun 			args->type = XFS_ALLOCTYPE_THIS_AG;
3127*4882a593Smuzhiyun 			sagno = 0;
3128*4882a593Smuzhiyun 			flags = 0;
3129*4882a593Smuzhiyun 		} else {
3130*4882a593Smuzhiyun 			/*
3131*4882a593Smuzhiyun 			 * Start with the given allocation group.
3132*4882a593Smuzhiyun 			 */
3133*4882a593Smuzhiyun 			args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3134*4882a593Smuzhiyun 			flags = XFS_ALLOC_FLAG_TRYLOCK;
3135*4882a593Smuzhiyun 		}
3136*4882a593Smuzhiyun 		/*
3137*4882a593Smuzhiyun 		 * Loop over allocation groups twice; first time with
3138*4882a593Smuzhiyun 		 * trylock set, second time without.
3139*4882a593Smuzhiyun 		 */
3140*4882a593Smuzhiyun 		for (;;) {
3141*4882a593Smuzhiyun 			args->pag = xfs_perag_get(mp, args->agno);
3142*4882a593Smuzhiyun 			error = xfs_alloc_fix_freelist(args, flags);
3143*4882a593Smuzhiyun 			if (error) {
3144*4882a593Smuzhiyun 				trace_xfs_alloc_vextent_nofix(args);
3145*4882a593Smuzhiyun 				goto error0;
3146*4882a593Smuzhiyun 			}
3147*4882a593Smuzhiyun 			/*
3148*4882a593Smuzhiyun 			 * If we get a buffer back then the allocation will fly.
3149*4882a593Smuzhiyun 			 */
3150*4882a593Smuzhiyun 			if (args->agbp) {
3151*4882a593Smuzhiyun 				if ((error = xfs_alloc_ag_vextent(args)))
3152*4882a593Smuzhiyun 					goto error0;
3153*4882a593Smuzhiyun 				break;
3154*4882a593Smuzhiyun 			}
3155*4882a593Smuzhiyun 
3156*4882a593Smuzhiyun 			trace_xfs_alloc_vextent_loopfailed(args);
3157*4882a593Smuzhiyun 
3158*4882a593Smuzhiyun 			/*
3159*4882a593Smuzhiyun 			 * Didn't work, figure out the next iteration.
3160*4882a593Smuzhiyun 			 */
3161*4882a593Smuzhiyun 			if (args->agno == sagno &&
3162*4882a593Smuzhiyun 			    type == XFS_ALLOCTYPE_START_BNO)
3163*4882a593Smuzhiyun 				args->type = XFS_ALLOCTYPE_THIS_AG;
3164*4882a593Smuzhiyun 			/*
3165*4882a593Smuzhiyun 			* For the first allocation, we can try any AG to get
3166*4882a593Smuzhiyun 			* space.  However, if we already have allocated a
3167*4882a593Smuzhiyun 			* block, we don't want to try AGs whose number is below
3168*4882a593Smuzhiyun 			* sagno. Otherwise, we may end up with out-of-order
3169*4882a593Smuzhiyun 			* locking of AGF, which might cause deadlock.
3170*4882a593Smuzhiyun 			*/
3171*4882a593Smuzhiyun 			if (++(args->agno) == mp->m_sb.sb_agcount) {
3172*4882a593Smuzhiyun 				if (args->tp->t_firstblock != NULLFSBLOCK)
3173*4882a593Smuzhiyun 					args->agno = sagno;
3174*4882a593Smuzhiyun 				else
3175*4882a593Smuzhiyun 					args->agno = 0;
3176*4882a593Smuzhiyun 			}
3177*4882a593Smuzhiyun 			/*
3178*4882a593Smuzhiyun 			 * Reached the starting a.g., must either be done
3179*4882a593Smuzhiyun 			 * or switch to non-trylock mode.
3180*4882a593Smuzhiyun 			 */
3181*4882a593Smuzhiyun 			if (args->agno == sagno) {
3182*4882a593Smuzhiyun 				if (flags == 0) {
3183*4882a593Smuzhiyun 					args->agbno = NULLAGBLOCK;
3184*4882a593Smuzhiyun 					trace_xfs_alloc_vextent_allfailed(args);
3185*4882a593Smuzhiyun 					break;
3186*4882a593Smuzhiyun 				}
3187*4882a593Smuzhiyun 
3188*4882a593Smuzhiyun 				flags = 0;
3189*4882a593Smuzhiyun 				if (type == XFS_ALLOCTYPE_START_BNO) {
3190*4882a593Smuzhiyun 					args->agbno = XFS_FSB_TO_AGBNO(mp,
3191*4882a593Smuzhiyun 						args->fsbno);
3192*4882a593Smuzhiyun 					args->type = XFS_ALLOCTYPE_NEAR_BNO;
3193*4882a593Smuzhiyun 				}
3194*4882a593Smuzhiyun 			}
3195*4882a593Smuzhiyun 			xfs_perag_put(args->pag);
3196*4882a593Smuzhiyun 		}
3197*4882a593Smuzhiyun 		if (bump_rotor) {
3198*4882a593Smuzhiyun 			if (args->agno == sagno)
3199*4882a593Smuzhiyun 				mp->m_agfrotor = (mp->m_agfrotor + 1) %
3200*4882a593Smuzhiyun 					(mp->m_sb.sb_agcount * rotorstep);
3201*4882a593Smuzhiyun 			else
3202*4882a593Smuzhiyun 				mp->m_agfrotor = (args->agno * rotorstep + 1) %
3203*4882a593Smuzhiyun 					(mp->m_sb.sb_agcount * rotorstep);
3204*4882a593Smuzhiyun 		}
3205*4882a593Smuzhiyun 		break;
3206*4882a593Smuzhiyun 	default:
3207*4882a593Smuzhiyun 		ASSERT(0);
3208*4882a593Smuzhiyun 		/* NOTREACHED */
3209*4882a593Smuzhiyun 	}
3210*4882a593Smuzhiyun 	if (args->agbno == NULLAGBLOCK)
3211*4882a593Smuzhiyun 		args->fsbno = NULLFSBLOCK;
3212*4882a593Smuzhiyun 	else {
3213*4882a593Smuzhiyun 		args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3214*4882a593Smuzhiyun #ifdef DEBUG
3215*4882a593Smuzhiyun 		ASSERT(args->len >= args->minlen);
3216*4882a593Smuzhiyun 		ASSERT(args->len <= args->maxlen);
3217*4882a593Smuzhiyun 		ASSERT(args->agbno % args->alignment == 0);
3218*4882a593Smuzhiyun 		XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
3219*4882a593Smuzhiyun 			args->len);
3220*4882a593Smuzhiyun #endif
3221*4882a593Smuzhiyun 
3222*4882a593Smuzhiyun 	}
3223*4882a593Smuzhiyun 	xfs_perag_put(args->pag);
3224*4882a593Smuzhiyun 	return 0;
3225*4882a593Smuzhiyun error0:
3226*4882a593Smuzhiyun 	xfs_perag_put(args->pag);
3227*4882a593Smuzhiyun 	return error;
3228*4882a593Smuzhiyun }
3229*4882a593Smuzhiyun 
3230*4882a593Smuzhiyun /* Ensure that the freelist is at full capacity. */
3231*4882a593Smuzhiyun int
xfs_free_extent_fix_freelist(struct xfs_trans * tp,xfs_agnumber_t agno,struct xfs_buf ** agbp)3232*4882a593Smuzhiyun xfs_free_extent_fix_freelist(
3233*4882a593Smuzhiyun 	struct xfs_trans	*tp,
3234*4882a593Smuzhiyun 	xfs_agnumber_t		agno,
3235*4882a593Smuzhiyun 	struct xfs_buf		**agbp)
3236*4882a593Smuzhiyun {
3237*4882a593Smuzhiyun 	struct xfs_alloc_arg	args;
3238*4882a593Smuzhiyun 	int			error;
3239*4882a593Smuzhiyun 
3240*4882a593Smuzhiyun 	memset(&args, 0, sizeof(struct xfs_alloc_arg));
3241*4882a593Smuzhiyun 	args.tp = tp;
3242*4882a593Smuzhiyun 	args.mp = tp->t_mountp;
3243*4882a593Smuzhiyun 	args.agno = agno;
3244*4882a593Smuzhiyun 
3245*4882a593Smuzhiyun 	/*
3246*4882a593Smuzhiyun 	 * validate that the block number is legal - the enables us to detect
3247*4882a593Smuzhiyun 	 * and handle a silent filesystem corruption rather than crashing.
3248*4882a593Smuzhiyun 	 */
3249*4882a593Smuzhiyun 	if (args.agno >= args.mp->m_sb.sb_agcount)
3250*4882a593Smuzhiyun 		return -EFSCORRUPTED;
3251*4882a593Smuzhiyun 
3252*4882a593Smuzhiyun 	args.pag = xfs_perag_get(args.mp, args.agno);
3253*4882a593Smuzhiyun 	ASSERT(args.pag);
3254*4882a593Smuzhiyun 
3255*4882a593Smuzhiyun 	error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3256*4882a593Smuzhiyun 	if (error)
3257*4882a593Smuzhiyun 		goto out;
3258*4882a593Smuzhiyun 
3259*4882a593Smuzhiyun 	*agbp = args.agbp;
3260*4882a593Smuzhiyun out:
3261*4882a593Smuzhiyun 	xfs_perag_put(args.pag);
3262*4882a593Smuzhiyun 	return error;
3263*4882a593Smuzhiyun }
3264*4882a593Smuzhiyun 
3265*4882a593Smuzhiyun /*
3266*4882a593Smuzhiyun  * Free an extent.
3267*4882a593Smuzhiyun  * Just break up the extent address and hand off to xfs_free_ag_extent
3268*4882a593Smuzhiyun  * after fixing up the freelist.
3269*4882a593Smuzhiyun  */
3270*4882a593Smuzhiyun int
__xfs_free_extent(struct xfs_trans * tp,xfs_fsblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type,bool skip_discard)3271*4882a593Smuzhiyun __xfs_free_extent(
3272*4882a593Smuzhiyun 	struct xfs_trans		*tp,
3273*4882a593Smuzhiyun 	xfs_fsblock_t			bno,
3274*4882a593Smuzhiyun 	xfs_extlen_t			len,
3275*4882a593Smuzhiyun 	const struct xfs_owner_info	*oinfo,
3276*4882a593Smuzhiyun 	enum xfs_ag_resv_type		type,
3277*4882a593Smuzhiyun 	bool				skip_discard)
3278*4882a593Smuzhiyun {
3279*4882a593Smuzhiyun 	struct xfs_mount		*mp = tp->t_mountp;
3280*4882a593Smuzhiyun 	struct xfs_buf			*agbp;
3281*4882a593Smuzhiyun 	xfs_agnumber_t			agno = XFS_FSB_TO_AGNO(mp, bno);
3282*4882a593Smuzhiyun 	xfs_agblock_t			agbno = XFS_FSB_TO_AGBNO(mp, bno);
3283*4882a593Smuzhiyun 	struct xfs_agf			*agf;
3284*4882a593Smuzhiyun 	int				error;
3285*4882a593Smuzhiyun 	unsigned int			busy_flags = 0;
3286*4882a593Smuzhiyun 
3287*4882a593Smuzhiyun 	ASSERT(len != 0);
3288*4882a593Smuzhiyun 	ASSERT(type != XFS_AG_RESV_AGFL);
3289*4882a593Smuzhiyun 
3290*4882a593Smuzhiyun 	if (XFS_TEST_ERROR(false, mp,
3291*4882a593Smuzhiyun 			XFS_ERRTAG_FREE_EXTENT))
3292*4882a593Smuzhiyun 		return -EIO;
3293*4882a593Smuzhiyun 
3294*4882a593Smuzhiyun 	error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3295*4882a593Smuzhiyun 	if (error)
3296*4882a593Smuzhiyun 		return error;
3297*4882a593Smuzhiyun 	agf = agbp->b_addr;
3298*4882a593Smuzhiyun 
3299*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3300*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
3301*4882a593Smuzhiyun 		goto err;
3302*4882a593Smuzhiyun 	}
3303*4882a593Smuzhiyun 
3304*4882a593Smuzhiyun 	/* validate the extent size is legal now we have the agf locked */
3305*4882a593Smuzhiyun 	if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3306*4882a593Smuzhiyun 		error = -EFSCORRUPTED;
3307*4882a593Smuzhiyun 		goto err;
3308*4882a593Smuzhiyun 	}
3309*4882a593Smuzhiyun 
3310*4882a593Smuzhiyun 	error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
3311*4882a593Smuzhiyun 	if (error)
3312*4882a593Smuzhiyun 		goto err;
3313*4882a593Smuzhiyun 
3314*4882a593Smuzhiyun 	if (skip_discard)
3315*4882a593Smuzhiyun 		busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3316*4882a593Smuzhiyun 	xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
3317*4882a593Smuzhiyun 	return 0;
3318*4882a593Smuzhiyun 
3319*4882a593Smuzhiyun err:
3320*4882a593Smuzhiyun 	xfs_trans_brelse(tp, agbp);
3321*4882a593Smuzhiyun 	return error;
3322*4882a593Smuzhiyun }
3323*4882a593Smuzhiyun 
3324*4882a593Smuzhiyun struct xfs_alloc_query_range_info {
3325*4882a593Smuzhiyun 	xfs_alloc_query_range_fn	fn;
3326*4882a593Smuzhiyun 	void				*priv;
3327*4882a593Smuzhiyun };
3328*4882a593Smuzhiyun 
3329*4882a593Smuzhiyun /* Format btree record and pass to our callback. */
3330*4882a593Smuzhiyun STATIC int
xfs_alloc_query_range_helper(struct xfs_btree_cur * cur,union xfs_btree_rec * rec,void * priv)3331*4882a593Smuzhiyun xfs_alloc_query_range_helper(
3332*4882a593Smuzhiyun 	struct xfs_btree_cur		*cur,
3333*4882a593Smuzhiyun 	union xfs_btree_rec		*rec,
3334*4882a593Smuzhiyun 	void				*priv)
3335*4882a593Smuzhiyun {
3336*4882a593Smuzhiyun 	struct xfs_alloc_query_range_info	*query = priv;
3337*4882a593Smuzhiyun 	struct xfs_alloc_rec_incore		irec;
3338*4882a593Smuzhiyun 
3339*4882a593Smuzhiyun 	irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3340*4882a593Smuzhiyun 	irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3341*4882a593Smuzhiyun 	return query->fn(cur, &irec, query->priv);
3342*4882a593Smuzhiyun }
3343*4882a593Smuzhiyun 
3344*4882a593Smuzhiyun /* Find all free space within a given range of blocks. */
3345*4882a593Smuzhiyun int
xfs_alloc_query_range(struct xfs_btree_cur * cur,struct xfs_alloc_rec_incore * low_rec,struct xfs_alloc_rec_incore * high_rec,xfs_alloc_query_range_fn fn,void * priv)3346*4882a593Smuzhiyun xfs_alloc_query_range(
3347*4882a593Smuzhiyun 	struct xfs_btree_cur			*cur,
3348*4882a593Smuzhiyun 	struct xfs_alloc_rec_incore		*low_rec,
3349*4882a593Smuzhiyun 	struct xfs_alloc_rec_incore		*high_rec,
3350*4882a593Smuzhiyun 	xfs_alloc_query_range_fn		fn,
3351*4882a593Smuzhiyun 	void					*priv)
3352*4882a593Smuzhiyun {
3353*4882a593Smuzhiyun 	union xfs_btree_irec			low_brec;
3354*4882a593Smuzhiyun 	union xfs_btree_irec			high_brec;
3355*4882a593Smuzhiyun 	struct xfs_alloc_query_range_info	query;
3356*4882a593Smuzhiyun 
3357*4882a593Smuzhiyun 	ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3358*4882a593Smuzhiyun 	low_brec.a = *low_rec;
3359*4882a593Smuzhiyun 	high_brec.a = *high_rec;
3360*4882a593Smuzhiyun 	query.priv = priv;
3361*4882a593Smuzhiyun 	query.fn = fn;
3362*4882a593Smuzhiyun 	return xfs_btree_query_range(cur, &low_brec, &high_brec,
3363*4882a593Smuzhiyun 			xfs_alloc_query_range_helper, &query);
3364*4882a593Smuzhiyun }
3365*4882a593Smuzhiyun 
3366*4882a593Smuzhiyun /* Find all free space records. */
3367*4882a593Smuzhiyun int
xfs_alloc_query_all(struct xfs_btree_cur * cur,xfs_alloc_query_range_fn fn,void * priv)3368*4882a593Smuzhiyun xfs_alloc_query_all(
3369*4882a593Smuzhiyun 	struct xfs_btree_cur			*cur,
3370*4882a593Smuzhiyun 	xfs_alloc_query_range_fn		fn,
3371*4882a593Smuzhiyun 	void					*priv)
3372*4882a593Smuzhiyun {
3373*4882a593Smuzhiyun 	struct xfs_alloc_query_range_info	query;
3374*4882a593Smuzhiyun 
3375*4882a593Smuzhiyun 	ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3376*4882a593Smuzhiyun 	query.priv = priv;
3377*4882a593Smuzhiyun 	query.fn = fn;
3378*4882a593Smuzhiyun 	return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3379*4882a593Smuzhiyun }
3380*4882a593Smuzhiyun 
3381*4882a593Smuzhiyun /* Is there a record covering a given extent? */
3382*4882a593Smuzhiyun int
xfs_alloc_has_record(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,bool * exists)3383*4882a593Smuzhiyun xfs_alloc_has_record(
3384*4882a593Smuzhiyun 	struct xfs_btree_cur	*cur,
3385*4882a593Smuzhiyun 	xfs_agblock_t		bno,
3386*4882a593Smuzhiyun 	xfs_extlen_t		len,
3387*4882a593Smuzhiyun 	bool			*exists)
3388*4882a593Smuzhiyun {
3389*4882a593Smuzhiyun 	union xfs_btree_irec	low;
3390*4882a593Smuzhiyun 	union xfs_btree_irec	high;
3391*4882a593Smuzhiyun 
3392*4882a593Smuzhiyun 	memset(&low, 0, sizeof(low));
3393*4882a593Smuzhiyun 	low.a.ar_startblock = bno;
3394*4882a593Smuzhiyun 	memset(&high, 0xFF, sizeof(high));
3395*4882a593Smuzhiyun 	high.a.ar_startblock = bno + len - 1;
3396*4882a593Smuzhiyun 
3397*4882a593Smuzhiyun 	return xfs_btree_has_record(cur, &low, &high, exists);
3398*4882a593Smuzhiyun }
3399*4882a593Smuzhiyun 
3400*4882a593Smuzhiyun /*
3401*4882a593Smuzhiyun  * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
3402*4882a593Smuzhiyun  * error code or XFS_ITER_*.
3403*4882a593Smuzhiyun  */
3404*4882a593Smuzhiyun int
xfs_agfl_walk(struct xfs_mount * mp,struct xfs_agf * agf,struct xfs_buf * agflbp,xfs_agfl_walk_fn walk_fn,void * priv)3405*4882a593Smuzhiyun xfs_agfl_walk(
3406*4882a593Smuzhiyun 	struct xfs_mount	*mp,
3407*4882a593Smuzhiyun 	struct xfs_agf		*agf,
3408*4882a593Smuzhiyun 	struct xfs_buf		*agflbp,
3409*4882a593Smuzhiyun 	xfs_agfl_walk_fn	walk_fn,
3410*4882a593Smuzhiyun 	void			*priv)
3411*4882a593Smuzhiyun {
3412*4882a593Smuzhiyun 	__be32			*agfl_bno;
3413*4882a593Smuzhiyun 	unsigned int		i;
3414*4882a593Smuzhiyun 	int			error;
3415*4882a593Smuzhiyun 
3416*4882a593Smuzhiyun 	agfl_bno = xfs_buf_to_agfl_bno(agflbp);
3417*4882a593Smuzhiyun 	i = be32_to_cpu(agf->agf_flfirst);
3418*4882a593Smuzhiyun 
3419*4882a593Smuzhiyun 	/* Nothing to walk in an empty AGFL. */
3420*4882a593Smuzhiyun 	if (agf->agf_flcount == cpu_to_be32(0))
3421*4882a593Smuzhiyun 		return 0;
3422*4882a593Smuzhiyun 
3423*4882a593Smuzhiyun 	/* Otherwise, walk from first to last, wrapping as needed. */
3424*4882a593Smuzhiyun 	for (;;) {
3425*4882a593Smuzhiyun 		error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3426*4882a593Smuzhiyun 		if (error)
3427*4882a593Smuzhiyun 			return error;
3428*4882a593Smuzhiyun 		if (i == be32_to_cpu(agf->agf_fllast))
3429*4882a593Smuzhiyun 			break;
3430*4882a593Smuzhiyun 		if (++i == xfs_agfl_size(mp))
3431*4882a593Smuzhiyun 			i = 0;
3432*4882a593Smuzhiyun 	}
3433*4882a593Smuzhiyun 
3434*4882a593Smuzhiyun 	return 0;
3435*4882a593Smuzhiyun }
3436