1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4*4882a593Smuzhiyun * All Rights Reserved.
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun #include "xfs.h"
7*4882a593Smuzhiyun #include "xfs_fs.h"
8*4882a593Smuzhiyun #include "xfs_format.h"
9*4882a593Smuzhiyun #include "xfs_log_format.h"
10*4882a593Smuzhiyun #include "xfs_shared.h"
11*4882a593Smuzhiyun #include "xfs_trans_resv.h"
12*4882a593Smuzhiyun #include "xfs_bit.h"
13*4882a593Smuzhiyun #include "xfs_sb.h"
14*4882a593Smuzhiyun #include "xfs_mount.h"
15*4882a593Smuzhiyun #include "xfs_defer.h"
16*4882a593Smuzhiyun #include "xfs_btree.h"
17*4882a593Smuzhiyun #include "xfs_rmap.h"
18*4882a593Smuzhiyun #include "xfs_alloc_btree.h"
19*4882a593Smuzhiyun #include "xfs_alloc.h"
20*4882a593Smuzhiyun #include "xfs_extent_busy.h"
21*4882a593Smuzhiyun #include "xfs_errortag.h"
22*4882a593Smuzhiyun #include "xfs_error.h"
23*4882a593Smuzhiyun #include "xfs_trace.h"
24*4882a593Smuzhiyun #include "xfs_trans.h"
25*4882a593Smuzhiyun #include "xfs_buf_item.h"
26*4882a593Smuzhiyun #include "xfs_log.h"
27*4882a593Smuzhiyun #include "xfs_ag_resv.h"
28*4882a593Smuzhiyun #include "xfs_bmap.h"
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun extern kmem_zone_t *xfs_bmap_free_item_zone;
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun struct workqueue_struct *xfs_alloc_wq;
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun #define XFSA_FIXUP_BNO_OK 1
37*4882a593Smuzhiyun #define XFSA_FIXUP_CNT_OK 2
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
40*4882a593Smuzhiyun STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
41*4882a593Smuzhiyun STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun /*
44*4882a593Smuzhiyun * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
45*4882a593Smuzhiyun * the beginning of the block for a proper header with the location information
46*4882a593Smuzhiyun * and CRC.
47*4882a593Smuzhiyun */
48*4882a593Smuzhiyun unsigned int
xfs_agfl_size(struct xfs_mount * mp)49*4882a593Smuzhiyun xfs_agfl_size(
50*4882a593Smuzhiyun struct xfs_mount *mp)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun unsigned int size = mp->m_sb.sb_sectsize;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun if (xfs_sb_version_hascrc(&mp->m_sb))
55*4882a593Smuzhiyun size -= sizeof(struct xfs_agfl);
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun return size / sizeof(xfs_agblock_t);
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun unsigned int
xfs_refc_block(struct xfs_mount * mp)61*4882a593Smuzhiyun xfs_refc_block(
62*4882a593Smuzhiyun struct xfs_mount *mp)
63*4882a593Smuzhiyun {
64*4882a593Smuzhiyun if (xfs_sb_version_hasrmapbt(&mp->m_sb))
65*4882a593Smuzhiyun return XFS_RMAP_BLOCK(mp) + 1;
66*4882a593Smuzhiyun if (xfs_sb_version_hasfinobt(&mp->m_sb))
67*4882a593Smuzhiyun return XFS_FIBT_BLOCK(mp) + 1;
68*4882a593Smuzhiyun return XFS_IBT_BLOCK(mp) + 1;
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun xfs_extlen_t
xfs_prealloc_blocks(struct xfs_mount * mp)72*4882a593Smuzhiyun xfs_prealloc_blocks(
73*4882a593Smuzhiyun struct xfs_mount *mp)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun if (xfs_sb_version_hasreflink(&mp->m_sb))
76*4882a593Smuzhiyun return xfs_refc_block(mp) + 1;
77*4882a593Smuzhiyun if (xfs_sb_version_hasrmapbt(&mp->m_sb))
78*4882a593Smuzhiyun return XFS_RMAP_BLOCK(mp) + 1;
79*4882a593Smuzhiyun if (xfs_sb_version_hasfinobt(&mp->m_sb))
80*4882a593Smuzhiyun return XFS_FIBT_BLOCK(mp) + 1;
81*4882a593Smuzhiyun return XFS_IBT_BLOCK(mp) + 1;
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun /*
85*4882a593Smuzhiyun * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
86*4882a593Smuzhiyun * AGF buffer (PV 947395), we place constraints on the relationship among
87*4882a593Smuzhiyun * actual allocations for data blocks, freelist blocks, and potential file data
88*4882a593Smuzhiyun * bmap btree blocks. However, these restrictions may result in no actual space
89*4882a593Smuzhiyun * allocated for a delayed extent, for example, a data block in a certain AG is
90*4882a593Smuzhiyun * allocated but there is no additional block for the additional bmap btree
91*4882a593Smuzhiyun * block due to a split of the bmap btree of the file. The result of this may
92*4882a593Smuzhiyun * lead to an infinite loop when the file gets flushed to disk and all delayed
93*4882a593Smuzhiyun * extents need to be actually allocated. To get around this, we explicitly set
94*4882a593Smuzhiyun * aside a few blocks which will not be reserved in delayed allocation.
95*4882a593Smuzhiyun *
96*4882a593Smuzhiyun * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
97*4882a593Smuzhiyun * potential split of the file's bmap btree.
98*4882a593Smuzhiyun */
99*4882a593Smuzhiyun unsigned int
xfs_alloc_set_aside(struct xfs_mount * mp)100*4882a593Smuzhiyun xfs_alloc_set_aside(
101*4882a593Smuzhiyun struct xfs_mount *mp)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun /*
107*4882a593Smuzhiyun * When deciding how much space to allocate out of an AG, we limit the
108*4882a593Smuzhiyun * allocation maximum size to the size the AG. However, we cannot use all the
109*4882a593Smuzhiyun * blocks in the AG - some are permanently used by metadata. These
110*4882a593Smuzhiyun * blocks are generally:
111*4882a593Smuzhiyun * - the AG superblock, AGF, AGI and AGFL
112*4882a593Smuzhiyun * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
113*4882a593Smuzhiyun * the AGI free inode and rmap btree root blocks.
114*4882a593Smuzhiyun * - blocks on the AGFL according to xfs_alloc_set_aside() limits
115*4882a593Smuzhiyun * - the rmapbt root block
116*4882a593Smuzhiyun *
117*4882a593Smuzhiyun * The AG headers are sector sized, so the amount of space they take up is
118*4882a593Smuzhiyun * dependent on filesystem geometry. The others are all single blocks.
119*4882a593Smuzhiyun */
120*4882a593Smuzhiyun unsigned int
xfs_alloc_ag_max_usable(struct xfs_mount * mp)121*4882a593Smuzhiyun xfs_alloc_ag_max_usable(
122*4882a593Smuzhiyun struct xfs_mount *mp)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun unsigned int blocks;
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
127*4882a593Smuzhiyun blocks += XFS_ALLOC_AGFL_RESERVE;
128*4882a593Smuzhiyun blocks += 3; /* AGF, AGI btree root blocks */
129*4882a593Smuzhiyun if (xfs_sb_version_hasfinobt(&mp->m_sb))
130*4882a593Smuzhiyun blocks++; /* finobt root block */
131*4882a593Smuzhiyun if (xfs_sb_version_hasrmapbt(&mp->m_sb))
132*4882a593Smuzhiyun blocks++; /* rmap root block */
133*4882a593Smuzhiyun if (xfs_sb_version_hasreflink(&mp->m_sb))
134*4882a593Smuzhiyun blocks++; /* refcount root block */
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun return mp->m_sb.sb_agblocks - blocks;
137*4882a593Smuzhiyun }
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun /*
140*4882a593Smuzhiyun * Lookup the record equal to [bno, len] in the btree given by cur.
141*4882a593Smuzhiyun */
142*4882a593Smuzhiyun STATIC int /* error */
xfs_alloc_lookup_eq(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)143*4882a593Smuzhiyun xfs_alloc_lookup_eq(
144*4882a593Smuzhiyun struct xfs_btree_cur *cur, /* btree cursor */
145*4882a593Smuzhiyun xfs_agblock_t bno, /* starting block of extent */
146*4882a593Smuzhiyun xfs_extlen_t len, /* length of extent */
147*4882a593Smuzhiyun int *stat) /* success/failure */
148*4882a593Smuzhiyun {
149*4882a593Smuzhiyun int error;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun cur->bc_rec.a.ar_startblock = bno;
152*4882a593Smuzhiyun cur->bc_rec.a.ar_blockcount = len;
153*4882a593Smuzhiyun error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
154*4882a593Smuzhiyun cur->bc_ag.abt.active = (*stat == 1);
155*4882a593Smuzhiyun return error;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /*
159*4882a593Smuzhiyun * Lookup the first record greater than or equal to [bno, len]
160*4882a593Smuzhiyun * in the btree given by cur.
161*4882a593Smuzhiyun */
162*4882a593Smuzhiyun int /* error */
xfs_alloc_lookup_ge(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)163*4882a593Smuzhiyun xfs_alloc_lookup_ge(
164*4882a593Smuzhiyun struct xfs_btree_cur *cur, /* btree cursor */
165*4882a593Smuzhiyun xfs_agblock_t bno, /* starting block of extent */
166*4882a593Smuzhiyun xfs_extlen_t len, /* length of extent */
167*4882a593Smuzhiyun int *stat) /* success/failure */
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun int error;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun cur->bc_rec.a.ar_startblock = bno;
172*4882a593Smuzhiyun cur->bc_rec.a.ar_blockcount = len;
173*4882a593Smuzhiyun error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
174*4882a593Smuzhiyun cur->bc_ag.abt.active = (*stat == 1);
175*4882a593Smuzhiyun return error;
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun /*
179*4882a593Smuzhiyun * Lookup the first record less than or equal to [bno, len]
180*4882a593Smuzhiyun * in the btree given by cur.
181*4882a593Smuzhiyun */
182*4882a593Smuzhiyun int /* error */
xfs_alloc_lookup_le(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,int * stat)183*4882a593Smuzhiyun xfs_alloc_lookup_le(
184*4882a593Smuzhiyun struct xfs_btree_cur *cur, /* btree cursor */
185*4882a593Smuzhiyun xfs_agblock_t bno, /* starting block of extent */
186*4882a593Smuzhiyun xfs_extlen_t len, /* length of extent */
187*4882a593Smuzhiyun int *stat) /* success/failure */
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun int error;
190*4882a593Smuzhiyun cur->bc_rec.a.ar_startblock = bno;
191*4882a593Smuzhiyun cur->bc_rec.a.ar_blockcount = len;
192*4882a593Smuzhiyun error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
193*4882a593Smuzhiyun cur->bc_ag.abt.active = (*stat == 1);
194*4882a593Smuzhiyun return error;
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun static inline bool
xfs_alloc_cur_active(struct xfs_btree_cur * cur)198*4882a593Smuzhiyun xfs_alloc_cur_active(
199*4882a593Smuzhiyun struct xfs_btree_cur *cur)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun return cur && cur->bc_ag.abt.active;
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun /*
205*4882a593Smuzhiyun * Update the record referred to by cur to the value given
206*4882a593Smuzhiyun * by [bno, len].
207*4882a593Smuzhiyun * This either works (return 0) or gets an EFSCORRUPTED error.
208*4882a593Smuzhiyun */
209*4882a593Smuzhiyun STATIC int /* error */
xfs_alloc_update(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len)210*4882a593Smuzhiyun xfs_alloc_update(
211*4882a593Smuzhiyun struct xfs_btree_cur *cur, /* btree cursor */
212*4882a593Smuzhiyun xfs_agblock_t bno, /* starting block of extent */
213*4882a593Smuzhiyun xfs_extlen_t len) /* length of extent */
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun union xfs_btree_rec rec;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun rec.alloc.ar_startblock = cpu_to_be32(bno);
218*4882a593Smuzhiyun rec.alloc.ar_blockcount = cpu_to_be32(len);
219*4882a593Smuzhiyun return xfs_btree_update(cur, &rec);
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun /*
223*4882a593Smuzhiyun * Get the data from the pointed-to record.
224*4882a593Smuzhiyun */
225*4882a593Smuzhiyun int /* error */
xfs_alloc_get_rec(struct xfs_btree_cur * cur,xfs_agblock_t * bno,xfs_extlen_t * len,int * stat)226*4882a593Smuzhiyun xfs_alloc_get_rec(
227*4882a593Smuzhiyun struct xfs_btree_cur *cur, /* btree cursor */
228*4882a593Smuzhiyun xfs_agblock_t *bno, /* output: starting block of extent */
229*4882a593Smuzhiyun xfs_extlen_t *len, /* output: length of extent */
230*4882a593Smuzhiyun int *stat) /* output: success/failure */
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun struct xfs_mount *mp = cur->bc_mp;
233*4882a593Smuzhiyun xfs_agnumber_t agno = cur->bc_ag.agno;
234*4882a593Smuzhiyun union xfs_btree_rec *rec;
235*4882a593Smuzhiyun int error;
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun error = xfs_btree_get_rec(cur, &rec, stat);
238*4882a593Smuzhiyun if (error || !(*stat))
239*4882a593Smuzhiyun return error;
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun *bno = be32_to_cpu(rec->alloc.ar_startblock);
242*4882a593Smuzhiyun *len = be32_to_cpu(rec->alloc.ar_blockcount);
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun if (*len == 0)
245*4882a593Smuzhiyun goto out_bad_rec;
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun /* check for valid extent range, including overflow */
248*4882a593Smuzhiyun if (!xfs_verify_agbno(mp, agno, *bno))
249*4882a593Smuzhiyun goto out_bad_rec;
250*4882a593Smuzhiyun if (*bno > *bno + *len)
251*4882a593Smuzhiyun goto out_bad_rec;
252*4882a593Smuzhiyun if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
253*4882a593Smuzhiyun goto out_bad_rec;
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun return 0;
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun out_bad_rec:
258*4882a593Smuzhiyun xfs_warn(mp,
259*4882a593Smuzhiyun "%s Freespace BTree record corruption in AG %d detected!",
260*4882a593Smuzhiyun cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
261*4882a593Smuzhiyun xfs_warn(mp,
262*4882a593Smuzhiyun "start block 0x%x block count 0x%x", *bno, *len);
263*4882a593Smuzhiyun return -EFSCORRUPTED;
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun /*
267*4882a593Smuzhiyun * Compute aligned version of the found extent.
268*4882a593Smuzhiyun * Takes alignment and min length into account.
269*4882a593Smuzhiyun */
270*4882a593Smuzhiyun STATIC bool
xfs_alloc_compute_aligned(xfs_alloc_arg_t * args,xfs_agblock_t foundbno,xfs_extlen_t foundlen,xfs_agblock_t * resbno,xfs_extlen_t * reslen,unsigned * busy_gen)271*4882a593Smuzhiyun xfs_alloc_compute_aligned(
272*4882a593Smuzhiyun xfs_alloc_arg_t *args, /* allocation argument structure */
273*4882a593Smuzhiyun xfs_agblock_t foundbno, /* starting block in found extent */
274*4882a593Smuzhiyun xfs_extlen_t foundlen, /* length in found extent */
275*4882a593Smuzhiyun xfs_agblock_t *resbno, /* result block number */
276*4882a593Smuzhiyun xfs_extlen_t *reslen, /* result length */
277*4882a593Smuzhiyun unsigned *busy_gen)
278*4882a593Smuzhiyun {
279*4882a593Smuzhiyun xfs_agblock_t bno = foundbno;
280*4882a593Smuzhiyun xfs_extlen_t len = foundlen;
281*4882a593Smuzhiyun xfs_extlen_t diff;
282*4882a593Smuzhiyun bool busy;
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun /* Trim busy sections out of found extent */
285*4882a593Smuzhiyun busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun /*
288*4882a593Smuzhiyun * If we have a largish extent that happens to start before min_agbno,
289*4882a593Smuzhiyun * see if we can shift it into range...
290*4882a593Smuzhiyun */
291*4882a593Smuzhiyun if (bno < args->min_agbno && bno + len > args->min_agbno) {
292*4882a593Smuzhiyun diff = args->min_agbno - bno;
293*4882a593Smuzhiyun if (len > diff) {
294*4882a593Smuzhiyun bno += diff;
295*4882a593Smuzhiyun len -= diff;
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun if (args->alignment > 1 && len >= args->minlen) {
300*4882a593Smuzhiyun xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun diff = aligned_bno - bno;
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun *resbno = aligned_bno;
305*4882a593Smuzhiyun *reslen = diff >= len ? 0 : len - diff;
306*4882a593Smuzhiyun } else {
307*4882a593Smuzhiyun *resbno = bno;
308*4882a593Smuzhiyun *reslen = len;
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun return busy;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun /*
315*4882a593Smuzhiyun * Compute best start block and diff for "near" allocations.
316*4882a593Smuzhiyun * freelen >= wantlen already checked by caller.
317*4882a593Smuzhiyun */
318*4882a593Smuzhiyun STATIC xfs_extlen_t /* difference value (absolute) */
xfs_alloc_compute_diff(xfs_agblock_t wantbno,xfs_extlen_t wantlen,xfs_extlen_t alignment,int datatype,xfs_agblock_t freebno,xfs_extlen_t freelen,xfs_agblock_t * newbnop)319*4882a593Smuzhiyun xfs_alloc_compute_diff(
320*4882a593Smuzhiyun xfs_agblock_t wantbno, /* target starting block */
321*4882a593Smuzhiyun xfs_extlen_t wantlen, /* target length */
322*4882a593Smuzhiyun xfs_extlen_t alignment, /* target alignment */
323*4882a593Smuzhiyun int datatype, /* are we allocating data? */
324*4882a593Smuzhiyun xfs_agblock_t freebno, /* freespace's starting block */
325*4882a593Smuzhiyun xfs_extlen_t freelen, /* freespace's length */
326*4882a593Smuzhiyun xfs_agblock_t *newbnop) /* result: best start block from free */
327*4882a593Smuzhiyun {
328*4882a593Smuzhiyun xfs_agblock_t freeend; /* end of freespace extent */
329*4882a593Smuzhiyun xfs_agblock_t newbno1; /* return block number */
330*4882a593Smuzhiyun xfs_agblock_t newbno2; /* other new block number */
331*4882a593Smuzhiyun xfs_extlen_t newlen1=0; /* length with newbno1 */
332*4882a593Smuzhiyun xfs_extlen_t newlen2=0; /* length with newbno2 */
333*4882a593Smuzhiyun xfs_agblock_t wantend; /* end of target extent */
334*4882a593Smuzhiyun bool userdata = datatype & XFS_ALLOC_USERDATA;
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun ASSERT(freelen >= wantlen);
337*4882a593Smuzhiyun freeend = freebno + freelen;
338*4882a593Smuzhiyun wantend = wantbno + wantlen;
339*4882a593Smuzhiyun /*
340*4882a593Smuzhiyun * We want to allocate from the start of a free extent if it is past
341*4882a593Smuzhiyun * the desired block or if we are allocating user data and the free
342*4882a593Smuzhiyun * extent is before desired block. The second case is there to allow
343*4882a593Smuzhiyun * for contiguous allocation from the remaining free space if the file
344*4882a593Smuzhiyun * grows in the short term.
345*4882a593Smuzhiyun */
346*4882a593Smuzhiyun if (freebno >= wantbno || (userdata && freeend < wantend)) {
347*4882a593Smuzhiyun if ((newbno1 = roundup(freebno, alignment)) >= freeend)
348*4882a593Smuzhiyun newbno1 = NULLAGBLOCK;
349*4882a593Smuzhiyun } else if (freeend >= wantend && alignment > 1) {
350*4882a593Smuzhiyun newbno1 = roundup(wantbno, alignment);
351*4882a593Smuzhiyun newbno2 = newbno1 - alignment;
352*4882a593Smuzhiyun if (newbno1 >= freeend)
353*4882a593Smuzhiyun newbno1 = NULLAGBLOCK;
354*4882a593Smuzhiyun else
355*4882a593Smuzhiyun newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
356*4882a593Smuzhiyun if (newbno2 < freebno)
357*4882a593Smuzhiyun newbno2 = NULLAGBLOCK;
358*4882a593Smuzhiyun else
359*4882a593Smuzhiyun newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
360*4882a593Smuzhiyun if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
361*4882a593Smuzhiyun if (newlen1 < newlen2 ||
362*4882a593Smuzhiyun (newlen1 == newlen2 &&
363*4882a593Smuzhiyun XFS_ABSDIFF(newbno1, wantbno) >
364*4882a593Smuzhiyun XFS_ABSDIFF(newbno2, wantbno)))
365*4882a593Smuzhiyun newbno1 = newbno2;
366*4882a593Smuzhiyun } else if (newbno2 != NULLAGBLOCK)
367*4882a593Smuzhiyun newbno1 = newbno2;
368*4882a593Smuzhiyun } else if (freeend >= wantend) {
369*4882a593Smuzhiyun newbno1 = wantbno;
370*4882a593Smuzhiyun } else if (alignment > 1) {
371*4882a593Smuzhiyun newbno1 = roundup(freeend - wantlen, alignment);
372*4882a593Smuzhiyun if (newbno1 > freeend - wantlen &&
373*4882a593Smuzhiyun newbno1 - alignment >= freebno)
374*4882a593Smuzhiyun newbno1 -= alignment;
375*4882a593Smuzhiyun else if (newbno1 >= freeend)
376*4882a593Smuzhiyun newbno1 = NULLAGBLOCK;
377*4882a593Smuzhiyun } else
378*4882a593Smuzhiyun newbno1 = freeend - wantlen;
379*4882a593Smuzhiyun *newbnop = newbno1;
380*4882a593Smuzhiyun return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
381*4882a593Smuzhiyun }
382*4882a593Smuzhiyun
383*4882a593Smuzhiyun /*
384*4882a593Smuzhiyun * Fix up the length, based on mod and prod.
385*4882a593Smuzhiyun * len should be k * prod + mod for some k.
386*4882a593Smuzhiyun * If len is too small it is returned unchanged.
387*4882a593Smuzhiyun * If len hits maxlen it is left alone.
388*4882a593Smuzhiyun */
389*4882a593Smuzhiyun STATIC void
xfs_alloc_fix_len(xfs_alloc_arg_t * args)390*4882a593Smuzhiyun xfs_alloc_fix_len(
391*4882a593Smuzhiyun xfs_alloc_arg_t *args) /* allocation argument structure */
392*4882a593Smuzhiyun {
393*4882a593Smuzhiyun xfs_extlen_t k;
394*4882a593Smuzhiyun xfs_extlen_t rlen;
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun ASSERT(args->mod < args->prod);
397*4882a593Smuzhiyun rlen = args->len;
398*4882a593Smuzhiyun ASSERT(rlen >= args->minlen);
399*4882a593Smuzhiyun ASSERT(rlen <= args->maxlen);
400*4882a593Smuzhiyun if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
401*4882a593Smuzhiyun (args->mod == 0 && rlen < args->prod))
402*4882a593Smuzhiyun return;
403*4882a593Smuzhiyun k = rlen % args->prod;
404*4882a593Smuzhiyun if (k == args->mod)
405*4882a593Smuzhiyun return;
406*4882a593Smuzhiyun if (k > args->mod)
407*4882a593Smuzhiyun rlen = rlen - (k - args->mod);
408*4882a593Smuzhiyun else
409*4882a593Smuzhiyun rlen = rlen - args->prod + (args->mod - k);
410*4882a593Smuzhiyun /* casts to (int) catch length underflows */
411*4882a593Smuzhiyun if ((int)rlen < (int)args->minlen)
412*4882a593Smuzhiyun return;
413*4882a593Smuzhiyun ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
414*4882a593Smuzhiyun ASSERT(rlen % args->prod == args->mod);
415*4882a593Smuzhiyun ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
416*4882a593Smuzhiyun rlen + args->minleft);
417*4882a593Smuzhiyun args->len = rlen;
418*4882a593Smuzhiyun }
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun /*
421*4882a593Smuzhiyun * Update the two btrees, logically removing from freespace the extent
422*4882a593Smuzhiyun * starting at rbno, rlen blocks. The extent is contained within the
423*4882a593Smuzhiyun * actual (current) free extent fbno for flen blocks.
424*4882a593Smuzhiyun * Flags are passed in indicating whether the cursors are set to the
425*4882a593Smuzhiyun * relevant records.
426*4882a593Smuzhiyun */
427*4882a593Smuzhiyun STATIC int /* error code */
xfs_alloc_fixup_trees(xfs_btree_cur_t * cnt_cur,xfs_btree_cur_t * bno_cur,xfs_agblock_t fbno,xfs_extlen_t flen,xfs_agblock_t rbno,xfs_extlen_t rlen,int flags)428*4882a593Smuzhiyun xfs_alloc_fixup_trees(
429*4882a593Smuzhiyun xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
430*4882a593Smuzhiyun xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
431*4882a593Smuzhiyun xfs_agblock_t fbno, /* starting block of free extent */
432*4882a593Smuzhiyun xfs_extlen_t flen, /* length of free extent */
433*4882a593Smuzhiyun xfs_agblock_t rbno, /* starting block of returned extent */
434*4882a593Smuzhiyun xfs_extlen_t rlen, /* length of returned extent */
435*4882a593Smuzhiyun int flags) /* flags, XFSA_FIXUP_... */
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun int error; /* error code */
438*4882a593Smuzhiyun int i; /* operation results */
439*4882a593Smuzhiyun xfs_agblock_t nfbno1; /* first new free startblock */
440*4882a593Smuzhiyun xfs_agblock_t nfbno2; /* second new free startblock */
441*4882a593Smuzhiyun xfs_extlen_t nflen1=0; /* first new free length */
442*4882a593Smuzhiyun xfs_extlen_t nflen2=0; /* second new free length */
443*4882a593Smuzhiyun struct xfs_mount *mp;
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun mp = cnt_cur->bc_mp;
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun /*
448*4882a593Smuzhiyun * Look up the record in the by-size tree if necessary.
449*4882a593Smuzhiyun */
450*4882a593Smuzhiyun if (flags & XFSA_FIXUP_CNT_OK) {
451*4882a593Smuzhiyun #ifdef DEBUG
452*4882a593Smuzhiyun if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
453*4882a593Smuzhiyun return error;
454*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp,
455*4882a593Smuzhiyun i != 1 ||
456*4882a593Smuzhiyun nfbno1 != fbno ||
457*4882a593Smuzhiyun nflen1 != flen))
458*4882a593Smuzhiyun return -EFSCORRUPTED;
459*4882a593Smuzhiyun #endif
460*4882a593Smuzhiyun } else {
461*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
462*4882a593Smuzhiyun return error;
463*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1))
464*4882a593Smuzhiyun return -EFSCORRUPTED;
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun /*
467*4882a593Smuzhiyun * Look up the record in the by-block tree if necessary.
468*4882a593Smuzhiyun */
469*4882a593Smuzhiyun if (flags & XFSA_FIXUP_BNO_OK) {
470*4882a593Smuzhiyun #ifdef DEBUG
471*4882a593Smuzhiyun if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
472*4882a593Smuzhiyun return error;
473*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp,
474*4882a593Smuzhiyun i != 1 ||
475*4882a593Smuzhiyun nfbno1 != fbno ||
476*4882a593Smuzhiyun nflen1 != flen))
477*4882a593Smuzhiyun return -EFSCORRUPTED;
478*4882a593Smuzhiyun #endif
479*4882a593Smuzhiyun } else {
480*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
481*4882a593Smuzhiyun return error;
482*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1))
483*4882a593Smuzhiyun return -EFSCORRUPTED;
484*4882a593Smuzhiyun }
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun #ifdef DEBUG
487*4882a593Smuzhiyun if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
488*4882a593Smuzhiyun struct xfs_btree_block *bnoblock;
489*4882a593Smuzhiyun struct xfs_btree_block *cntblock;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
492*4882a593Smuzhiyun cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp,
495*4882a593Smuzhiyun bnoblock->bb_numrecs !=
496*4882a593Smuzhiyun cntblock->bb_numrecs))
497*4882a593Smuzhiyun return -EFSCORRUPTED;
498*4882a593Smuzhiyun }
499*4882a593Smuzhiyun #endif
500*4882a593Smuzhiyun
501*4882a593Smuzhiyun /*
502*4882a593Smuzhiyun * Deal with all four cases: the allocated record is contained
503*4882a593Smuzhiyun * within the freespace record, so we can have new freespace
504*4882a593Smuzhiyun * at either (or both) end, or no freespace remaining.
505*4882a593Smuzhiyun */
506*4882a593Smuzhiyun if (rbno == fbno && rlen == flen)
507*4882a593Smuzhiyun nfbno1 = nfbno2 = NULLAGBLOCK;
508*4882a593Smuzhiyun else if (rbno == fbno) {
509*4882a593Smuzhiyun nfbno1 = rbno + rlen;
510*4882a593Smuzhiyun nflen1 = flen - rlen;
511*4882a593Smuzhiyun nfbno2 = NULLAGBLOCK;
512*4882a593Smuzhiyun } else if (rbno + rlen == fbno + flen) {
513*4882a593Smuzhiyun nfbno1 = fbno;
514*4882a593Smuzhiyun nflen1 = flen - rlen;
515*4882a593Smuzhiyun nfbno2 = NULLAGBLOCK;
516*4882a593Smuzhiyun } else {
517*4882a593Smuzhiyun nfbno1 = fbno;
518*4882a593Smuzhiyun nflen1 = rbno - fbno;
519*4882a593Smuzhiyun nfbno2 = rbno + rlen;
520*4882a593Smuzhiyun nflen2 = (fbno + flen) - nfbno2;
521*4882a593Smuzhiyun }
522*4882a593Smuzhiyun /*
523*4882a593Smuzhiyun * Delete the entry from the by-size btree.
524*4882a593Smuzhiyun */
525*4882a593Smuzhiyun if ((error = xfs_btree_delete(cnt_cur, &i)))
526*4882a593Smuzhiyun return error;
527*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1))
528*4882a593Smuzhiyun return -EFSCORRUPTED;
529*4882a593Smuzhiyun /*
530*4882a593Smuzhiyun * Add new by-size btree entry(s).
531*4882a593Smuzhiyun */
532*4882a593Smuzhiyun if (nfbno1 != NULLAGBLOCK) {
533*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
534*4882a593Smuzhiyun return error;
535*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 0))
536*4882a593Smuzhiyun return -EFSCORRUPTED;
537*4882a593Smuzhiyun if ((error = xfs_btree_insert(cnt_cur, &i)))
538*4882a593Smuzhiyun return error;
539*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1))
540*4882a593Smuzhiyun return -EFSCORRUPTED;
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun if (nfbno2 != NULLAGBLOCK) {
543*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
544*4882a593Smuzhiyun return error;
545*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 0))
546*4882a593Smuzhiyun return -EFSCORRUPTED;
547*4882a593Smuzhiyun if ((error = xfs_btree_insert(cnt_cur, &i)))
548*4882a593Smuzhiyun return error;
549*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1))
550*4882a593Smuzhiyun return -EFSCORRUPTED;
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun /*
553*4882a593Smuzhiyun * Fix up the by-block btree entry(s).
554*4882a593Smuzhiyun */
555*4882a593Smuzhiyun if (nfbno1 == NULLAGBLOCK) {
556*4882a593Smuzhiyun /*
557*4882a593Smuzhiyun * No remaining freespace, just delete the by-block tree entry.
558*4882a593Smuzhiyun */
559*4882a593Smuzhiyun if ((error = xfs_btree_delete(bno_cur, &i)))
560*4882a593Smuzhiyun return error;
561*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1))
562*4882a593Smuzhiyun return -EFSCORRUPTED;
563*4882a593Smuzhiyun } else {
564*4882a593Smuzhiyun /*
565*4882a593Smuzhiyun * Update the by-block entry to start later|be shorter.
566*4882a593Smuzhiyun */
567*4882a593Smuzhiyun if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
568*4882a593Smuzhiyun return error;
569*4882a593Smuzhiyun }
570*4882a593Smuzhiyun if (nfbno2 != NULLAGBLOCK) {
571*4882a593Smuzhiyun /*
572*4882a593Smuzhiyun * 2 resulting free entries, need to add one.
573*4882a593Smuzhiyun */
574*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
575*4882a593Smuzhiyun return error;
576*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 0))
577*4882a593Smuzhiyun return -EFSCORRUPTED;
578*4882a593Smuzhiyun if ((error = xfs_btree_insert(bno_cur, &i)))
579*4882a593Smuzhiyun return error;
580*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1))
581*4882a593Smuzhiyun return -EFSCORRUPTED;
582*4882a593Smuzhiyun }
583*4882a593Smuzhiyun return 0;
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun static xfs_failaddr_t
xfs_agfl_verify(struct xfs_buf * bp)587*4882a593Smuzhiyun xfs_agfl_verify(
588*4882a593Smuzhiyun struct xfs_buf *bp)
589*4882a593Smuzhiyun {
590*4882a593Smuzhiyun struct xfs_mount *mp = bp->b_mount;
591*4882a593Smuzhiyun struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
592*4882a593Smuzhiyun __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
593*4882a593Smuzhiyun int i;
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun /*
596*4882a593Smuzhiyun * There is no verification of non-crc AGFLs because mkfs does not
597*4882a593Smuzhiyun * initialise the AGFL to zero or NULL. Hence the only valid part of the
598*4882a593Smuzhiyun * AGFL is what the AGF says is active. We can't get to the AGF, so we
599*4882a593Smuzhiyun * can't verify just those entries are valid.
600*4882a593Smuzhiyun */
601*4882a593Smuzhiyun if (!xfs_sb_version_hascrc(&mp->m_sb))
602*4882a593Smuzhiyun return NULL;
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
605*4882a593Smuzhiyun return __this_address;
606*4882a593Smuzhiyun if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
607*4882a593Smuzhiyun return __this_address;
608*4882a593Smuzhiyun /*
609*4882a593Smuzhiyun * during growfs operations, the perag is not fully initialised,
610*4882a593Smuzhiyun * so we can't use it for any useful checking. growfs ensures we can't
611*4882a593Smuzhiyun * use it by using uncached buffers that don't have the perag attached
612*4882a593Smuzhiyun * so we can detect and avoid this problem.
613*4882a593Smuzhiyun */
614*4882a593Smuzhiyun if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
615*4882a593Smuzhiyun return __this_address;
616*4882a593Smuzhiyun
617*4882a593Smuzhiyun for (i = 0; i < xfs_agfl_size(mp); i++) {
618*4882a593Smuzhiyun if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
619*4882a593Smuzhiyun be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
620*4882a593Smuzhiyun return __this_address;
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
624*4882a593Smuzhiyun return __this_address;
625*4882a593Smuzhiyun return NULL;
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun
628*4882a593Smuzhiyun static void
xfs_agfl_read_verify(struct xfs_buf * bp)629*4882a593Smuzhiyun xfs_agfl_read_verify(
630*4882a593Smuzhiyun struct xfs_buf *bp)
631*4882a593Smuzhiyun {
632*4882a593Smuzhiyun struct xfs_mount *mp = bp->b_mount;
633*4882a593Smuzhiyun xfs_failaddr_t fa;
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun /*
636*4882a593Smuzhiyun * There is no verification of non-crc AGFLs because mkfs does not
637*4882a593Smuzhiyun * initialise the AGFL to zero or NULL. Hence the only valid part of the
638*4882a593Smuzhiyun * AGFL is what the AGF says is active. We can't get to the AGF, so we
639*4882a593Smuzhiyun * can't verify just those entries are valid.
640*4882a593Smuzhiyun */
641*4882a593Smuzhiyun if (!xfs_sb_version_hascrc(&mp->m_sb))
642*4882a593Smuzhiyun return;
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
645*4882a593Smuzhiyun xfs_verifier_error(bp, -EFSBADCRC, __this_address);
646*4882a593Smuzhiyun else {
647*4882a593Smuzhiyun fa = xfs_agfl_verify(bp);
648*4882a593Smuzhiyun if (fa)
649*4882a593Smuzhiyun xfs_verifier_error(bp, -EFSCORRUPTED, fa);
650*4882a593Smuzhiyun }
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun
653*4882a593Smuzhiyun static void
xfs_agfl_write_verify(struct xfs_buf * bp)654*4882a593Smuzhiyun xfs_agfl_write_verify(
655*4882a593Smuzhiyun struct xfs_buf *bp)
656*4882a593Smuzhiyun {
657*4882a593Smuzhiyun struct xfs_mount *mp = bp->b_mount;
658*4882a593Smuzhiyun struct xfs_buf_log_item *bip = bp->b_log_item;
659*4882a593Smuzhiyun xfs_failaddr_t fa;
660*4882a593Smuzhiyun
661*4882a593Smuzhiyun /* no verification of non-crc AGFLs */
662*4882a593Smuzhiyun if (!xfs_sb_version_hascrc(&mp->m_sb))
663*4882a593Smuzhiyun return;
664*4882a593Smuzhiyun
665*4882a593Smuzhiyun fa = xfs_agfl_verify(bp);
666*4882a593Smuzhiyun if (fa) {
667*4882a593Smuzhiyun xfs_verifier_error(bp, -EFSCORRUPTED, fa);
668*4882a593Smuzhiyun return;
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun
671*4882a593Smuzhiyun if (bip)
672*4882a593Smuzhiyun XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
673*4882a593Smuzhiyun
674*4882a593Smuzhiyun xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
675*4882a593Smuzhiyun }
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun const struct xfs_buf_ops xfs_agfl_buf_ops = {
678*4882a593Smuzhiyun .name = "xfs_agfl",
679*4882a593Smuzhiyun .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
680*4882a593Smuzhiyun .verify_read = xfs_agfl_read_verify,
681*4882a593Smuzhiyun .verify_write = xfs_agfl_write_verify,
682*4882a593Smuzhiyun .verify_struct = xfs_agfl_verify,
683*4882a593Smuzhiyun };
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun /*
686*4882a593Smuzhiyun * Read in the allocation group free block array.
687*4882a593Smuzhiyun */
688*4882a593Smuzhiyun int /* error */
xfs_alloc_read_agfl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_buf_t ** bpp)689*4882a593Smuzhiyun xfs_alloc_read_agfl(
690*4882a593Smuzhiyun xfs_mount_t *mp, /* mount point structure */
691*4882a593Smuzhiyun xfs_trans_t *tp, /* transaction pointer */
692*4882a593Smuzhiyun xfs_agnumber_t agno, /* allocation group number */
693*4882a593Smuzhiyun xfs_buf_t **bpp) /* buffer for the ag free block array */
694*4882a593Smuzhiyun {
695*4882a593Smuzhiyun xfs_buf_t *bp; /* return value */
696*4882a593Smuzhiyun int error;
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun ASSERT(agno != NULLAGNUMBER);
699*4882a593Smuzhiyun error = xfs_trans_read_buf(
700*4882a593Smuzhiyun mp, tp, mp->m_ddev_targp,
701*4882a593Smuzhiyun XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
702*4882a593Smuzhiyun XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
703*4882a593Smuzhiyun if (error)
704*4882a593Smuzhiyun return error;
705*4882a593Smuzhiyun xfs_buf_set_ref(bp, XFS_AGFL_REF);
706*4882a593Smuzhiyun *bpp = bp;
707*4882a593Smuzhiyun return 0;
708*4882a593Smuzhiyun }
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun STATIC int
xfs_alloc_update_counters(struct xfs_trans * tp,struct xfs_buf * agbp,long len)711*4882a593Smuzhiyun xfs_alloc_update_counters(
712*4882a593Smuzhiyun struct xfs_trans *tp,
713*4882a593Smuzhiyun struct xfs_buf *agbp,
714*4882a593Smuzhiyun long len)
715*4882a593Smuzhiyun {
716*4882a593Smuzhiyun struct xfs_agf *agf = agbp->b_addr;
717*4882a593Smuzhiyun
718*4882a593Smuzhiyun agbp->b_pag->pagf_freeblks += len;
719*4882a593Smuzhiyun be32_add_cpu(&agf->agf_freeblks, len);
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun xfs_trans_agblocks_delta(tp, len);
722*4882a593Smuzhiyun if (unlikely(be32_to_cpu(agf->agf_freeblks) >
723*4882a593Smuzhiyun be32_to_cpu(agf->agf_length))) {
724*4882a593Smuzhiyun xfs_buf_mark_corrupt(agbp);
725*4882a593Smuzhiyun return -EFSCORRUPTED;
726*4882a593Smuzhiyun }
727*4882a593Smuzhiyun
728*4882a593Smuzhiyun xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
729*4882a593Smuzhiyun return 0;
730*4882a593Smuzhiyun }
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun /*
733*4882a593Smuzhiyun * Block allocation algorithm and data structures.
734*4882a593Smuzhiyun */
735*4882a593Smuzhiyun struct xfs_alloc_cur {
736*4882a593Smuzhiyun struct xfs_btree_cur *cnt; /* btree cursors */
737*4882a593Smuzhiyun struct xfs_btree_cur *bnolt;
738*4882a593Smuzhiyun struct xfs_btree_cur *bnogt;
739*4882a593Smuzhiyun xfs_extlen_t cur_len;/* current search length */
740*4882a593Smuzhiyun xfs_agblock_t rec_bno;/* extent startblock */
741*4882a593Smuzhiyun xfs_extlen_t rec_len;/* extent length */
742*4882a593Smuzhiyun xfs_agblock_t bno; /* alloc bno */
743*4882a593Smuzhiyun xfs_extlen_t len; /* alloc len */
744*4882a593Smuzhiyun xfs_extlen_t diff; /* diff from search bno */
745*4882a593Smuzhiyun unsigned int busy_gen;/* busy state */
746*4882a593Smuzhiyun bool busy;
747*4882a593Smuzhiyun };
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun /*
750*4882a593Smuzhiyun * Set up cursors, etc. in the extent allocation cursor. This function can be
751*4882a593Smuzhiyun * called multiple times to reset an initialized structure without having to
752*4882a593Smuzhiyun * reallocate cursors.
753*4882a593Smuzhiyun */
754*4882a593Smuzhiyun static int
xfs_alloc_cur_setup(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)755*4882a593Smuzhiyun xfs_alloc_cur_setup(
756*4882a593Smuzhiyun struct xfs_alloc_arg *args,
757*4882a593Smuzhiyun struct xfs_alloc_cur *acur)
758*4882a593Smuzhiyun {
759*4882a593Smuzhiyun int error;
760*4882a593Smuzhiyun int i;
761*4882a593Smuzhiyun
762*4882a593Smuzhiyun ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
763*4882a593Smuzhiyun
764*4882a593Smuzhiyun acur->cur_len = args->maxlen;
765*4882a593Smuzhiyun acur->rec_bno = 0;
766*4882a593Smuzhiyun acur->rec_len = 0;
767*4882a593Smuzhiyun acur->bno = 0;
768*4882a593Smuzhiyun acur->len = 0;
769*4882a593Smuzhiyun acur->diff = -1;
770*4882a593Smuzhiyun acur->busy = false;
771*4882a593Smuzhiyun acur->busy_gen = 0;
772*4882a593Smuzhiyun
773*4882a593Smuzhiyun /*
774*4882a593Smuzhiyun * Perform an initial cntbt lookup to check for availability of maxlen
775*4882a593Smuzhiyun * extents. If this fails, we'll return -ENOSPC to signal the caller to
776*4882a593Smuzhiyun * attempt a small allocation.
777*4882a593Smuzhiyun */
778*4882a593Smuzhiyun if (!acur->cnt)
779*4882a593Smuzhiyun acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
780*4882a593Smuzhiyun args->agbp, args->agno, XFS_BTNUM_CNT);
781*4882a593Smuzhiyun error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
782*4882a593Smuzhiyun if (error)
783*4882a593Smuzhiyun return error;
784*4882a593Smuzhiyun
785*4882a593Smuzhiyun /*
786*4882a593Smuzhiyun * Allocate the bnobt left and right search cursors.
787*4882a593Smuzhiyun */
788*4882a593Smuzhiyun if (!acur->bnolt)
789*4882a593Smuzhiyun acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
790*4882a593Smuzhiyun args->agbp, args->agno, XFS_BTNUM_BNO);
791*4882a593Smuzhiyun if (!acur->bnogt)
792*4882a593Smuzhiyun acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
793*4882a593Smuzhiyun args->agbp, args->agno, XFS_BTNUM_BNO);
794*4882a593Smuzhiyun return i == 1 ? 0 : -ENOSPC;
795*4882a593Smuzhiyun }
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun static void
xfs_alloc_cur_close(struct xfs_alloc_cur * acur,bool error)798*4882a593Smuzhiyun xfs_alloc_cur_close(
799*4882a593Smuzhiyun struct xfs_alloc_cur *acur,
800*4882a593Smuzhiyun bool error)
801*4882a593Smuzhiyun {
802*4882a593Smuzhiyun int cur_error = XFS_BTREE_NOERROR;
803*4882a593Smuzhiyun
804*4882a593Smuzhiyun if (error)
805*4882a593Smuzhiyun cur_error = XFS_BTREE_ERROR;
806*4882a593Smuzhiyun
807*4882a593Smuzhiyun if (acur->cnt)
808*4882a593Smuzhiyun xfs_btree_del_cursor(acur->cnt, cur_error);
809*4882a593Smuzhiyun if (acur->bnolt)
810*4882a593Smuzhiyun xfs_btree_del_cursor(acur->bnolt, cur_error);
811*4882a593Smuzhiyun if (acur->bnogt)
812*4882a593Smuzhiyun xfs_btree_del_cursor(acur->bnogt, cur_error);
813*4882a593Smuzhiyun acur->cnt = acur->bnolt = acur->bnogt = NULL;
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun
816*4882a593Smuzhiyun /*
817*4882a593Smuzhiyun * Check an extent for allocation and track the best available candidate in the
818*4882a593Smuzhiyun * allocation structure. The cursor is deactivated if it has entered an out of
819*4882a593Smuzhiyun * range state based on allocation arguments. Optionally return the extent
820*4882a593Smuzhiyun * extent geometry and allocation status if requested by the caller.
821*4882a593Smuzhiyun */
822*4882a593Smuzhiyun static int
xfs_alloc_cur_check(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,int * new)823*4882a593Smuzhiyun xfs_alloc_cur_check(
824*4882a593Smuzhiyun struct xfs_alloc_arg *args,
825*4882a593Smuzhiyun struct xfs_alloc_cur *acur,
826*4882a593Smuzhiyun struct xfs_btree_cur *cur,
827*4882a593Smuzhiyun int *new)
828*4882a593Smuzhiyun {
829*4882a593Smuzhiyun int error, i;
830*4882a593Smuzhiyun xfs_agblock_t bno, bnoa, bnew;
831*4882a593Smuzhiyun xfs_extlen_t len, lena, diff = -1;
832*4882a593Smuzhiyun bool busy;
833*4882a593Smuzhiyun unsigned busy_gen = 0;
834*4882a593Smuzhiyun bool deactivate = false;
835*4882a593Smuzhiyun bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun *new = 0;
838*4882a593Smuzhiyun
839*4882a593Smuzhiyun error = xfs_alloc_get_rec(cur, &bno, &len, &i);
840*4882a593Smuzhiyun if (error)
841*4882a593Smuzhiyun return error;
842*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, i != 1))
843*4882a593Smuzhiyun return -EFSCORRUPTED;
844*4882a593Smuzhiyun
845*4882a593Smuzhiyun /*
846*4882a593Smuzhiyun * Check minlen and deactivate a cntbt cursor if out of acceptable size
847*4882a593Smuzhiyun * range (i.e., walking backwards looking for a minlen extent).
848*4882a593Smuzhiyun */
849*4882a593Smuzhiyun if (len < args->minlen) {
850*4882a593Smuzhiyun deactivate = !isbnobt;
851*4882a593Smuzhiyun goto out;
852*4882a593Smuzhiyun }
853*4882a593Smuzhiyun
854*4882a593Smuzhiyun busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
855*4882a593Smuzhiyun &busy_gen);
856*4882a593Smuzhiyun acur->busy |= busy;
857*4882a593Smuzhiyun if (busy)
858*4882a593Smuzhiyun acur->busy_gen = busy_gen;
859*4882a593Smuzhiyun /* deactivate a bnobt cursor outside of locality range */
860*4882a593Smuzhiyun if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
861*4882a593Smuzhiyun deactivate = isbnobt;
862*4882a593Smuzhiyun goto out;
863*4882a593Smuzhiyun }
864*4882a593Smuzhiyun if (lena < args->minlen)
865*4882a593Smuzhiyun goto out;
866*4882a593Smuzhiyun
867*4882a593Smuzhiyun args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
868*4882a593Smuzhiyun xfs_alloc_fix_len(args);
869*4882a593Smuzhiyun ASSERT(args->len >= args->minlen);
870*4882a593Smuzhiyun if (args->len < acur->len)
871*4882a593Smuzhiyun goto out;
872*4882a593Smuzhiyun
873*4882a593Smuzhiyun /*
874*4882a593Smuzhiyun * We have an aligned record that satisfies minlen and beats or matches
875*4882a593Smuzhiyun * the candidate extent size. Compare locality for near allocation mode.
876*4882a593Smuzhiyun */
877*4882a593Smuzhiyun ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
878*4882a593Smuzhiyun diff = xfs_alloc_compute_diff(args->agbno, args->len,
879*4882a593Smuzhiyun args->alignment, args->datatype,
880*4882a593Smuzhiyun bnoa, lena, &bnew);
881*4882a593Smuzhiyun if (bnew == NULLAGBLOCK)
882*4882a593Smuzhiyun goto out;
883*4882a593Smuzhiyun
884*4882a593Smuzhiyun /*
885*4882a593Smuzhiyun * Deactivate a bnobt cursor with worse locality than the current best.
886*4882a593Smuzhiyun */
887*4882a593Smuzhiyun if (diff > acur->diff) {
888*4882a593Smuzhiyun deactivate = isbnobt;
889*4882a593Smuzhiyun goto out;
890*4882a593Smuzhiyun }
891*4882a593Smuzhiyun
892*4882a593Smuzhiyun ASSERT(args->len > acur->len ||
893*4882a593Smuzhiyun (args->len == acur->len && diff <= acur->diff));
894*4882a593Smuzhiyun acur->rec_bno = bno;
895*4882a593Smuzhiyun acur->rec_len = len;
896*4882a593Smuzhiyun acur->bno = bnew;
897*4882a593Smuzhiyun acur->len = args->len;
898*4882a593Smuzhiyun acur->diff = diff;
899*4882a593Smuzhiyun *new = 1;
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun /*
902*4882a593Smuzhiyun * We're done if we found a perfect allocation. This only deactivates
903*4882a593Smuzhiyun * the current cursor, but this is just an optimization to terminate a
904*4882a593Smuzhiyun * cntbt search that otherwise runs to the edge of the tree.
905*4882a593Smuzhiyun */
906*4882a593Smuzhiyun if (acur->diff == 0 && acur->len == args->maxlen)
907*4882a593Smuzhiyun deactivate = true;
908*4882a593Smuzhiyun out:
909*4882a593Smuzhiyun if (deactivate)
910*4882a593Smuzhiyun cur->bc_ag.abt.active = false;
911*4882a593Smuzhiyun trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
912*4882a593Smuzhiyun *new);
913*4882a593Smuzhiyun return 0;
914*4882a593Smuzhiyun }
915*4882a593Smuzhiyun
916*4882a593Smuzhiyun /*
917*4882a593Smuzhiyun * Complete an allocation of a candidate extent. Remove the extent from both
918*4882a593Smuzhiyun * trees and update the args structure.
919*4882a593Smuzhiyun */
920*4882a593Smuzhiyun STATIC int
xfs_alloc_cur_finish(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)921*4882a593Smuzhiyun xfs_alloc_cur_finish(
922*4882a593Smuzhiyun struct xfs_alloc_arg *args,
923*4882a593Smuzhiyun struct xfs_alloc_cur *acur)
924*4882a593Smuzhiyun {
925*4882a593Smuzhiyun struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
926*4882a593Smuzhiyun int error;
927*4882a593Smuzhiyun
928*4882a593Smuzhiyun ASSERT(acur->cnt && acur->bnolt);
929*4882a593Smuzhiyun ASSERT(acur->bno >= acur->rec_bno);
930*4882a593Smuzhiyun ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
931*4882a593Smuzhiyun ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
932*4882a593Smuzhiyun
933*4882a593Smuzhiyun error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
934*4882a593Smuzhiyun acur->rec_len, acur->bno, acur->len, 0);
935*4882a593Smuzhiyun if (error)
936*4882a593Smuzhiyun return error;
937*4882a593Smuzhiyun
938*4882a593Smuzhiyun args->agbno = acur->bno;
939*4882a593Smuzhiyun args->len = acur->len;
940*4882a593Smuzhiyun args->wasfromfl = 0;
941*4882a593Smuzhiyun
942*4882a593Smuzhiyun trace_xfs_alloc_cur(args);
943*4882a593Smuzhiyun return 0;
944*4882a593Smuzhiyun }
945*4882a593Smuzhiyun
946*4882a593Smuzhiyun /*
947*4882a593Smuzhiyun * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
948*4882a593Smuzhiyun * bno optimized lookup to search for extents with ideal size and locality.
949*4882a593Smuzhiyun */
950*4882a593Smuzhiyun STATIC int
xfs_alloc_cntbt_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur)951*4882a593Smuzhiyun xfs_alloc_cntbt_iter(
952*4882a593Smuzhiyun struct xfs_alloc_arg *args,
953*4882a593Smuzhiyun struct xfs_alloc_cur *acur)
954*4882a593Smuzhiyun {
955*4882a593Smuzhiyun struct xfs_btree_cur *cur = acur->cnt;
956*4882a593Smuzhiyun xfs_agblock_t bno;
957*4882a593Smuzhiyun xfs_extlen_t len, cur_len;
958*4882a593Smuzhiyun int error;
959*4882a593Smuzhiyun int i;
960*4882a593Smuzhiyun
961*4882a593Smuzhiyun if (!xfs_alloc_cur_active(cur))
962*4882a593Smuzhiyun return 0;
963*4882a593Smuzhiyun
964*4882a593Smuzhiyun /* locality optimized lookup */
965*4882a593Smuzhiyun cur_len = acur->cur_len;
966*4882a593Smuzhiyun error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
967*4882a593Smuzhiyun if (error)
968*4882a593Smuzhiyun return error;
969*4882a593Smuzhiyun if (i == 0)
970*4882a593Smuzhiyun return 0;
971*4882a593Smuzhiyun error = xfs_alloc_get_rec(cur, &bno, &len, &i);
972*4882a593Smuzhiyun if (error)
973*4882a593Smuzhiyun return error;
974*4882a593Smuzhiyun
975*4882a593Smuzhiyun /* check the current record and update search length from it */
976*4882a593Smuzhiyun error = xfs_alloc_cur_check(args, acur, cur, &i);
977*4882a593Smuzhiyun if (error)
978*4882a593Smuzhiyun return error;
979*4882a593Smuzhiyun ASSERT(len >= acur->cur_len);
980*4882a593Smuzhiyun acur->cur_len = len;
981*4882a593Smuzhiyun
982*4882a593Smuzhiyun /*
983*4882a593Smuzhiyun * We looked up the first record >= [agbno, len] above. The agbno is a
984*4882a593Smuzhiyun * secondary key and so the current record may lie just before or after
985*4882a593Smuzhiyun * agbno. If it is past agbno, check the previous record too so long as
986*4882a593Smuzhiyun * the length matches as it may be closer. Don't check a smaller record
987*4882a593Smuzhiyun * because that could deactivate our cursor.
988*4882a593Smuzhiyun */
989*4882a593Smuzhiyun if (bno > args->agbno) {
990*4882a593Smuzhiyun error = xfs_btree_decrement(cur, 0, &i);
991*4882a593Smuzhiyun if (!error && i) {
992*4882a593Smuzhiyun error = xfs_alloc_get_rec(cur, &bno, &len, &i);
993*4882a593Smuzhiyun if (!error && i && len == acur->cur_len)
994*4882a593Smuzhiyun error = xfs_alloc_cur_check(args, acur, cur,
995*4882a593Smuzhiyun &i);
996*4882a593Smuzhiyun }
997*4882a593Smuzhiyun if (error)
998*4882a593Smuzhiyun return error;
999*4882a593Smuzhiyun }
1000*4882a593Smuzhiyun
1001*4882a593Smuzhiyun /*
1002*4882a593Smuzhiyun * Increment the search key until we find at least one allocation
1003*4882a593Smuzhiyun * candidate or if the extent we found was larger. Otherwise, double the
1004*4882a593Smuzhiyun * search key to optimize the search. Efficiency is more important here
1005*4882a593Smuzhiyun * than absolute best locality.
1006*4882a593Smuzhiyun */
1007*4882a593Smuzhiyun cur_len <<= 1;
1008*4882a593Smuzhiyun if (!acur->len || acur->cur_len >= cur_len)
1009*4882a593Smuzhiyun acur->cur_len++;
1010*4882a593Smuzhiyun else
1011*4882a593Smuzhiyun acur->cur_len = cur_len;
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun return error;
1014*4882a593Smuzhiyun }
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun /*
1017*4882a593Smuzhiyun * Deal with the case where only small freespaces remain. Either return the
1018*4882a593Smuzhiyun * contents of the last freespace record, or allocate space from the freelist if
1019*4882a593Smuzhiyun * there is nothing in the tree.
1020*4882a593Smuzhiyun */
1021*4882a593Smuzhiyun STATIC int /* error */
xfs_alloc_ag_vextent_small(struct xfs_alloc_arg * args,struct xfs_btree_cur * ccur,xfs_agblock_t * fbnop,xfs_extlen_t * flenp,int * stat)1022*4882a593Smuzhiyun xfs_alloc_ag_vextent_small(
1023*4882a593Smuzhiyun struct xfs_alloc_arg *args, /* allocation argument structure */
1024*4882a593Smuzhiyun struct xfs_btree_cur *ccur, /* optional by-size cursor */
1025*4882a593Smuzhiyun xfs_agblock_t *fbnop, /* result block number */
1026*4882a593Smuzhiyun xfs_extlen_t *flenp, /* result length */
1027*4882a593Smuzhiyun int *stat) /* status: 0-freelist, 1-normal/none */
1028*4882a593Smuzhiyun {
1029*4882a593Smuzhiyun struct xfs_agf *agf = args->agbp->b_addr;
1030*4882a593Smuzhiyun int error = 0;
1031*4882a593Smuzhiyun xfs_agblock_t fbno = NULLAGBLOCK;
1032*4882a593Smuzhiyun xfs_extlen_t flen = 0;
1033*4882a593Smuzhiyun int i = 0;
1034*4882a593Smuzhiyun
1035*4882a593Smuzhiyun /*
1036*4882a593Smuzhiyun * If a cntbt cursor is provided, try to allocate the largest record in
1037*4882a593Smuzhiyun * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1038*4882a593Smuzhiyun * allocation. Make sure to respect minleft even when pulling from the
1039*4882a593Smuzhiyun * freelist.
1040*4882a593Smuzhiyun */
1041*4882a593Smuzhiyun if (ccur)
1042*4882a593Smuzhiyun error = xfs_btree_decrement(ccur, 0, &i);
1043*4882a593Smuzhiyun if (error)
1044*4882a593Smuzhiyun goto error;
1045*4882a593Smuzhiyun if (i) {
1046*4882a593Smuzhiyun error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1047*4882a593Smuzhiyun if (error)
1048*4882a593Smuzhiyun goto error;
1049*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1050*4882a593Smuzhiyun error = -EFSCORRUPTED;
1051*4882a593Smuzhiyun goto error;
1052*4882a593Smuzhiyun }
1053*4882a593Smuzhiyun goto out;
1054*4882a593Smuzhiyun }
1055*4882a593Smuzhiyun
1056*4882a593Smuzhiyun if (args->minlen != 1 || args->alignment != 1 ||
1057*4882a593Smuzhiyun args->resv == XFS_AG_RESV_AGFL ||
1058*4882a593Smuzhiyun be32_to_cpu(agf->agf_flcount) <= args->minleft)
1059*4882a593Smuzhiyun goto out;
1060*4882a593Smuzhiyun
1061*4882a593Smuzhiyun error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1062*4882a593Smuzhiyun if (error)
1063*4882a593Smuzhiyun goto error;
1064*4882a593Smuzhiyun if (fbno == NULLAGBLOCK)
1065*4882a593Smuzhiyun goto out;
1066*4882a593Smuzhiyun
1067*4882a593Smuzhiyun xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1068*4882a593Smuzhiyun (args->datatype & XFS_ALLOC_NOBUSY));
1069*4882a593Smuzhiyun
1070*4882a593Smuzhiyun if (args->datatype & XFS_ALLOC_USERDATA) {
1071*4882a593Smuzhiyun struct xfs_buf *bp;
1072*4882a593Smuzhiyun
1073*4882a593Smuzhiyun error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1074*4882a593Smuzhiyun XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1075*4882a593Smuzhiyun args->mp->m_bsize, 0, &bp);
1076*4882a593Smuzhiyun if (error)
1077*4882a593Smuzhiyun goto error;
1078*4882a593Smuzhiyun xfs_trans_binval(args->tp, bp);
1079*4882a593Smuzhiyun }
1080*4882a593Smuzhiyun *fbnop = args->agbno = fbno;
1081*4882a593Smuzhiyun *flenp = args->len = 1;
1082*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1083*4882a593Smuzhiyun error = -EFSCORRUPTED;
1084*4882a593Smuzhiyun goto error;
1085*4882a593Smuzhiyun }
1086*4882a593Smuzhiyun args->wasfromfl = 1;
1087*4882a593Smuzhiyun trace_xfs_alloc_small_freelist(args);
1088*4882a593Smuzhiyun
1089*4882a593Smuzhiyun /*
1090*4882a593Smuzhiyun * If we're feeding an AGFL block to something that doesn't live in the
1091*4882a593Smuzhiyun * free space, we need to clear out the OWN_AG rmap.
1092*4882a593Smuzhiyun */
1093*4882a593Smuzhiyun error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
1094*4882a593Smuzhiyun &XFS_RMAP_OINFO_AG);
1095*4882a593Smuzhiyun if (error)
1096*4882a593Smuzhiyun goto error;
1097*4882a593Smuzhiyun
1098*4882a593Smuzhiyun *stat = 0;
1099*4882a593Smuzhiyun return 0;
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun out:
1102*4882a593Smuzhiyun /*
1103*4882a593Smuzhiyun * Can't do the allocation, give up.
1104*4882a593Smuzhiyun */
1105*4882a593Smuzhiyun if (flen < args->minlen) {
1106*4882a593Smuzhiyun args->agbno = NULLAGBLOCK;
1107*4882a593Smuzhiyun trace_xfs_alloc_small_notenough(args);
1108*4882a593Smuzhiyun flen = 0;
1109*4882a593Smuzhiyun }
1110*4882a593Smuzhiyun *fbnop = fbno;
1111*4882a593Smuzhiyun *flenp = flen;
1112*4882a593Smuzhiyun *stat = 1;
1113*4882a593Smuzhiyun trace_xfs_alloc_small_done(args);
1114*4882a593Smuzhiyun return 0;
1115*4882a593Smuzhiyun
1116*4882a593Smuzhiyun error:
1117*4882a593Smuzhiyun trace_xfs_alloc_small_error(args);
1118*4882a593Smuzhiyun return error;
1119*4882a593Smuzhiyun }
1120*4882a593Smuzhiyun
1121*4882a593Smuzhiyun /*
1122*4882a593Smuzhiyun * Allocate a variable extent in the allocation group agno.
1123*4882a593Smuzhiyun * Type and bno are used to determine where in the allocation group the
1124*4882a593Smuzhiyun * extent will start.
1125*4882a593Smuzhiyun * Extent's length (returned in *len) will be between minlen and maxlen,
1126*4882a593Smuzhiyun * and of the form k * prod + mod unless there's nothing that large.
1127*4882a593Smuzhiyun * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1128*4882a593Smuzhiyun */
1129*4882a593Smuzhiyun STATIC int /* error */
xfs_alloc_ag_vextent(xfs_alloc_arg_t * args)1130*4882a593Smuzhiyun xfs_alloc_ag_vextent(
1131*4882a593Smuzhiyun xfs_alloc_arg_t *args) /* argument structure for allocation */
1132*4882a593Smuzhiyun {
1133*4882a593Smuzhiyun int error=0;
1134*4882a593Smuzhiyun
1135*4882a593Smuzhiyun ASSERT(args->minlen > 0);
1136*4882a593Smuzhiyun ASSERT(args->maxlen > 0);
1137*4882a593Smuzhiyun ASSERT(args->minlen <= args->maxlen);
1138*4882a593Smuzhiyun ASSERT(args->mod < args->prod);
1139*4882a593Smuzhiyun ASSERT(args->alignment > 0);
1140*4882a593Smuzhiyun
1141*4882a593Smuzhiyun /*
1142*4882a593Smuzhiyun * Branch to correct routine based on the type.
1143*4882a593Smuzhiyun */
1144*4882a593Smuzhiyun args->wasfromfl = 0;
1145*4882a593Smuzhiyun switch (args->type) {
1146*4882a593Smuzhiyun case XFS_ALLOCTYPE_THIS_AG:
1147*4882a593Smuzhiyun error = xfs_alloc_ag_vextent_size(args);
1148*4882a593Smuzhiyun break;
1149*4882a593Smuzhiyun case XFS_ALLOCTYPE_NEAR_BNO:
1150*4882a593Smuzhiyun error = xfs_alloc_ag_vextent_near(args);
1151*4882a593Smuzhiyun break;
1152*4882a593Smuzhiyun case XFS_ALLOCTYPE_THIS_BNO:
1153*4882a593Smuzhiyun error = xfs_alloc_ag_vextent_exact(args);
1154*4882a593Smuzhiyun break;
1155*4882a593Smuzhiyun default:
1156*4882a593Smuzhiyun ASSERT(0);
1157*4882a593Smuzhiyun /* NOTREACHED */
1158*4882a593Smuzhiyun }
1159*4882a593Smuzhiyun
1160*4882a593Smuzhiyun if (error || args->agbno == NULLAGBLOCK)
1161*4882a593Smuzhiyun return error;
1162*4882a593Smuzhiyun
1163*4882a593Smuzhiyun ASSERT(args->len >= args->minlen);
1164*4882a593Smuzhiyun ASSERT(args->len <= args->maxlen);
1165*4882a593Smuzhiyun ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
1166*4882a593Smuzhiyun ASSERT(args->agbno % args->alignment == 0);
1167*4882a593Smuzhiyun
1168*4882a593Smuzhiyun /* if not file data, insert new block into the reverse map btree */
1169*4882a593Smuzhiyun if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
1170*4882a593Smuzhiyun error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
1171*4882a593Smuzhiyun args->agbno, args->len, &args->oinfo);
1172*4882a593Smuzhiyun if (error)
1173*4882a593Smuzhiyun return error;
1174*4882a593Smuzhiyun }
1175*4882a593Smuzhiyun
1176*4882a593Smuzhiyun if (!args->wasfromfl) {
1177*4882a593Smuzhiyun error = xfs_alloc_update_counters(args->tp, args->agbp,
1178*4882a593Smuzhiyun -((long)(args->len)));
1179*4882a593Smuzhiyun if (error)
1180*4882a593Smuzhiyun return error;
1181*4882a593Smuzhiyun
1182*4882a593Smuzhiyun ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
1183*4882a593Smuzhiyun args->agbno, args->len));
1184*4882a593Smuzhiyun }
1185*4882a593Smuzhiyun
1186*4882a593Smuzhiyun xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun XFS_STATS_INC(args->mp, xs_allocx);
1189*4882a593Smuzhiyun XFS_STATS_ADD(args->mp, xs_allocb, args->len);
1190*4882a593Smuzhiyun return error;
1191*4882a593Smuzhiyun }
1192*4882a593Smuzhiyun
1193*4882a593Smuzhiyun /*
1194*4882a593Smuzhiyun * Allocate a variable extent at exactly agno/bno.
1195*4882a593Smuzhiyun * Extent's length (returned in *len) will be between minlen and maxlen,
1196*4882a593Smuzhiyun * and of the form k * prod + mod unless there's nothing that large.
1197*4882a593Smuzhiyun * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1198*4882a593Smuzhiyun */
1199*4882a593Smuzhiyun STATIC int /* error */
xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t * args)1200*4882a593Smuzhiyun xfs_alloc_ag_vextent_exact(
1201*4882a593Smuzhiyun xfs_alloc_arg_t *args) /* allocation argument structure */
1202*4882a593Smuzhiyun {
1203*4882a593Smuzhiyun struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1204*4882a593Smuzhiyun xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
1205*4882a593Smuzhiyun xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
1206*4882a593Smuzhiyun int error;
1207*4882a593Smuzhiyun xfs_agblock_t fbno; /* start block of found extent */
1208*4882a593Smuzhiyun xfs_extlen_t flen; /* length of found extent */
1209*4882a593Smuzhiyun xfs_agblock_t tbno; /* start block of busy extent */
1210*4882a593Smuzhiyun xfs_extlen_t tlen; /* length of busy extent */
1211*4882a593Smuzhiyun xfs_agblock_t tend; /* end block of busy extent */
1212*4882a593Smuzhiyun int i; /* success/failure of operation */
1213*4882a593Smuzhiyun unsigned busy_gen;
1214*4882a593Smuzhiyun
1215*4882a593Smuzhiyun ASSERT(args->alignment == 1);
1216*4882a593Smuzhiyun
1217*4882a593Smuzhiyun /*
1218*4882a593Smuzhiyun * Allocate/initialize a cursor for the by-number freespace btree.
1219*4882a593Smuzhiyun */
1220*4882a593Smuzhiyun bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1221*4882a593Smuzhiyun args->agno, XFS_BTNUM_BNO);
1222*4882a593Smuzhiyun
1223*4882a593Smuzhiyun /*
1224*4882a593Smuzhiyun * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1225*4882a593Smuzhiyun * Look for the closest free block <= bno, it must contain bno
1226*4882a593Smuzhiyun * if any free block does.
1227*4882a593Smuzhiyun */
1228*4882a593Smuzhiyun error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1229*4882a593Smuzhiyun if (error)
1230*4882a593Smuzhiyun goto error0;
1231*4882a593Smuzhiyun if (!i)
1232*4882a593Smuzhiyun goto not_found;
1233*4882a593Smuzhiyun
1234*4882a593Smuzhiyun /*
1235*4882a593Smuzhiyun * Grab the freespace record.
1236*4882a593Smuzhiyun */
1237*4882a593Smuzhiyun error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1238*4882a593Smuzhiyun if (error)
1239*4882a593Smuzhiyun goto error0;
1240*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1241*4882a593Smuzhiyun error = -EFSCORRUPTED;
1242*4882a593Smuzhiyun goto error0;
1243*4882a593Smuzhiyun }
1244*4882a593Smuzhiyun ASSERT(fbno <= args->agbno);
1245*4882a593Smuzhiyun
1246*4882a593Smuzhiyun /*
1247*4882a593Smuzhiyun * Check for overlapping busy extents.
1248*4882a593Smuzhiyun */
1249*4882a593Smuzhiyun tbno = fbno;
1250*4882a593Smuzhiyun tlen = flen;
1251*4882a593Smuzhiyun xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
1252*4882a593Smuzhiyun
1253*4882a593Smuzhiyun /*
1254*4882a593Smuzhiyun * Give up if the start of the extent is busy, or the freespace isn't
1255*4882a593Smuzhiyun * long enough for the minimum request.
1256*4882a593Smuzhiyun */
1257*4882a593Smuzhiyun if (tbno > args->agbno)
1258*4882a593Smuzhiyun goto not_found;
1259*4882a593Smuzhiyun if (tlen < args->minlen)
1260*4882a593Smuzhiyun goto not_found;
1261*4882a593Smuzhiyun tend = tbno + tlen;
1262*4882a593Smuzhiyun if (tend < args->agbno + args->minlen)
1263*4882a593Smuzhiyun goto not_found;
1264*4882a593Smuzhiyun
1265*4882a593Smuzhiyun /*
1266*4882a593Smuzhiyun * End of extent will be smaller of the freespace end and the
1267*4882a593Smuzhiyun * maximal requested end.
1268*4882a593Smuzhiyun *
1269*4882a593Smuzhiyun * Fix the length according to mod and prod if given.
1270*4882a593Smuzhiyun */
1271*4882a593Smuzhiyun args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1272*4882a593Smuzhiyun - args->agbno;
1273*4882a593Smuzhiyun xfs_alloc_fix_len(args);
1274*4882a593Smuzhiyun ASSERT(args->agbno + args->len <= tend);
1275*4882a593Smuzhiyun
1276*4882a593Smuzhiyun /*
1277*4882a593Smuzhiyun * We are allocating agbno for args->len
1278*4882a593Smuzhiyun * Allocate/initialize a cursor for the by-size btree.
1279*4882a593Smuzhiyun */
1280*4882a593Smuzhiyun cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1281*4882a593Smuzhiyun args->agno, XFS_BTNUM_CNT);
1282*4882a593Smuzhiyun ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
1283*4882a593Smuzhiyun error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1284*4882a593Smuzhiyun args->len, XFSA_FIXUP_BNO_OK);
1285*4882a593Smuzhiyun if (error) {
1286*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1287*4882a593Smuzhiyun goto error0;
1288*4882a593Smuzhiyun }
1289*4882a593Smuzhiyun
1290*4882a593Smuzhiyun xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1291*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1292*4882a593Smuzhiyun
1293*4882a593Smuzhiyun args->wasfromfl = 0;
1294*4882a593Smuzhiyun trace_xfs_alloc_exact_done(args);
1295*4882a593Smuzhiyun return 0;
1296*4882a593Smuzhiyun
1297*4882a593Smuzhiyun not_found:
1298*4882a593Smuzhiyun /* Didn't find it, return null. */
1299*4882a593Smuzhiyun xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1300*4882a593Smuzhiyun args->agbno = NULLAGBLOCK;
1301*4882a593Smuzhiyun trace_xfs_alloc_exact_notfound(args);
1302*4882a593Smuzhiyun return 0;
1303*4882a593Smuzhiyun
1304*4882a593Smuzhiyun error0:
1305*4882a593Smuzhiyun xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1306*4882a593Smuzhiyun trace_xfs_alloc_exact_error(args);
1307*4882a593Smuzhiyun return error;
1308*4882a593Smuzhiyun }
1309*4882a593Smuzhiyun
1310*4882a593Smuzhiyun /*
1311*4882a593Smuzhiyun * Search a given number of btree records in a given direction. Check each
1312*4882a593Smuzhiyun * record against the good extent we've already found.
1313*4882a593Smuzhiyun */
1314*4882a593Smuzhiyun STATIC int
xfs_alloc_walk_iter(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,struct xfs_btree_cur * cur,bool increment,bool find_one,int count,int * stat)1315*4882a593Smuzhiyun xfs_alloc_walk_iter(
1316*4882a593Smuzhiyun struct xfs_alloc_arg *args,
1317*4882a593Smuzhiyun struct xfs_alloc_cur *acur,
1318*4882a593Smuzhiyun struct xfs_btree_cur *cur,
1319*4882a593Smuzhiyun bool increment,
1320*4882a593Smuzhiyun bool find_one, /* quit on first candidate */
1321*4882a593Smuzhiyun int count, /* rec count (-1 for infinite) */
1322*4882a593Smuzhiyun int *stat)
1323*4882a593Smuzhiyun {
1324*4882a593Smuzhiyun int error;
1325*4882a593Smuzhiyun int i;
1326*4882a593Smuzhiyun
1327*4882a593Smuzhiyun *stat = 0;
1328*4882a593Smuzhiyun
1329*4882a593Smuzhiyun /*
1330*4882a593Smuzhiyun * Search so long as the cursor is active or we find a better extent.
1331*4882a593Smuzhiyun * The cursor is deactivated if it extends beyond the range of the
1332*4882a593Smuzhiyun * current allocation candidate.
1333*4882a593Smuzhiyun */
1334*4882a593Smuzhiyun while (xfs_alloc_cur_active(cur) && count) {
1335*4882a593Smuzhiyun error = xfs_alloc_cur_check(args, acur, cur, &i);
1336*4882a593Smuzhiyun if (error)
1337*4882a593Smuzhiyun return error;
1338*4882a593Smuzhiyun if (i == 1) {
1339*4882a593Smuzhiyun *stat = 1;
1340*4882a593Smuzhiyun if (find_one)
1341*4882a593Smuzhiyun break;
1342*4882a593Smuzhiyun }
1343*4882a593Smuzhiyun if (!xfs_alloc_cur_active(cur))
1344*4882a593Smuzhiyun break;
1345*4882a593Smuzhiyun
1346*4882a593Smuzhiyun if (increment)
1347*4882a593Smuzhiyun error = xfs_btree_increment(cur, 0, &i);
1348*4882a593Smuzhiyun else
1349*4882a593Smuzhiyun error = xfs_btree_decrement(cur, 0, &i);
1350*4882a593Smuzhiyun if (error)
1351*4882a593Smuzhiyun return error;
1352*4882a593Smuzhiyun if (i == 0)
1353*4882a593Smuzhiyun cur->bc_ag.abt.active = false;
1354*4882a593Smuzhiyun
1355*4882a593Smuzhiyun if (count > 0)
1356*4882a593Smuzhiyun count--;
1357*4882a593Smuzhiyun }
1358*4882a593Smuzhiyun
1359*4882a593Smuzhiyun return 0;
1360*4882a593Smuzhiyun }
1361*4882a593Smuzhiyun
1362*4882a593Smuzhiyun /*
1363*4882a593Smuzhiyun * Search the by-bno and by-size btrees in parallel in search of an extent with
1364*4882a593Smuzhiyun * ideal locality based on the NEAR mode ->agbno locality hint.
1365*4882a593Smuzhiyun */
1366*4882a593Smuzhiyun STATIC int
xfs_alloc_ag_vextent_locality(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,int * stat)1367*4882a593Smuzhiyun xfs_alloc_ag_vextent_locality(
1368*4882a593Smuzhiyun struct xfs_alloc_arg *args,
1369*4882a593Smuzhiyun struct xfs_alloc_cur *acur,
1370*4882a593Smuzhiyun int *stat)
1371*4882a593Smuzhiyun {
1372*4882a593Smuzhiyun struct xfs_btree_cur *fbcur = NULL;
1373*4882a593Smuzhiyun int error;
1374*4882a593Smuzhiyun int i;
1375*4882a593Smuzhiyun bool fbinc;
1376*4882a593Smuzhiyun
1377*4882a593Smuzhiyun ASSERT(acur->len == 0);
1378*4882a593Smuzhiyun ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
1379*4882a593Smuzhiyun
1380*4882a593Smuzhiyun *stat = 0;
1381*4882a593Smuzhiyun
1382*4882a593Smuzhiyun error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1383*4882a593Smuzhiyun if (error)
1384*4882a593Smuzhiyun return error;
1385*4882a593Smuzhiyun error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1386*4882a593Smuzhiyun if (error)
1387*4882a593Smuzhiyun return error;
1388*4882a593Smuzhiyun error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1389*4882a593Smuzhiyun if (error)
1390*4882a593Smuzhiyun return error;
1391*4882a593Smuzhiyun
1392*4882a593Smuzhiyun /*
1393*4882a593Smuzhiyun * Search the bnobt and cntbt in parallel. Search the bnobt left and
1394*4882a593Smuzhiyun * right and lookup the closest extent to the locality hint for each
1395*4882a593Smuzhiyun * extent size key in the cntbt. The entire search terminates
1396*4882a593Smuzhiyun * immediately on a bnobt hit because that means we've found best case
1397*4882a593Smuzhiyun * locality. Otherwise the search continues until the cntbt cursor runs
1398*4882a593Smuzhiyun * off the end of the tree. If no allocation candidate is found at this
1399*4882a593Smuzhiyun * point, give up on locality, walk backwards from the end of the cntbt
1400*4882a593Smuzhiyun * and take the first available extent.
1401*4882a593Smuzhiyun *
1402*4882a593Smuzhiyun * The parallel tree searches balance each other out to provide fairly
1403*4882a593Smuzhiyun * consistent performance for various situations. The bnobt search can
1404*4882a593Smuzhiyun * have pathological behavior in the worst case scenario of larger
1405*4882a593Smuzhiyun * allocation requests and fragmented free space. On the other hand, the
1406*4882a593Smuzhiyun * bnobt is able to satisfy most smaller allocation requests much more
1407*4882a593Smuzhiyun * quickly than the cntbt. The cntbt search can sift through fragmented
1408*4882a593Smuzhiyun * free space and sets of free extents for larger allocation requests
1409*4882a593Smuzhiyun * more quickly than the bnobt. Since the locality hint is just a hint
1410*4882a593Smuzhiyun * and we don't want to scan the entire bnobt for perfect locality, the
1411*4882a593Smuzhiyun * cntbt search essentially bounds the bnobt search such that we can
1412*4882a593Smuzhiyun * find good enough locality at reasonable performance in most cases.
1413*4882a593Smuzhiyun */
1414*4882a593Smuzhiyun while (xfs_alloc_cur_active(acur->bnolt) ||
1415*4882a593Smuzhiyun xfs_alloc_cur_active(acur->bnogt) ||
1416*4882a593Smuzhiyun xfs_alloc_cur_active(acur->cnt)) {
1417*4882a593Smuzhiyun
1418*4882a593Smuzhiyun trace_xfs_alloc_cur_lookup(args);
1419*4882a593Smuzhiyun
1420*4882a593Smuzhiyun /*
1421*4882a593Smuzhiyun * Search the bnobt left and right. In the case of a hit, finish
1422*4882a593Smuzhiyun * the search in the opposite direction and we're done.
1423*4882a593Smuzhiyun */
1424*4882a593Smuzhiyun error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1425*4882a593Smuzhiyun true, 1, &i);
1426*4882a593Smuzhiyun if (error)
1427*4882a593Smuzhiyun return error;
1428*4882a593Smuzhiyun if (i == 1) {
1429*4882a593Smuzhiyun trace_xfs_alloc_cur_left(args);
1430*4882a593Smuzhiyun fbcur = acur->bnogt;
1431*4882a593Smuzhiyun fbinc = true;
1432*4882a593Smuzhiyun break;
1433*4882a593Smuzhiyun }
1434*4882a593Smuzhiyun error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1435*4882a593Smuzhiyun 1, &i);
1436*4882a593Smuzhiyun if (error)
1437*4882a593Smuzhiyun return error;
1438*4882a593Smuzhiyun if (i == 1) {
1439*4882a593Smuzhiyun trace_xfs_alloc_cur_right(args);
1440*4882a593Smuzhiyun fbcur = acur->bnolt;
1441*4882a593Smuzhiyun fbinc = false;
1442*4882a593Smuzhiyun break;
1443*4882a593Smuzhiyun }
1444*4882a593Smuzhiyun
1445*4882a593Smuzhiyun /*
1446*4882a593Smuzhiyun * Check the extent with best locality based on the current
1447*4882a593Smuzhiyun * extent size search key and keep track of the best candidate.
1448*4882a593Smuzhiyun */
1449*4882a593Smuzhiyun error = xfs_alloc_cntbt_iter(args, acur);
1450*4882a593Smuzhiyun if (error)
1451*4882a593Smuzhiyun return error;
1452*4882a593Smuzhiyun if (!xfs_alloc_cur_active(acur->cnt)) {
1453*4882a593Smuzhiyun trace_xfs_alloc_cur_lookup_done(args);
1454*4882a593Smuzhiyun break;
1455*4882a593Smuzhiyun }
1456*4882a593Smuzhiyun }
1457*4882a593Smuzhiyun
1458*4882a593Smuzhiyun /*
1459*4882a593Smuzhiyun * If we failed to find anything due to busy extents, return empty
1460*4882a593Smuzhiyun * handed so the caller can flush and retry. If no busy extents were
1461*4882a593Smuzhiyun * found, walk backwards from the end of the cntbt as a last resort.
1462*4882a593Smuzhiyun */
1463*4882a593Smuzhiyun if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1464*4882a593Smuzhiyun error = xfs_btree_decrement(acur->cnt, 0, &i);
1465*4882a593Smuzhiyun if (error)
1466*4882a593Smuzhiyun return error;
1467*4882a593Smuzhiyun if (i) {
1468*4882a593Smuzhiyun acur->cnt->bc_ag.abt.active = true;
1469*4882a593Smuzhiyun fbcur = acur->cnt;
1470*4882a593Smuzhiyun fbinc = false;
1471*4882a593Smuzhiyun }
1472*4882a593Smuzhiyun }
1473*4882a593Smuzhiyun
1474*4882a593Smuzhiyun /*
1475*4882a593Smuzhiyun * Search in the opposite direction for a better entry in the case of
1476*4882a593Smuzhiyun * a bnobt hit or walk backwards from the end of the cntbt.
1477*4882a593Smuzhiyun */
1478*4882a593Smuzhiyun if (fbcur) {
1479*4882a593Smuzhiyun error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1480*4882a593Smuzhiyun &i);
1481*4882a593Smuzhiyun if (error)
1482*4882a593Smuzhiyun return error;
1483*4882a593Smuzhiyun }
1484*4882a593Smuzhiyun
1485*4882a593Smuzhiyun if (acur->len)
1486*4882a593Smuzhiyun *stat = 1;
1487*4882a593Smuzhiyun
1488*4882a593Smuzhiyun return 0;
1489*4882a593Smuzhiyun }
1490*4882a593Smuzhiyun
1491*4882a593Smuzhiyun /* Check the last block of the cnt btree for allocations. */
1492*4882a593Smuzhiyun static int
xfs_alloc_ag_vextent_lastblock(struct xfs_alloc_arg * args,struct xfs_alloc_cur * acur,xfs_agblock_t * bno,xfs_extlen_t * len,bool * allocated)1493*4882a593Smuzhiyun xfs_alloc_ag_vextent_lastblock(
1494*4882a593Smuzhiyun struct xfs_alloc_arg *args,
1495*4882a593Smuzhiyun struct xfs_alloc_cur *acur,
1496*4882a593Smuzhiyun xfs_agblock_t *bno,
1497*4882a593Smuzhiyun xfs_extlen_t *len,
1498*4882a593Smuzhiyun bool *allocated)
1499*4882a593Smuzhiyun {
1500*4882a593Smuzhiyun int error;
1501*4882a593Smuzhiyun int i;
1502*4882a593Smuzhiyun
1503*4882a593Smuzhiyun #ifdef DEBUG
1504*4882a593Smuzhiyun /* Randomly don't execute the first algorithm. */
1505*4882a593Smuzhiyun if (prandom_u32() & 1)
1506*4882a593Smuzhiyun return 0;
1507*4882a593Smuzhiyun #endif
1508*4882a593Smuzhiyun
1509*4882a593Smuzhiyun /*
1510*4882a593Smuzhiyun * Start from the entry that lookup found, sequence through all larger
1511*4882a593Smuzhiyun * free blocks. If we're actually pointing at a record smaller than
1512*4882a593Smuzhiyun * maxlen, go to the start of this block, and skip all those smaller
1513*4882a593Smuzhiyun * than minlen.
1514*4882a593Smuzhiyun */
1515*4882a593Smuzhiyun if (*len || args->alignment > 1) {
1516*4882a593Smuzhiyun acur->cnt->bc_ptrs[0] = 1;
1517*4882a593Smuzhiyun do {
1518*4882a593Smuzhiyun error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1519*4882a593Smuzhiyun if (error)
1520*4882a593Smuzhiyun return error;
1521*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, i != 1))
1522*4882a593Smuzhiyun return -EFSCORRUPTED;
1523*4882a593Smuzhiyun if (*len >= args->minlen)
1524*4882a593Smuzhiyun break;
1525*4882a593Smuzhiyun error = xfs_btree_increment(acur->cnt, 0, &i);
1526*4882a593Smuzhiyun if (error)
1527*4882a593Smuzhiyun return error;
1528*4882a593Smuzhiyun } while (i);
1529*4882a593Smuzhiyun ASSERT(*len >= args->minlen);
1530*4882a593Smuzhiyun if (!i)
1531*4882a593Smuzhiyun return 0;
1532*4882a593Smuzhiyun }
1533*4882a593Smuzhiyun
1534*4882a593Smuzhiyun error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1535*4882a593Smuzhiyun if (error)
1536*4882a593Smuzhiyun return error;
1537*4882a593Smuzhiyun
1538*4882a593Smuzhiyun /*
1539*4882a593Smuzhiyun * It didn't work. We COULD be in a case where there's a good record
1540*4882a593Smuzhiyun * somewhere, so try again.
1541*4882a593Smuzhiyun */
1542*4882a593Smuzhiyun if (acur->len == 0)
1543*4882a593Smuzhiyun return 0;
1544*4882a593Smuzhiyun
1545*4882a593Smuzhiyun trace_xfs_alloc_near_first(args);
1546*4882a593Smuzhiyun *allocated = true;
1547*4882a593Smuzhiyun return 0;
1548*4882a593Smuzhiyun }
1549*4882a593Smuzhiyun
1550*4882a593Smuzhiyun /*
1551*4882a593Smuzhiyun * Allocate a variable extent near bno in the allocation group agno.
1552*4882a593Smuzhiyun * Extent's length (returned in len) will be between minlen and maxlen,
1553*4882a593Smuzhiyun * and of the form k * prod + mod unless there's nothing that large.
1554*4882a593Smuzhiyun * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1555*4882a593Smuzhiyun */
1556*4882a593Smuzhiyun STATIC int
xfs_alloc_ag_vextent_near(struct xfs_alloc_arg * args)1557*4882a593Smuzhiyun xfs_alloc_ag_vextent_near(
1558*4882a593Smuzhiyun struct xfs_alloc_arg *args)
1559*4882a593Smuzhiyun {
1560*4882a593Smuzhiyun struct xfs_alloc_cur acur = {};
1561*4882a593Smuzhiyun int error; /* error code */
1562*4882a593Smuzhiyun int i; /* result code, temporary */
1563*4882a593Smuzhiyun xfs_agblock_t bno;
1564*4882a593Smuzhiyun xfs_extlen_t len;
1565*4882a593Smuzhiyun
1566*4882a593Smuzhiyun /* handle uninitialized agbno range so caller doesn't have to */
1567*4882a593Smuzhiyun if (!args->min_agbno && !args->max_agbno)
1568*4882a593Smuzhiyun args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1569*4882a593Smuzhiyun ASSERT(args->min_agbno <= args->max_agbno);
1570*4882a593Smuzhiyun
1571*4882a593Smuzhiyun /* clamp agbno to the range if it's outside */
1572*4882a593Smuzhiyun if (args->agbno < args->min_agbno)
1573*4882a593Smuzhiyun args->agbno = args->min_agbno;
1574*4882a593Smuzhiyun if (args->agbno > args->max_agbno)
1575*4882a593Smuzhiyun args->agbno = args->max_agbno;
1576*4882a593Smuzhiyun
1577*4882a593Smuzhiyun restart:
1578*4882a593Smuzhiyun len = 0;
1579*4882a593Smuzhiyun
1580*4882a593Smuzhiyun /*
1581*4882a593Smuzhiyun * Set up cursors and see if there are any free extents as big as
1582*4882a593Smuzhiyun * maxlen. If not, pick the last entry in the tree unless the tree is
1583*4882a593Smuzhiyun * empty.
1584*4882a593Smuzhiyun */
1585*4882a593Smuzhiyun error = xfs_alloc_cur_setup(args, &acur);
1586*4882a593Smuzhiyun if (error == -ENOSPC) {
1587*4882a593Smuzhiyun error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1588*4882a593Smuzhiyun &len, &i);
1589*4882a593Smuzhiyun if (error)
1590*4882a593Smuzhiyun goto out;
1591*4882a593Smuzhiyun if (i == 0 || len == 0) {
1592*4882a593Smuzhiyun trace_xfs_alloc_near_noentry(args);
1593*4882a593Smuzhiyun goto out;
1594*4882a593Smuzhiyun }
1595*4882a593Smuzhiyun ASSERT(i == 1);
1596*4882a593Smuzhiyun } else if (error) {
1597*4882a593Smuzhiyun goto out;
1598*4882a593Smuzhiyun }
1599*4882a593Smuzhiyun
1600*4882a593Smuzhiyun /*
1601*4882a593Smuzhiyun * First algorithm.
1602*4882a593Smuzhiyun * If the requested extent is large wrt the freespaces available
1603*4882a593Smuzhiyun * in this a.g., then the cursor will be pointing to a btree entry
1604*4882a593Smuzhiyun * near the right edge of the tree. If it's in the last btree leaf
1605*4882a593Smuzhiyun * block, then we just examine all the entries in that block
1606*4882a593Smuzhiyun * that are big enough, and pick the best one.
1607*4882a593Smuzhiyun */
1608*4882a593Smuzhiyun if (xfs_btree_islastblock(acur.cnt, 0)) {
1609*4882a593Smuzhiyun bool allocated = false;
1610*4882a593Smuzhiyun
1611*4882a593Smuzhiyun error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1612*4882a593Smuzhiyun &allocated);
1613*4882a593Smuzhiyun if (error)
1614*4882a593Smuzhiyun goto out;
1615*4882a593Smuzhiyun if (allocated)
1616*4882a593Smuzhiyun goto alloc_finish;
1617*4882a593Smuzhiyun }
1618*4882a593Smuzhiyun
1619*4882a593Smuzhiyun /*
1620*4882a593Smuzhiyun * Second algorithm. Combined cntbt and bnobt search to find ideal
1621*4882a593Smuzhiyun * locality.
1622*4882a593Smuzhiyun */
1623*4882a593Smuzhiyun error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1624*4882a593Smuzhiyun if (error)
1625*4882a593Smuzhiyun goto out;
1626*4882a593Smuzhiyun
1627*4882a593Smuzhiyun /*
1628*4882a593Smuzhiyun * If we couldn't get anything, give up.
1629*4882a593Smuzhiyun */
1630*4882a593Smuzhiyun if (!acur.len) {
1631*4882a593Smuzhiyun if (acur.busy) {
1632*4882a593Smuzhiyun trace_xfs_alloc_near_busy(args);
1633*4882a593Smuzhiyun xfs_extent_busy_flush(args->mp, args->pag,
1634*4882a593Smuzhiyun acur.busy_gen);
1635*4882a593Smuzhiyun goto restart;
1636*4882a593Smuzhiyun }
1637*4882a593Smuzhiyun trace_xfs_alloc_size_neither(args);
1638*4882a593Smuzhiyun args->agbno = NULLAGBLOCK;
1639*4882a593Smuzhiyun goto out;
1640*4882a593Smuzhiyun }
1641*4882a593Smuzhiyun
1642*4882a593Smuzhiyun alloc_finish:
1643*4882a593Smuzhiyun /* fix up btrees on a successful allocation */
1644*4882a593Smuzhiyun error = xfs_alloc_cur_finish(args, &acur);
1645*4882a593Smuzhiyun
1646*4882a593Smuzhiyun out:
1647*4882a593Smuzhiyun xfs_alloc_cur_close(&acur, error);
1648*4882a593Smuzhiyun return error;
1649*4882a593Smuzhiyun }
1650*4882a593Smuzhiyun
1651*4882a593Smuzhiyun /*
1652*4882a593Smuzhiyun * Allocate a variable extent anywhere in the allocation group agno.
1653*4882a593Smuzhiyun * Extent's length (returned in len) will be between minlen and maxlen,
1654*4882a593Smuzhiyun * and of the form k * prod + mod unless there's nothing that large.
1655*4882a593Smuzhiyun * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1656*4882a593Smuzhiyun */
1657*4882a593Smuzhiyun STATIC int /* error */
xfs_alloc_ag_vextent_size(xfs_alloc_arg_t * args)1658*4882a593Smuzhiyun xfs_alloc_ag_vextent_size(
1659*4882a593Smuzhiyun xfs_alloc_arg_t *args) /* allocation argument structure */
1660*4882a593Smuzhiyun {
1661*4882a593Smuzhiyun struct xfs_agf *agf = args->agbp->b_addr;
1662*4882a593Smuzhiyun xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1663*4882a593Smuzhiyun xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1664*4882a593Smuzhiyun int error; /* error result */
1665*4882a593Smuzhiyun xfs_agblock_t fbno; /* start of found freespace */
1666*4882a593Smuzhiyun xfs_extlen_t flen; /* length of found freespace */
1667*4882a593Smuzhiyun int i; /* temp status variable */
1668*4882a593Smuzhiyun xfs_agblock_t rbno; /* returned block number */
1669*4882a593Smuzhiyun xfs_extlen_t rlen; /* length of returned extent */
1670*4882a593Smuzhiyun bool busy;
1671*4882a593Smuzhiyun unsigned busy_gen;
1672*4882a593Smuzhiyun
1673*4882a593Smuzhiyun restart:
1674*4882a593Smuzhiyun /*
1675*4882a593Smuzhiyun * Allocate and initialize a cursor for the by-size btree.
1676*4882a593Smuzhiyun */
1677*4882a593Smuzhiyun cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1678*4882a593Smuzhiyun args->agno, XFS_BTNUM_CNT);
1679*4882a593Smuzhiyun bno_cur = NULL;
1680*4882a593Smuzhiyun busy = false;
1681*4882a593Smuzhiyun
1682*4882a593Smuzhiyun /*
1683*4882a593Smuzhiyun * Look for an entry >= maxlen+alignment-1 blocks.
1684*4882a593Smuzhiyun */
1685*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1686*4882a593Smuzhiyun args->maxlen + args->alignment - 1, &i)))
1687*4882a593Smuzhiyun goto error0;
1688*4882a593Smuzhiyun
1689*4882a593Smuzhiyun /*
1690*4882a593Smuzhiyun * If none then we have to settle for a smaller extent. In the case that
1691*4882a593Smuzhiyun * there are no large extents, this will return the last entry in the
1692*4882a593Smuzhiyun * tree unless the tree is empty. In the case that there are only busy
1693*4882a593Smuzhiyun * large extents, this will return the largest small extent unless there
1694*4882a593Smuzhiyun * are no smaller extents available.
1695*4882a593Smuzhiyun */
1696*4882a593Smuzhiyun if (!i) {
1697*4882a593Smuzhiyun error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1698*4882a593Smuzhiyun &fbno, &flen, &i);
1699*4882a593Smuzhiyun if (error)
1700*4882a593Smuzhiyun goto error0;
1701*4882a593Smuzhiyun if (i == 0 || flen == 0) {
1702*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1703*4882a593Smuzhiyun trace_xfs_alloc_size_noentry(args);
1704*4882a593Smuzhiyun return 0;
1705*4882a593Smuzhiyun }
1706*4882a593Smuzhiyun ASSERT(i == 1);
1707*4882a593Smuzhiyun busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1708*4882a593Smuzhiyun &rlen, &busy_gen);
1709*4882a593Smuzhiyun } else {
1710*4882a593Smuzhiyun /*
1711*4882a593Smuzhiyun * Search for a non-busy extent that is large enough.
1712*4882a593Smuzhiyun */
1713*4882a593Smuzhiyun for (;;) {
1714*4882a593Smuzhiyun error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1715*4882a593Smuzhiyun if (error)
1716*4882a593Smuzhiyun goto error0;
1717*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1718*4882a593Smuzhiyun error = -EFSCORRUPTED;
1719*4882a593Smuzhiyun goto error0;
1720*4882a593Smuzhiyun }
1721*4882a593Smuzhiyun
1722*4882a593Smuzhiyun busy = xfs_alloc_compute_aligned(args, fbno, flen,
1723*4882a593Smuzhiyun &rbno, &rlen, &busy_gen);
1724*4882a593Smuzhiyun
1725*4882a593Smuzhiyun if (rlen >= args->maxlen)
1726*4882a593Smuzhiyun break;
1727*4882a593Smuzhiyun
1728*4882a593Smuzhiyun error = xfs_btree_increment(cnt_cur, 0, &i);
1729*4882a593Smuzhiyun if (error)
1730*4882a593Smuzhiyun goto error0;
1731*4882a593Smuzhiyun if (i == 0) {
1732*4882a593Smuzhiyun /*
1733*4882a593Smuzhiyun * Our only valid extents must have been busy.
1734*4882a593Smuzhiyun * Make it unbusy by forcing the log out and
1735*4882a593Smuzhiyun * retrying.
1736*4882a593Smuzhiyun */
1737*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur,
1738*4882a593Smuzhiyun XFS_BTREE_NOERROR);
1739*4882a593Smuzhiyun trace_xfs_alloc_size_busy(args);
1740*4882a593Smuzhiyun xfs_extent_busy_flush(args->mp,
1741*4882a593Smuzhiyun args->pag, busy_gen);
1742*4882a593Smuzhiyun goto restart;
1743*4882a593Smuzhiyun }
1744*4882a593Smuzhiyun }
1745*4882a593Smuzhiyun }
1746*4882a593Smuzhiyun
1747*4882a593Smuzhiyun /*
1748*4882a593Smuzhiyun * In the first case above, we got the last entry in the
1749*4882a593Smuzhiyun * by-size btree. Now we check to see if the space hits maxlen
1750*4882a593Smuzhiyun * once aligned; if not, we search left for something better.
1751*4882a593Smuzhiyun * This can't happen in the second case above.
1752*4882a593Smuzhiyun */
1753*4882a593Smuzhiyun rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1754*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp,
1755*4882a593Smuzhiyun rlen != 0 &&
1756*4882a593Smuzhiyun (rlen > flen ||
1757*4882a593Smuzhiyun rbno + rlen > fbno + flen))) {
1758*4882a593Smuzhiyun error = -EFSCORRUPTED;
1759*4882a593Smuzhiyun goto error0;
1760*4882a593Smuzhiyun }
1761*4882a593Smuzhiyun if (rlen < args->maxlen) {
1762*4882a593Smuzhiyun xfs_agblock_t bestfbno;
1763*4882a593Smuzhiyun xfs_extlen_t bestflen;
1764*4882a593Smuzhiyun xfs_agblock_t bestrbno;
1765*4882a593Smuzhiyun xfs_extlen_t bestrlen;
1766*4882a593Smuzhiyun
1767*4882a593Smuzhiyun bestrlen = rlen;
1768*4882a593Smuzhiyun bestrbno = rbno;
1769*4882a593Smuzhiyun bestflen = flen;
1770*4882a593Smuzhiyun bestfbno = fbno;
1771*4882a593Smuzhiyun for (;;) {
1772*4882a593Smuzhiyun if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1773*4882a593Smuzhiyun goto error0;
1774*4882a593Smuzhiyun if (i == 0)
1775*4882a593Smuzhiyun break;
1776*4882a593Smuzhiyun if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1777*4882a593Smuzhiyun &i)))
1778*4882a593Smuzhiyun goto error0;
1779*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1780*4882a593Smuzhiyun error = -EFSCORRUPTED;
1781*4882a593Smuzhiyun goto error0;
1782*4882a593Smuzhiyun }
1783*4882a593Smuzhiyun if (flen < bestrlen)
1784*4882a593Smuzhiyun break;
1785*4882a593Smuzhiyun busy = xfs_alloc_compute_aligned(args, fbno, flen,
1786*4882a593Smuzhiyun &rbno, &rlen, &busy_gen);
1787*4882a593Smuzhiyun rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1788*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp,
1789*4882a593Smuzhiyun rlen != 0 &&
1790*4882a593Smuzhiyun (rlen > flen ||
1791*4882a593Smuzhiyun rbno + rlen > fbno + flen))) {
1792*4882a593Smuzhiyun error = -EFSCORRUPTED;
1793*4882a593Smuzhiyun goto error0;
1794*4882a593Smuzhiyun }
1795*4882a593Smuzhiyun if (rlen > bestrlen) {
1796*4882a593Smuzhiyun bestrlen = rlen;
1797*4882a593Smuzhiyun bestrbno = rbno;
1798*4882a593Smuzhiyun bestflen = flen;
1799*4882a593Smuzhiyun bestfbno = fbno;
1800*4882a593Smuzhiyun if (rlen == args->maxlen)
1801*4882a593Smuzhiyun break;
1802*4882a593Smuzhiyun }
1803*4882a593Smuzhiyun }
1804*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1805*4882a593Smuzhiyun &i)))
1806*4882a593Smuzhiyun goto error0;
1807*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1808*4882a593Smuzhiyun error = -EFSCORRUPTED;
1809*4882a593Smuzhiyun goto error0;
1810*4882a593Smuzhiyun }
1811*4882a593Smuzhiyun rlen = bestrlen;
1812*4882a593Smuzhiyun rbno = bestrbno;
1813*4882a593Smuzhiyun flen = bestflen;
1814*4882a593Smuzhiyun fbno = bestfbno;
1815*4882a593Smuzhiyun }
1816*4882a593Smuzhiyun args->wasfromfl = 0;
1817*4882a593Smuzhiyun /*
1818*4882a593Smuzhiyun * Fix up the length.
1819*4882a593Smuzhiyun */
1820*4882a593Smuzhiyun args->len = rlen;
1821*4882a593Smuzhiyun if (rlen < args->minlen) {
1822*4882a593Smuzhiyun if (busy) {
1823*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1824*4882a593Smuzhiyun trace_xfs_alloc_size_busy(args);
1825*4882a593Smuzhiyun xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1826*4882a593Smuzhiyun goto restart;
1827*4882a593Smuzhiyun }
1828*4882a593Smuzhiyun goto out_nominleft;
1829*4882a593Smuzhiyun }
1830*4882a593Smuzhiyun xfs_alloc_fix_len(args);
1831*4882a593Smuzhiyun
1832*4882a593Smuzhiyun rlen = args->len;
1833*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1834*4882a593Smuzhiyun error = -EFSCORRUPTED;
1835*4882a593Smuzhiyun goto error0;
1836*4882a593Smuzhiyun }
1837*4882a593Smuzhiyun /*
1838*4882a593Smuzhiyun * Allocate and initialize a cursor for the by-block tree.
1839*4882a593Smuzhiyun */
1840*4882a593Smuzhiyun bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1841*4882a593Smuzhiyun args->agno, XFS_BTNUM_BNO);
1842*4882a593Smuzhiyun if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1843*4882a593Smuzhiyun rbno, rlen, XFSA_FIXUP_CNT_OK)))
1844*4882a593Smuzhiyun goto error0;
1845*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1846*4882a593Smuzhiyun xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1847*4882a593Smuzhiyun cnt_cur = bno_cur = NULL;
1848*4882a593Smuzhiyun args->len = rlen;
1849*4882a593Smuzhiyun args->agbno = rbno;
1850*4882a593Smuzhiyun if (XFS_IS_CORRUPT(args->mp,
1851*4882a593Smuzhiyun args->agbno + args->len >
1852*4882a593Smuzhiyun be32_to_cpu(agf->agf_length))) {
1853*4882a593Smuzhiyun error = -EFSCORRUPTED;
1854*4882a593Smuzhiyun goto error0;
1855*4882a593Smuzhiyun }
1856*4882a593Smuzhiyun trace_xfs_alloc_size_done(args);
1857*4882a593Smuzhiyun return 0;
1858*4882a593Smuzhiyun
1859*4882a593Smuzhiyun error0:
1860*4882a593Smuzhiyun trace_xfs_alloc_size_error(args);
1861*4882a593Smuzhiyun if (cnt_cur)
1862*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1863*4882a593Smuzhiyun if (bno_cur)
1864*4882a593Smuzhiyun xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1865*4882a593Smuzhiyun return error;
1866*4882a593Smuzhiyun
1867*4882a593Smuzhiyun out_nominleft:
1868*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1869*4882a593Smuzhiyun trace_xfs_alloc_size_nominleft(args);
1870*4882a593Smuzhiyun args->agbno = NULLAGBLOCK;
1871*4882a593Smuzhiyun return 0;
1872*4882a593Smuzhiyun }
1873*4882a593Smuzhiyun
1874*4882a593Smuzhiyun /*
1875*4882a593Smuzhiyun * Free the extent starting at agno/bno for length.
1876*4882a593Smuzhiyun */
1877*4882a593Smuzhiyun STATIC int
xfs_free_ag_extent(struct xfs_trans * tp,struct xfs_buf * agbp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type)1878*4882a593Smuzhiyun xfs_free_ag_extent(
1879*4882a593Smuzhiyun struct xfs_trans *tp,
1880*4882a593Smuzhiyun struct xfs_buf *agbp,
1881*4882a593Smuzhiyun xfs_agnumber_t agno,
1882*4882a593Smuzhiyun xfs_agblock_t bno,
1883*4882a593Smuzhiyun xfs_extlen_t len,
1884*4882a593Smuzhiyun const struct xfs_owner_info *oinfo,
1885*4882a593Smuzhiyun enum xfs_ag_resv_type type)
1886*4882a593Smuzhiyun {
1887*4882a593Smuzhiyun struct xfs_mount *mp;
1888*4882a593Smuzhiyun struct xfs_btree_cur *bno_cur;
1889*4882a593Smuzhiyun struct xfs_btree_cur *cnt_cur;
1890*4882a593Smuzhiyun xfs_agblock_t gtbno; /* start of right neighbor */
1891*4882a593Smuzhiyun xfs_extlen_t gtlen; /* length of right neighbor */
1892*4882a593Smuzhiyun xfs_agblock_t ltbno; /* start of left neighbor */
1893*4882a593Smuzhiyun xfs_extlen_t ltlen; /* length of left neighbor */
1894*4882a593Smuzhiyun xfs_agblock_t nbno; /* new starting block of freesp */
1895*4882a593Smuzhiyun xfs_extlen_t nlen; /* new length of freespace */
1896*4882a593Smuzhiyun int haveleft; /* have a left neighbor */
1897*4882a593Smuzhiyun int haveright; /* have a right neighbor */
1898*4882a593Smuzhiyun int i;
1899*4882a593Smuzhiyun int error;
1900*4882a593Smuzhiyun
1901*4882a593Smuzhiyun bno_cur = cnt_cur = NULL;
1902*4882a593Smuzhiyun mp = tp->t_mountp;
1903*4882a593Smuzhiyun
1904*4882a593Smuzhiyun if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1905*4882a593Smuzhiyun error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1906*4882a593Smuzhiyun if (error)
1907*4882a593Smuzhiyun goto error0;
1908*4882a593Smuzhiyun }
1909*4882a593Smuzhiyun
1910*4882a593Smuzhiyun /*
1911*4882a593Smuzhiyun * Allocate and initialize a cursor for the by-block btree.
1912*4882a593Smuzhiyun */
1913*4882a593Smuzhiyun bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1914*4882a593Smuzhiyun /*
1915*4882a593Smuzhiyun * Look for a neighboring block on the left (lower block numbers)
1916*4882a593Smuzhiyun * that is contiguous with this space.
1917*4882a593Smuzhiyun */
1918*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1919*4882a593Smuzhiyun goto error0;
1920*4882a593Smuzhiyun if (haveleft) {
1921*4882a593Smuzhiyun /*
1922*4882a593Smuzhiyun * There is a block to our left.
1923*4882a593Smuzhiyun */
1924*4882a593Smuzhiyun if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i)))
1925*4882a593Smuzhiyun goto error0;
1926*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
1927*4882a593Smuzhiyun error = -EFSCORRUPTED;
1928*4882a593Smuzhiyun goto error0;
1929*4882a593Smuzhiyun }
1930*4882a593Smuzhiyun /*
1931*4882a593Smuzhiyun * It's not contiguous, though.
1932*4882a593Smuzhiyun */
1933*4882a593Smuzhiyun if (ltbno + ltlen < bno)
1934*4882a593Smuzhiyun haveleft = 0;
1935*4882a593Smuzhiyun else {
1936*4882a593Smuzhiyun /*
1937*4882a593Smuzhiyun * If this failure happens the request to free this
1938*4882a593Smuzhiyun * space was invalid, it's (partly) already free.
1939*4882a593Smuzhiyun * Very bad.
1940*4882a593Smuzhiyun */
1941*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1942*4882a593Smuzhiyun error = -EFSCORRUPTED;
1943*4882a593Smuzhiyun goto error0;
1944*4882a593Smuzhiyun }
1945*4882a593Smuzhiyun }
1946*4882a593Smuzhiyun }
1947*4882a593Smuzhiyun /*
1948*4882a593Smuzhiyun * Look for a neighboring block on the right (higher block numbers)
1949*4882a593Smuzhiyun * that is contiguous with this space.
1950*4882a593Smuzhiyun */
1951*4882a593Smuzhiyun if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1952*4882a593Smuzhiyun goto error0;
1953*4882a593Smuzhiyun if (haveright) {
1954*4882a593Smuzhiyun /*
1955*4882a593Smuzhiyun * There is a block to our right.
1956*4882a593Smuzhiyun */
1957*4882a593Smuzhiyun if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i)))
1958*4882a593Smuzhiyun goto error0;
1959*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
1960*4882a593Smuzhiyun error = -EFSCORRUPTED;
1961*4882a593Smuzhiyun goto error0;
1962*4882a593Smuzhiyun }
1963*4882a593Smuzhiyun /*
1964*4882a593Smuzhiyun * It's not contiguous, though.
1965*4882a593Smuzhiyun */
1966*4882a593Smuzhiyun if (bno + len < gtbno)
1967*4882a593Smuzhiyun haveright = 0;
1968*4882a593Smuzhiyun else {
1969*4882a593Smuzhiyun /*
1970*4882a593Smuzhiyun * If this failure happens the request to free this
1971*4882a593Smuzhiyun * space was invalid, it's (partly) already free.
1972*4882a593Smuzhiyun * Very bad.
1973*4882a593Smuzhiyun */
1974*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1975*4882a593Smuzhiyun error = -EFSCORRUPTED;
1976*4882a593Smuzhiyun goto error0;
1977*4882a593Smuzhiyun }
1978*4882a593Smuzhiyun }
1979*4882a593Smuzhiyun }
1980*4882a593Smuzhiyun /*
1981*4882a593Smuzhiyun * Now allocate and initialize a cursor for the by-size tree.
1982*4882a593Smuzhiyun */
1983*4882a593Smuzhiyun cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1984*4882a593Smuzhiyun /*
1985*4882a593Smuzhiyun * Have both left and right contiguous neighbors.
1986*4882a593Smuzhiyun * Merge all three into a single free block.
1987*4882a593Smuzhiyun */
1988*4882a593Smuzhiyun if (haveleft && haveright) {
1989*4882a593Smuzhiyun /*
1990*4882a593Smuzhiyun * Delete the old by-size entry on the left.
1991*4882a593Smuzhiyun */
1992*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1993*4882a593Smuzhiyun goto error0;
1994*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
1995*4882a593Smuzhiyun error = -EFSCORRUPTED;
1996*4882a593Smuzhiyun goto error0;
1997*4882a593Smuzhiyun }
1998*4882a593Smuzhiyun if ((error = xfs_btree_delete(cnt_cur, &i)))
1999*4882a593Smuzhiyun goto error0;
2000*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2001*4882a593Smuzhiyun error = -EFSCORRUPTED;
2002*4882a593Smuzhiyun goto error0;
2003*4882a593Smuzhiyun }
2004*4882a593Smuzhiyun /*
2005*4882a593Smuzhiyun * Delete the old by-size entry on the right.
2006*4882a593Smuzhiyun */
2007*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2008*4882a593Smuzhiyun goto error0;
2009*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2010*4882a593Smuzhiyun error = -EFSCORRUPTED;
2011*4882a593Smuzhiyun goto error0;
2012*4882a593Smuzhiyun }
2013*4882a593Smuzhiyun if ((error = xfs_btree_delete(cnt_cur, &i)))
2014*4882a593Smuzhiyun goto error0;
2015*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2016*4882a593Smuzhiyun error = -EFSCORRUPTED;
2017*4882a593Smuzhiyun goto error0;
2018*4882a593Smuzhiyun }
2019*4882a593Smuzhiyun /*
2020*4882a593Smuzhiyun * Delete the old by-block entry for the right block.
2021*4882a593Smuzhiyun */
2022*4882a593Smuzhiyun if ((error = xfs_btree_delete(bno_cur, &i)))
2023*4882a593Smuzhiyun goto error0;
2024*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2025*4882a593Smuzhiyun error = -EFSCORRUPTED;
2026*4882a593Smuzhiyun goto error0;
2027*4882a593Smuzhiyun }
2028*4882a593Smuzhiyun /*
2029*4882a593Smuzhiyun * Move the by-block cursor back to the left neighbor.
2030*4882a593Smuzhiyun */
2031*4882a593Smuzhiyun if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2032*4882a593Smuzhiyun goto error0;
2033*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2034*4882a593Smuzhiyun error = -EFSCORRUPTED;
2035*4882a593Smuzhiyun goto error0;
2036*4882a593Smuzhiyun }
2037*4882a593Smuzhiyun #ifdef DEBUG
2038*4882a593Smuzhiyun /*
2039*4882a593Smuzhiyun * Check that this is the right record: delete didn't
2040*4882a593Smuzhiyun * mangle the cursor.
2041*4882a593Smuzhiyun */
2042*4882a593Smuzhiyun {
2043*4882a593Smuzhiyun xfs_agblock_t xxbno;
2044*4882a593Smuzhiyun xfs_extlen_t xxlen;
2045*4882a593Smuzhiyun
2046*4882a593Smuzhiyun if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2047*4882a593Smuzhiyun &i)))
2048*4882a593Smuzhiyun goto error0;
2049*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp,
2050*4882a593Smuzhiyun i != 1 ||
2051*4882a593Smuzhiyun xxbno != ltbno ||
2052*4882a593Smuzhiyun xxlen != ltlen)) {
2053*4882a593Smuzhiyun error = -EFSCORRUPTED;
2054*4882a593Smuzhiyun goto error0;
2055*4882a593Smuzhiyun }
2056*4882a593Smuzhiyun }
2057*4882a593Smuzhiyun #endif
2058*4882a593Smuzhiyun /*
2059*4882a593Smuzhiyun * Update remaining by-block entry to the new, joined block.
2060*4882a593Smuzhiyun */
2061*4882a593Smuzhiyun nbno = ltbno;
2062*4882a593Smuzhiyun nlen = len + ltlen + gtlen;
2063*4882a593Smuzhiyun if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2064*4882a593Smuzhiyun goto error0;
2065*4882a593Smuzhiyun }
2066*4882a593Smuzhiyun /*
2067*4882a593Smuzhiyun * Have only a left contiguous neighbor.
2068*4882a593Smuzhiyun * Merge it together with the new freespace.
2069*4882a593Smuzhiyun */
2070*4882a593Smuzhiyun else if (haveleft) {
2071*4882a593Smuzhiyun /*
2072*4882a593Smuzhiyun * Delete the old by-size entry on the left.
2073*4882a593Smuzhiyun */
2074*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2075*4882a593Smuzhiyun goto error0;
2076*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2077*4882a593Smuzhiyun error = -EFSCORRUPTED;
2078*4882a593Smuzhiyun goto error0;
2079*4882a593Smuzhiyun }
2080*4882a593Smuzhiyun if ((error = xfs_btree_delete(cnt_cur, &i)))
2081*4882a593Smuzhiyun goto error0;
2082*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2083*4882a593Smuzhiyun error = -EFSCORRUPTED;
2084*4882a593Smuzhiyun goto error0;
2085*4882a593Smuzhiyun }
2086*4882a593Smuzhiyun /*
2087*4882a593Smuzhiyun * Back up the by-block cursor to the left neighbor, and
2088*4882a593Smuzhiyun * update its length.
2089*4882a593Smuzhiyun */
2090*4882a593Smuzhiyun if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2091*4882a593Smuzhiyun goto error0;
2092*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2093*4882a593Smuzhiyun error = -EFSCORRUPTED;
2094*4882a593Smuzhiyun goto error0;
2095*4882a593Smuzhiyun }
2096*4882a593Smuzhiyun nbno = ltbno;
2097*4882a593Smuzhiyun nlen = len + ltlen;
2098*4882a593Smuzhiyun if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2099*4882a593Smuzhiyun goto error0;
2100*4882a593Smuzhiyun }
2101*4882a593Smuzhiyun /*
2102*4882a593Smuzhiyun * Have only a right contiguous neighbor.
2103*4882a593Smuzhiyun * Merge it together with the new freespace.
2104*4882a593Smuzhiyun */
2105*4882a593Smuzhiyun else if (haveright) {
2106*4882a593Smuzhiyun /*
2107*4882a593Smuzhiyun * Delete the old by-size entry on the right.
2108*4882a593Smuzhiyun */
2109*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2110*4882a593Smuzhiyun goto error0;
2111*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2112*4882a593Smuzhiyun error = -EFSCORRUPTED;
2113*4882a593Smuzhiyun goto error0;
2114*4882a593Smuzhiyun }
2115*4882a593Smuzhiyun if ((error = xfs_btree_delete(cnt_cur, &i)))
2116*4882a593Smuzhiyun goto error0;
2117*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2118*4882a593Smuzhiyun error = -EFSCORRUPTED;
2119*4882a593Smuzhiyun goto error0;
2120*4882a593Smuzhiyun }
2121*4882a593Smuzhiyun /*
2122*4882a593Smuzhiyun * Update the starting block and length of the right
2123*4882a593Smuzhiyun * neighbor in the by-block tree.
2124*4882a593Smuzhiyun */
2125*4882a593Smuzhiyun nbno = bno;
2126*4882a593Smuzhiyun nlen = len + gtlen;
2127*4882a593Smuzhiyun if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2128*4882a593Smuzhiyun goto error0;
2129*4882a593Smuzhiyun }
2130*4882a593Smuzhiyun /*
2131*4882a593Smuzhiyun * No contiguous neighbors.
2132*4882a593Smuzhiyun * Insert the new freespace into the by-block tree.
2133*4882a593Smuzhiyun */
2134*4882a593Smuzhiyun else {
2135*4882a593Smuzhiyun nbno = bno;
2136*4882a593Smuzhiyun nlen = len;
2137*4882a593Smuzhiyun if ((error = xfs_btree_insert(bno_cur, &i)))
2138*4882a593Smuzhiyun goto error0;
2139*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2140*4882a593Smuzhiyun error = -EFSCORRUPTED;
2141*4882a593Smuzhiyun goto error0;
2142*4882a593Smuzhiyun }
2143*4882a593Smuzhiyun }
2144*4882a593Smuzhiyun xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2145*4882a593Smuzhiyun bno_cur = NULL;
2146*4882a593Smuzhiyun /*
2147*4882a593Smuzhiyun * In all cases we need to insert the new freespace in the by-size tree.
2148*4882a593Smuzhiyun */
2149*4882a593Smuzhiyun if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2150*4882a593Smuzhiyun goto error0;
2151*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 0)) {
2152*4882a593Smuzhiyun error = -EFSCORRUPTED;
2153*4882a593Smuzhiyun goto error0;
2154*4882a593Smuzhiyun }
2155*4882a593Smuzhiyun if ((error = xfs_btree_insert(cnt_cur, &i)))
2156*4882a593Smuzhiyun goto error0;
2157*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, i != 1)) {
2158*4882a593Smuzhiyun error = -EFSCORRUPTED;
2159*4882a593Smuzhiyun goto error0;
2160*4882a593Smuzhiyun }
2161*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2162*4882a593Smuzhiyun cnt_cur = NULL;
2163*4882a593Smuzhiyun
2164*4882a593Smuzhiyun /*
2165*4882a593Smuzhiyun * Update the freespace totals in the ag and superblock.
2166*4882a593Smuzhiyun */
2167*4882a593Smuzhiyun error = xfs_alloc_update_counters(tp, agbp, len);
2168*4882a593Smuzhiyun xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
2169*4882a593Smuzhiyun if (error)
2170*4882a593Smuzhiyun goto error0;
2171*4882a593Smuzhiyun
2172*4882a593Smuzhiyun XFS_STATS_INC(mp, xs_freex);
2173*4882a593Smuzhiyun XFS_STATS_ADD(mp, xs_freeb, len);
2174*4882a593Smuzhiyun
2175*4882a593Smuzhiyun trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
2176*4882a593Smuzhiyun
2177*4882a593Smuzhiyun return 0;
2178*4882a593Smuzhiyun
2179*4882a593Smuzhiyun error0:
2180*4882a593Smuzhiyun trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2181*4882a593Smuzhiyun if (bno_cur)
2182*4882a593Smuzhiyun xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2183*4882a593Smuzhiyun if (cnt_cur)
2184*4882a593Smuzhiyun xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2185*4882a593Smuzhiyun return error;
2186*4882a593Smuzhiyun }
2187*4882a593Smuzhiyun
2188*4882a593Smuzhiyun /*
2189*4882a593Smuzhiyun * Visible (exported) allocation/free functions.
2190*4882a593Smuzhiyun * Some of these are used just by xfs_alloc_btree.c and this file.
2191*4882a593Smuzhiyun */
2192*4882a593Smuzhiyun
2193*4882a593Smuzhiyun /*
2194*4882a593Smuzhiyun * Compute and fill in value of m_ag_maxlevels.
2195*4882a593Smuzhiyun */
2196*4882a593Smuzhiyun void
xfs_alloc_compute_maxlevels(xfs_mount_t * mp)2197*4882a593Smuzhiyun xfs_alloc_compute_maxlevels(
2198*4882a593Smuzhiyun xfs_mount_t *mp) /* file system mount structure */
2199*4882a593Smuzhiyun {
2200*4882a593Smuzhiyun mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
2201*4882a593Smuzhiyun (mp->m_sb.sb_agblocks + 1) / 2);
2202*4882a593Smuzhiyun }
2203*4882a593Smuzhiyun
2204*4882a593Smuzhiyun /*
2205*4882a593Smuzhiyun * Find the length of the longest extent in an AG. The 'need' parameter
2206*4882a593Smuzhiyun * specifies how much space we're going to need for the AGFL and the
2207*4882a593Smuzhiyun * 'reserved' parameter tells us how many blocks in this AG are reserved for
2208*4882a593Smuzhiyun * other callers.
2209*4882a593Smuzhiyun */
2210*4882a593Smuzhiyun xfs_extlen_t
xfs_alloc_longest_free_extent(struct xfs_perag * pag,xfs_extlen_t need,xfs_extlen_t reserved)2211*4882a593Smuzhiyun xfs_alloc_longest_free_extent(
2212*4882a593Smuzhiyun struct xfs_perag *pag,
2213*4882a593Smuzhiyun xfs_extlen_t need,
2214*4882a593Smuzhiyun xfs_extlen_t reserved)
2215*4882a593Smuzhiyun {
2216*4882a593Smuzhiyun xfs_extlen_t delta = 0;
2217*4882a593Smuzhiyun
2218*4882a593Smuzhiyun /*
2219*4882a593Smuzhiyun * If the AGFL needs a recharge, we'll have to subtract that from the
2220*4882a593Smuzhiyun * longest extent.
2221*4882a593Smuzhiyun */
2222*4882a593Smuzhiyun if (need > pag->pagf_flcount)
2223*4882a593Smuzhiyun delta = need - pag->pagf_flcount;
2224*4882a593Smuzhiyun
2225*4882a593Smuzhiyun /*
2226*4882a593Smuzhiyun * If we cannot maintain others' reservations with space from the
2227*4882a593Smuzhiyun * not-longest freesp extents, we'll have to subtract /that/ from
2228*4882a593Smuzhiyun * the longest extent too.
2229*4882a593Smuzhiyun */
2230*4882a593Smuzhiyun if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2231*4882a593Smuzhiyun delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2232*4882a593Smuzhiyun
2233*4882a593Smuzhiyun /*
2234*4882a593Smuzhiyun * If the longest extent is long enough to satisfy all the
2235*4882a593Smuzhiyun * reservations and AGFL rules in place, we can return this extent.
2236*4882a593Smuzhiyun */
2237*4882a593Smuzhiyun if (pag->pagf_longest > delta)
2238*4882a593Smuzhiyun return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2239*4882a593Smuzhiyun pag->pagf_longest - delta);
2240*4882a593Smuzhiyun
2241*4882a593Smuzhiyun /* Otherwise, let the caller try for 1 block if there's space. */
2242*4882a593Smuzhiyun return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2243*4882a593Smuzhiyun }
2244*4882a593Smuzhiyun
2245*4882a593Smuzhiyun /*
2246*4882a593Smuzhiyun * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2247*4882a593Smuzhiyun * return the largest possible minimum length.
2248*4882a593Smuzhiyun */
2249*4882a593Smuzhiyun unsigned int
xfs_alloc_min_freelist(struct xfs_mount * mp,struct xfs_perag * pag)2250*4882a593Smuzhiyun xfs_alloc_min_freelist(
2251*4882a593Smuzhiyun struct xfs_mount *mp,
2252*4882a593Smuzhiyun struct xfs_perag *pag)
2253*4882a593Smuzhiyun {
2254*4882a593Smuzhiyun /* AG btrees have at least 1 level. */
2255*4882a593Smuzhiyun static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2256*4882a593Smuzhiyun const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
2257*4882a593Smuzhiyun unsigned int min_free;
2258*4882a593Smuzhiyun
2259*4882a593Smuzhiyun ASSERT(mp->m_ag_maxlevels > 0);
2260*4882a593Smuzhiyun
2261*4882a593Smuzhiyun /* space needed by-bno freespace btree */
2262*4882a593Smuzhiyun min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
2263*4882a593Smuzhiyun mp->m_ag_maxlevels);
2264*4882a593Smuzhiyun /* space needed by-size freespace btree */
2265*4882a593Smuzhiyun min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
2266*4882a593Smuzhiyun mp->m_ag_maxlevels);
2267*4882a593Smuzhiyun /* space needed reverse mapping used space btree */
2268*4882a593Smuzhiyun if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2269*4882a593Smuzhiyun min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2270*4882a593Smuzhiyun mp->m_rmap_maxlevels);
2271*4882a593Smuzhiyun
2272*4882a593Smuzhiyun return min_free;
2273*4882a593Smuzhiyun }
2274*4882a593Smuzhiyun
2275*4882a593Smuzhiyun /*
2276*4882a593Smuzhiyun * Check if the operation we are fixing up the freelist for should go ahead or
2277*4882a593Smuzhiyun * not. If we are freeing blocks, we always allow it, otherwise the allocation
2278*4882a593Smuzhiyun * is dependent on whether the size and shape of free space available will
2279*4882a593Smuzhiyun * permit the requested allocation to take place.
2280*4882a593Smuzhiyun */
2281*4882a593Smuzhiyun static bool
xfs_alloc_space_available(struct xfs_alloc_arg * args,xfs_extlen_t min_free,int flags)2282*4882a593Smuzhiyun xfs_alloc_space_available(
2283*4882a593Smuzhiyun struct xfs_alloc_arg *args,
2284*4882a593Smuzhiyun xfs_extlen_t min_free,
2285*4882a593Smuzhiyun int flags)
2286*4882a593Smuzhiyun {
2287*4882a593Smuzhiyun struct xfs_perag *pag = args->pag;
2288*4882a593Smuzhiyun xfs_extlen_t alloc_len, longest;
2289*4882a593Smuzhiyun xfs_extlen_t reservation; /* blocks that are still reserved */
2290*4882a593Smuzhiyun int available;
2291*4882a593Smuzhiyun xfs_extlen_t agflcount;
2292*4882a593Smuzhiyun
2293*4882a593Smuzhiyun if (flags & XFS_ALLOC_FLAG_FREEING)
2294*4882a593Smuzhiyun return true;
2295*4882a593Smuzhiyun
2296*4882a593Smuzhiyun reservation = xfs_ag_resv_needed(pag, args->resv);
2297*4882a593Smuzhiyun
2298*4882a593Smuzhiyun /* do we have enough contiguous free space for the allocation? */
2299*4882a593Smuzhiyun alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2300*4882a593Smuzhiyun longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
2301*4882a593Smuzhiyun if (longest < alloc_len)
2302*4882a593Smuzhiyun return false;
2303*4882a593Smuzhiyun
2304*4882a593Smuzhiyun /*
2305*4882a593Smuzhiyun * Do we have enough free space remaining for the allocation? Don't
2306*4882a593Smuzhiyun * account extra agfl blocks because we are about to defer free them,
2307*4882a593Smuzhiyun * making them unavailable until the current transaction commits.
2308*4882a593Smuzhiyun */
2309*4882a593Smuzhiyun agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2310*4882a593Smuzhiyun available = (int)(pag->pagf_freeblks + agflcount -
2311*4882a593Smuzhiyun reservation - min_free - args->minleft);
2312*4882a593Smuzhiyun if (available < (int)max(args->total, alloc_len))
2313*4882a593Smuzhiyun return false;
2314*4882a593Smuzhiyun
2315*4882a593Smuzhiyun /*
2316*4882a593Smuzhiyun * Clamp maxlen to the amount of free space available for the actual
2317*4882a593Smuzhiyun * extent allocation.
2318*4882a593Smuzhiyun */
2319*4882a593Smuzhiyun if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2320*4882a593Smuzhiyun args->maxlen = available;
2321*4882a593Smuzhiyun ASSERT(args->maxlen > 0);
2322*4882a593Smuzhiyun ASSERT(args->maxlen >= args->minlen);
2323*4882a593Smuzhiyun }
2324*4882a593Smuzhiyun
2325*4882a593Smuzhiyun return true;
2326*4882a593Smuzhiyun }
2327*4882a593Smuzhiyun
2328*4882a593Smuzhiyun int
xfs_free_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,struct xfs_buf * agbp,struct xfs_owner_info * oinfo)2329*4882a593Smuzhiyun xfs_free_agfl_block(
2330*4882a593Smuzhiyun struct xfs_trans *tp,
2331*4882a593Smuzhiyun xfs_agnumber_t agno,
2332*4882a593Smuzhiyun xfs_agblock_t agbno,
2333*4882a593Smuzhiyun struct xfs_buf *agbp,
2334*4882a593Smuzhiyun struct xfs_owner_info *oinfo)
2335*4882a593Smuzhiyun {
2336*4882a593Smuzhiyun int error;
2337*4882a593Smuzhiyun struct xfs_buf *bp;
2338*4882a593Smuzhiyun
2339*4882a593Smuzhiyun error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2340*4882a593Smuzhiyun XFS_AG_RESV_AGFL);
2341*4882a593Smuzhiyun if (error)
2342*4882a593Smuzhiyun return error;
2343*4882a593Smuzhiyun
2344*4882a593Smuzhiyun error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2345*4882a593Smuzhiyun XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2346*4882a593Smuzhiyun tp->t_mountp->m_bsize, 0, &bp);
2347*4882a593Smuzhiyun if (error)
2348*4882a593Smuzhiyun return error;
2349*4882a593Smuzhiyun xfs_trans_binval(tp, bp);
2350*4882a593Smuzhiyun
2351*4882a593Smuzhiyun return 0;
2352*4882a593Smuzhiyun }
2353*4882a593Smuzhiyun
2354*4882a593Smuzhiyun /*
2355*4882a593Smuzhiyun * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2356*4882a593Smuzhiyun * is to detect an agfl header padding mismatch between current and early v5
2357*4882a593Smuzhiyun * kernels. This problem manifests as a 1-slot size difference between the
2358*4882a593Smuzhiyun * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2359*4882a593Smuzhiyun * may also catch variants of agfl count corruption unrelated to padding. Either
2360*4882a593Smuzhiyun * way, we'll reset the agfl and warn the user.
2361*4882a593Smuzhiyun *
2362*4882a593Smuzhiyun * Return true if a reset is required before the agfl can be used, false
2363*4882a593Smuzhiyun * otherwise.
2364*4882a593Smuzhiyun */
2365*4882a593Smuzhiyun static bool
xfs_agfl_needs_reset(struct xfs_mount * mp,struct xfs_agf * agf)2366*4882a593Smuzhiyun xfs_agfl_needs_reset(
2367*4882a593Smuzhiyun struct xfs_mount *mp,
2368*4882a593Smuzhiyun struct xfs_agf *agf)
2369*4882a593Smuzhiyun {
2370*4882a593Smuzhiyun uint32_t f = be32_to_cpu(agf->agf_flfirst);
2371*4882a593Smuzhiyun uint32_t l = be32_to_cpu(agf->agf_fllast);
2372*4882a593Smuzhiyun uint32_t c = be32_to_cpu(agf->agf_flcount);
2373*4882a593Smuzhiyun int agfl_size = xfs_agfl_size(mp);
2374*4882a593Smuzhiyun int active;
2375*4882a593Smuzhiyun
2376*4882a593Smuzhiyun /* no agfl header on v4 supers */
2377*4882a593Smuzhiyun if (!xfs_sb_version_hascrc(&mp->m_sb))
2378*4882a593Smuzhiyun return false;
2379*4882a593Smuzhiyun
2380*4882a593Smuzhiyun /*
2381*4882a593Smuzhiyun * The agf read verifier catches severe corruption of these fields.
2382*4882a593Smuzhiyun * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2383*4882a593Smuzhiyun * the verifier allows it.
2384*4882a593Smuzhiyun */
2385*4882a593Smuzhiyun if (f >= agfl_size || l >= agfl_size)
2386*4882a593Smuzhiyun return true;
2387*4882a593Smuzhiyun if (c > agfl_size)
2388*4882a593Smuzhiyun return true;
2389*4882a593Smuzhiyun
2390*4882a593Smuzhiyun /*
2391*4882a593Smuzhiyun * Check consistency between the on-disk count and the active range. An
2392*4882a593Smuzhiyun * agfl padding mismatch manifests as an inconsistent flcount.
2393*4882a593Smuzhiyun */
2394*4882a593Smuzhiyun if (c && l >= f)
2395*4882a593Smuzhiyun active = l - f + 1;
2396*4882a593Smuzhiyun else if (c)
2397*4882a593Smuzhiyun active = agfl_size - f + l + 1;
2398*4882a593Smuzhiyun else
2399*4882a593Smuzhiyun active = 0;
2400*4882a593Smuzhiyun
2401*4882a593Smuzhiyun return active != c;
2402*4882a593Smuzhiyun }
2403*4882a593Smuzhiyun
2404*4882a593Smuzhiyun /*
2405*4882a593Smuzhiyun * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2406*4882a593Smuzhiyun * agfl content cannot be trusted. Warn the user that a repair is required to
2407*4882a593Smuzhiyun * recover leaked blocks.
2408*4882a593Smuzhiyun *
2409*4882a593Smuzhiyun * The purpose of this mechanism is to handle filesystems affected by the agfl
2410*4882a593Smuzhiyun * header padding mismatch problem. A reset keeps the filesystem online with a
2411*4882a593Smuzhiyun * relatively minor free space accounting inconsistency rather than suffer the
2412*4882a593Smuzhiyun * inevitable crash from use of an invalid agfl block.
2413*4882a593Smuzhiyun */
2414*4882a593Smuzhiyun static void
xfs_agfl_reset(struct xfs_trans * tp,struct xfs_buf * agbp,struct xfs_perag * pag)2415*4882a593Smuzhiyun xfs_agfl_reset(
2416*4882a593Smuzhiyun struct xfs_trans *tp,
2417*4882a593Smuzhiyun struct xfs_buf *agbp,
2418*4882a593Smuzhiyun struct xfs_perag *pag)
2419*4882a593Smuzhiyun {
2420*4882a593Smuzhiyun struct xfs_mount *mp = tp->t_mountp;
2421*4882a593Smuzhiyun struct xfs_agf *agf = agbp->b_addr;
2422*4882a593Smuzhiyun
2423*4882a593Smuzhiyun ASSERT(pag->pagf_agflreset);
2424*4882a593Smuzhiyun trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2425*4882a593Smuzhiyun
2426*4882a593Smuzhiyun xfs_warn(mp,
2427*4882a593Smuzhiyun "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2428*4882a593Smuzhiyun "Please unmount and run xfs_repair.",
2429*4882a593Smuzhiyun pag->pag_agno, pag->pagf_flcount);
2430*4882a593Smuzhiyun
2431*4882a593Smuzhiyun agf->agf_flfirst = 0;
2432*4882a593Smuzhiyun agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2433*4882a593Smuzhiyun agf->agf_flcount = 0;
2434*4882a593Smuzhiyun xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2435*4882a593Smuzhiyun XFS_AGF_FLCOUNT);
2436*4882a593Smuzhiyun
2437*4882a593Smuzhiyun pag->pagf_flcount = 0;
2438*4882a593Smuzhiyun pag->pagf_agflreset = false;
2439*4882a593Smuzhiyun }
2440*4882a593Smuzhiyun
2441*4882a593Smuzhiyun /*
2442*4882a593Smuzhiyun * Defer an AGFL block free. This is effectively equivalent to
2443*4882a593Smuzhiyun * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2444*4882a593Smuzhiyun *
2445*4882a593Smuzhiyun * Deferring AGFL frees helps prevent log reservation overruns due to too many
2446*4882a593Smuzhiyun * allocation operations in a transaction. AGFL frees are prone to this problem
2447*4882a593Smuzhiyun * because for one they are always freed one at a time. Further, an immediate
2448*4882a593Smuzhiyun * AGFL block free can cause a btree join and require another block free before
2449*4882a593Smuzhiyun * the real allocation can proceed. Deferring the free disconnects freeing up
2450*4882a593Smuzhiyun * the AGFL slot from freeing the block.
2451*4882a593Smuzhiyun */
2452*4882a593Smuzhiyun STATIC void
xfs_defer_agfl_block(struct xfs_trans * tp,xfs_agnumber_t agno,xfs_fsblock_t agbno,struct xfs_owner_info * oinfo)2453*4882a593Smuzhiyun xfs_defer_agfl_block(
2454*4882a593Smuzhiyun struct xfs_trans *tp,
2455*4882a593Smuzhiyun xfs_agnumber_t agno,
2456*4882a593Smuzhiyun xfs_fsblock_t agbno,
2457*4882a593Smuzhiyun struct xfs_owner_info *oinfo)
2458*4882a593Smuzhiyun {
2459*4882a593Smuzhiyun struct xfs_mount *mp = tp->t_mountp;
2460*4882a593Smuzhiyun struct xfs_extent_free_item *new; /* new element */
2461*4882a593Smuzhiyun
2462*4882a593Smuzhiyun ASSERT(xfs_bmap_free_item_zone != NULL);
2463*4882a593Smuzhiyun ASSERT(oinfo != NULL);
2464*4882a593Smuzhiyun
2465*4882a593Smuzhiyun new = kmem_cache_alloc(xfs_bmap_free_item_zone,
2466*4882a593Smuzhiyun GFP_KERNEL | __GFP_NOFAIL);
2467*4882a593Smuzhiyun new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2468*4882a593Smuzhiyun new->xefi_blockcount = 1;
2469*4882a593Smuzhiyun new->xefi_oinfo = *oinfo;
2470*4882a593Smuzhiyun new->xefi_skip_discard = false;
2471*4882a593Smuzhiyun
2472*4882a593Smuzhiyun trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2473*4882a593Smuzhiyun
2474*4882a593Smuzhiyun xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
2475*4882a593Smuzhiyun }
2476*4882a593Smuzhiyun
2477*4882a593Smuzhiyun /*
2478*4882a593Smuzhiyun * Decide whether to use this allocation group for this allocation.
2479*4882a593Smuzhiyun * If so, fix up the btree freelist's size.
2480*4882a593Smuzhiyun */
2481*4882a593Smuzhiyun int /* error */
xfs_alloc_fix_freelist(struct xfs_alloc_arg * args,int flags)2482*4882a593Smuzhiyun xfs_alloc_fix_freelist(
2483*4882a593Smuzhiyun struct xfs_alloc_arg *args, /* allocation argument structure */
2484*4882a593Smuzhiyun int flags) /* XFS_ALLOC_FLAG_... */
2485*4882a593Smuzhiyun {
2486*4882a593Smuzhiyun struct xfs_mount *mp = args->mp;
2487*4882a593Smuzhiyun struct xfs_perag *pag = args->pag;
2488*4882a593Smuzhiyun struct xfs_trans *tp = args->tp;
2489*4882a593Smuzhiyun struct xfs_buf *agbp = NULL;
2490*4882a593Smuzhiyun struct xfs_buf *agflbp = NULL;
2491*4882a593Smuzhiyun struct xfs_alloc_arg targs; /* local allocation arguments */
2492*4882a593Smuzhiyun xfs_agblock_t bno; /* freelist block */
2493*4882a593Smuzhiyun xfs_extlen_t need; /* total blocks needed in freelist */
2494*4882a593Smuzhiyun int error = 0;
2495*4882a593Smuzhiyun
2496*4882a593Smuzhiyun /* deferred ops (AGFL block frees) require permanent transactions */
2497*4882a593Smuzhiyun ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2498*4882a593Smuzhiyun
2499*4882a593Smuzhiyun if (!pag->pagf_init) {
2500*4882a593Smuzhiyun error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2501*4882a593Smuzhiyun if (error) {
2502*4882a593Smuzhiyun /* Couldn't lock the AGF so skip this AG. */
2503*4882a593Smuzhiyun if (error == -EAGAIN)
2504*4882a593Smuzhiyun error = 0;
2505*4882a593Smuzhiyun goto out_no_agbp;
2506*4882a593Smuzhiyun }
2507*4882a593Smuzhiyun }
2508*4882a593Smuzhiyun
2509*4882a593Smuzhiyun /*
2510*4882a593Smuzhiyun * If this is a metadata preferred pag and we are user data then try
2511*4882a593Smuzhiyun * somewhere else if we are not being asked to try harder at this
2512*4882a593Smuzhiyun * point
2513*4882a593Smuzhiyun */
2514*4882a593Smuzhiyun if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
2515*4882a593Smuzhiyun (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2516*4882a593Smuzhiyun ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2517*4882a593Smuzhiyun goto out_agbp_relse;
2518*4882a593Smuzhiyun }
2519*4882a593Smuzhiyun
2520*4882a593Smuzhiyun need = xfs_alloc_min_freelist(mp, pag);
2521*4882a593Smuzhiyun if (!xfs_alloc_space_available(args, need, flags |
2522*4882a593Smuzhiyun XFS_ALLOC_FLAG_CHECK))
2523*4882a593Smuzhiyun goto out_agbp_relse;
2524*4882a593Smuzhiyun
2525*4882a593Smuzhiyun /*
2526*4882a593Smuzhiyun * Get the a.g. freespace buffer.
2527*4882a593Smuzhiyun * Can fail if we're not blocking on locks, and it's held.
2528*4882a593Smuzhiyun */
2529*4882a593Smuzhiyun if (!agbp) {
2530*4882a593Smuzhiyun error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2531*4882a593Smuzhiyun if (error) {
2532*4882a593Smuzhiyun /* Couldn't lock the AGF so skip this AG. */
2533*4882a593Smuzhiyun if (error == -EAGAIN)
2534*4882a593Smuzhiyun error = 0;
2535*4882a593Smuzhiyun goto out_no_agbp;
2536*4882a593Smuzhiyun }
2537*4882a593Smuzhiyun }
2538*4882a593Smuzhiyun
2539*4882a593Smuzhiyun /* reset a padding mismatched agfl before final free space check */
2540*4882a593Smuzhiyun if (pag->pagf_agflreset)
2541*4882a593Smuzhiyun xfs_agfl_reset(tp, agbp, pag);
2542*4882a593Smuzhiyun
2543*4882a593Smuzhiyun /* If there isn't enough total space or single-extent, reject it. */
2544*4882a593Smuzhiyun need = xfs_alloc_min_freelist(mp, pag);
2545*4882a593Smuzhiyun if (!xfs_alloc_space_available(args, need, flags))
2546*4882a593Smuzhiyun goto out_agbp_relse;
2547*4882a593Smuzhiyun
2548*4882a593Smuzhiyun /*
2549*4882a593Smuzhiyun * Make the freelist shorter if it's too long.
2550*4882a593Smuzhiyun *
2551*4882a593Smuzhiyun * Note that from this point onwards, we will always release the agf and
2552*4882a593Smuzhiyun * agfl buffers on error. This handles the case where we error out and
2553*4882a593Smuzhiyun * the buffers are clean or may not have been joined to the transaction
2554*4882a593Smuzhiyun * and hence need to be released manually. If they have been joined to
2555*4882a593Smuzhiyun * the transaction, then xfs_trans_brelse() will handle them
2556*4882a593Smuzhiyun * appropriately based on the recursion count and dirty state of the
2557*4882a593Smuzhiyun * buffer.
2558*4882a593Smuzhiyun *
2559*4882a593Smuzhiyun * XXX (dgc): When we have lots of free space, does this buy us
2560*4882a593Smuzhiyun * anything other than extra overhead when we need to put more blocks
2561*4882a593Smuzhiyun * back on the free list? Maybe we should only do this when space is
2562*4882a593Smuzhiyun * getting low or the AGFL is more than half full?
2563*4882a593Smuzhiyun *
2564*4882a593Smuzhiyun * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2565*4882a593Smuzhiyun * big; the NORMAP flag prevents AGFL expand/shrink operations from
2566*4882a593Smuzhiyun * updating the rmapbt. Both flags are used in xfs_repair while we're
2567*4882a593Smuzhiyun * rebuilding the rmapbt, and neither are used by the kernel. They're
2568*4882a593Smuzhiyun * both required to ensure that rmaps are correctly recorded for the
2569*4882a593Smuzhiyun * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2570*4882a593Smuzhiyun * repair/rmap.c in xfsprogs for details.
2571*4882a593Smuzhiyun */
2572*4882a593Smuzhiyun memset(&targs, 0, sizeof(targs));
2573*4882a593Smuzhiyun /* struct copy below */
2574*4882a593Smuzhiyun if (flags & XFS_ALLOC_FLAG_NORMAP)
2575*4882a593Smuzhiyun targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
2576*4882a593Smuzhiyun else
2577*4882a593Smuzhiyun targs.oinfo = XFS_RMAP_OINFO_AG;
2578*4882a593Smuzhiyun while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
2579*4882a593Smuzhiyun error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2580*4882a593Smuzhiyun if (error)
2581*4882a593Smuzhiyun goto out_agbp_relse;
2582*4882a593Smuzhiyun
2583*4882a593Smuzhiyun /* defer agfl frees */
2584*4882a593Smuzhiyun xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2585*4882a593Smuzhiyun }
2586*4882a593Smuzhiyun
2587*4882a593Smuzhiyun targs.tp = tp;
2588*4882a593Smuzhiyun targs.mp = mp;
2589*4882a593Smuzhiyun targs.agbp = agbp;
2590*4882a593Smuzhiyun targs.agno = args->agno;
2591*4882a593Smuzhiyun targs.alignment = targs.minlen = targs.prod = 1;
2592*4882a593Smuzhiyun targs.type = XFS_ALLOCTYPE_THIS_AG;
2593*4882a593Smuzhiyun targs.pag = pag;
2594*4882a593Smuzhiyun error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2595*4882a593Smuzhiyun if (error)
2596*4882a593Smuzhiyun goto out_agbp_relse;
2597*4882a593Smuzhiyun
2598*4882a593Smuzhiyun /* Make the freelist longer if it's too short. */
2599*4882a593Smuzhiyun while (pag->pagf_flcount < need) {
2600*4882a593Smuzhiyun targs.agbno = 0;
2601*4882a593Smuzhiyun targs.maxlen = need - pag->pagf_flcount;
2602*4882a593Smuzhiyun targs.resv = XFS_AG_RESV_AGFL;
2603*4882a593Smuzhiyun
2604*4882a593Smuzhiyun /* Allocate as many blocks as possible at once. */
2605*4882a593Smuzhiyun error = xfs_alloc_ag_vextent(&targs);
2606*4882a593Smuzhiyun if (error)
2607*4882a593Smuzhiyun goto out_agflbp_relse;
2608*4882a593Smuzhiyun
2609*4882a593Smuzhiyun /*
2610*4882a593Smuzhiyun * Stop if we run out. Won't happen if callers are obeying
2611*4882a593Smuzhiyun * the restrictions correctly. Can happen for free calls
2612*4882a593Smuzhiyun * on a completely full ag.
2613*4882a593Smuzhiyun */
2614*4882a593Smuzhiyun if (targs.agbno == NULLAGBLOCK) {
2615*4882a593Smuzhiyun if (flags & XFS_ALLOC_FLAG_FREEING)
2616*4882a593Smuzhiyun break;
2617*4882a593Smuzhiyun goto out_agflbp_relse;
2618*4882a593Smuzhiyun }
2619*4882a593Smuzhiyun /*
2620*4882a593Smuzhiyun * Put each allocated block on the list.
2621*4882a593Smuzhiyun */
2622*4882a593Smuzhiyun for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2623*4882a593Smuzhiyun error = xfs_alloc_put_freelist(tp, agbp,
2624*4882a593Smuzhiyun agflbp, bno, 0);
2625*4882a593Smuzhiyun if (error)
2626*4882a593Smuzhiyun goto out_agflbp_relse;
2627*4882a593Smuzhiyun }
2628*4882a593Smuzhiyun }
2629*4882a593Smuzhiyun xfs_trans_brelse(tp, agflbp);
2630*4882a593Smuzhiyun args->agbp = agbp;
2631*4882a593Smuzhiyun return 0;
2632*4882a593Smuzhiyun
2633*4882a593Smuzhiyun out_agflbp_relse:
2634*4882a593Smuzhiyun xfs_trans_brelse(tp, agflbp);
2635*4882a593Smuzhiyun out_agbp_relse:
2636*4882a593Smuzhiyun if (agbp)
2637*4882a593Smuzhiyun xfs_trans_brelse(tp, agbp);
2638*4882a593Smuzhiyun out_no_agbp:
2639*4882a593Smuzhiyun args->agbp = NULL;
2640*4882a593Smuzhiyun return error;
2641*4882a593Smuzhiyun }
2642*4882a593Smuzhiyun
2643*4882a593Smuzhiyun /*
2644*4882a593Smuzhiyun * Get a block from the freelist.
2645*4882a593Smuzhiyun * Returns with the buffer for the block gotten.
2646*4882a593Smuzhiyun */
2647*4882a593Smuzhiyun int /* error */
xfs_alloc_get_freelist(xfs_trans_t * tp,xfs_buf_t * agbp,xfs_agblock_t * bnop,int btreeblk)2648*4882a593Smuzhiyun xfs_alloc_get_freelist(
2649*4882a593Smuzhiyun xfs_trans_t *tp, /* transaction pointer */
2650*4882a593Smuzhiyun xfs_buf_t *agbp, /* buffer containing the agf structure */
2651*4882a593Smuzhiyun xfs_agblock_t *bnop, /* block address retrieved from freelist */
2652*4882a593Smuzhiyun int btreeblk) /* destination is a AGF btree */
2653*4882a593Smuzhiyun {
2654*4882a593Smuzhiyun struct xfs_agf *agf = agbp->b_addr;
2655*4882a593Smuzhiyun xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2656*4882a593Smuzhiyun xfs_agblock_t bno; /* block number returned */
2657*4882a593Smuzhiyun __be32 *agfl_bno;
2658*4882a593Smuzhiyun int error;
2659*4882a593Smuzhiyun int logflags;
2660*4882a593Smuzhiyun xfs_mount_t *mp = tp->t_mountp;
2661*4882a593Smuzhiyun xfs_perag_t *pag; /* per allocation group data */
2662*4882a593Smuzhiyun
2663*4882a593Smuzhiyun /*
2664*4882a593Smuzhiyun * Freelist is empty, give up.
2665*4882a593Smuzhiyun */
2666*4882a593Smuzhiyun if (!agf->agf_flcount) {
2667*4882a593Smuzhiyun *bnop = NULLAGBLOCK;
2668*4882a593Smuzhiyun return 0;
2669*4882a593Smuzhiyun }
2670*4882a593Smuzhiyun /*
2671*4882a593Smuzhiyun * Read the array of free blocks.
2672*4882a593Smuzhiyun */
2673*4882a593Smuzhiyun error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2674*4882a593Smuzhiyun &agflbp);
2675*4882a593Smuzhiyun if (error)
2676*4882a593Smuzhiyun return error;
2677*4882a593Smuzhiyun
2678*4882a593Smuzhiyun
2679*4882a593Smuzhiyun /*
2680*4882a593Smuzhiyun * Get the block number and update the data structures.
2681*4882a593Smuzhiyun */
2682*4882a593Smuzhiyun agfl_bno = xfs_buf_to_agfl_bno(agflbp);
2683*4882a593Smuzhiyun bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2684*4882a593Smuzhiyun be32_add_cpu(&agf->agf_flfirst, 1);
2685*4882a593Smuzhiyun xfs_trans_brelse(tp, agflbp);
2686*4882a593Smuzhiyun if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
2687*4882a593Smuzhiyun agf->agf_flfirst = 0;
2688*4882a593Smuzhiyun
2689*4882a593Smuzhiyun pag = agbp->b_pag;
2690*4882a593Smuzhiyun ASSERT(!pag->pagf_agflreset);
2691*4882a593Smuzhiyun be32_add_cpu(&agf->agf_flcount, -1);
2692*4882a593Smuzhiyun xfs_trans_agflist_delta(tp, -1);
2693*4882a593Smuzhiyun pag->pagf_flcount--;
2694*4882a593Smuzhiyun
2695*4882a593Smuzhiyun logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2696*4882a593Smuzhiyun if (btreeblk) {
2697*4882a593Smuzhiyun be32_add_cpu(&agf->agf_btreeblks, 1);
2698*4882a593Smuzhiyun pag->pagf_btreeblks++;
2699*4882a593Smuzhiyun logflags |= XFS_AGF_BTREEBLKS;
2700*4882a593Smuzhiyun }
2701*4882a593Smuzhiyun
2702*4882a593Smuzhiyun xfs_alloc_log_agf(tp, agbp, logflags);
2703*4882a593Smuzhiyun *bnop = bno;
2704*4882a593Smuzhiyun
2705*4882a593Smuzhiyun return 0;
2706*4882a593Smuzhiyun }
2707*4882a593Smuzhiyun
2708*4882a593Smuzhiyun /*
2709*4882a593Smuzhiyun * Log the given fields from the agf structure.
2710*4882a593Smuzhiyun */
2711*4882a593Smuzhiyun void
xfs_alloc_log_agf(xfs_trans_t * tp,xfs_buf_t * bp,int fields)2712*4882a593Smuzhiyun xfs_alloc_log_agf(
2713*4882a593Smuzhiyun xfs_trans_t *tp, /* transaction pointer */
2714*4882a593Smuzhiyun xfs_buf_t *bp, /* buffer for a.g. freelist header */
2715*4882a593Smuzhiyun int fields) /* mask of fields to be logged (XFS_AGF_...) */
2716*4882a593Smuzhiyun {
2717*4882a593Smuzhiyun int first; /* first byte offset */
2718*4882a593Smuzhiyun int last; /* last byte offset */
2719*4882a593Smuzhiyun static const short offsets[] = {
2720*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_magicnum),
2721*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_versionnum),
2722*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_seqno),
2723*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_length),
2724*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_roots[0]),
2725*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_levels[0]),
2726*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_flfirst),
2727*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_fllast),
2728*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_flcount),
2729*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_freeblks),
2730*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_longest),
2731*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_btreeblks),
2732*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_uuid),
2733*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_rmap_blocks),
2734*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_refcount_blocks),
2735*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_refcount_root),
2736*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_refcount_level),
2737*4882a593Smuzhiyun /* needed so that we don't log the whole rest of the structure: */
2738*4882a593Smuzhiyun offsetof(xfs_agf_t, agf_spare64),
2739*4882a593Smuzhiyun sizeof(xfs_agf_t)
2740*4882a593Smuzhiyun };
2741*4882a593Smuzhiyun
2742*4882a593Smuzhiyun trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
2743*4882a593Smuzhiyun
2744*4882a593Smuzhiyun xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2745*4882a593Smuzhiyun
2746*4882a593Smuzhiyun xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2747*4882a593Smuzhiyun xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2748*4882a593Smuzhiyun }
2749*4882a593Smuzhiyun
2750*4882a593Smuzhiyun /*
2751*4882a593Smuzhiyun * Interface for inode allocation to force the pag data to be initialized.
2752*4882a593Smuzhiyun */
2753*4882a593Smuzhiyun int /* error */
xfs_alloc_pagf_init(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,int flags)2754*4882a593Smuzhiyun xfs_alloc_pagf_init(
2755*4882a593Smuzhiyun xfs_mount_t *mp, /* file system mount structure */
2756*4882a593Smuzhiyun xfs_trans_t *tp, /* transaction pointer */
2757*4882a593Smuzhiyun xfs_agnumber_t agno, /* allocation group number */
2758*4882a593Smuzhiyun int flags) /* XFS_ALLOC_FLAGS_... */
2759*4882a593Smuzhiyun {
2760*4882a593Smuzhiyun xfs_buf_t *bp;
2761*4882a593Smuzhiyun int error;
2762*4882a593Smuzhiyun
2763*4882a593Smuzhiyun error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
2764*4882a593Smuzhiyun if (!error)
2765*4882a593Smuzhiyun xfs_trans_brelse(tp, bp);
2766*4882a593Smuzhiyun return error;
2767*4882a593Smuzhiyun }
2768*4882a593Smuzhiyun
2769*4882a593Smuzhiyun /*
2770*4882a593Smuzhiyun * Put the block on the freelist for the allocation group.
2771*4882a593Smuzhiyun */
2772*4882a593Smuzhiyun int /* error */
xfs_alloc_put_freelist(xfs_trans_t * tp,xfs_buf_t * agbp,xfs_buf_t * agflbp,xfs_agblock_t bno,int btreeblk)2773*4882a593Smuzhiyun xfs_alloc_put_freelist(
2774*4882a593Smuzhiyun xfs_trans_t *tp, /* transaction pointer */
2775*4882a593Smuzhiyun xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2776*4882a593Smuzhiyun xfs_buf_t *agflbp,/* buffer for a.g. free block array */
2777*4882a593Smuzhiyun xfs_agblock_t bno, /* block being freed */
2778*4882a593Smuzhiyun int btreeblk) /* block came from a AGF btree */
2779*4882a593Smuzhiyun {
2780*4882a593Smuzhiyun struct xfs_mount *mp = tp->t_mountp;
2781*4882a593Smuzhiyun struct xfs_agf *agf = agbp->b_addr;
2782*4882a593Smuzhiyun __be32 *blockp;/* pointer to array entry */
2783*4882a593Smuzhiyun int error;
2784*4882a593Smuzhiyun int logflags;
2785*4882a593Smuzhiyun xfs_perag_t *pag; /* per allocation group data */
2786*4882a593Smuzhiyun __be32 *agfl_bno;
2787*4882a593Smuzhiyun int startoff;
2788*4882a593Smuzhiyun
2789*4882a593Smuzhiyun if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2790*4882a593Smuzhiyun be32_to_cpu(agf->agf_seqno), &agflbp)))
2791*4882a593Smuzhiyun return error;
2792*4882a593Smuzhiyun be32_add_cpu(&agf->agf_fllast, 1);
2793*4882a593Smuzhiyun if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
2794*4882a593Smuzhiyun agf->agf_fllast = 0;
2795*4882a593Smuzhiyun
2796*4882a593Smuzhiyun pag = agbp->b_pag;
2797*4882a593Smuzhiyun ASSERT(!pag->pagf_agflreset);
2798*4882a593Smuzhiyun be32_add_cpu(&agf->agf_flcount, 1);
2799*4882a593Smuzhiyun xfs_trans_agflist_delta(tp, 1);
2800*4882a593Smuzhiyun pag->pagf_flcount++;
2801*4882a593Smuzhiyun
2802*4882a593Smuzhiyun logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2803*4882a593Smuzhiyun if (btreeblk) {
2804*4882a593Smuzhiyun be32_add_cpu(&agf->agf_btreeblks, -1);
2805*4882a593Smuzhiyun pag->pagf_btreeblks--;
2806*4882a593Smuzhiyun logflags |= XFS_AGF_BTREEBLKS;
2807*4882a593Smuzhiyun }
2808*4882a593Smuzhiyun
2809*4882a593Smuzhiyun xfs_alloc_log_agf(tp, agbp, logflags);
2810*4882a593Smuzhiyun
2811*4882a593Smuzhiyun ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
2812*4882a593Smuzhiyun
2813*4882a593Smuzhiyun agfl_bno = xfs_buf_to_agfl_bno(agflbp);
2814*4882a593Smuzhiyun blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2815*4882a593Smuzhiyun *blockp = cpu_to_be32(bno);
2816*4882a593Smuzhiyun startoff = (char *)blockp - (char *)agflbp->b_addr;
2817*4882a593Smuzhiyun
2818*4882a593Smuzhiyun xfs_alloc_log_agf(tp, agbp, logflags);
2819*4882a593Smuzhiyun
2820*4882a593Smuzhiyun xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2821*4882a593Smuzhiyun xfs_trans_log_buf(tp, agflbp, startoff,
2822*4882a593Smuzhiyun startoff + sizeof(xfs_agblock_t) - 1);
2823*4882a593Smuzhiyun return 0;
2824*4882a593Smuzhiyun }
2825*4882a593Smuzhiyun
2826*4882a593Smuzhiyun static xfs_failaddr_t
xfs_agf_verify(struct xfs_buf * bp)2827*4882a593Smuzhiyun xfs_agf_verify(
2828*4882a593Smuzhiyun struct xfs_buf *bp)
2829*4882a593Smuzhiyun {
2830*4882a593Smuzhiyun struct xfs_mount *mp = bp->b_mount;
2831*4882a593Smuzhiyun struct xfs_agf *agf = bp->b_addr;
2832*4882a593Smuzhiyun
2833*4882a593Smuzhiyun if (xfs_sb_version_hascrc(&mp->m_sb)) {
2834*4882a593Smuzhiyun if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2835*4882a593Smuzhiyun return __this_address;
2836*4882a593Smuzhiyun if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
2837*4882a593Smuzhiyun return __this_address;
2838*4882a593Smuzhiyun }
2839*4882a593Smuzhiyun
2840*4882a593Smuzhiyun if (!xfs_verify_magic(bp, agf->agf_magicnum))
2841*4882a593Smuzhiyun return __this_address;
2842*4882a593Smuzhiyun
2843*4882a593Smuzhiyun if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2844*4882a593Smuzhiyun be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2845*4882a593Smuzhiyun be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2846*4882a593Smuzhiyun be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2847*4882a593Smuzhiyun be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
2848*4882a593Smuzhiyun return __this_address;
2849*4882a593Smuzhiyun
2850*4882a593Smuzhiyun if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
2851*4882a593Smuzhiyun return __this_address;
2852*4882a593Smuzhiyun
2853*4882a593Smuzhiyun if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
2854*4882a593Smuzhiyun be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
2855*4882a593Smuzhiyun return __this_address;
2856*4882a593Smuzhiyun
2857*4882a593Smuzhiyun if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2858*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2859*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2860*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2861*4882a593Smuzhiyun return __this_address;
2862*4882a593Smuzhiyun
2863*4882a593Smuzhiyun if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2864*4882a593Smuzhiyun (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2865*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
2866*4882a593Smuzhiyun return __this_address;
2867*4882a593Smuzhiyun
2868*4882a593Smuzhiyun if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2869*4882a593Smuzhiyun be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
2870*4882a593Smuzhiyun return __this_address;
2871*4882a593Smuzhiyun
2872*4882a593Smuzhiyun /*
2873*4882a593Smuzhiyun * during growfs operations, the perag is not fully initialised,
2874*4882a593Smuzhiyun * so we can't use it for any useful checking. growfs ensures we can't
2875*4882a593Smuzhiyun * use it by using uncached buffers that don't have the perag attached
2876*4882a593Smuzhiyun * so we can detect and avoid this problem.
2877*4882a593Smuzhiyun */
2878*4882a593Smuzhiyun if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2879*4882a593Smuzhiyun return __this_address;
2880*4882a593Smuzhiyun
2881*4882a593Smuzhiyun if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2882*4882a593Smuzhiyun be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2883*4882a593Smuzhiyun return __this_address;
2884*4882a593Smuzhiyun
2885*4882a593Smuzhiyun if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2886*4882a593Smuzhiyun be32_to_cpu(agf->agf_refcount_blocks) >
2887*4882a593Smuzhiyun be32_to_cpu(agf->agf_length))
2888*4882a593Smuzhiyun return __this_address;
2889*4882a593Smuzhiyun
2890*4882a593Smuzhiyun if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2891*4882a593Smuzhiyun (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2892*4882a593Smuzhiyun be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
2893*4882a593Smuzhiyun return __this_address;
2894*4882a593Smuzhiyun
2895*4882a593Smuzhiyun return NULL;
2896*4882a593Smuzhiyun
2897*4882a593Smuzhiyun }
2898*4882a593Smuzhiyun
2899*4882a593Smuzhiyun static void
xfs_agf_read_verify(struct xfs_buf * bp)2900*4882a593Smuzhiyun xfs_agf_read_verify(
2901*4882a593Smuzhiyun struct xfs_buf *bp)
2902*4882a593Smuzhiyun {
2903*4882a593Smuzhiyun struct xfs_mount *mp = bp->b_mount;
2904*4882a593Smuzhiyun xfs_failaddr_t fa;
2905*4882a593Smuzhiyun
2906*4882a593Smuzhiyun if (xfs_sb_version_hascrc(&mp->m_sb) &&
2907*4882a593Smuzhiyun !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2908*4882a593Smuzhiyun xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2909*4882a593Smuzhiyun else {
2910*4882a593Smuzhiyun fa = xfs_agf_verify(bp);
2911*4882a593Smuzhiyun if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2912*4882a593Smuzhiyun xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2913*4882a593Smuzhiyun }
2914*4882a593Smuzhiyun }
2915*4882a593Smuzhiyun
2916*4882a593Smuzhiyun static void
xfs_agf_write_verify(struct xfs_buf * bp)2917*4882a593Smuzhiyun xfs_agf_write_verify(
2918*4882a593Smuzhiyun struct xfs_buf *bp)
2919*4882a593Smuzhiyun {
2920*4882a593Smuzhiyun struct xfs_mount *mp = bp->b_mount;
2921*4882a593Smuzhiyun struct xfs_buf_log_item *bip = bp->b_log_item;
2922*4882a593Smuzhiyun struct xfs_agf *agf = bp->b_addr;
2923*4882a593Smuzhiyun xfs_failaddr_t fa;
2924*4882a593Smuzhiyun
2925*4882a593Smuzhiyun fa = xfs_agf_verify(bp);
2926*4882a593Smuzhiyun if (fa) {
2927*4882a593Smuzhiyun xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2928*4882a593Smuzhiyun return;
2929*4882a593Smuzhiyun }
2930*4882a593Smuzhiyun
2931*4882a593Smuzhiyun if (!xfs_sb_version_hascrc(&mp->m_sb))
2932*4882a593Smuzhiyun return;
2933*4882a593Smuzhiyun
2934*4882a593Smuzhiyun if (bip)
2935*4882a593Smuzhiyun agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2936*4882a593Smuzhiyun
2937*4882a593Smuzhiyun xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2938*4882a593Smuzhiyun }
2939*4882a593Smuzhiyun
2940*4882a593Smuzhiyun const struct xfs_buf_ops xfs_agf_buf_ops = {
2941*4882a593Smuzhiyun .name = "xfs_agf",
2942*4882a593Smuzhiyun .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
2943*4882a593Smuzhiyun .verify_read = xfs_agf_read_verify,
2944*4882a593Smuzhiyun .verify_write = xfs_agf_write_verify,
2945*4882a593Smuzhiyun .verify_struct = xfs_agf_verify,
2946*4882a593Smuzhiyun };
2947*4882a593Smuzhiyun
2948*4882a593Smuzhiyun /*
2949*4882a593Smuzhiyun * Read in the allocation group header (free/alloc section).
2950*4882a593Smuzhiyun */
2951*4882a593Smuzhiyun int /* error */
xfs_read_agf(struct xfs_mount * mp,struct xfs_trans * tp,xfs_agnumber_t agno,int flags,struct xfs_buf ** bpp)2952*4882a593Smuzhiyun xfs_read_agf(
2953*4882a593Smuzhiyun struct xfs_mount *mp, /* mount point structure */
2954*4882a593Smuzhiyun struct xfs_trans *tp, /* transaction pointer */
2955*4882a593Smuzhiyun xfs_agnumber_t agno, /* allocation group number */
2956*4882a593Smuzhiyun int flags, /* XFS_BUF_ */
2957*4882a593Smuzhiyun struct xfs_buf **bpp) /* buffer for the ag freelist header */
2958*4882a593Smuzhiyun {
2959*4882a593Smuzhiyun int error;
2960*4882a593Smuzhiyun
2961*4882a593Smuzhiyun trace_xfs_read_agf(mp, agno);
2962*4882a593Smuzhiyun
2963*4882a593Smuzhiyun ASSERT(agno != NULLAGNUMBER);
2964*4882a593Smuzhiyun error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
2965*4882a593Smuzhiyun XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2966*4882a593Smuzhiyun XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2967*4882a593Smuzhiyun if (error)
2968*4882a593Smuzhiyun return error;
2969*4882a593Smuzhiyun
2970*4882a593Smuzhiyun ASSERT(!(*bpp)->b_error);
2971*4882a593Smuzhiyun xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2972*4882a593Smuzhiyun return 0;
2973*4882a593Smuzhiyun }
2974*4882a593Smuzhiyun
2975*4882a593Smuzhiyun /*
2976*4882a593Smuzhiyun * Read in the allocation group header (free/alloc section).
2977*4882a593Smuzhiyun */
2978*4882a593Smuzhiyun int /* error */
xfs_alloc_read_agf(struct xfs_mount * mp,struct xfs_trans * tp,xfs_agnumber_t agno,int flags,struct xfs_buf ** bpp)2979*4882a593Smuzhiyun xfs_alloc_read_agf(
2980*4882a593Smuzhiyun struct xfs_mount *mp, /* mount point structure */
2981*4882a593Smuzhiyun struct xfs_trans *tp, /* transaction pointer */
2982*4882a593Smuzhiyun xfs_agnumber_t agno, /* allocation group number */
2983*4882a593Smuzhiyun int flags, /* XFS_ALLOC_FLAG_... */
2984*4882a593Smuzhiyun struct xfs_buf **bpp) /* buffer for the ag freelist header */
2985*4882a593Smuzhiyun {
2986*4882a593Smuzhiyun struct xfs_agf *agf; /* ag freelist header */
2987*4882a593Smuzhiyun struct xfs_perag *pag; /* per allocation group data */
2988*4882a593Smuzhiyun int error;
2989*4882a593Smuzhiyun
2990*4882a593Smuzhiyun trace_xfs_alloc_read_agf(mp, agno);
2991*4882a593Smuzhiyun
2992*4882a593Smuzhiyun /* We don't support trylock when freeing. */
2993*4882a593Smuzhiyun ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
2994*4882a593Smuzhiyun (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
2995*4882a593Smuzhiyun ASSERT(agno != NULLAGNUMBER);
2996*4882a593Smuzhiyun error = xfs_read_agf(mp, tp, agno,
2997*4882a593Smuzhiyun (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2998*4882a593Smuzhiyun bpp);
2999*4882a593Smuzhiyun if (error)
3000*4882a593Smuzhiyun return error;
3001*4882a593Smuzhiyun ASSERT(!(*bpp)->b_error);
3002*4882a593Smuzhiyun
3003*4882a593Smuzhiyun agf = (*bpp)->b_addr;
3004*4882a593Smuzhiyun pag = (*bpp)->b_pag;
3005*4882a593Smuzhiyun if (!pag->pagf_init) {
3006*4882a593Smuzhiyun pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
3007*4882a593Smuzhiyun pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
3008*4882a593Smuzhiyun pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3009*4882a593Smuzhiyun pag->pagf_longest = be32_to_cpu(agf->agf_longest);
3010*4882a593Smuzhiyun pag->pagf_levels[XFS_BTNUM_BNOi] =
3011*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
3012*4882a593Smuzhiyun pag->pagf_levels[XFS_BTNUM_CNTi] =
3013*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3014*4882a593Smuzhiyun pag->pagf_levels[XFS_BTNUM_RMAPi] =
3015*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
3016*4882a593Smuzhiyun pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
3017*4882a593Smuzhiyun pag->pagf_init = 1;
3018*4882a593Smuzhiyun pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
3019*4882a593Smuzhiyun }
3020*4882a593Smuzhiyun #ifdef DEBUG
3021*4882a593Smuzhiyun else if (!XFS_FORCED_SHUTDOWN(mp)) {
3022*4882a593Smuzhiyun ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
3023*4882a593Smuzhiyun ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
3024*4882a593Smuzhiyun ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3025*4882a593Smuzhiyun ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
3026*4882a593Smuzhiyun ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
3027*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
3028*4882a593Smuzhiyun ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
3029*4882a593Smuzhiyun be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
3030*4882a593Smuzhiyun }
3031*4882a593Smuzhiyun #endif
3032*4882a593Smuzhiyun return 0;
3033*4882a593Smuzhiyun }
3034*4882a593Smuzhiyun
3035*4882a593Smuzhiyun /*
3036*4882a593Smuzhiyun * Allocate an extent (variable-size).
3037*4882a593Smuzhiyun * Depending on the allocation type, we either look in a single allocation
3038*4882a593Smuzhiyun * group or loop over the allocation groups to find the result.
3039*4882a593Smuzhiyun */
3040*4882a593Smuzhiyun int /* error */
xfs_alloc_vextent(struct xfs_alloc_arg * args)3041*4882a593Smuzhiyun xfs_alloc_vextent(
3042*4882a593Smuzhiyun struct xfs_alloc_arg *args) /* allocation argument structure */
3043*4882a593Smuzhiyun {
3044*4882a593Smuzhiyun xfs_agblock_t agsize; /* allocation group size */
3045*4882a593Smuzhiyun int error;
3046*4882a593Smuzhiyun int flags; /* XFS_ALLOC_FLAG_... locking flags */
3047*4882a593Smuzhiyun struct xfs_mount *mp; /* mount structure pointer */
3048*4882a593Smuzhiyun xfs_agnumber_t sagno; /* starting allocation group number */
3049*4882a593Smuzhiyun xfs_alloctype_t type; /* input allocation type */
3050*4882a593Smuzhiyun int bump_rotor = 0;
3051*4882a593Smuzhiyun xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
3052*4882a593Smuzhiyun
3053*4882a593Smuzhiyun mp = args->mp;
3054*4882a593Smuzhiyun type = args->otype = args->type;
3055*4882a593Smuzhiyun args->agbno = NULLAGBLOCK;
3056*4882a593Smuzhiyun /*
3057*4882a593Smuzhiyun * Just fix this up, for the case where the last a.g. is shorter
3058*4882a593Smuzhiyun * (or there's only one a.g.) and the caller couldn't easily figure
3059*4882a593Smuzhiyun * that out (xfs_bmap_alloc).
3060*4882a593Smuzhiyun */
3061*4882a593Smuzhiyun agsize = mp->m_sb.sb_agblocks;
3062*4882a593Smuzhiyun if (args->maxlen > agsize)
3063*4882a593Smuzhiyun args->maxlen = agsize;
3064*4882a593Smuzhiyun if (args->alignment == 0)
3065*4882a593Smuzhiyun args->alignment = 1;
3066*4882a593Smuzhiyun ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
3067*4882a593Smuzhiyun ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
3068*4882a593Smuzhiyun ASSERT(args->minlen <= args->maxlen);
3069*4882a593Smuzhiyun ASSERT(args->minlen <= agsize);
3070*4882a593Smuzhiyun ASSERT(args->mod < args->prod);
3071*4882a593Smuzhiyun if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
3072*4882a593Smuzhiyun XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
3073*4882a593Smuzhiyun args->minlen > args->maxlen || args->minlen > agsize ||
3074*4882a593Smuzhiyun args->mod >= args->prod) {
3075*4882a593Smuzhiyun args->fsbno = NULLFSBLOCK;
3076*4882a593Smuzhiyun trace_xfs_alloc_vextent_badargs(args);
3077*4882a593Smuzhiyun return 0;
3078*4882a593Smuzhiyun }
3079*4882a593Smuzhiyun
3080*4882a593Smuzhiyun switch (type) {
3081*4882a593Smuzhiyun case XFS_ALLOCTYPE_THIS_AG:
3082*4882a593Smuzhiyun case XFS_ALLOCTYPE_NEAR_BNO:
3083*4882a593Smuzhiyun case XFS_ALLOCTYPE_THIS_BNO:
3084*4882a593Smuzhiyun /*
3085*4882a593Smuzhiyun * These three force us into a single a.g.
3086*4882a593Smuzhiyun */
3087*4882a593Smuzhiyun args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3088*4882a593Smuzhiyun args->pag = xfs_perag_get(mp, args->agno);
3089*4882a593Smuzhiyun error = xfs_alloc_fix_freelist(args, 0);
3090*4882a593Smuzhiyun if (error) {
3091*4882a593Smuzhiyun trace_xfs_alloc_vextent_nofix(args);
3092*4882a593Smuzhiyun goto error0;
3093*4882a593Smuzhiyun }
3094*4882a593Smuzhiyun if (!args->agbp) {
3095*4882a593Smuzhiyun trace_xfs_alloc_vextent_noagbp(args);
3096*4882a593Smuzhiyun break;
3097*4882a593Smuzhiyun }
3098*4882a593Smuzhiyun args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3099*4882a593Smuzhiyun if ((error = xfs_alloc_ag_vextent(args)))
3100*4882a593Smuzhiyun goto error0;
3101*4882a593Smuzhiyun break;
3102*4882a593Smuzhiyun case XFS_ALLOCTYPE_START_BNO:
3103*4882a593Smuzhiyun /*
3104*4882a593Smuzhiyun * Try near allocation first, then anywhere-in-ag after
3105*4882a593Smuzhiyun * the first a.g. fails.
3106*4882a593Smuzhiyun */
3107*4882a593Smuzhiyun if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3108*4882a593Smuzhiyun (mp->m_flags & XFS_MOUNT_32BITINODES)) {
3109*4882a593Smuzhiyun args->fsbno = XFS_AGB_TO_FSB(mp,
3110*4882a593Smuzhiyun ((mp->m_agfrotor / rotorstep) %
3111*4882a593Smuzhiyun mp->m_sb.sb_agcount), 0);
3112*4882a593Smuzhiyun bump_rotor = 1;
3113*4882a593Smuzhiyun }
3114*4882a593Smuzhiyun args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3115*4882a593Smuzhiyun args->type = XFS_ALLOCTYPE_NEAR_BNO;
3116*4882a593Smuzhiyun /* FALLTHROUGH */
3117*4882a593Smuzhiyun case XFS_ALLOCTYPE_FIRST_AG:
3118*4882a593Smuzhiyun /*
3119*4882a593Smuzhiyun * Rotate through the allocation groups looking for a winner.
3120*4882a593Smuzhiyun */
3121*4882a593Smuzhiyun if (type == XFS_ALLOCTYPE_FIRST_AG) {
3122*4882a593Smuzhiyun /*
3123*4882a593Smuzhiyun * Start with allocation group given by bno.
3124*4882a593Smuzhiyun */
3125*4882a593Smuzhiyun args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3126*4882a593Smuzhiyun args->type = XFS_ALLOCTYPE_THIS_AG;
3127*4882a593Smuzhiyun sagno = 0;
3128*4882a593Smuzhiyun flags = 0;
3129*4882a593Smuzhiyun } else {
3130*4882a593Smuzhiyun /*
3131*4882a593Smuzhiyun * Start with the given allocation group.
3132*4882a593Smuzhiyun */
3133*4882a593Smuzhiyun args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3134*4882a593Smuzhiyun flags = XFS_ALLOC_FLAG_TRYLOCK;
3135*4882a593Smuzhiyun }
3136*4882a593Smuzhiyun /*
3137*4882a593Smuzhiyun * Loop over allocation groups twice; first time with
3138*4882a593Smuzhiyun * trylock set, second time without.
3139*4882a593Smuzhiyun */
3140*4882a593Smuzhiyun for (;;) {
3141*4882a593Smuzhiyun args->pag = xfs_perag_get(mp, args->agno);
3142*4882a593Smuzhiyun error = xfs_alloc_fix_freelist(args, flags);
3143*4882a593Smuzhiyun if (error) {
3144*4882a593Smuzhiyun trace_xfs_alloc_vextent_nofix(args);
3145*4882a593Smuzhiyun goto error0;
3146*4882a593Smuzhiyun }
3147*4882a593Smuzhiyun /*
3148*4882a593Smuzhiyun * If we get a buffer back then the allocation will fly.
3149*4882a593Smuzhiyun */
3150*4882a593Smuzhiyun if (args->agbp) {
3151*4882a593Smuzhiyun if ((error = xfs_alloc_ag_vextent(args)))
3152*4882a593Smuzhiyun goto error0;
3153*4882a593Smuzhiyun break;
3154*4882a593Smuzhiyun }
3155*4882a593Smuzhiyun
3156*4882a593Smuzhiyun trace_xfs_alloc_vextent_loopfailed(args);
3157*4882a593Smuzhiyun
3158*4882a593Smuzhiyun /*
3159*4882a593Smuzhiyun * Didn't work, figure out the next iteration.
3160*4882a593Smuzhiyun */
3161*4882a593Smuzhiyun if (args->agno == sagno &&
3162*4882a593Smuzhiyun type == XFS_ALLOCTYPE_START_BNO)
3163*4882a593Smuzhiyun args->type = XFS_ALLOCTYPE_THIS_AG;
3164*4882a593Smuzhiyun /*
3165*4882a593Smuzhiyun * For the first allocation, we can try any AG to get
3166*4882a593Smuzhiyun * space. However, if we already have allocated a
3167*4882a593Smuzhiyun * block, we don't want to try AGs whose number is below
3168*4882a593Smuzhiyun * sagno. Otherwise, we may end up with out-of-order
3169*4882a593Smuzhiyun * locking of AGF, which might cause deadlock.
3170*4882a593Smuzhiyun */
3171*4882a593Smuzhiyun if (++(args->agno) == mp->m_sb.sb_agcount) {
3172*4882a593Smuzhiyun if (args->tp->t_firstblock != NULLFSBLOCK)
3173*4882a593Smuzhiyun args->agno = sagno;
3174*4882a593Smuzhiyun else
3175*4882a593Smuzhiyun args->agno = 0;
3176*4882a593Smuzhiyun }
3177*4882a593Smuzhiyun /*
3178*4882a593Smuzhiyun * Reached the starting a.g., must either be done
3179*4882a593Smuzhiyun * or switch to non-trylock mode.
3180*4882a593Smuzhiyun */
3181*4882a593Smuzhiyun if (args->agno == sagno) {
3182*4882a593Smuzhiyun if (flags == 0) {
3183*4882a593Smuzhiyun args->agbno = NULLAGBLOCK;
3184*4882a593Smuzhiyun trace_xfs_alloc_vextent_allfailed(args);
3185*4882a593Smuzhiyun break;
3186*4882a593Smuzhiyun }
3187*4882a593Smuzhiyun
3188*4882a593Smuzhiyun flags = 0;
3189*4882a593Smuzhiyun if (type == XFS_ALLOCTYPE_START_BNO) {
3190*4882a593Smuzhiyun args->agbno = XFS_FSB_TO_AGBNO(mp,
3191*4882a593Smuzhiyun args->fsbno);
3192*4882a593Smuzhiyun args->type = XFS_ALLOCTYPE_NEAR_BNO;
3193*4882a593Smuzhiyun }
3194*4882a593Smuzhiyun }
3195*4882a593Smuzhiyun xfs_perag_put(args->pag);
3196*4882a593Smuzhiyun }
3197*4882a593Smuzhiyun if (bump_rotor) {
3198*4882a593Smuzhiyun if (args->agno == sagno)
3199*4882a593Smuzhiyun mp->m_agfrotor = (mp->m_agfrotor + 1) %
3200*4882a593Smuzhiyun (mp->m_sb.sb_agcount * rotorstep);
3201*4882a593Smuzhiyun else
3202*4882a593Smuzhiyun mp->m_agfrotor = (args->agno * rotorstep + 1) %
3203*4882a593Smuzhiyun (mp->m_sb.sb_agcount * rotorstep);
3204*4882a593Smuzhiyun }
3205*4882a593Smuzhiyun break;
3206*4882a593Smuzhiyun default:
3207*4882a593Smuzhiyun ASSERT(0);
3208*4882a593Smuzhiyun /* NOTREACHED */
3209*4882a593Smuzhiyun }
3210*4882a593Smuzhiyun if (args->agbno == NULLAGBLOCK)
3211*4882a593Smuzhiyun args->fsbno = NULLFSBLOCK;
3212*4882a593Smuzhiyun else {
3213*4882a593Smuzhiyun args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3214*4882a593Smuzhiyun #ifdef DEBUG
3215*4882a593Smuzhiyun ASSERT(args->len >= args->minlen);
3216*4882a593Smuzhiyun ASSERT(args->len <= args->maxlen);
3217*4882a593Smuzhiyun ASSERT(args->agbno % args->alignment == 0);
3218*4882a593Smuzhiyun XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
3219*4882a593Smuzhiyun args->len);
3220*4882a593Smuzhiyun #endif
3221*4882a593Smuzhiyun
3222*4882a593Smuzhiyun }
3223*4882a593Smuzhiyun xfs_perag_put(args->pag);
3224*4882a593Smuzhiyun return 0;
3225*4882a593Smuzhiyun error0:
3226*4882a593Smuzhiyun xfs_perag_put(args->pag);
3227*4882a593Smuzhiyun return error;
3228*4882a593Smuzhiyun }
3229*4882a593Smuzhiyun
3230*4882a593Smuzhiyun /* Ensure that the freelist is at full capacity. */
3231*4882a593Smuzhiyun int
xfs_free_extent_fix_freelist(struct xfs_trans * tp,xfs_agnumber_t agno,struct xfs_buf ** agbp)3232*4882a593Smuzhiyun xfs_free_extent_fix_freelist(
3233*4882a593Smuzhiyun struct xfs_trans *tp,
3234*4882a593Smuzhiyun xfs_agnumber_t agno,
3235*4882a593Smuzhiyun struct xfs_buf **agbp)
3236*4882a593Smuzhiyun {
3237*4882a593Smuzhiyun struct xfs_alloc_arg args;
3238*4882a593Smuzhiyun int error;
3239*4882a593Smuzhiyun
3240*4882a593Smuzhiyun memset(&args, 0, sizeof(struct xfs_alloc_arg));
3241*4882a593Smuzhiyun args.tp = tp;
3242*4882a593Smuzhiyun args.mp = tp->t_mountp;
3243*4882a593Smuzhiyun args.agno = agno;
3244*4882a593Smuzhiyun
3245*4882a593Smuzhiyun /*
3246*4882a593Smuzhiyun * validate that the block number is legal - the enables us to detect
3247*4882a593Smuzhiyun * and handle a silent filesystem corruption rather than crashing.
3248*4882a593Smuzhiyun */
3249*4882a593Smuzhiyun if (args.agno >= args.mp->m_sb.sb_agcount)
3250*4882a593Smuzhiyun return -EFSCORRUPTED;
3251*4882a593Smuzhiyun
3252*4882a593Smuzhiyun args.pag = xfs_perag_get(args.mp, args.agno);
3253*4882a593Smuzhiyun ASSERT(args.pag);
3254*4882a593Smuzhiyun
3255*4882a593Smuzhiyun error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3256*4882a593Smuzhiyun if (error)
3257*4882a593Smuzhiyun goto out;
3258*4882a593Smuzhiyun
3259*4882a593Smuzhiyun *agbp = args.agbp;
3260*4882a593Smuzhiyun out:
3261*4882a593Smuzhiyun xfs_perag_put(args.pag);
3262*4882a593Smuzhiyun return error;
3263*4882a593Smuzhiyun }
3264*4882a593Smuzhiyun
3265*4882a593Smuzhiyun /*
3266*4882a593Smuzhiyun * Free an extent.
3267*4882a593Smuzhiyun * Just break up the extent address and hand off to xfs_free_ag_extent
3268*4882a593Smuzhiyun * after fixing up the freelist.
3269*4882a593Smuzhiyun */
3270*4882a593Smuzhiyun int
__xfs_free_extent(struct xfs_trans * tp,xfs_fsblock_t bno,xfs_extlen_t len,const struct xfs_owner_info * oinfo,enum xfs_ag_resv_type type,bool skip_discard)3271*4882a593Smuzhiyun __xfs_free_extent(
3272*4882a593Smuzhiyun struct xfs_trans *tp,
3273*4882a593Smuzhiyun xfs_fsblock_t bno,
3274*4882a593Smuzhiyun xfs_extlen_t len,
3275*4882a593Smuzhiyun const struct xfs_owner_info *oinfo,
3276*4882a593Smuzhiyun enum xfs_ag_resv_type type,
3277*4882a593Smuzhiyun bool skip_discard)
3278*4882a593Smuzhiyun {
3279*4882a593Smuzhiyun struct xfs_mount *mp = tp->t_mountp;
3280*4882a593Smuzhiyun struct xfs_buf *agbp;
3281*4882a593Smuzhiyun xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3282*4882a593Smuzhiyun xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
3283*4882a593Smuzhiyun struct xfs_agf *agf;
3284*4882a593Smuzhiyun int error;
3285*4882a593Smuzhiyun unsigned int busy_flags = 0;
3286*4882a593Smuzhiyun
3287*4882a593Smuzhiyun ASSERT(len != 0);
3288*4882a593Smuzhiyun ASSERT(type != XFS_AG_RESV_AGFL);
3289*4882a593Smuzhiyun
3290*4882a593Smuzhiyun if (XFS_TEST_ERROR(false, mp,
3291*4882a593Smuzhiyun XFS_ERRTAG_FREE_EXTENT))
3292*4882a593Smuzhiyun return -EIO;
3293*4882a593Smuzhiyun
3294*4882a593Smuzhiyun error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3295*4882a593Smuzhiyun if (error)
3296*4882a593Smuzhiyun return error;
3297*4882a593Smuzhiyun agf = agbp->b_addr;
3298*4882a593Smuzhiyun
3299*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3300*4882a593Smuzhiyun error = -EFSCORRUPTED;
3301*4882a593Smuzhiyun goto err;
3302*4882a593Smuzhiyun }
3303*4882a593Smuzhiyun
3304*4882a593Smuzhiyun /* validate the extent size is legal now we have the agf locked */
3305*4882a593Smuzhiyun if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3306*4882a593Smuzhiyun error = -EFSCORRUPTED;
3307*4882a593Smuzhiyun goto err;
3308*4882a593Smuzhiyun }
3309*4882a593Smuzhiyun
3310*4882a593Smuzhiyun error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
3311*4882a593Smuzhiyun if (error)
3312*4882a593Smuzhiyun goto err;
3313*4882a593Smuzhiyun
3314*4882a593Smuzhiyun if (skip_discard)
3315*4882a593Smuzhiyun busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3316*4882a593Smuzhiyun xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
3317*4882a593Smuzhiyun return 0;
3318*4882a593Smuzhiyun
3319*4882a593Smuzhiyun err:
3320*4882a593Smuzhiyun xfs_trans_brelse(tp, agbp);
3321*4882a593Smuzhiyun return error;
3322*4882a593Smuzhiyun }
3323*4882a593Smuzhiyun
3324*4882a593Smuzhiyun struct xfs_alloc_query_range_info {
3325*4882a593Smuzhiyun xfs_alloc_query_range_fn fn;
3326*4882a593Smuzhiyun void *priv;
3327*4882a593Smuzhiyun };
3328*4882a593Smuzhiyun
3329*4882a593Smuzhiyun /* Format btree record and pass to our callback. */
3330*4882a593Smuzhiyun STATIC int
xfs_alloc_query_range_helper(struct xfs_btree_cur * cur,union xfs_btree_rec * rec,void * priv)3331*4882a593Smuzhiyun xfs_alloc_query_range_helper(
3332*4882a593Smuzhiyun struct xfs_btree_cur *cur,
3333*4882a593Smuzhiyun union xfs_btree_rec *rec,
3334*4882a593Smuzhiyun void *priv)
3335*4882a593Smuzhiyun {
3336*4882a593Smuzhiyun struct xfs_alloc_query_range_info *query = priv;
3337*4882a593Smuzhiyun struct xfs_alloc_rec_incore irec;
3338*4882a593Smuzhiyun
3339*4882a593Smuzhiyun irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3340*4882a593Smuzhiyun irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3341*4882a593Smuzhiyun return query->fn(cur, &irec, query->priv);
3342*4882a593Smuzhiyun }
3343*4882a593Smuzhiyun
3344*4882a593Smuzhiyun /* Find all free space within a given range of blocks. */
3345*4882a593Smuzhiyun int
xfs_alloc_query_range(struct xfs_btree_cur * cur,struct xfs_alloc_rec_incore * low_rec,struct xfs_alloc_rec_incore * high_rec,xfs_alloc_query_range_fn fn,void * priv)3346*4882a593Smuzhiyun xfs_alloc_query_range(
3347*4882a593Smuzhiyun struct xfs_btree_cur *cur,
3348*4882a593Smuzhiyun struct xfs_alloc_rec_incore *low_rec,
3349*4882a593Smuzhiyun struct xfs_alloc_rec_incore *high_rec,
3350*4882a593Smuzhiyun xfs_alloc_query_range_fn fn,
3351*4882a593Smuzhiyun void *priv)
3352*4882a593Smuzhiyun {
3353*4882a593Smuzhiyun union xfs_btree_irec low_brec;
3354*4882a593Smuzhiyun union xfs_btree_irec high_brec;
3355*4882a593Smuzhiyun struct xfs_alloc_query_range_info query;
3356*4882a593Smuzhiyun
3357*4882a593Smuzhiyun ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3358*4882a593Smuzhiyun low_brec.a = *low_rec;
3359*4882a593Smuzhiyun high_brec.a = *high_rec;
3360*4882a593Smuzhiyun query.priv = priv;
3361*4882a593Smuzhiyun query.fn = fn;
3362*4882a593Smuzhiyun return xfs_btree_query_range(cur, &low_brec, &high_brec,
3363*4882a593Smuzhiyun xfs_alloc_query_range_helper, &query);
3364*4882a593Smuzhiyun }
3365*4882a593Smuzhiyun
3366*4882a593Smuzhiyun /* Find all free space records. */
3367*4882a593Smuzhiyun int
xfs_alloc_query_all(struct xfs_btree_cur * cur,xfs_alloc_query_range_fn fn,void * priv)3368*4882a593Smuzhiyun xfs_alloc_query_all(
3369*4882a593Smuzhiyun struct xfs_btree_cur *cur,
3370*4882a593Smuzhiyun xfs_alloc_query_range_fn fn,
3371*4882a593Smuzhiyun void *priv)
3372*4882a593Smuzhiyun {
3373*4882a593Smuzhiyun struct xfs_alloc_query_range_info query;
3374*4882a593Smuzhiyun
3375*4882a593Smuzhiyun ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3376*4882a593Smuzhiyun query.priv = priv;
3377*4882a593Smuzhiyun query.fn = fn;
3378*4882a593Smuzhiyun return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3379*4882a593Smuzhiyun }
3380*4882a593Smuzhiyun
3381*4882a593Smuzhiyun /* Is there a record covering a given extent? */
3382*4882a593Smuzhiyun int
xfs_alloc_has_record(struct xfs_btree_cur * cur,xfs_agblock_t bno,xfs_extlen_t len,bool * exists)3383*4882a593Smuzhiyun xfs_alloc_has_record(
3384*4882a593Smuzhiyun struct xfs_btree_cur *cur,
3385*4882a593Smuzhiyun xfs_agblock_t bno,
3386*4882a593Smuzhiyun xfs_extlen_t len,
3387*4882a593Smuzhiyun bool *exists)
3388*4882a593Smuzhiyun {
3389*4882a593Smuzhiyun union xfs_btree_irec low;
3390*4882a593Smuzhiyun union xfs_btree_irec high;
3391*4882a593Smuzhiyun
3392*4882a593Smuzhiyun memset(&low, 0, sizeof(low));
3393*4882a593Smuzhiyun low.a.ar_startblock = bno;
3394*4882a593Smuzhiyun memset(&high, 0xFF, sizeof(high));
3395*4882a593Smuzhiyun high.a.ar_startblock = bno + len - 1;
3396*4882a593Smuzhiyun
3397*4882a593Smuzhiyun return xfs_btree_has_record(cur, &low, &high, exists);
3398*4882a593Smuzhiyun }
3399*4882a593Smuzhiyun
3400*4882a593Smuzhiyun /*
3401*4882a593Smuzhiyun * Walk all the blocks in the AGFL. The @walk_fn can return any negative
3402*4882a593Smuzhiyun * error code or XFS_ITER_*.
3403*4882a593Smuzhiyun */
3404*4882a593Smuzhiyun int
xfs_agfl_walk(struct xfs_mount * mp,struct xfs_agf * agf,struct xfs_buf * agflbp,xfs_agfl_walk_fn walk_fn,void * priv)3405*4882a593Smuzhiyun xfs_agfl_walk(
3406*4882a593Smuzhiyun struct xfs_mount *mp,
3407*4882a593Smuzhiyun struct xfs_agf *agf,
3408*4882a593Smuzhiyun struct xfs_buf *agflbp,
3409*4882a593Smuzhiyun xfs_agfl_walk_fn walk_fn,
3410*4882a593Smuzhiyun void *priv)
3411*4882a593Smuzhiyun {
3412*4882a593Smuzhiyun __be32 *agfl_bno;
3413*4882a593Smuzhiyun unsigned int i;
3414*4882a593Smuzhiyun int error;
3415*4882a593Smuzhiyun
3416*4882a593Smuzhiyun agfl_bno = xfs_buf_to_agfl_bno(agflbp);
3417*4882a593Smuzhiyun i = be32_to_cpu(agf->agf_flfirst);
3418*4882a593Smuzhiyun
3419*4882a593Smuzhiyun /* Nothing to walk in an empty AGFL. */
3420*4882a593Smuzhiyun if (agf->agf_flcount == cpu_to_be32(0))
3421*4882a593Smuzhiyun return 0;
3422*4882a593Smuzhiyun
3423*4882a593Smuzhiyun /* Otherwise, walk from first to last, wrapping as needed. */
3424*4882a593Smuzhiyun for (;;) {
3425*4882a593Smuzhiyun error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3426*4882a593Smuzhiyun if (error)
3427*4882a593Smuzhiyun return error;
3428*4882a593Smuzhiyun if (i == be32_to_cpu(agf->agf_fllast))
3429*4882a593Smuzhiyun break;
3430*4882a593Smuzhiyun if (++i == xfs_agfl_size(mp))
3431*4882a593Smuzhiyun i = 0;
3432*4882a593Smuzhiyun }
3433*4882a593Smuzhiyun
3434*4882a593Smuzhiyun return 0;
3435*4882a593Smuzhiyun }
3436