1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0+
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2017 Oracle. All Rights Reserved.
4*4882a593Smuzhiyun * Author: Darrick J. Wong <darrick.wong@oracle.com>
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun #include "xfs.h"
7*4882a593Smuzhiyun #include "xfs_fs.h"
8*4882a593Smuzhiyun #include "xfs_shared.h"
9*4882a593Smuzhiyun #include "xfs_format.h"
10*4882a593Smuzhiyun #include "xfs_trans_resv.h"
11*4882a593Smuzhiyun #include "xfs_mount.h"
12*4882a593Smuzhiyun #include "xfs_btree.h"
13*4882a593Smuzhiyun #include "xfs_log_format.h"
14*4882a593Smuzhiyun #include "xfs_trans.h"
15*4882a593Smuzhiyun #include "xfs_inode.h"
16*4882a593Smuzhiyun #include "xfs_ialloc.h"
17*4882a593Smuzhiyun #include "xfs_ialloc_btree.h"
18*4882a593Smuzhiyun #include "xfs_icache.h"
19*4882a593Smuzhiyun #include "xfs_rmap.h"
20*4882a593Smuzhiyun #include "scrub/scrub.h"
21*4882a593Smuzhiyun #include "scrub/common.h"
22*4882a593Smuzhiyun #include "scrub/btree.h"
23*4882a593Smuzhiyun #include "scrub/trace.h"
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun /*
26*4882a593Smuzhiyun * Set us up to scrub inode btrees.
27*4882a593Smuzhiyun * If we detect a discrepancy between the inobt and the inode,
28*4882a593Smuzhiyun * try again after forcing logged inode cores out to disk.
29*4882a593Smuzhiyun */
30*4882a593Smuzhiyun int
xchk_setup_ag_iallocbt(struct xfs_scrub * sc,struct xfs_inode * ip)31*4882a593Smuzhiyun xchk_setup_ag_iallocbt(
32*4882a593Smuzhiyun struct xfs_scrub *sc,
33*4882a593Smuzhiyun struct xfs_inode *ip)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun return xchk_setup_ag_btree(sc, ip, sc->flags & XCHK_TRY_HARDER);
36*4882a593Smuzhiyun }
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun /* Inode btree scrubber. */
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun struct xchk_iallocbt {
41*4882a593Smuzhiyun /* Number of inodes we see while scanning inobt. */
42*4882a593Smuzhiyun unsigned long long inodes;
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun /* Expected next startino, for big block filesystems. */
45*4882a593Smuzhiyun xfs_agino_t next_startino;
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun /* Expected end of the current inode cluster. */
48*4882a593Smuzhiyun xfs_agino_t next_cluster_ino;
49*4882a593Smuzhiyun };
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun /*
52*4882a593Smuzhiyun * If we're checking the finobt, cross-reference with the inobt.
53*4882a593Smuzhiyun * Otherwise we're checking the inobt; if there is an finobt, make sure
54*4882a593Smuzhiyun * we have a record or not depending on freecount.
55*4882a593Smuzhiyun */
56*4882a593Smuzhiyun static inline void
xchk_iallocbt_chunk_xref_other(struct xfs_scrub * sc,struct xfs_inobt_rec_incore * irec,xfs_agino_t agino)57*4882a593Smuzhiyun xchk_iallocbt_chunk_xref_other(
58*4882a593Smuzhiyun struct xfs_scrub *sc,
59*4882a593Smuzhiyun struct xfs_inobt_rec_incore *irec,
60*4882a593Smuzhiyun xfs_agino_t agino)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun struct xfs_btree_cur **pcur;
63*4882a593Smuzhiyun bool has_irec;
64*4882a593Smuzhiyun int error;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
67*4882a593Smuzhiyun pcur = &sc->sa.ino_cur;
68*4882a593Smuzhiyun else
69*4882a593Smuzhiyun pcur = &sc->sa.fino_cur;
70*4882a593Smuzhiyun if (!(*pcur))
71*4882a593Smuzhiyun return;
72*4882a593Smuzhiyun error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec);
73*4882a593Smuzhiyun if (!xchk_should_check_xref(sc, &error, pcur))
74*4882a593Smuzhiyun return;
75*4882a593Smuzhiyun if (((irec->ir_freecount > 0 && !has_irec) ||
76*4882a593Smuzhiyun (irec->ir_freecount == 0 && has_irec)))
77*4882a593Smuzhiyun xchk_btree_xref_set_corrupt(sc, *pcur, 0);
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun /* Cross-reference with the other btrees. */
81*4882a593Smuzhiyun STATIC void
xchk_iallocbt_chunk_xref(struct xfs_scrub * sc,struct xfs_inobt_rec_incore * irec,xfs_agino_t agino,xfs_agblock_t agbno,xfs_extlen_t len)82*4882a593Smuzhiyun xchk_iallocbt_chunk_xref(
83*4882a593Smuzhiyun struct xfs_scrub *sc,
84*4882a593Smuzhiyun struct xfs_inobt_rec_incore *irec,
85*4882a593Smuzhiyun xfs_agino_t agino,
86*4882a593Smuzhiyun xfs_agblock_t agbno,
87*4882a593Smuzhiyun xfs_extlen_t len)
88*4882a593Smuzhiyun {
89*4882a593Smuzhiyun if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
90*4882a593Smuzhiyun return;
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun xchk_xref_is_used_space(sc, agbno, len);
93*4882a593Smuzhiyun xchk_iallocbt_chunk_xref_other(sc, irec, agino);
94*4882a593Smuzhiyun xchk_xref_is_owned_by(sc, agbno, len, &XFS_RMAP_OINFO_INODES);
95*4882a593Smuzhiyun xchk_xref_is_not_shared(sc, agbno, len);
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun /* Is this chunk worth checking? */
99*4882a593Smuzhiyun STATIC bool
xchk_iallocbt_chunk(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec,xfs_agino_t agino,xfs_extlen_t len)100*4882a593Smuzhiyun xchk_iallocbt_chunk(
101*4882a593Smuzhiyun struct xchk_btree *bs,
102*4882a593Smuzhiyun struct xfs_inobt_rec_incore *irec,
103*4882a593Smuzhiyun xfs_agino_t agino,
104*4882a593Smuzhiyun xfs_extlen_t len)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun struct xfs_mount *mp = bs->cur->bc_mp;
107*4882a593Smuzhiyun xfs_agnumber_t agno = bs->cur->bc_ag.agno;
108*4882a593Smuzhiyun xfs_agblock_t bno;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun bno = XFS_AGINO_TO_AGBNO(mp, agino);
111*4882a593Smuzhiyun if (bno + len <= bno ||
112*4882a593Smuzhiyun !xfs_verify_agbno(mp, agno, bno) ||
113*4882a593Smuzhiyun !xfs_verify_agbno(mp, agno, bno + len - 1))
114*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun return true;
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun /* Count the number of free inodes. */
122*4882a593Smuzhiyun static unsigned int
xchk_iallocbt_freecount(xfs_inofree_t freemask)123*4882a593Smuzhiyun xchk_iallocbt_freecount(
124*4882a593Smuzhiyun xfs_inofree_t freemask)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(freemask) != sizeof(__u64));
127*4882a593Smuzhiyun return hweight64(freemask);
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun * Check that an inode's allocation status matches ir_free in the inobt
132*4882a593Smuzhiyun * record. First we try querying the in-core inode state, and if the inode
133*4882a593Smuzhiyun * isn't loaded we examine the on-disk inode directly.
134*4882a593Smuzhiyun *
135*4882a593Smuzhiyun * Since there can be 1:M and M:1 mappings between inobt records and inode
136*4882a593Smuzhiyun * clusters, we pass in the inode location information as an inobt record;
137*4882a593Smuzhiyun * the index of an inode cluster within the inobt record (as well as the
138*4882a593Smuzhiyun * cluster buffer itself); and the index of the inode within the cluster.
139*4882a593Smuzhiyun *
140*4882a593Smuzhiyun * @irec is the inobt record.
141*4882a593Smuzhiyun * @irec_ino is the inode offset from the start of the record.
142*4882a593Smuzhiyun * @dip is the on-disk inode.
143*4882a593Smuzhiyun */
144*4882a593Smuzhiyun STATIC int
xchk_iallocbt_check_cluster_ifree(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec,unsigned int irec_ino,struct xfs_dinode * dip)145*4882a593Smuzhiyun xchk_iallocbt_check_cluster_ifree(
146*4882a593Smuzhiyun struct xchk_btree *bs,
147*4882a593Smuzhiyun struct xfs_inobt_rec_incore *irec,
148*4882a593Smuzhiyun unsigned int irec_ino,
149*4882a593Smuzhiyun struct xfs_dinode *dip)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun struct xfs_mount *mp = bs->cur->bc_mp;
152*4882a593Smuzhiyun xfs_ino_t fsino;
153*4882a593Smuzhiyun xfs_agino_t agino;
154*4882a593Smuzhiyun bool irec_free;
155*4882a593Smuzhiyun bool ino_inuse;
156*4882a593Smuzhiyun bool freemask_ok;
157*4882a593Smuzhiyun int error = 0;
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun if (xchk_should_terminate(bs->sc, &error))
160*4882a593Smuzhiyun return error;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun /*
163*4882a593Smuzhiyun * Given an inobt record and the offset of an inode from the start of
164*4882a593Smuzhiyun * the record, compute which fs inode we're talking about.
165*4882a593Smuzhiyun */
166*4882a593Smuzhiyun agino = irec->ir_startino + irec_ino;
167*4882a593Smuzhiyun fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_ag.agno, agino);
168*4882a593Smuzhiyun irec_free = (irec->ir_free & XFS_INOBT_MASK(irec_ino));
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
171*4882a593Smuzhiyun (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)) {
172*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
173*4882a593Smuzhiyun goto out;
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, fsino,
177*4882a593Smuzhiyun &ino_inuse);
178*4882a593Smuzhiyun if (error == -ENODATA) {
179*4882a593Smuzhiyun /* Not cached, just read the disk buffer */
180*4882a593Smuzhiyun freemask_ok = irec_free ^ !!(dip->di_mode);
181*4882a593Smuzhiyun if (!(bs->sc->flags & XCHK_TRY_HARDER) && !freemask_ok)
182*4882a593Smuzhiyun return -EDEADLOCK;
183*4882a593Smuzhiyun } else if (error < 0) {
184*4882a593Smuzhiyun /*
185*4882a593Smuzhiyun * Inode is only half assembled, or there was an IO error,
186*4882a593Smuzhiyun * or the verifier failed, so don't bother trying to check.
187*4882a593Smuzhiyun * The inode scrubber can deal with this.
188*4882a593Smuzhiyun */
189*4882a593Smuzhiyun goto out;
190*4882a593Smuzhiyun } else {
191*4882a593Smuzhiyun /* Inode is all there. */
192*4882a593Smuzhiyun freemask_ok = irec_free ^ ino_inuse;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun if (!freemask_ok)
195*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
196*4882a593Smuzhiyun out:
197*4882a593Smuzhiyun return 0;
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun /*
201*4882a593Smuzhiyun * Check that the holemask and freemask of a hypothetical inode cluster match
202*4882a593Smuzhiyun * what's actually on disk. If sparse inodes are enabled, the cluster does
203*4882a593Smuzhiyun * not actually have to map to inodes if the corresponding holemask bit is set.
204*4882a593Smuzhiyun *
205*4882a593Smuzhiyun * @cluster_base is the first inode in the cluster within the @irec.
206*4882a593Smuzhiyun */
207*4882a593Smuzhiyun STATIC int
xchk_iallocbt_check_cluster(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec,unsigned int cluster_base)208*4882a593Smuzhiyun xchk_iallocbt_check_cluster(
209*4882a593Smuzhiyun struct xchk_btree *bs,
210*4882a593Smuzhiyun struct xfs_inobt_rec_incore *irec,
211*4882a593Smuzhiyun unsigned int cluster_base)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun struct xfs_imap imap;
214*4882a593Smuzhiyun struct xfs_mount *mp = bs->cur->bc_mp;
215*4882a593Smuzhiyun struct xfs_dinode *dip;
216*4882a593Smuzhiyun struct xfs_buf *cluster_bp;
217*4882a593Smuzhiyun unsigned int nr_inodes;
218*4882a593Smuzhiyun xfs_agnumber_t agno = bs->cur->bc_ag.agno;
219*4882a593Smuzhiyun xfs_agblock_t agbno;
220*4882a593Smuzhiyun unsigned int cluster_index;
221*4882a593Smuzhiyun uint16_t cluster_mask = 0;
222*4882a593Smuzhiyun uint16_t ir_holemask;
223*4882a593Smuzhiyun int error = 0;
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
226*4882a593Smuzhiyun M_IGEO(mp)->inodes_per_cluster);
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun /* Map this inode cluster */
229*4882a593Smuzhiyun agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun /* Compute a bitmask for this cluster that can be used for holemask. */
232*4882a593Smuzhiyun for (cluster_index = 0;
233*4882a593Smuzhiyun cluster_index < nr_inodes;
234*4882a593Smuzhiyun cluster_index += XFS_INODES_PER_HOLEMASK_BIT)
235*4882a593Smuzhiyun cluster_mask |= XFS_INOBT_MASK((cluster_base + cluster_index) /
236*4882a593Smuzhiyun XFS_INODES_PER_HOLEMASK_BIT);
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun /*
239*4882a593Smuzhiyun * Map the first inode of this cluster to a buffer and offset.
240*4882a593Smuzhiyun * Be careful about inobt records that don't align with the start of
241*4882a593Smuzhiyun * the inode buffer when block sizes are large enough to hold multiple
242*4882a593Smuzhiyun * inode chunks. When this happens, cluster_base will be zero but
243*4882a593Smuzhiyun * ir_startino can be large enough to make im_boffset nonzero.
244*4882a593Smuzhiyun */
245*4882a593Smuzhiyun ir_holemask = (irec->ir_holemask & cluster_mask);
246*4882a593Smuzhiyun imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
247*4882a593Smuzhiyun imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
248*4882a593Smuzhiyun imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
249*4882a593Smuzhiyun mp->m_sb.sb_inodelog;
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun if (imap.im_boffset != 0 && cluster_base != 0) {
252*4882a593Smuzhiyun ASSERT(imap.im_boffset == 0 || cluster_base == 0);
253*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
254*4882a593Smuzhiyun return 0;
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun trace_xchk_iallocbt_check_cluster(mp, agno, irec->ir_startino,
258*4882a593Smuzhiyun imap.im_blkno, imap.im_len, cluster_base, nr_inodes,
259*4882a593Smuzhiyun cluster_mask, ir_holemask,
260*4882a593Smuzhiyun XFS_INO_TO_OFFSET(mp, irec->ir_startino +
261*4882a593Smuzhiyun cluster_base));
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun /* The whole cluster must be a hole or not a hole. */
264*4882a593Smuzhiyun if (ir_holemask != cluster_mask && ir_holemask != 0) {
265*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
266*4882a593Smuzhiyun return 0;
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun /* If any part of this is a hole, skip it. */
270*4882a593Smuzhiyun if (ir_holemask) {
271*4882a593Smuzhiyun xchk_xref_is_not_owned_by(bs->sc, agbno,
272*4882a593Smuzhiyun M_IGEO(mp)->blocks_per_cluster,
273*4882a593Smuzhiyun &XFS_RMAP_OINFO_INODES);
274*4882a593Smuzhiyun return 0;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster,
278*4882a593Smuzhiyun &XFS_RMAP_OINFO_INODES);
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun /* Grab the inode cluster buffer. */
281*4882a593Smuzhiyun error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp, 0);
282*4882a593Smuzhiyun if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error))
283*4882a593Smuzhiyun return error;
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /* Check free status of each inode within this cluster. */
286*4882a593Smuzhiyun for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
287*4882a593Smuzhiyun struct xfs_dinode *dip;
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun if (imap.im_boffset >= BBTOB(cluster_bp->b_length)) {
290*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
291*4882a593Smuzhiyun break;
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun dip = xfs_buf_offset(cluster_bp, imap.im_boffset);
295*4882a593Smuzhiyun error = xchk_iallocbt_check_cluster_ifree(bs, irec,
296*4882a593Smuzhiyun cluster_base + cluster_index, dip);
297*4882a593Smuzhiyun if (error)
298*4882a593Smuzhiyun break;
299*4882a593Smuzhiyun imap.im_boffset += mp->m_sb.sb_inodesize;
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun xfs_trans_brelse(bs->cur->bc_tp, cluster_bp);
303*4882a593Smuzhiyun return error;
304*4882a593Smuzhiyun }
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun /*
307*4882a593Smuzhiyun * For all the inode clusters that could map to this inobt record, make sure
308*4882a593Smuzhiyun * that the holemask makes sense and that the allocation status of each inode
309*4882a593Smuzhiyun * matches the freemask.
310*4882a593Smuzhiyun */
311*4882a593Smuzhiyun STATIC int
xchk_iallocbt_check_clusters(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec)312*4882a593Smuzhiyun xchk_iallocbt_check_clusters(
313*4882a593Smuzhiyun struct xchk_btree *bs,
314*4882a593Smuzhiyun struct xfs_inobt_rec_incore *irec)
315*4882a593Smuzhiyun {
316*4882a593Smuzhiyun unsigned int cluster_base;
317*4882a593Smuzhiyun int error = 0;
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun /*
320*4882a593Smuzhiyun * For the common case where this inobt record maps to multiple inode
321*4882a593Smuzhiyun * clusters this will call _check_cluster for each cluster.
322*4882a593Smuzhiyun *
323*4882a593Smuzhiyun * For the case that multiple inobt records map to a single cluster,
324*4882a593Smuzhiyun * this will call _check_cluster once.
325*4882a593Smuzhiyun */
326*4882a593Smuzhiyun for (cluster_base = 0;
327*4882a593Smuzhiyun cluster_base < XFS_INODES_PER_CHUNK;
328*4882a593Smuzhiyun cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) {
329*4882a593Smuzhiyun error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
330*4882a593Smuzhiyun if (error)
331*4882a593Smuzhiyun break;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun return error;
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun /*
338*4882a593Smuzhiyun * Make sure this inode btree record is aligned properly. Because a fs block
339*4882a593Smuzhiyun * contains multiple inodes, we check that the inobt record is aligned to the
340*4882a593Smuzhiyun * correct inode, not just the correct block on disk. This results in a finer
341*4882a593Smuzhiyun * grained corruption check.
342*4882a593Smuzhiyun */
343*4882a593Smuzhiyun STATIC void
xchk_iallocbt_rec_alignment(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec)344*4882a593Smuzhiyun xchk_iallocbt_rec_alignment(
345*4882a593Smuzhiyun struct xchk_btree *bs,
346*4882a593Smuzhiyun struct xfs_inobt_rec_incore *irec)
347*4882a593Smuzhiyun {
348*4882a593Smuzhiyun struct xfs_mount *mp = bs->sc->mp;
349*4882a593Smuzhiyun struct xchk_iallocbt *iabt = bs->private;
350*4882a593Smuzhiyun struct xfs_ino_geometry *igeo = M_IGEO(mp);
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun /*
353*4882a593Smuzhiyun * finobt records have different positioning requirements than inobt
354*4882a593Smuzhiyun * records: each finobt record must have a corresponding inobt record.
355*4882a593Smuzhiyun * That is checked in the xref function, so for now we only catch the
356*4882a593Smuzhiyun * obvious case where the record isn't at all aligned properly.
357*4882a593Smuzhiyun *
358*4882a593Smuzhiyun * Note that if a fs block contains more than a single chunk of inodes,
359*4882a593Smuzhiyun * we will have finobt records only for those chunks containing free
360*4882a593Smuzhiyun * inodes, and therefore expect chunk alignment of finobt records.
361*4882a593Smuzhiyun * Otherwise, we expect that the finobt record is aligned to the
362*4882a593Smuzhiyun * cluster alignment as told by the superblock.
363*4882a593Smuzhiyun */
364*4882a593Smuzhiyun if (bs->cur->bc_btnum == XFS_BTNUM_FINO) {
365*4882a593Smuzhiyun unsigned int imask;
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
368*4882a593Smuzhiyun igeo->cluster_align_inodes) - 1;
369*4882a593Smuzhiyun if (irec->ir_startino & imask)
370*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
371*4882a593Smuzhiyun return;
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun if (iabt->next_startino != NULLAGINO) {
375*4882a593Smuzhiyun /*
376*4882a593Smuzhiyun * We're midway through a cluster of inodes that is mapped by
377*4882a593Smuzhiyun * multiple inobt records. Did we get the record for the next
378*4882a593Smuzhiyun * irec in the sequence?
379*4882a593Smuzhiyun */
380*4882a593Smuzhiyun if (irec->ir_startino != iabt->next_startino) {
381*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
382*4882a593Smuzhiyun return;
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun iabt->next_startino += XFS_INODES_PER_CHUNK;
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun /* Are we done with the cluster? */
388*4882a593Smuzhiyun if (iabt->next_startino >= iabt->next_cluster_ino) {
389*4882a593Smuzhiyun iabt->next_startino = NULLAGINO;
390*4882a593Smuzhiyun iabt->next_cluster_ino = NULLAGINO;
391*4882a593Smuzhiyun }
392*4882a593Smuzhiyun return;
393*4882a593Smuzhiyun }
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun /* inobt records must be aligned to cluster and inoalignmnt size. */
396*4882a593Smuzhiyun if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) {
397*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
398*4882a593Smuzhiyun return;
399*4882a593Smuzhiyun }
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) {
402*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
403*4882a593Smuzhiyun return;
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK)
407*4882a593Smuzhiyun return;
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun /*
410*4882a593Smuzhiyun * If this is the start of an inode cluster that can be mapped by
411*4882a593Smuzhiyun * multiple inobt records, the next inobt record must follow exactly
412*4882a593Smuzhiyun * after this one.
413*4882a593Smuzhiyun */
414*4882a593Smuzhiyun iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
415*4882a593Smuzhiyun iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster;
416*4882a593Smuzhiyun }
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun /* Scrub an inobt/finobt record. */
419*4882a593Smuzhiyun STATIC int
xchk_iallocbt_rec(struct xchk_btree * bs,union xfs_btree_rec * rec)420*4882a593Smuzhiyun xchk_iallocbt_rec(
421*4882a593Smuzhiyun struct xchk_btree *bs,
422*4882a593Smuzhiyun union xfs_btree_rec *rec)
423*4882a593Smuzhiyun {
424*4882a593Smuzhiyun struct xfs_mount *mp = bs->cur->bc_mp;
425*4882a593Smuzhiyun struct xchk_iallocbt *iabt = bs->private;
426*4882a593Smuzhiyun struct xfs_inobt_rec_incore irec;
427*4882a593Smuzhiyun uint64_t holes;
428*4882a593Smuzhiyun xfs_agnumber_t agno = bs->cur->bc_ag.agno;
429*4882a593Smuzhiyun xfs_agino_t agino;
430*4882a593Smuzhiyun xfs_extlen_t len;
431*4882a593Smuzhiyun int holecount;
432*4882a593Smuzhiyun int i;
433*4882a593Smuzhiyun int error = 0;
434*4882a593Smuzhiyun unsigned int real_freecount;
435*4882a593Smuzhiyun uint16_t holemask;
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun xfs_inobt_btrec_to_irec(mp, rec, &irec);
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun if (irec.ir_count > XFS_INODES_PER_CHUNK ||
440*4882a593Smuzhiyun irec.ir_freecount > XFS_INODES_PER_CHUNK)
441*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun real_freecount = irec.ir_freecount +
444*4882a593Smuzhiyun (XFS_INODES_PER_CHUNK - irec.ir_count);
445*4882a593Smuzhiyun if (real_freecount != xchk_iallocbt_freecount(irec.ir_free))
446*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun agino = irec.ir_startino;
449*4882a593Smuzhiyun /* Record has to be properly aligned within the AG. */
450*4882a593Smuzhiyun if (!xfs_verify_agino(mp, agno, agino) ||
451*4882a593Smuzhiyun !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) {
452*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
453*4882a593Smuzhiyun goto out;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun xchk_iallocbt_rec_alignment(bs, &irec);
457*4882a593Smuzhiyun if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
458*4882a593Smuzhiyun goto out;
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun iabt->inodes += irec.ir_count;
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun /* Handle non-sparse inodes */
463*4882a593Smuzhiyun if (!xfs_inobt_issparse(irec.ir_holemask)) {
464*4882a593Smuzhiyun len = XFS_B_TO_FSB(mp,
465*4882a593Smuzhiyun XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize);
466*4882a593Smuzhiyun if (irec.ir_count != XFS_INODES_PER_CHUNK)
467*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun if (!xchk_iallocbt_chunk(bs, &irec, agino, len))
470*4882a593Smuzhiyun goto out;
471*4882a593Smuzhiyun goto check_clusters;
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun /* Check each chunk of a sparse inode cluster. */
475*4882a593Smuzhiyun holemask = irec.ir_holemask;
476*4882a593Smuzhiyun holecount = 0;
477*4882a593Smuzhiyun len = XFS_B_TO_FSB(mp,
478*4882a593Smuzhiyun XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize);
479*4882a593Smuzhiyun holes = ~xfs_inobt_irec_to_allocmask(&irec);
480*4882a593Smuzhiyun if ((holes & irec.ir_free) != holes ||
481*4882a593Smuzhiyun irec.ir_freecount > irec.ir_count)
482*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) {
485*4882a593Smuzhiyun if (holemask & 1)
486*4882a593Smuzhiyun holecount += XFS_INODES_PER_HOLEMASK_BIT;
487*4882a593Smuzhiyun else if (!xchk_iallocbt_chunk(bs, &irec, agino, len))
488*4882a593Smuzhiyun break;
489*4882a593Smuzhiyun holemask >>= 1;
490*4882a593Smuzhiyun agino += XFS_INODES_PER_HOLEMASK_BIT;
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun if (holecount > XFS_INODES_PER_CHUNK ||
494*4882a593Smuzhiyun holecount + irec.ir_count != XFS_INODES_PER_CHUNK)
495*4882a593Smuzhiyun xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun check_clusters:
498*4882a593Smuzhiyun error = xchk_iallocbt_check_clusters(bs, &irec);
499*4882a593Smuzhiyun if (error)
500*4882a593Smuzhiyun goto out;
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun out:
503*4882a593Smuzhiyun return error;
504*4882a593Smuzhiyun }
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun /*
507*4882a593Smuzhiyun * Make sure the inode btrees are as large as the rmap thinks they are.
508*4882a593Smuzhiyun * Don't bother if we're missing btree cursors, as we're already corrupt.
509*4882a593Smuzhiyun */
510*4882a593Smuzhiyun STATIC void
xchk_iallocbt_xref_rmap_btreeblks(struct xfs_scrub * sc,int which)511*4882a593Smuzhiyun xchk_iallocbt_xref_rmap_btreeblks(
512*4882a593Smuzhiyun struct xfs_scrub *sc,
513*4882a593Smuzhiyun int which)
514*4882a593Smuzhiyun {
515*4882a593Smuzhiyun xfs_filblks_t blocks;
516*4882a593Smuzhiyun xfs_extlen_t inobt_blocks = 0;
517*4882a593Smuzhiyun xfs_extlen_t finobt_blocks = 0;
518*4882a593Smuzhiyun int error;
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
521*4882a593Smuzhiyun (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur) ||
522*4882a593Smuzhiyun xchk_skip_xref(sc->sm))
523*4882a593Smuzhiyun return;
524*4882a593Smuzhiyun
525*4882a593Smuzhiyun /* Check that we saw as many inobt blocks as the rmap says. */
526*4882a593Smuzhiyun error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks);
527*4882a593Smuzhiyun if (!xchk_process_error(sc, 0, 0, &error))
528*4882a593Smuzhiyun return;
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun if (sc->sa.fino_cur) {
531*4882a593Smuzhiyun error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks);
532*4882a593Smuzhiyun if (!xchk_process_error(sc, 0, 0, &error))
533*4882a593Smuzhiyun return;
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
537*4882a593Smuzhiyun &XFS_RMAP_OINFO_INOBT, &blocks);
538*4882a593Smuzhiyun if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
539*4882a593Smuzhiyun return;
540*4882a593Smuzhiyun if (blocks != inobt_blocks + finobt_blocks)
541*4882a593Smuzhiyun xchk_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
542*4882a593Smuzhiyun }
543*4882a593Smuzhiyun
544*4882a593Smuzhiyun /*
545*4882a593Smuzhiyun * Make sure that the inobt records point to the same number of blocks as
546*4882a593Smuzhiyun * the rmap says are owned by inodes.
547*4882a593Smuzhiyun */
548*4882a593Smuzhiyun STATIC void
xchk_iallocbt_xref_rmap_inodes(struct xfs_scrub * sc,int which,unsigned long long inodes)549*4882a593Smuzhiyun xchk_iallocbt_xref_rmap_inodes(
550*4882a593Smuzhiyun struct xfs_scrub *sc,
551*4882a593Smuzhiyun int which,
552*4882a593Smuzhiyun unsigned long long inodes)
553*4882a593Smuzhiyun {
554*4882a593Smuzhiyun xfs_filblks_t blocks;
555*4882a593Smuzhiyun xfs_filblks_t inode_blocks;
556*4882a593Smuzhiyun int error;
557*4882a593Smuzhiyun
558*4882a593Smuzhiyun if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
559*4882a593Smuzhiyun return;
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun /* Check that we saw as many inode blocks as the rmap knows about. */
562*4882a593Smuzhiyun error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
563*4882a593Smuzhiyun &XFS_RMAP_OINFO_INODES, &blocks);
564*4882a593Smuzhiyun if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
565*4882a593Smuzhiyun return;
566*4882a593Smuzhiyun inode_blocks = XFS_B_TO_FSB(sc->mp, inodes * sc->mp->m_sb.sb_inodesize);
567*4882a593Smuzhiyun if (blocks != inode_blocks)
568*4882a593Smuzhiyun xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
569*4882a593Smuzhiyun }
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun /* Scrub the inode btrees for some AG. */
572*4882a593Smuzhiyun STATIC int
xchk_iallocbt(struct xfs_scrub * sc,xfs_btnum_t which)573*4882a593Smuzhiyun xchk_iallocbt(
574*4882a593Smuzhiyun struct xfs_scrub *sc,
575*4882a593Smuzhiyun xfs_btnum_t which)
576*4882a593Smuzhiyun {
577*4882a593Smuzhiyun struct xfs_btree_cur *cur;
578*4882a593Smuzhiyun struct xchk_iallocbt iabt = {
579*4882a593Smuzhiyun .inodes = 0,
580*4882a593Smuzhiyun .next_startino = NULLAGINO,
581*4882a593Smuzhiyun .next_cluster_ino = NULLAGINO,
582*4882a593Smuzhiyun };
583*4882a593Smuzhiyun int error;
584*4882a593Smuzhiyun
585*4882a593Smuzhiyun cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
586*4882a593Smuzhiyun error = xchk_btree(sc, cur, xchk_iallocbt_rec, &XFS_RMAP_OINFO_INOBT,
587*4882a593Smuzhiyun &iabt);
588*4882a593Smuzhiyun if (error)
589*4882a593Smuzhiyun return error;
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun xchk_iallocbt_xref_rmap_btreeblks(sc, which);
592*4882a593Smuzhiyun
593*4882a593Smuzhiyun /*
594*4882a593Smuzhiyun * If we're scrubbing the inode btree, inode_blocks is the number of
595*4882a593Smuzhiyun * blocks pointed to by all the inode chunk records. Therefore, we
596*4882a593Smuzhiyun * should compare to the number of inode chunk blocks that the rmap
597*4882a593Smuzhiyun * knows about. We can't do this for the finobt since it only points
598*4882a593Smuzhiyun * to inode chunks with free inodes.
599*4882a593Smuzhiyun */
600*4882a593Smuzhiyun if (which == XFS_BTNUM_INO)
601*4882a593Smuzhiyun xchk_iallocbt_xref_rmap_inodes(sc, which, iabt.inodes);
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun return error;
604*4882a593Smuzhiyun }
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun int
xchk_inobt(struct xfs_scrub * sc)607*4882a593Smuzhiyun xchk_inobt(
608*4882a593Smuzhiyun struct xfs_scrub *sc)
609*4882a593Smuzhiyun {
610*4882a593Smuzhiyun return xchk_iallocbt(sc, XFS_BTNUM_INO);
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun
613*4882a593Smuzhiyun int
xchk_finobt(struct xfs_scrub * sc)614*4882a593Smuzhiyun xchk_finobt(
615*4882a593Smuzhiyun struct xfs_scrub *sc)
616*4882a593Smuzhiyun {
617*4882a593Smuzhiyun return xchk_iallocbt(sc, XFS_BTNUM_FINO);
618*4882a593Smuzhiyun }
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun /* See if an inode btree has (or doesn't have) an inode chunk record. */
621*4882a593Smuzhiyun static inline void
xchk_xref_inode_check(struct xfs_scrub * sc,xfs_agblock_t agbno,xfs_extlen_t len,struct xfs_btree_cur ** icur,bool should_have_inodes)622*4882a593Smuzhiyun xchk_xref_inode_check(
623*4882a593Smuzhiyun struct xfs_scrub *sc,
624*4882a593Smuzhiyun xfs_agblock_t agbno,
625*4882a593Smuzhiyun xfs_extlen_t len,
626*4882a593Smuzhiyun struct xfs_btree_cur **icur,
627*4882a593Smuzhiyun bool should_have_inodes)
628*4882a593Smuzhiyun {
629*4882a593Smuzhiyun bool has_inodes;
630*4882a593Smuzhiyun int error;
631*4882a593Smuzhiyun
632*4882a593Smuzhiyun if (!(*icur) || xchk_skip_xref(sc->sm))
633*4882a593Smuzhiyun return;
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes);
636*4882a593Smuzhiyun if (!xchk_should_check_xref(sc, &error, icur))
637*4882a593Smuzhiyun return;
638*4882a593Smuzhiyun if (has_inodes != should_have_inodes)
639*4882a593Smuzhiyun xchk_btree_xref_set_corrupt(sc, *icur, 0);
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun
642*4882a593Smuzhiyun /* xref check that the extent is not covered by inodes */
643*4882a593Smuzhiyun void
xchk_xref_is_not_inode_chunk(struct xfs_scrub * sc,xfs_agblock_t agbno,xfs_extlen_t len)644*4882a593Smuzhiyun xchk_xref_is_not_inode_chunk(
645*4882a593Smuzhiyun struct xfs_scrub *sc,
646*4882a593Smuzhiyun xfs_agblock_t agbno,
647*4882a593Smuzhiyun xfs_extlen_t len)
648*4882a593Smuzhiyun {
649*4882a593Smuzhiyun xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false);
650*4882a593Smuzhiyun xchk_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false);
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun
653*4882a593Smuzhiyun /* xref check that the extent is covered by inodes */
654*4882a593Smuzhiyun void
xchk_xref_is_inode_chunk(struct xfs_scrub * sc,xfs_agblock_t agbno,xfs_extlen_t len)655*4882a593Smuzhiyun xchk_xref_is_inode_chunk(
656*4882a593Smuzhiyun struct xfs_scrub *sc,
657*4882a593Smuzhiyun xfs_agblock_t agbno,
658*4882a593Smuzhiyun xfs_extlen_t len)
659*4882a593Smuzhiyun {
660*4882a593Smuzhiyun xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true);
661*4882a593Smuzhiyun }
662