1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0+
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * linux/fs/jbd2/recovery.c
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Journal recovery routines for the generic filesystem journaling code;
10*4882a593Smuzhiyun * part of the ext2fs journaling system.
11*4882a593Smuzhiyun */
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #ifndef __KERNEL__
14*4882a593Smuzhiyun #include "jfs_user.h"
15*4882a593Smuzhiyun #else
16*4882a593Smuzhiyun #include <linux/time.h>
17*4882a593Smuzhiyun #include <linux/fs.h>
18*4882a593Smuzhiyun #include <linux/jbd2.h>
19*4882a593Smuzhiyun #include <linux/errno.h>
20*4882a593Smuzhiyun #include <linux/crc32.h>
21*4882a593Smuzhiyun #include <linux/blkdev.h>
22*4882a593Smuzhiyun #endif
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun /*
25*4882a593Smuzhiyun * Maintain information about the progress of the recovery job, so that
26*4882a593Smuzhiyun * the different passes can carry information between them.
27*4882a593Smuzhiyun */
28*4882a593Smuzhiyun struct recovery_info
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun tid_t start_transaction;
31*4882a593Smuzhiyun tid_t end_transaction;
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun int nr_replays;
34*4882a593Smuzhiyun int nr_revokes;
35*4882a593Smuzhiyun int nr_revoke_hits;
36*4882a593Smuzhiyun };
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun static int do_one_pass(journal_t *journal,
39*4882a593Smuzhiyun struct recovery_info *info, enum passtype pass);
40*4882a593Smuzhiyun static int scan_revoke_records(journal_t *, struct buffer_head *,
41*4882a593Smuzhiyun tid_t, struct recovery_info *);
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun #ifdef __KERNEL__
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun /* Release readahead buffers after use */
journal_brelse_array(struct buffer_head * b[],int n)46*4882a593Smuzhiyun static void journal_brelse_array(struct buffer_head *b[], int n)
47*4882a593Smuzhiyun {
48*4882a593Smuzhiyun while (--n >= 0)
49*4882a593Smuzhiyun brelse (b[n]);
50*4882a593Smuzhiyun }
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun /*
54*4882a593Smuzhiyun * When reading from the journal, we are going through the block device
55*4882a593Smuzhiyun * layer directly and so there is no readahead being done for us. We
56*4882a593Smuzhiyun * need to implement any readahead ourselves if we want it to happen at
57*4882a593Smuzhiyun * all. Recovery is basically one long sequential read, so make sure we
58*4882a593Smuzhiyun * do the IO in reasonably large chunks.
59*4882a593Smuzhiyun *
60*4882a593Smuzhiyun * This is not so critical that we need to be enormously clever about
61*4882a593Smuzhiyun * the readahead size, though. 128K is a purely arbitrary, good-enough
62*4882a593Smuzhiyun * fixed value.
63*4882a593Smuzhiyun */
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun #define MAXBUF 8
do_readahead(journal_t * journal,unsigned int start)66*4882a593Smuzhiyun static int do_readahead(journal_t *journal, unsigned int start)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun int err;
69*4882a593Smuzhiyun unsigned int max, nbufs, next;
70*4882a593Smuzhiyun unsigned long long blocknr;
71*4882a593Smuzhiyun struct buffer_head *bh;
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun struct buffer_head * bufs[MAXBUF];
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun /* Do up to 128K of readahead */
76*4882a593Smuzhiyun max = start + (128 * 1024 / journal->j_blocksize);
77*4882a593Smuzhiyun if (max > journal->j_total_len)
78*4882a593Smuzhiyun max = journal->j_total_len;
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun /* Do the readahead itself. We'll submit MAXBUF buffer_heads at
81*4882a593Smuzhiyun * a time to the block device IO layer. */
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun nbufs = 0;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun for (next = start; next < max; next++) {
86*4882a593Smuzhiyun err = jbd2_journal_bmap(journal, next, &blocknr);
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun if (err) {
89*4882a593Smuzhiyun printk(KERN_ERR "JBD2: bad block at offset %u\n",
90*4882a593Smuzhiyun next);
91*4882a593Smuzhiyun goto failed;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
95*4882a593Smuzhiyun if (!bh) {
96*4882a593Smuzhiyun err = -ENOMEM;
97*4882a593Smuzhiyun goto failed;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
101*4882a593Smuzhiyun bufs[nbufs++] = bh;
102*4882a593Smuzhiyun if (nbufs == MAXBUF) {
103*4882a593Smuzhiyun ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
104*4882a593Smuzhiyun journal_brelse_array(bufs, nbufs);
105*4882a593Smuzhiyun nbufs = 0;
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun } else
108*4882a593Smuzhiyun brelse(bh);
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun if (nbufs)
112*4882a593Smuzhiyun ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
113*4882a593Smuzhiyun err = 0;
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun failed:
116*4882a593Smuzhiyun if (nbufs)
117*4882a593Smuzhiyun journal_brelse_array(bufs, nbufs);
118*4882a593Smuzhiyun return err;
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun #endif /* __KERNEL__ */
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun /*
125*4882a593Smuzhiyun * Read a block from the journal
126*4882a593Smuzhiyun */
127*4882a593Smuzhiyun
jread(struct buffer_head ** bhp,journal_t * journal,unsigned int offset)128*4882a593Smuzhiyun static int jread(struct buffer_head **bhp, journal_t *journal,
129*4882a593Smuzhiyun unsigned int offset)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun int err;
132*4882a593Smuzhiyun unsigned long long blocknr;
133*4882a593Smuzhiyun struct buffer_head *bh;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun *bhp = NULL;
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun if (offset >= journal->j_total_len) {
138*4882a593Smuzhiyun printk(KERN_ERR "JBD2: corrupted journal superblock\n");
139*4882a593Smuzhiyun return -EFSCORRUPTED;
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun err = jbd2_journal_bmap(journal, offset, &blocknr);
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun if (err) {
145*4882a593Smuzhiyun printk(KERN_ERR "JBD2: bad block at offset %u\n",
146*4882a593Smuzhiyun offset);
147*4882a593Smuzhiyun return err;
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
151*4882a593Smuzhiyun if (!bh)
152*4882a593Smuzhiyun return -ENOMEM;
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun if (!buffer_uptodate(bh)) {
155*4882a593Smuzhiyun /* If this is a brand new buffer, start readahead.
156*4882a593Smuzhiyun Otherwise, we assume we are already reading it. */
157*4882a593Smuzhiyun if (!buffer_req(bh))
158*4882a593Smuzhiyun do_readahead(journal, offset);
159*4882a593Smuzhiyun wait_on_buffer(bh);
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun if (!buffer_uptodate(bh)) {
163*4882a593Smuzhiyun printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
164*4882a593Smuzhiyun offset);
165*4882a593Smuzhiyun brelse(bh);
166*4882a593Smuzhiyun return -EIO;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun *bhp = bh;
170*4882a593Smuzhiyun return 0;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun
jbd2_descriptor_block_csum_verify(journal_t * j,void * buf)173*4882a593Smuzhiyun static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
174*4882a593Smuzhiyun {
175*4882a593Smuzhiyun struct jbd2_journal_block_tail *tail;
176*4882a593Smuzhiyun __be32 provided;
177*4882a593Smuzhiyun __u32 calculated;
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun if (!jbd2_journal_has_csum_v2or3(j))
180*4882a593Smuzhiyun return 1;
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
183*4882a593Smuzhiyun sizeof(struct jbd2_journal_block_tail));
184*4882a593Smuzhiyun provided = tail->t_checksum;
185*4882a593Smuzhiyun tail->t_checksum = 0;
186*4882a593Smuzhiyun calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
187*4882a593Smuzhiyun tail->t_checksum = provided;
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun return provided == cpu_to_be32(calculated);
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun /*
193*4882a593Smuzhiyun * Count the number of in-use tags in a journal descriptor block.
194*4882a593Smuzhiyun */
195*4882a593Smuzhiyun
count_tags(journal_t * journal,struct buffer_head * bh)196*4882a593Smuzhiyun static int count_tags(journal_t *journal, struct buffer_head *bh)
197*4882a593Smuzhiyun {
198*4882a593Smuzhiyun char * tagp;
199*4882a593Smuzhiyun journal_block_tag_t * tag;
200*4882a593Smuzhiyun int nr = 0, size = journal->j_blocksize;
201*4882a593Smuzhiyun int tag_bytes = journal_tag_bytes(journal);
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun if (jbd2_journal_has_csum_v2or3(journal))
204*4882a593Smuzhiyun size -= sizeof(struct jbd2_journal_block_tail);
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun tagp = &bh->b_data[sizeof(journal_header_t)];
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun while ((tagp - bh->b_data + tag_bytes) <= size) {
209*4882a593Smuzhiyun tag = (journal_block_tag_t *) tagp;
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun nr++;
212*4882a593Smuzhiyun tagp += tag_bytes;
213*4882a593Smuzhiyun if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
214*4882a593Smuzhiyun tagp += 16;
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
217*4882a593Smuzhiyun break;
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun return nr;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun /* Make sure we wrap around the log correctly! */
225*4882a593Smuzhiyun #define wrap(journal, var) \
226*4882a593Smuzhiyun do { \
227*4882a593Smuzhiyun unsigned long _wrap_last = \
228*4882a593Smuzhiyun jbd2_has_feature_fast_commit(journal) ? \
229*4882a593Smuzhiyun (journal)->j_fc_last : (journal)->j_last; \
230*4882a593Smuzhiyun \
231*4882a593Smuzhiyun if (var >= _wrap_last) \
232*4882a593Smuzhiyun var -= (_wrap_last - (journal)->j_first); \
233*4882a593Smuzhiyun } while (0)
234*4882a593Smuzhiyun
fc_do_one_pass(journal_t * journal,struct recovery_info * info,enum passtype pass)235*4882a593Smuzhiyun static int fc_do_one_pass(journal_t *journal,
236*4882a593Smuzhiyun struct recovery_info *info, enum passtype pass)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun unsigned int expected_commit_id = info->end_transaction;
239*4882a593Smuzhiyun unsigned long next_fc_block;
240*4882a593Smuzhiyun struct buffer_head *bh;
241*4882a593Smuzhiyun int err = 0;
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun next_fc_block = journal->j_fc_first;
244*4882a593Smuzhiyun if (!journal->j_fc_replay_callback)
245*4882a593Smuzhiyun return 0;
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun while (next_fc_block <= journal->j_fc_last) {
248*4882a593Smuzhiyun jbd_debug(3, "Fast commit replay: next block %ld\n",
249*4882a593Smuzhiyun next_fc_block);
250*4882a593Smuzhiyun err = jread(&bh, journal, next_fc_block);
251*4882a593Smuzhiyun if (err) {
252*4882a593Smuzhiyun jbd_debug(3, "Fast commit replay: read error\n");
253*4882a593Smuzhiyun break;
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun err = journal->j_fc_replay_callback(journal, bh, pass,
257*4882a593Smuzhiyun next_fc_block - journal->j_fc_first,
258*4882a593Smuzhiyun expected_commit_id);
259*4882a593Smuzhiyun brelse(bh);
260*4882a593Smuzhiyun next_fc_block++;
261*4882a593Smuzhiyun if (err < 0 || err == JBD2_FC_REPLAY_STOP)
262*4882a593Smuzhiyun break;
263*4882a593Smuzhiyun err = 0;
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun if (err)
267*4882a593Smuzhiyun jbd_debug(3, "Fast commit replay failed, err = %d\n", err);
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun return err;
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun /**
273*4882a593Smuzhiyun * jbd2_journal_recover - recovers a on-disk journal
274*4882a593Smuzhiyun * @journal: the journal to recover
275*4882a593Smuzhiyun *
276*4882a593Smuzhiyun * The primary function for recovering the log contents when mounting a
277*4882a593Smuzhiyun * journaled device.
278*4882a593Smuzhiyun *
279*4882a593Smuzhiyun * Recovery is done in three passes. In the first pass, we look for the
280*4882a593Smuzhiyun * end of the log. In the second, we assemble the list of revoke
281*4882a593Smuzhiyun * blocks. In the third and final pass, we replay any un-revoked blocks
282*4882a593Smuzhiyun * in the log.
283*4882a593Smuzhiyun */
jbd2_journal_recover(journal_t * journal)284*4882a593Smuzhiyun int jbd2_journal_recover(journal_t *journal)
285*4882a593Smuzhiyun {
286*4882a593Smuzhiyun int err, err2;
287*4882a593Smuzhiyun journal_superblock_t * sb;
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun struct recovery_info info;
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun memset(&info, 0, sizeof(info));
292*4882a593Smuzhiyun sb = journal->j_superblock;
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun /*
295*4882a593Smuzhiyun * The journal superblock's s_start field (the current log head)
296*4882a593Smuzhiyun * is always zero if, and only if, the journal was cleanly
297*4882a593Smuzhiyun * unmounted.
298*4882a593Smuzhiyun */
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun if (!sb->s_start) {
301*4882a593Smuzhiyun jbd_debug(1, "No recovery required, last transaction %d\n",
302*4882a593Smuzhiyun be32_to_cpu(sb->s_sequence));
303*4882a593Smuzhiyun journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
304*4882a593Smuzhiyun return 0;
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun err = do_one_pass(journal, &info, PASS_SCAN);
308*4882a593Smuzhiyun if (!err)
309*4882a593Smuzhiyun err = do_one_pass(journal, &info, PASS_REVOKE);
310*4882a593Smuzhiyun if (!err)
311*4882a593Smuzhiyun err = do_one_pass(journal, &info, PASS_REPLAY);
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun jbd_debug(1, "JBD2: recovery, exit status %d, "
314*4882a593Smuzhiyun "recovered transactions %u to %u\n",
315*4882a593Smuzhiyun err, info.start_transaction, info.end_transaction);
316*4882a593Smuzhiyun jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
317*4882a593Smuzhiyun info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun /* Restart the log at the next transaction ID, thus invalidating
320*4882a593Smuzhiyun * any existing commit records in the log. */
321*4882a593Smuzhiyun journal->j_transaction_sequence = ++info.end_transaction;
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun jbd2_journal_clear_revoke(journal);
324*4882a593Smuzhiyun err2 = sync_blockdev(journal->j_fs_dev);
325*4882a593Smuzhiyun if (!err)
326*4882a593Smuzhiyun err = err2;
327*4882a593Smuzhiyun /* Make sure all replayed data is on permanent storage */
328*4882a593Smuzhiyun if (journal->j_flags & JBD2_BARRIER) {
329*4882a593Smuzhiyun err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL);
330*4882a593Smuzhiyun if (!err)
331*4882a593Smuzhiyun err = err2;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun return err;
334*4882a593Smuzhiyun }
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun /**
337*4882a593Smuzhiyun * jbd2_journal_skip_recovery - Start journal and wipe exiting records
338*4882a593Smuzhiyun * @journal: journal to startup
339*4882a593Smuzhiyun *
340*4882a593Smuzhiyun * Locate any valid recovery information from the journal and set up the
341*4882a593Smuzhiyun * journal structures in memory to ignore it (presumably because the
342*4882a593Smuzhiyun * caller has evidence that it is out of date).
343*4882a593Smuzhiyun * This function doesn't appear to be exported..
344*4882a593Smuzhiyun *
345*4882a593Smuzhiyun * We perform one pass over the journal to allow us to tell the user how
346*4882a593Smuzhiyun * much recovery information is being erased, and to let us initialise
347*4882a593Smuzhiyun * the journal transaction sequence numbers to the next unused ID.
348*4882a593Smuzhiyun */
jbd2_journal_skip_recovery(journal_t * journal)349*4882a593Smuzhiyun int jbd2_journal_skip_recovery(journal_t *journal)
350*4882a593Smuzhiyun {
351*4882a593Smuzhiyun int err;
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun struct recovery_info info;
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun memset (&info, 0, sizeof(info));
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun err = do_one_pass(journal, &info, PASS_SCAN);
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun if (err) {
360*4882a593Smuzhiyun printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
361*4882a593Smuzhiyun ++journal->j_transaction_sequence;
362*4882a593Smuzhiyun } else {
363*4882a593Smuzhiyun #ifdef CONFIG_JBD2_DEBUG
364*4882a593Smuzhiyun int dropped = info.end_transaction -
365*4882a593Smuzhiyun be32_to_cpu(journal->j_superblock->s_sequence);
366*4882a593Smuzhiyun jbd_debug(1,
367*4882a593Smuzhiyun "JBD2: ignoring %d transaction%s from the journal.\n",
368*4882a593Smuzhiyun dropped, (dropped == 1) ? "" : "s");
369*4882a593Smuzhiyun #endif
370*4882a593Smuzhiyun journal->j_transaction_sequence = ++info.end_transaction;
371*4882a593Smuzhiyun }
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun journal->j_tail = 0;
374*4882a593Smuzhiyun return err;
375*4882a593Smuzhiyun }
376*4882a593Smuzhiyun
read_tag_block(journal_t * journal,journal_block_tag_t * tag)377*4882a593Smuzhiyun static inline unsigned long long read_tag_block(journal_t *journal,
378*4882a593Smuzhiyun journal_block_tag_t *tag)
379*4882a593Smuzhiyun {
380*4882a593Smuzhiyun unsigned long long block = be32_to_cpu(tag->t_blocknr);
381*4882a593Smuzhiyun if (jbd2_has_feature_64bit(journal))
382*4882a593Smuzhiyun block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
383*4882a593Smuzhiyun return block;
384*4882a593Smuzhiyun }
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun /*
387*4882a593Smuzhiyun * calc_chksums calculates the checksums for the blocks described in the
388*4882a593Smuzhiyun * descriptor block.
389*4882a593Smuzhiyun */
calc_chksums(journal_t * journal,struct buffer_head * bh,unsigned long * next_log_block,__u32 * crc32_sum)390*4882a593Smuzhiyun static int calc_chksums(journal_t *journal, struct buffer_head *bh,
391*4882a593Smuzhiyun unsigned long *next_log_block, __u32 *crc32_sum)
392*4882a593Smuzhiyun {
393*4882a593Smuzhiyun int i, num_blks, err;
394*4882a593Smuzhiyun unsigned long io_block;
395*4882a593Smuzhiyun struct buffer_head *obh;
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun num_blks = count_tags(journal, bh);
398*4882a593Smuzhiyun /* Calculate checksum of the descriptor block. */
399*4882a593Smuzhiyun *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun for (i = 0; i < num_blks; i++) {
402*4882a593Smuzhiyun io_block = (*next_log_block)++;
403*4882a593Smuzhiyun wrap(journal, *next_log_block);
404*4882a593Smuzhiyun err = jread(&obh, journal, io_block);
405*4882a593Smuzhiyun if (err) {
406*4882a593Smuzhiyun printk(KERN_ERR "JBD2: IO error %d recovering block "
407*4882a593Smuzhiyun "%lu in log\n", err, io_block);
408*4882a593Smuzhiyun return 1;
409*4882a593Smuzhiyun } else {
410*4882a593Smuzhiyun *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
411*4882a593Smuzhiyun obh->b_size);
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun put_bh(obh);
414*4882a593Smuzhiyun }
415*4882a593Smuzhiyun return 0;
416*4882a593Smuzhiyun }
417*4882a593Smuzhiyun
jbd2_commit_block_csum_verify(journal_t * j,void * buf)418*4882a593Smuzhiyun static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
419*4882a593Smuzhiyun {
420*4882a593Smuzhiyun struct commit_header *h;
421*4882a593Smuzhiyun __be32 provided;
422*4882a593Smuzhiyun __u32 calculated;
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun if (!jbd2_journal_has_csum_v2or3(j))
425*4882a593Smuzhiyun return 1;
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun h = buf;
428*4882a593Smuzhiyun provided = h->h_chksum[0];
429*4882a593Smuzhiyun h->h_chksum[0] = 0;
430*4882a593Smuzhiyun calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
431*4882a593Smuzhiyun h->h_chksum[0] = provided;
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun return provided == cpu_to_be32(calculated);
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
jbd2_block_tag_csum_verify(journal_t * j,journal_block_tag_t * tag,void * buf,__u32 sequence)436*4882a593Smuzhiyun static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
437*4882a593Smuzhiyun void *buf, __u32 sequence)
438*4882a593Smuzhiyun {
439*4882a593Smuzhiyun journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
440*4882a593Smuzhiyun __u32 csum32;
441*4882a593Smuzhiyun __be32 seq;
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun if (!jbd2_journal_has_csum_v2or3(j))
444*4882a593Smuzhiyun return 1;
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun seq = cpu_to_be32(sequence);
447*4882a593Smuzhiyun csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
448*4882a593Smuzhiyun csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun if (jbd2_has_feature_csum3(j))
451*4882a593Smuzhiyun return tag3->t_checksum == cpu_to_be32(csum32);
452*4882a593Smuzhiyun else
453*4882a593Smuzhiyun return tag->t_checksum == cpu_to_be16(csum32);
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun
do_one_pass(journal_t * journal,struct recovery_info * info,enum passtype pass)456*4882a593Smuzhiyun static int do_one_pass(journal_t *journal,
457*4882a593Smuzhiyun struct recovery_info *info, enum passtype pass)
458*4882a593Smuzhiyun {
459*4882a593Smuzhiyun unsigned int first_commit_ID, next_commit_ID;
460*4882a593Smuzhiyun unsigned long next_log_block;
461*4882a593Smuzhiyun int err, success = 0;
462*4882a593Smuzhiyun journal_superblock_t * sb;
463*4882a593Smuzhiyun journal_header_t * tmp;
464*4882a593Smuzhiyun struct buffer_head * bh;
465*4882a593Smuzhiyun unsigned int sequence;
466*4882a593Smuzhiyun int blocktype;
467*4882a593Smuzhiyun int tag_bytes = journal_tag_bytes(journal);
468*4882a593Smuzhiyun __u32 crc32_sum = ~0; /* Transactional Checksums */
469*4882a593Smuzhiyun int descr_csum_size = 0;
470*4882a593Smuzhiyun int block_error = 0;
471*4882a593Smuzhiyun bool need_check_commit_time = false;
472*4882a593Smuzhiyun __u64 last_trans_commit_time = 0, commit_time;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun /*
475*4882a593Smuzhiyun * First thing is to establish what we expect to find in the log
476*4882a593Smuzhiyun * (in terms of transaction IDs), and where (in terms of log
477*4882a593Smuzhiyun * block offsets): query the superblock.
478*4882a593Smuzhiyun */
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun sb = journal->j_superblock;
481*4882a593Smuzhiyun next_commit_ID = be32_to_cpu(sb->s_sequence);
482*4882a593Smuzhiyun next_log_block = be32_to_cpu(sb->s_start);
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun first_commit_ID = next_commit_ID;
485*4882a593Smuzhiyun if (pass == PASS_SCAN)
486*4882a593Smuzhiyun info->start_transaction = first_commit_ID;
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun jbd_debug(1, "Starting recovery pass %d\n", pass);
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun /*
491*4882a593Smuzhiyun * Now we walk through the log, transaction by transaction,
492*4882a593Smuzhiyun * making sure that each transaction has a commit block in the
493*4882a593Smuzhiyun * expected place. Each complete transaction gets replayed back
494*4882a593Smuzhiyun * into the main filesystem.
495*4882a593Smuzhiyun */
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun while (1) {
498*4882a593Smuzhiyun int flags;
499*4882a593Smuzhiyun char * tagp;
500*4882a593Smuzhiyun journal_block_tag_t * tag;
501*4882a593Smuzhiyun struct buffer_head * obh;
502*4882a593Smuzhiyun struct buffer_head * nbh;
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun cond_resched();
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun /* If we already know where to stop the log traversal,
507*4882a593Smuzhiyun * check right now that we haven't gone past the end of
508*4882a593Smuzhiyun * the log. */
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun if (pass != PASS_SCAN)
511*4882a593Smuzhiyun if (tid_geq(next_commit_ID, info->end_transaction))
512*4882a593Smuzhiyun break;
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
515*4882a593Smuzhiyun next_commit_ID, next_log_block,
516*4882a593Smuzhiyun jbd2_has_feature_fast_commit(journal) ?
517*4882a593Smuzhiyun journal->j_fc_last : journal->j_last);
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun /* Skip over each chunk of the transaction looking
520*4882a593Smuzhiyun * either the next descriptor block or the final commit
521*4882a593Smuzhiyun * record. */
522*4882a593Smuzhiyun
523*4882a593Smuzhiyun jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
524*4882a593Smuzhiyun err = jread(&bh, journal, next_log_block);
525*4882a593Smuzhiyun if (err)
526*4882a593Smuzhiyun goto failed;
527*4882a593Smuzhiyun
528*4882a593Smuzhiyun next_log_block++;
529*4882a593Smuzhiyun wrap(journal, next_log_block);
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun /* What kind of buffer is it?
532*4882a593Smuzhiyun *
533*4882a593Smuzhiyun * If it is a descriptor block, check that it has the
534*4882a593Smuzhiyun * expected sequence number. Otherwise, we're all done
535*4882a593Smuzhiyun * here. */
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun tmp = (journal_header_t *)bh->b_data;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
540*4882a593Smuzhiyun brelse(bh);
541*4882a593Smuzhiyun break;
542*4882a593Smuzhiyun }
543*4882a593Smuzhiyun
544*4882a593Smuzhiyun blocktype = be32_to_cpu(tmp->h_blocktype);
545*4882a593Smuzhiyun sequence = be32_to_cpu(tmp->h_sequence);
546*4882a593Smuzhiyun jbd_debug(3, "Found magic %d, sequence %d\n",
547*4882a593Smuzhiyun blocktype, sequence);
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun if (sequence != next_commit_ID) {
550*4882a593Smuzhiyun brelse(bh);
551*4882a593Smuzhiyun break;
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun
554*4882a593Smuzhiyun /* OK, we have a valid descriptor block which matches
555*4882a593Smuzhiyun * all of the sequence number checks. What are we going
556*4882a593Smuzhiyun * to do with it? That depends on the pass... */
557*4882a593Smuzhiyun
558*4882a593Smuzhiyun switch(blocktype) {
559*4882a593Smuzhiyun case JBD2_DESCRIPTOR_BLOCK:
560*4882a593Smuzhiyun /* Verify checksum first */
561*4882a593Smuzhiyun if (jbd2_journal_has_csum_v2or3(journal))
562*4882a593Smuzhiyun descr_csum_size =
563*4882a593Smuzhiyun sizeof(struct jbd2_journal_block_tail);
564*4882a593Smuzhiyun if (descr_csum_size > 0 &&
565*4882a593Smuzhiyun !jbd2_descriptor_block_csum_verify(journal,
566*4882a593Smuzhiyun bh->b_data)) {
567*4882a593Smuzhiyun /*
568*4882a593Smuzhiyun * PASS_SCAN can see stale blocks due to lazy
569*4882a593Smuzhiyun * journal init. Don't error out on those yet.
570*4882a593Smuzhiyun */
571*4882a593Smuzhiyun if (pass != PASS_SCAN) {
572*4882a593Smuzhiyun pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
573*4882a593Smuzhiyun next_log_block);
574*4882a593Smuzhiyun err = -EFSBADCRC;
575*4882a593Smuzhiyun brelse(bh);
576*4882a593Smuzhiyun goto failed;
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun need_check_commit_time = true;
579*4882a593Smuzhiyun jbd_debug(1,
580*4882a593Smuzhiyun "invalid descriptor block found in %lu\n",
581*4882a593Smuzhiyun next_log_block);
582*4882a593Smuzhiyun }
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun /* If it is a valid descriptor block, replay it
585*4882a593Smuzhiyun * in pass REPLAY; if journal_checksums enabled, then
586*4882a593Smuzhiyun * calculate checksums in PASS_SCAN, otherwise,
587*4882a593Smuzhiyun * just skip over the blocks it describes. */
588*4882a593Smuzhiyun if (pass != PASS_REPLAY) {
589*4882a593Smuzhiyun if (pass == PASS_SCAN &&
590*4882a593Smuzhiyun jbd2_has_feature_checksum(journal) &&
591*4882a593Smuzhiyun !need_check_commit_time &&
592*4882a593Smuzhiyun !info->end_transaction) {
593*4882a593Smuzhiyun if (calc_chksums(journal, bh,
594*4882a593Smuzhiyun &next_log_block,
595*4882a593Smuzhiyun &crc32_sum)) {
596*4882a593Smuzhiyun put_bh(bh);
597*4882a593Smuzhiyun break;
598*4882a593Smuzhiyun }
599*4882a593Smuzhiyun put_bh(bh);
600*4882a593Smuzhiyun continue;
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun next_log_block += count_tags(journal, bh);
603*4882a593Smuzhiyun wrap(journal, next_log_block);
604*4882a593Smuzhiyun put_bh(bh);
605*4882a593Smuzhiyun continue;
606*4882a593Smuzhiyun }
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun /* A descriptor block: we can now write all of
609*4882a593Smuzhiyun * the data blocks. Yay, useful work is finally
610*4882a593Smuzhiyun * getting done here! */
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun tagp = &bh->b_data[sizeof(journal_header_t)];
613*4882a593Smuzhiyun while ((tagp - bh->b_data + tag_bytes)
614*4882a593Smuzhiyun <= journal->j_blocksize - descr_csum_size) {
615*4882a593Smuzhiyun unsigned long io_block;
616*4882a593Smuzhiyun
617*4882a593Smuzhiyun tag = (journal_block_tag_t *) tagp;
618*4882a593Smuzhiyun flags = be16_to_cpu(tag->t_flags);
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun io_block = next_log_block++;
621*4882a593Smuzhiyun wrap(journal, next_log_block);
622*4882a593Smuzhiyun err = jread(&obh, journal, io_block);
623*4882a593Smuzhiyun if (err) {
624*4882a593Smuzhiyun /* Recover what we can, but
625*4882a593Smuzhiyun * report failure at the end. */
626*4882a593Smuzhiyun success = err;
627*4882a593Smuzhiyun printk(KERN_ERR
628*4882a593Smuzhiyun "JBD2: IO error %d recovering "
629*4882a593Smuzhiyun "block %ld in log\n",
630*4882a593Smuzhiyun err, io_block);
631*4882a593Smuzhiyun } else {
632*4882a593Smuzhiyun unsigned long long blocknr;
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun J_ASSERT(obh != NULL);
635*4882a593Smuzhiyun blocknr = read_tag_block(journal,
636*4882a593Smuzhiyun tag);
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun /* If the block has been
639*4882a593Smuzhiyun * revoked, then we're all done
640*4882a593Smuzhiyun * here. */
641*4882a593Smuzhiyun if (jbd2_journal_test_revoke
642*4882a593Smuzhiyun (journal, blocknr,
643*4882a593Smuzhiyun next_commit_ID)) {
644*4882a593Smuzhiyun brelse(obh);
645*4882a593Smuzhiyun ++info->nr_revoke_hits;
646*4882a593Smuzhiyun goto skip_write;
647*4882a593Smuzhiyun }
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun /* Look for block corruption */
650*4882a593Smuzhiyun if (!jbd2_block_tag_csum_verify(
651*4882a593Smuzhiyun journal, tag, obh->b_data,
652*4882a593Smuzhiyun be32_to_cpu(tmp->h_sequence))) {
653*4882a593Smuzhiyun brelse(obh);
654*4882a593Smuzhiyun success = -EFSBADCRC;
655*4882a593Smuzhiyun printk(KERN_ERR "JBD2: Invalid "
656*4882a593Smuzhiyun "checksum recovering "
657*4882a593Smuzhiyun "data block %llu in "
658*4882a593Smuzhiyun "log\n", blocknr);
659*4882a593Smuzhiyun block_error = 1;
660*4882a593Smuzhiyun goto skip_write;
661*4882a593Smuzhiyun }
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun /* Find a buffer for the new
664*4882a593Smuzhiyun * data being restored */
665*4882a593Smuzhiyun nbh = __getblk(journal->j_fs_dev,
666*4882a593Smuzhiyun blocknr,
667*4882a593Smuzhiyun journal->j_blocksize);
668*4882a593Smuzhiyun if (nbh == NULL) {
669*4882a593Smuzhiyun printk(KERN_ERR
670*4882a593Smuzhiyun "JBD2: Out of memory "
671*4882a593Smuzhiyun "during recovery.\n");
672*4882a593Smuzhiyun err = -ENOMEM;
673*4882a593Smuzhiyun brelse(bh);
674*4882a593Smuzhiyun brelse(obh);
675*4882a593Smuzhiyun goto failed;
676*4882a593Smuzhiyun }
677*4882a593Smuzhiyun
678*4882a593Smuzhiyun lock_buffer(nbh);
679*4882a593Smuzhiyun memcpy(nbh->b_data, obh->b_data,
680*4882a593Smuzhiyun journal->j_blocksize);
681*4882a593Smuzhiyun if (flags & JBD2_FLAG_ESCAPE) {
682*4882a593Smuzhiyun *((__be32 *)nbh->b_data) =
683*4882a593Smuzhiyun cpu_to_be32(JBD2_MAGIC_NUMBER);
684*4882a593Smuzhiyun }
685*4882a593Smuzhiyun
686*4882a593Smuzhiyun BUFFER_TRACE(nbh, "marking dirty");
687*4882a593Smuzhiyun set_buffer_uptodate(nbh);
688*4882a593Smuzhiyun mark_buffer_dirty(nbh);
689*4882a593Smuzhiyun BUFFER_TRACE(nbh, "marking uptodate");
690*4882a593Smuzhiyun ++info->nr_replays;
691*4882a593Smuzhiyun /* ll_rw_block(WRITE, 1, &nbh); */
692*4882a593Smuzhiyun unlock_buffer(nbh);
693*4882a593Smuzhiyun brelse(obh);
694*4882a593Smuzhiyun brelse(nbh);
695*4882a593Smuzhiyun }
696*4882a593Smuzhiyun
697*4882a593Smuzhiyun skip_write:
698*4882a593Smuzhiyun tagp += tag_bytes;
699*4882a593Smuzhiyun if (!(flags & JBD2_FLAG_SAME_UUID))
700*4882a593Smuzhiyun tagp += 16;
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun if (flags & JBD2_FLAG_LAST_TAG)
703*4882a593Smuzhiyun break;
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun brelse(bh);
707*4882a593Smuzhiyun continue;
708*4882a593Smuzhiyun
709*4882a593Smuzhiyun case JBD2_COMMIT_BLOCK:
710*4882a593Smuzhiyun /* How to differentiate between interrupted commit
711*4882a593Smuzhiyun * and journal corruption ?
712*4882a593Smuzhiyun *
713*4882a593Smuzhiyun * {nth transaction}
714*4882a593Smuzhiyun * Checksum Verification Failed
715*4882a593Smuzhiyun * |
716*4882a593Smuzhiyun * ____________________
717*4882a593Smuzhiyun * | |
718*4882a593Smuzhiyun * async_commit sync_commit
719*4882a593Smuzhiyun * | |
720*4882a593Smuzhiyun * | GO TO NEXT "Journal Corruption"
721*4882a593Smuzhiyun * | TRANSACTION
722*4882a593Smuzhiyun * |
723*4882a593Smuzhiyun * {(n+1)th transanction}
724*4882a593Smuzhiyun * |
725*4882a593Smuzhiyun * _______|______________
726*4882a593Smuzhiyun * | |
727*4882a593Smuzhiyun * Commit block found Commit block not found
728*4882a593Smuzhiyun * | |
729*4882a593Smuzhiyun * "Journal Corruption" |
730*4882a593Smuzhiyun * _____________|_________
731*4882a593Smuzhiyun * | |
732*4882a593Smuzhiyun * nth trans corrupt OR nth trans
733*4882a593Smuzhiyun * and (n+1)th interrupted interrupted
734*4882a593Smuzhiyun * before commit block
735*4882a593Smuzhiyun * could reach the disk.
736*4882a593Smuzhiyun * (Cannot find the difference in above
737*4882a593Smuzhiyun * mentioned conditions. Hence assume
738*4882a593Smuzhiyun * "Interrupted Commit".)
739*4882a593Smuzhiyun */
740*4882a593Smuzhiyun commit_time = be64_to_cpu(
741*4882a593Smuzhiyun ((struct commit_header *)bh->b_data)->h_commit_sec);
742*4882a593Smuzhiyun /*
743*4882a593Smuzhiyun * If need_check_commit_time is set, it means we are in
744*4882a593Smuzhiyun * PASS_SCAN and csum verify failed before. If
745*4882a593Smuzhiyun * commit_time is increasing, it's the same journal,
746*4882a593Smuzhiyun * otherwise it is stale journal block, just end this
747*4882a593Smuzhiyun * recovery.
748*4882a593Smuzhiyun */
749*4882a593Smuzhiyun if (need_check_commit_time) {
750*4882a593Smuzhiyun if (commit_time >= last_trans_commit_time) {
751*4882a593Smuzhiyun pr_err("JBD2: Invalid checksum found in transaction %u\n",
752*4882a593Smuzhiyun next_commit_ID);
753*4882a593Smuzhiyun err = -EFSBADCRC;
754*4882a593Smuzhiyun brelse(bh);
755*4882a593Smuzhiyun goto failed;
756*4882a593Smuzhiyun }
757*4882a593Smuzhiyun ignore_crc_mismatch:
758*4882a593Smuzhiyun /*
759*4882a593Smuzhiyun * It likely does not belong to same journal,
760*4882a593Smuzhiyun * just end this recovery with success.
761*4882a593Smuzhiyun */
762*4882a593Smuzhiyun jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
763*4882a593Smuzhiyun next_commit_ID);
764*4882a593Smuzhiyun err = 0;
765*4882a593Smuzhiyun brelse(bh);
766*4882a593Smuzhiyun goto done;
767*4882a593Smuzhiyun }
768*4882a593Smuzhiyun
769*4882a593Smuzhiyun /*
770*4882a593Smuzhiyun * Found an expected commit block: if checksums
771*4882a593Smuzhiyun * are present, verify them in PASS_SCAN; else not
772*4882a593Smuzhiyun * much to do other than move on to the next sequence
773*4882a593Smuzhiyun * number.
774*4882a593Smuzhiyun */
775*4882a593Smuzhiyun if (pass == PASS_SCAN &&
776*4882a593Smuzhiyun jbd2_has_feature_checksum(journal)) {
777*4882a593Smuzhiyun struct commit_header *cbh =
778*4882a593Smuzhiyun (struct commit_header *)bh->b_data;
779*4882a593Smuzhiyun unsigned found_chksum =
780*4882a593Smuzhiyun be32_to_cpu(cbh->h_chksum[0]);
781*4882a593Smuzhiyun
782*4882a593Smuzhiyun if (info->end_transaction) {
783*4882a593Smuzhiyun journal->j_failed_commit =
784*4882a593Smuzhiyun info->end_transaction;
785*4882a593Smuzhiyun brelse(bh);
786*4882a593Smuzhiyun break;
787*4882a593Smuzhiyun }
788*4882a593Smuzhiyun
789*4882a593Smuzhiyun /* Neither checksum match nor unused? */
790*4882a593Smuzhiyun if (!((crc32_sum == found_chksum &&
791*4882a593Smuzhiyun cbh->h_chksum_type ==
792*4882a593Smuzhiyun JBD2_CRC32_CHKSUM &&
793*4882a593Smuzhiyun cbh->h_chksum_size ==
794*4882a593Smuzhiyun JBD2_CRC32_CHKSUM_SIZE) ||
795*4882a593Smuzhiyun (cbh->h_chksum_type == 0 &&
796*4882a593Smuzhiyun cbh->h_chksum_size == 0 &&
797*4882a593Smuzhiyun found_chksum == 0)))
798*4882a593Smuzhiyun goto chksum_error;
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun crc32_sum = ~0;
801*4882a593Smuzhiyun }
802*4882a593Smuzhiyun if (pass == PASS_SCAN &&
803*4882a593Smuzhiyun !jbd2_commit_block_csum_verify(journal,
804*4882a593Smuzhiyun bh->b_data)) {
805*4882a593Smuzhiyun chksum_error:
806*4882a593Smuzhiyun if (commit_time < last_trans_commit_time)
807*4882a593Smuzhiyun goto ignore_crc_mismatch;
808*4882a593Smuzhiyun info->end_transaction = next_commit_ID;
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun if (!jbd2_has_feature_async_commit(journal)) {
811*4882a593Smuzhiyun journal->j_failed_commit =
812*4882a593Smuzhiyun next_commit_ID;
813*4882a593Smuzhiyun brelse(bh);
814*4882a593Smuzhiyun break;
815*4882a593Smuzhiyun }
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun if (pass == PASS_SCAN)
818*4882a593Smuzhiyun last_trans_commit_time = commit_time;
819*4882a593Smuzhiyun brelse(bh);
820*4882a593Smuzhiyun next_commit_ID++;
821*4882a593Smuzhiyun continue;
822*4882a593Smuzhiyun
823*4882a593Smuzhiyun case JBD2_REVOKE_BLOCK:
824*4882a593Smuzhiyun /*
825*4882a593Smuzhiyun * Check revoke block crc in pass_scan, if csum verify
826*4882a593Smuzhiyun * failed, check commit block time later.
827*4882a593Smuzhiyun */
828*4882a593Smuzhiyun if (pass == PASS_SCAN &&
829*4882a593Smuzhiyun !jbd2_descriptor_block_csum_verify(journal,
830*4882a593Smuzhiyun bh->b_data)) {
831*4882a593Smuzhiyun jbd_debug(1, "JBD2: invalid revoke block found in %lu\n",
832*4882a593Smuzhiyun next_log_block);
833*4882a593Smuzhiyun need_check_commit_time = true;
834*4882a593Smuzhiyun }
835*4882a593Smuzhiyun /* If we aren't in the REVOKE pass, then we can
836*4882a593Smuzhiyun * just skip over this block. */
837*4882a593Smuzhiyun if (pass != PASS_REVOKE) {
838*4882a593Smuzhiyun brelse(bh);
839*4882a593Smuzhiyun continue;
840*4882a593Smuzhiyun }
841*4882a593Smuzhiyun
842*4882a593Smuzhiyun err = scan_revoke_records(journal, bh,
843*4882a593Smuzhiyun next_commit_ID, info);
844*4882a593Smuzhiyun brelse(bh);
845*4882a593Smuzhiyun if (err)
846*4882a593Smuzhiyun goto failed;
847*4882a593Smuzhiyun continue;
848*4882a593Smuzhiyun
849*4882a593Smuzhiyun default:
850*4882a593Smuzhiyun jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
851*4882a593Smuzhiyun blocktype);
852*4882a593Smuzhiyun brelse(bh);
853*4882a593Smuzhiyun goto done;
854*4882a593Smuzhiyun }
855*4882a593Smuzhiyun }
856*4882a593Smuzhiyun
857*4882a593Smuzhiyun done:
858*4882a593Smuzhiyun /*
859*4882a593Smuzhiyun * We broke out of the log scan loop: either we came to the
860*4882a593Smuzhiyun * known end of the log or we found an unexpected block in the
861*4882a593Smuzhiyun * log. If the latter happened, then we know that the "current"
862*4882a593Smuzhiyun * transaction marks the end of the valid log.
863*4882a593Smuzhiyun */
864*4882a593Smuzhiyun
865*4882a593Smuzhiyun if (pass == PASS_SCAN) {
866*4882a593Smuzhiyun if (!info->end_transaction)
867*4882a593Smuzhiyun info->end_transaction = next_commit_ID;
868*4882a593Smuzhiyun } else {
869*4882a593Smuzhiyun /* It's really bad news if different passes end up at
870*4882a593Smuzhiyun * different places (but possible due to IO errors). */
871*4882a593Smuzhiyun if (info->end_transaction != next_commit_ID) {
872*4882a593Smuzhiyun printk(KERN_ERR "JBD2: recovery pass %d ended at "
873*4882a593Smuzhiyun "transaction %u, expected %u\n",
874*4882a593Smuzhiyun pass, next_commit_ID, info->end_transaction);
875*4882a593Smuzhiyun if (!success)
876*4882a593Smuzhiyun success = -EIO;
877*4882a593Smuzhiyun }
878*4882a593Smuzhiyun }
879*4882a593Smuzhiyun
880*4882a593Smuzhiyun if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) {
881*4882a593Smuzhiyun err = fc_do_one_pass(journal, info, pass);
882*4882a593Smuzhiyun if (err)
883*4882a593Smuzhiyun success = err;
884*4882a593Smuzhiyun }
885*4882a593Smuzhiyun
886*4882a593Smuzhiyun if (block_error && success == 0)
887*4882a593Smuzhiyun success = -EIO;
888*4882a593Smuzhiyun return success;
889*4882a593Smuzhiyun
890*4882a593Smuzhiyun failed:
891*4882a593Smuzhiyun return err;
892*4882a593Smuzhiyun }
893*4882a593Smuzhiyun
894*4882a593Smuzhiyun /* Scan a revoke record, marking all blocks mentioned as revoked. */
895*4882a593Smuzhiyun
scan_revoke_records(journal_t * journal,struct buffer_head * bh,tid_t sequence,struct recovery_info * info)896*4882a593Smuzhiyun static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
897*4882a593Smuzhiyun tid_t sequence, struct recovery_info *info)
898*4882a593Smuzhiyun {
899*4882a593Smuzhiyun jbd2_journal_revoke_header_t *header;
900*4882a593Smuzhiyun int offset, max;
901*4882a593Smuzhiyun int csum_size = 0;
902*4882a593Smuzhiyun __u32 rcount;
903*4882a593Smuzhiyun int record_len = 4;
904*4882a593Smuzhiyun
905*4882a593Smuzhiyun header = (jbd2_journal_revoke_header_t *) bh->b_data;
906*4882a593Smuzhiyun offset = sizeof(jbd2_journal_revoke_header_t);
907*4882a593Smuzhiyun rcount = be32_to_cpu(header->r_count);
908*4882a593Smuzhiyun
909*4882a593Smuzhiyun if (jbd2_journal_has_csum_v2or3(journal))
910*4882a593Smuzhiyun csum_size = sizeof(struct jbd2_journal_block_tail);
911*4882a593Smuzhiyun if (rcount > journal->j_blocksize - csum_size)
912*4882a593Smuzhiyun return -EINVAL;
913*4882a593Smuzhiyun max = rcount;
914*4882a593Smuzhiyun
915*4882a593Smuzhiyun if (jbd2_has_feature_64bit(journal))
916*4882a593Smuzhiyun record_len = 8;
917*4882a593Smuzhiyun
918*4882a593Smuzhiyun while (offset + record_len <= max) {
919*4882a593Smuzhiyun unsigned long long blocknr;
920*4882a593Smuzhiyun int err;
921*4882a593Smuzhiyun
922*4882a593Smuzhiyun if (record_len == 4)
923*4882a593Smuzhiyun blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
924*4882a593Smuzhiyun else
925*4882a593Smuzhiyun blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
926*4882a593Smuzhiyun offset += record_len;
927*4882a593Smuzhiyun err = jbd2_journal_set_revoke(journal, blocknr, sequence);
928*4882a593Smuzhiyun if (err)
929*4882a593Smuzhiyun return err;
930*4882a593Smuzhiyun ++info->nr_revokes;
931*4882a593Smuzhiyun }
932*4882a593Smuzhiyun return 0;
933*4882a593Smuzhiyun }
934