1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * This file is part of UBIFS.
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright (C) 2006-2008 Nokia Corporation.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * SPDX-License-Identifier: GPL-2.0+
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * Authors: Adrian Hunter
9*4882a593Smuzhiyun * Artem Bityutskiy (Битюцкий Артём)
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun /*
13*4882a593Smuzhiyun * This file contains journal replay code. It runs when the file-system is being
14*4882a593Smuzhiyun * mounted and requires no locking.
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * The larger is the journal, the longer it takes to scan it, so the longer it
17*4882a593Smuzhiyun * takes to mount UBIFS. This is why the journal has limited size which may be
18*4882a593Smuzhiyun * changed depending on the system requirements. But a larger journal gives
19*4882a593Smuzhiyun * faster I/O speed because it writes the index less frequently. So this is a
20*4882a593Smuzhiyun * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
21*4882a593Smuzhiyun * larger is the journal, the more memory its index may consume.
22*4882a593Smuzhiyun */
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #ifdef __UBOOT__
25*4882a593Smuzhiyun #include <linux/compat.h>
26*4882a593Smuzhiyun #include <linux/err.h>
27*4882a593Smuzhiyun #endif
28*4882a593Smuzhiyun #include "ubifs.h"
29*4882a593Smuzhiyun #include <linux/bug.h>
30*4882a593Smuzhiyun #include <linux/list_sort.h>
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun /**
33*4882a593Smuzhiyun * struct replay_entry - replay list entry.
34*4882a593Smuzhiyun * @lnum: logical eraseblock number of the node
35*4882a593Smuzhiyun * @offs: node offset
36*4882a593Smuzhiyun * @len: node length
37*4882a593Smuzhiyun * @deletion: non-zero if this entry corresponds to a node deletion
38*4882a593Smuzhiyun * @sqnum: node sequence number
39*4882a593Smuzhiyun * @list: links the replay list
40*4882a593Smuzhiyun * @key: node key
41*4882a593Smuzhiyun * @nm: directory entry name
42*4882a593Smuzhiyun * @old_size: truncation old size
43*4882a593Smuzhiyun * @new_size: truncation new size
44*4882a593Smuzhiyun *
45*4882a593Smuzhiyun * The replay process first scans all buds and builds the replay list, then
46*4882a593Smuzhiyun * sorts the replay list in nodes sequence number order, and then inserts all
47*4882a593Smuzhiyun * the replay entries to the TNC.
48*4882a593Smuzhiyun */
49*4882a593Smuzhiyun struct replay_entry {
50*4882a593Smuzhiyun int lnum;
51*4882a593Smuzhiyun int offs;
52*4882a593Smuzhiyun int len;
53*4882a593Smuzhiyun unsigned int deletion:1;
54*4882a593Smuzhiyun unsigned long long sqnum;
55*4882a593Smuzhiyun struct list_head list;
56*4882a593Smuzhiyun union ubifs_key key;
57*4882a593Smuzhiyun union {
58*4882a593Smuzhiyun struct qstr nm;
59*4882a593Smuzhiyun struct {
60*4882a593Smuzhiyun loff_t old_size;
61*4882a593Smuzhiyun loff_t new_size;
62*4882a593Smuzhiyun };
63*4882a593Smuzhiyun };
64*4882a593Smuzhiyun };
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun /**
67*4882a593Smuzhiyun * struct bud_entry - entry in the list of buds to replay.
68*4882a593Smuzhiyun * @list: next bud in the list
69*4882a593Smuzhiyun * @bud: bud description object
70*4882a593Smuzhiyun * @sqnum: reference node sequence number
71*4882a593Smuzhiyun * @free: free bytes in the bud
72*4882a593Smuzhiyun * @dirty: dirty bytes in the bud
73*4882a593Smuzhiyun */
74*4882a593Smuzhiyun struct bud_entry {
75*4882a593Smuzhiyun struct list_head list;
76*4882a593Smuzhiyun struct ubifs_bud *bud;
77*4882a593Smuzhiyun unsigned long long sqnum;
78*4882a593Smuzhiyun int free;
79*4882a593Smuzhiyun int dirty;
80*4882a593Smuzhiyun };
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun /**
83*4882a593Smuzhiyun * set_bud_lprops - set free and dirty space used by a bud.
84*4882a593Smuzhiyun * @c: UBIFS file-system description object
85*4882a593Smuzhiyun * @b: bud entry which describes the bud
86*4882a593Smuzhiyun *
87*4882a593Smuzhiyun * This function makes sure the LEB properties of bud @b are set correctly
88*4882a593Smuzhiyun * after the replay. Returns zero in case of success and a negative error code
89*4882a593Smuzhiyun * in case of failure.
90*4882a593Smuzhiyun */
set_bud_lprops(struct ubifs_info * c,struct bud_entry * b)91*4882a593Smuzhiyun static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
92*4882a593Smuzhiyun {
93*4882a593Smuzhiyun const struct ubifs_lprops *lp;
94*4882a593Smuzhiyun int err = 0, dirty;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun ubifs_get_lprops(c);
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
99*4882a593Smuzhiyun if (IS_ERR(lp)) {
100*4882a593Smuzhiyun err = PTR_ERR(lp);
101*4882a593Smuzhiyun goto out;
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun dirty = lp->dirty;
105*4882a593Smuzhiyun if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
106*4882a593Smuzhiyun /*
107*4882a593Smuzhiyun * The LEB was added to the journal with a starting offset of
108*4882a593Smuzhiyun * zero which means the LEB must have been empty. The LEB
109*4882a593Smuzhiyun * property values should be @lp->free == @c->leb_size and
110*4882a593Smuzhiyun * @lp->dirty == 0, but that is not the case. The reason is that
111*4882a593Smuzhiyun * the LEB had been garbage collected before it became the bud,
112*4882a593Smuzhiyun * and there was not commit inbetween. The garbage collector
113*4882a593Smuzhiyun * resets the free and dirty space without recording it
114*4882a593Smuzhiyun * anywhere except lprops, so if there was no commit then
115*4882a593Smuzhiyun * lprops does not have that information.
116*4882a593Smuzhiyun *
117*4882a593Smuzhiyun * We do not need to adjust free space because the scan has told
118*4882a593Smuzhiyun * us the exact value which is recorded in the replay entry as
119*4882a593Smuzhiyun * @b->free.
120*4882a593Smuzhiyun *
121*4882a593Smuzhiyun * However we do need to subtract from the dirty space the
122*4882a593Smuzhiyun * amount of space that the garbage collector reclaimed, which
123*4882a593Smuzhiyun * is the whole LEB minus the amount of space that was free.
124*4882a593Smuzhiyun */
125*4882a593Smuzhiyun dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
126*4882a593Smuzhiyun lp->free, lp->dirty);
127*4882a593Smuzhiyun dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
128*4882a593Smuzhiyun lp->free, lp->dirty);
129*4882a593Smuzhiyun dirty -= c->leb_size - lp->free;
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun * If the replay order was perfect the dirty space would now be
132*4882a593Smuzhiyun * zero. The order is not perfect because the journal heads
133*4882a593Smuzhiyun * race with each other. This is not a problem but is does mean
134*4882a593Smuzhiyun * that the dirty space may temporarily exceed c->leb_size
135*4882a593Smuzhiyun * during the replay.
136*4882a593Smuzhiyun */
137*4882a593Smuzhiyun if (dirty != 0)
138*4882a593Smuzhiyun dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
139*4882a593Smuzhiyun b->bud->lnum, lp->free, lp->dirty, b->free,
140*4882a593Smuzhiyun b->dirty);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
143*4882a593Smuzhiyun lp->flags | LPROPS_TAKEN, 0);
144*4882a593Smuzhiyun if (IS_ERR(lp)) {
145*4882a593Smuzhiyun err = PTR_ERR(lp);
146*4882a593Smuzhiyun goto out;
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun /* Make sure the journal head points to the latest bud */
150*4882a593Smuzhiyun err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
151*4882a593Smuzhiyun b->bud->lnum, c->leb_size - b->free);
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun out:
154*4882a593Smuzhiyun ubifs_release_lprops(c);
155*4882a593Smuzhiyun return err;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /**
159*4882a593Smuzhiyun * set_buds_lprops - set free and dirty space for all replayed buds.
160*4882a593Smuzhiyun * @c: UBIFS file-system description object
161*4882a593Smuzhiyun *
162*4882a593Smuzhiyun * This function sets LEB properties for all replayed buds. Returns zero in
163*4882a593Smuzhiyun * case of success and a negative error code in case of failure.
164*4882a593Smuzhiyun */
set_buds_lprops(struct ubifs_info * c)165*4882a593Smuzhiyun static int set_buds_lprops(struct ubifs_info *c)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun struct bud_entry *b;
168*4882a593Smuzhiyun int err;
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun list_for_each_entry(b, &c->replay_buds, list) {
171*4882a593Smuzhiyun err = set_bud_lprops(c, b);
172*4882a593Smuzhiyun if (err)
173*4882a593Smuzhiyun return err;
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun return 0;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun /**
180*4882a593Smuzhiyun * trun_remove_range - apply a replay entry for a truncation to the TNC.
181*4882a593Smuzhiyun * @c: UBIFS file-system description object
182*4882a593Smuzhiyun * @r: replay entry of truncation
183*4882a593Smuzhiyun */
trun_remove_range(struct ubifs_info * c,struct replay_entry * r)184*4882a593Smuzhiyun static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
185*4882a593Smuzhiyun {
186*4882a593Smuzhiyun unsigned min_blk, max_blk;
187*4882a593Smuzhiyun union ubifs_key min_key, max_key;
188*4882a593Smuzhiyun ino_t ino;
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun min_blk = r->new_size / UBIFS_BLOCK_SIZE;
191*4882a593Smuzhiyun if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
192*4882a593Smuzhiyun min_blk += 1;
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun max_blk = r->old_size / UBIFS_BLOCK_SIZE;
195*4882a593Smuzhiyun if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
196*4882a593Smuzhiyun max_blk -= 1;
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun ino = key_inum(c, &r->key);
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun data_key_init(c, &min_key, ino, min_blk);
201*4882a593Smuzhiyun data_key_init(c, &max_key, ino, max_blk);
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun return ubifs_tnc_remove_range(c, &min_key, &max_key);
204*4882a593Smuzhiyun }
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun /**
207*4882a593Smuzhiyun * apply_replay_entry - apply a replay entry to the TNC.
208*4882a593Smuzhiyun * @c: UBIFS file-system description object
209*4882a593Smuzhiyun * @r: replay entry to apply
210*4882a593Smuzhiyun *
211*4882a593Smuzhiyun * Apply a replay entry to the TNC.
212*4882a593Smuzhiyun */
apply_replay_entry(struct ubifs_info * c,struct replay_entry * r)213*4882a593Smuzhiyun static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun int err;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
218*4882a593Smuzhiyun r->lnum, r->offs, r->len, r->deletion, r->sqnum);
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun /* Set c->replay_sqnum to help deal with dangling branches. */
221*4882a593Smuzhiyun c->replay_sqnum = r->sqnum;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun if (is_hash_key(c, &r->key)) {
224*4882a593Smuzhiyun if (r->deletion)
225*4882a593Smuzhiyun err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
226*4882a593Smuzhiyun else
227*4882a593Smuzhiyun err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
228*4882a593Smuzhiyun r->len, &r->nm);
229*4882a593Smuzhiyun } else {
230*4882a593Smuzhiyun if (r->deletion)
231*4882a593Smuzhiyun switch (key_type(c, &r->key)) {
232*4882a593Smuzhiyun case UBIFS_INO_KEY:
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun ino_t inum = key_inum(c, &r->key);
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun err = ubifs_tnc_remove_ino(c, inum);
237*4882a593Smuzhiyun break;
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun case UBIFS_TRUN_KEY:
240*4882a593Smuzhiyun err = trun_remove_range(c, r);
241*4882a593Smuzhiyun break;
242*4882a593Smuzhiyun default:
243*4882a593Smuzhiyun err = ubifs_tnc_remove(c, &r->key);
244*4882a593Smuzhiyun break;
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun else
247*4882a593Smuzhiyun err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
248*4882a593Smuzhiyun r->len);
249*4882a593Smuzhiyun if (err)
250*4882a593Smuzhiyun return err;
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun if (c->need_recovery)
253*4882a593Smuzhiyun err = ubifs_recover_size_accum(c, &r->key, r->deletion,
254*4882a593Smuzhiyun r->new_size);
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun return err;
258*4882a593Smuzhiyun }
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun /**
261*4882a593Smuzhiyun * replay_entries_cmp - compare 2 replay entries.
262*4882a593Smuzhiyun * @priv: UBIFS file-system description object
263*4882a593Smuzhiyun * @a: first replay entry
264*4882a593Smuzhiyun * @a: second replay entry
265*4882a593Smuzhiyun *
266*4882a593Smuzhiyun * This is a comparios function for 'list_sort()' which compares 2 replay
267*4882a593Smuzhiyun * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
268*4882a593Smuzhiyun * greater sequence number and %-1 otherwise.
269*4882a593Smuzhiyun */
replay_entries_cmp(void * priv,struct list_head * a,struct list_head * b)270*4882a593Smuzhiyun static int replay_entries_cmp(void *priv, struct list_head *a,
271*4882a593Smuzhiyun struct list_head *b)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun struct replay_entry *ra, *rb;
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun cond_resched();
276*4882a593Smuzhiyun if (a == b)
277*4882a593Smuzhiyun return 0;
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun ra = list_entry(a, struct replay_entry, list);
280*4882a593Smuzhiyun rb = list_entry(b, struct replay_entry, list);
281*4882a593Smuzhiyun ubifs_assert(ra->sqnum != rb->sqnum);
282*4882a593Smuzhiyun if (ra->sqnum > rb->sqnum)
283*4882a593Smuzhiyun return 1;
284*4882a593Smuzhiyun return -1;
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun /**
288*4882a593Smuzhiyun * apply_replay_list - apply the replay list to the TNC.
289*4882a593Smuzhiyun * @c: UBIFS file-system description object
290*4882a593Smuzhiyun *
291*4882a593Smuzhiyun * Apply all entries in the replay list to the TNC. Returns zero in case of
292*4882a593Smuzhiyun * success and a negative error code in case of failure.
293*4882a593Smuzhiyun */
apply_replay_list(struct ubifs_info * c)294*4882a593Smuzhiyun static int apply_replay_list(struct ubifs_info *c)
295*4882a593Smuzhiyun {
296*4882a593Smuzhiyun struct replay_entry *r;
297*4882a593Smuzhiyun int err;
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun list_sort(c, &c->replay_list, &replay_entries_cmp);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun list_for_each_entry(r, &c->replay_list, list) {
302*4882a593Smuzhiyun cond_resched();
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun err = apply_replay_entry(c, r);
305*4882a593Smuzhiyun if (err)
306*4882a593Smuzhiyun return err;
307*4882a593Smuzhiyun }
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun return 0;
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun /**
313*4882a593Smuzhiyun * destroy_replay_list - destroy the replay.
314*4882a593Smuzhiyun * @c: UBIFS file-system description object
315*4882a593Smuzhiyun *
316*4882a593Smuzhiyun * Destroy the replay list.
317*4882a593Smuzhiyun */
destroy_replay_list(struct ubifs_info * c)318*4882a593Smuzhiyun static void destroy_replay_list(struct ubifs_info *c)
319*4882a593Smuzhiyun {
320*4882a593Smuzhiyun struct replay_entry *r, *tmp;
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
323*4882a593Smuzhiyun if (is_hash_key(c, &r->key))
324*4882a593Smuzhiyun kfree(r->nm.name);
325*4882a593Smuzhiyun list_del(&r->list);
326*4882a593Smuzhiyun kfree(r);
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun /**
331*4882a593Smuzhiyun * insert_node - insert a node to the replay list
332*4882a593Smuzhiyun * @c: UBIFS file-system description object
333*4882a593Smuzhiyun * @lnum: node logical eraseblock number
334*4882a593Smuzhiyun * @offs: node offset
335*4882a593Smuzhiyun * @len: node length
336*4882a593Smuzhiyun * @key: node key
337*4882a593Smuzhiyun * @sqnum: sequence number
338*4882a593Smuzhiyun * @deletion: non-zero if this is a deletion
339*4882a593Smuzhiyun * @used: number of bytes in use in a LEB
340*4882a593Smuzhiyun * @old_size: truncation old size
341*4882a593Smuzhiyun * @new_size: truncation new size
342*4882a593Smuzhiyun *
343*4882a593Smuzhiyun * This function inserts a scanned non-direntry node to the replay list. The
344*4882a593Smuzhiyun * replay list contains @struct replay_entry elements, and we sort this list in
345*4882a593Smuzhiyun * sequence number order before applying it. The replay list is applied at the
346*4882a593Smuzhiyun * very end of the replay process. Since the list is sorted in sequence number
347*4882a593Smuzhiyun * order, the older modifications are applied first. This function returns zero
348*4882a593Smuzhiyun * in case of success and a negative error code in case of failure.
349*4882a593Smuzhiyun */
insert_node(struct ubifs_info * c,int lnum,int offs,int len,union ubifs_key * key,unsigned long long sqnum,int deletion,int * used,loff_t old_size,loff_t new_size)350*4882a593Smuzhiyun static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
351*4882a593Smuzhiyun union ubifs_key *key, unsigned long long sqnum,
352*4882a593Smuzhiyun int deletion, int *used, loff_t old_size,
353*4882a593Smuzhiyun loff_t new_size)
354*4882a593Smuzhiyun {
355*4882a593Smuzhiyun struct replay_entry *r;
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun if (key_inum(c, key) >= c->highest_inum)
360*4882a593Smuzhiyun c->highest_inum = key_inum(c, key);
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
363*4882a593Smuzhiyun if (!r)
364*4882a593Smuzhiyun return -ENOMEM;
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun if (!deletion)
367*4882a593Smuzhiyun *used += ALIGN(len, 8);
368*4882a593Smuzhiyun r->lnum = lnum;
369*4882a593Smuzhiyun r->offs = offs;
370*4882a593Smuzhiyun r->len = len;
371*4882a593Smuzhiyun r->deletion = !!deletion;
372*4882a593Smuzhiyun r->sqnum = sqnum;
373*4882a593Smuzhiyun key_copy(c, key, &r->key);
374*4882a593Smuzhiyun r->old_size = old_size;
375*4882a593Smuzhiyun r->new_size = new_size;
376*4882a593Smuzhiyun
377*4882a593Smuzhiyun list_add_tail(&r->list, &c->replay_list);
378*4882a593Smuzhiyun return 0;
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun /**
382*4882a593Smuzhiyun * insert_dent - insert a directory entry node into the replay list.
383*4882a593Smuzhiyun * @c: UBIFS file-system description object
384*4882a593Smuzhiyun * @lnum: node logical eraseblock number
385*4882a593Smuzhiyun * @offs: node offset
386*4882a593Smuzhiyun * @len: node length
387*4882a593Smuzhiyun * @key: node key
388*4882a593Smuzhiyun * @name: directory entry name
389*4882a593Smuzhiyun * @nlen: directory entry name length
390*4882a593Smuzhiyun * @sqnum: sequence number
391*4882a593Smuzhiyun * @deletion: non-zero if this is a deletion
392*4882a593Smuzhiyun * @used: number of bytes in use in a LEB
393*4882a593Smuzhiyun *
394*4882a593Smuzhiyun * This function inserts a scanned directory entry node or an extended
395*4882a593Smuzhiyun * attribute entry to the replay list. Returns zero in case of success and a
396*4882a593Smuzhiyun * negative error code in case of failure.
397*4882a593Smuzhiyun */
insert_dent(struct ubifs_info * c,int lnum,int offs,int len,union ubifs_key * key,const char * name,int nlen,unsigned long long sqnum,int deletion,int * used)398*4882a593Smuzhiyun static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
399*4882a593Smuzhiyun union ubifs_key *key, const char *name, int nlen,
400*4882a593Smuzhiyun unsigned long long sqnum, int deletion, int *used)
401*4882a593Smuzhiyun {
402*4882a593Smuzhiyun struct replay_entry *r;
403*4882a593Smuzhiyun char *nbuf;
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
406*4882a593Smuzhiyun if (key_inum(c, key) >= c->highest_inum)
407*4882a593Smuzhiyun c->highest_inum = key_inum(c, key);
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
410*4882a593Smuzhiyun if (!r)
411*4882a593Smuzhiyun return -ENOMEM;
412*4882a593Smuzhiyun
413*4882a593Smuzhiyun nbuf = kmalloc(nlen + 1, GFP_KERNEL);
414*4882a593Smuzhiyun if (!nbuf) {
415*4882a593Smuzhiyun kfree(r);
416*4882a593Smuzhiyun return -ENOMEM;
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun if (!deletion)
420*4882a593Smuzhiyun *used += ALIGN(len, 8);
421*4882a593Smuzhiyun r->lnum = lnum;
422*4882a593Smuzhiyun r->offs = offs;
423*4882a593Smuzhiyun r->len = len;
424*4882a593Smuzhiyun r->deletion = !!deletion;
425*4882a593Smuzhiyun r->sqnum = sqnum;
426*4882a593Smuzhiyun key_copy(c, key, &r->key);
427*4882a593Smuzhiyun r->nm.len = nlen;
428*4882a593Smuzhiyun memcpy(nbuf, name, nlen);
429*4882a593Smuzhiyun nbuf[nlen] = '\0';
430*4882a593Smuzhiyun r->nm.name = nbuf;
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun list_add_tail(&r->list, &c->replay_list);
433*4882a593Smuzhiyun return 0;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun /**
437*4882a593Smuzhiyun * ubifs_validate_entry - validate directory or extended attribute entry node.
438*4882a593Smuzhiyun * @c: UBIFS file-system description object
439*4882a593Smuzhiyun * @dent: the node to validate
440*4882a593Smuzhiyun *
441*4882a593Smuzhiyun * This function validates directory or extended attribute entry node @dent.
442*4882a593Smuzhiyun * Returns zero if the node is all right and a %-EINVAL if not.
443*4882a593Smuzhiyun */
ubifs_validate_entry(struct ubifs_info * c,const struct ubifs_dent_node * dent)444*4882a593Smuzhiyun int ubifs_validate_entry(struct ubifs_info *c,
445*4882a593Smuzhiyun const struct ubifs_dent_node *dent)
446*4882a593Smuzhiyun {
447*4882a593Smuzhiyun int key_type = key_type_flash(c, dent->key);
448*4882a593Smuzhiyun int nlen = le16_to_cpu(dent->nlen);
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
451*4882a593Smuzhiyun dent->type >= UBIFS_ITYPES_CNT ||
452*4882a593Smuzhiyun nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
453*4882a593Smuzhiyun strnlen(dent->name, nlen) != nlen ||
454*4882a593Smuzhiyun le64_to_cpu(dent->inum) > MAX_INUM) {
455*4882a593Smuzhiyun ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ?
456*4882a593Smuzhiyun "directory entry" : "extended attribute entry");
457*4882a593Smuzhiyun return -EINVAL;
458*4882a593Smuzhiyun }
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
461*4882a593Smuzhiyun ubifs_err(c, "bad key type %d", key_type);
462*4882a593Smuzhiyun return -EINVAL;
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun return 0;
466*4882a593Smuzhiyun }
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun /**
469*4882a593Smuzhiyun * is_last_bud - check if the bud is the last in the journal head.
470*4882a593Smuzhiyun * @c: UBIFS file-system description object
471*4882a593Smuzhiyun * @bud: bud description object
472*4882a593Smuzhiyun *
473*4882a593Smuzhiyun * This function checks if bud @bud is the last bud in its journal head. This
474*4882a593Smuzhiyun * information is then used by 'replay_bud()' to decide whether the bud can
475*4882a593Smuzhiyun * have corruptions or not. Indeed, only last buds can be corrupted by power
476*4882a593Smuzhiyun * cuts. Returns %1 if this is the last bud, and %0 if not.
477*4882a593Smuzhiyun */
is_last_bud(struct ubifs_info * c,struct ubifs_bud * bud)478*4882a593Smuzhiyun static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
479*4882a593Smuzhiyun {
480*4882a593Smuzhiyun struct ubifs_jhead *jh = &c->jheads[bud->jhead];
481*4882a593Smuzhiyun struct ubifs_bud *next;
482*4882a593Smuzhiyun uint32_t data;
483*4882a593Smuzhiyun int err;
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun if (list_is_last(&bud->list, &jh->buds_list))
486*4882a593Smuzhiyun return 1;
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun /*
489*4882a593Smuzhiyun * The following is a quirk to make sure we work correctly with UBIFS
490*4882a593Smuzhiyun * images used with older UBIFS.
491*4882a593Smuzhiyun *
492*4882a593Smuzhiyun * Normally, the last bud will be the last in the journal head's list
493*4882a593Smuzhiyun * of bud. However, there is one exception if the UBIFS image belongs
494*4882a593Smuzhiyun * to older UBIFS. This is fairly unlikely: one would need to use old
495*4882a593Smuzhiyun * UBIFS, then have a power cut exactly at the right point, and then
496*4882a593Smuzhiyun * try to mount this image with new UBIFS.
497*4882a593Smuzhiyun *
498*4882a593Smuzhiyun * The exception is: it is possible to have 2 buds A and B, A goes
499*4882a593Smuzhiyun * before B, and B is the last, bud B is contains no data, and bud A is
500*4882a593Smuzhiyun * corrupted at the end. The reason is that in older versions when the
501*4882a593Smuzhiyun * journal code switched the next bud (from A to B), it first added a
502*4882a593Smuzhiyun * log reference node for the new bud (B), and only after this it
503*4882a593Smuzhiyun * synchronized the write-buffer of current bud (A). But later this was
504*4882a593Smuzhiyun * changed and UBIFS started to always synchronize the write-buffer of
505*4882a593Smuzhiyun * the bud (A) before writing the log reference for the new bud (B).
506*4882a593Smuzhiyun *
507*4882a593Smuzhiyun * But because older UBIFS always synchronized A's write-buffer before
508*4882a593Smuzhiyun * writing to B, we can recognize this exceptional situation but
509*4882a593Smuzhiyun * checking the contents of bud B - if it is empty, then A can be
510*4882a593Smuzhiyun * treated as the last and we can recover it.
511*4882a593Smuzhiyun *
512*4882a593Smuzhiyun * TODO: remove this piece of code in a couple of years (today it is
513*4882a593Smuzhiyun * 16.05.2011).
514*4882a593Smuzhiyun */
515*4882a593Smuzhiyun next = list_entry(bud->list.next, struct ubifs_bud, list);
516*4882a593Smuzhiyun if (!list_is_last(&next->list, &jh->buds_list))
517*4882a593Smuzhiyun return 0;
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
520*4882a593Smuzhiyun if (err)
521*4882a593Smuzhiyun return 0;
522*4882a593Smuzhiyun
523*4882a593Smuzhiyun return data == 0xFFFFFFFF;
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun /**
527*4882a593Smuzhiyun * replay_bud - replay a bud logical eraseblock.
528*4882a593Smuzhiyun * @c: UBIFS file-system description object
529*4882a593Smuzhiyun * @b: bud entry which describes the bud
530*4882a593Smuzhiyun *
531*4882a593Smuzhiyun * This function replays bud @bud, recovers it if needed, and adds all nodes
532*4882a593Smuzhiyun * from this bud to the replay list. Returns zero in case of success and a
533*4882a593Smuzhiyun * negative error code in case of failure.
534*4882a593Smuzhiyun */
replay_bud(struct ubifs_info * c,struct bud_entry * b)535*4882a593Smuzhiyun static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
536*4882a593Smuzhiyun {
537*4882a593Smuzhiyun int is_last = is_last_bud(c, b->bud);
538*4882a593Smuzhiyun int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
539*4882a593Smuzhiyun struct ubifs_scan_leb *sleb;
540*4882a593Smuzhiyun struct ubifs_scan_node *snod;
541*4882a593Smuzhiyun
542*4882a593Smuzhiyun dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
543*4882a593Smuzhiyun lnum, b->bud->jhead, offs, is_last);
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun if (c->need_recovery && is_last)
546*4882a593Smuzhiyun /*
547*4882a593Smuzhiyun * Recover only last LEBs in the journal heads, because power
548*4882a593Smuzhiyun * cuts may cause corruptions only in these LEBs, because only
549*4882a593Smuzhiyun * these LEBs could possibly be written to at the power cut
550*4882a593Smuzhiyun * time.
551*4882a593Smuzhiyun */
552*4882a593Smuzhiyun sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
553*4882a593Smuzhiyun else
554*4882a593Smuzhiyun sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
555*4882a593Smuzhiyun if (IS_ERR(sleb))
556*4882a593Smuzhiyun return PTR_ERR(sleb);
557*4882a593Smuzhiyun
558*4882a593Smuzhiyun /*
559*4882a593Smuzhiyun * The bud does not have to start from offset zero - the beginning of
560*4882a593Smuzhiyun * the 'lnum' LEB may contain previously committed data. One of the
561*4882a593Smuzhiyun * things we have to do in replay is to correctly update lprops with
562*4882a593Smuzhiyun * newer information about this LEB.
563*4882a593Smuzhiyun *
564*4882a593Smuzhiyun * At this point lprops thinks that this LEB has 'c->leb_size - offs'
565*4882a593Smuzhiyun * bytes of free space because it only contain information about
566*4882a593Smuzhiyun * committed data.
567*4882a593Smuzhiyun *
568*4882a593Smuzhiyun * But we know that real amount of free space is 'c->leb_size -
569*4882a593Smuzhiyun * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
570*4882a593Smuzhiyun * 'sleb->endpt' is used by bud data. We have to correctly calculate
571*4882a593Smuzhiyun * how much of these data are dirty and update lprops with this
572*4882a593Smuzhiyun * information.
573*4882a593Smuzhiyun *
574*4882a593Smuzhiyun * The dirt in that LEB region is comprised of padding nodes, deletion
575*4882a593Smuzhiyun * nodes, truncation nodes and nodes which are obsoleted by subsequent
576*4882a593Smuzhiyun * nodes in this LEB. So instead of calculating clean space, we
577*4882a593Smuzhiyun * calculate used space ('used' variable).
578*4882a593Smuzhiyun */
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun list_for_each_entry(snod, &sleb->nodes, list) {
581*4882a593Smuzhiyun int deletion = 0;
582*4882a593Smuzhiyun
583*4882a593Smuzhiyun cond_resched();
584*4882a593Smuzhiyun
585*4882a593Smuzhiyun if (snod->sqnum >= SQNUM_WATERMARK) {
586*4882a593Smuzhiyun ubifs_err(c, "file system's life ended");
587*4882a593Smuzhiyun goto out_dump;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun if (snod->sqnum > c->max_sqnum)
591*4882a593Smuzhiyun c->max_sqnum = snod->sqnum;
592*4882a593Smuzhiyun
593*4882a593Smuzhiyun switch (snod->type) {
594*4882a593Smuzhiyun case UBIFS_INO_NODE:
595*4882a593Smuzhiyun {
596*4882a593Smuzhiyun struct ubifs_ino_node *ino = snod->node;
597*4882a593Smuzhiyun loff_t new_size = le64_to_cpu(ino->size);
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun if (le32_to_cpu(ino->nlink) == 0)
600*4882a593Smuzhiyun deletion = 1;
601*4882a593Smuzhiyun err = insert_node(c, lnum, snod->offs, snod->len,
602*4882a593Smuzhiyun &snod->key, snod->sqnum, deletion,
603*4882a593Smuzhiyun &used, 0, new_size);
604*4882a593Smuzhiyun break;
605*4882a593Smuzhiyun }
606*4882a593Smuzhiyun case UBIFS_DATA_NODE:
607*4882a593Smuzhiyun {
608*4882a593Smuzhiyun struct ubifs_data_node *dn = snod->node;
609*4882a593Smuzhiyun loff_t new_size = le32_to_cpu(dn->size) +
610*4882a593Smuzhiyun key_block(c, &snod->key) *
611*4882a593Smuzhiyun UBIFS_BLOCK_SIZE;
612*4882a593Smuzhiyun
613*4882a593Smuzhiyun err = insert_node(c, lnum, snod->offs, snod->len,
614*4882a593Smuzhiyun &snod->key, snod->sqnum, deletion,
615*4882a593Smuzhiyun &used, 0, new_size);
616*4882a593Smuzhiyun break;
617*4882a593Smuzhiyun }
618*4882a593Smuzhiyun case UBIFS_DENT_NODE:
619*4882a593Smuzhiyun case UBIFS_XENT_NODE:
620*4882a593Smuzhiyun {
621*4882a593Smuzhiyun struct ubifs_dent_node *dent = snod->node;
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun err = ubifs_validate_entry(c, dent);
624*4882a593Smuzhiyun if (err)
625*4882a593Smuzhiyun goto out_dump;
626*4882a593Smuzhiyun
627*4882a593Smuzhiyun err = insert_dent(c, lnum, snod->offs, snod->len,
628*4882a593Smuzhiyun &snod->key, dent->name,
629*4882a593Smuzhiyun le16_to_cpu(dent->nlen), snod->sqnum,
630*4882a593Smuzhiyun !le64_to_cpu(dent->inum), &used);
631*4882a593Smuzhiyun break;
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun case UBIFS_TRUN_NODE:
634*4882a593Smuzhiyun {
635*4882a593Smuzhiyun struct ubifs_trun_node *trun = snod->node;
636*4882a593Smuzhiyun loff_t old_size = le64_to_cpu(trun->old_size);
637*4882a593Smuzhiyun loff_t new_size = le64_to_cpu(trun->new_size);
638*4882a593Smuzhiyun union ubifs_key key;
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun /* Validate truncation node */
641*4882a593Smuzhiyun if (old_size < 0 || old_size > c->max_inode_sz ||
642*4882a593Smuzhiyun new_size < 0 || new_size > c->max_inode_sz ||
643*4882a593Smuzhiyun old_size <= new_size) {
644*4882a593Smuzhiyun ubifs_err(c, "bad truncation node");
645*4882a593Smuzhiyun goto out_dump;
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun /*
649*4882a593Smuzhiyun * Create a fake truncation key just to use the same
650*4882a593Smuzhiyun * functions which expect nodes to have keys.
651*4882a593Smuzhiyun */
652*4882a593Smuzhiyun trun_key_init(c, &key, le32_to_cpu(trun->inum));
653*4882a593Smuzhiyun err = insert_node(c, lnum, snod->offs, snod->len,
654*4882a593Smuzhiyun &key, snod->sqnum, 1, &used,
655*4882a593Smuzhiyun old_size, new_size);
656*4882a593Smuzhiyun break;
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun default:
659*4882a593Smuzhiyun ubifs_err(c, "unexpected node type %d in bud LEB %d:%d",
660*4882a593Smuzhiyun snod->type, lnum, snod->offs);
661*4882a593Smuzhiyun err = -EINVAL;
662*4882a593Smuzhiyun goto out_dump;
663*4882a593Smuzhiyun }
664*4882a593Smuzhiyun if (err)
665*4882a593Smuzhiyun goto out;
666*4882a593Smuzhiyun }
667*4882a593Smuzhiyun
668*4882a593Smuzhiyun ubifs_assert(ubifs_search_bud(c, lnum));
669*4882a593Smuzhiyun ubifs_assert(sleb->endpt - offs >= used);
670*4882a593Smuzhiyun ubifs_assert(sleb->endpt % c->min_io_size == 0);
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun b->dirty = sleb->endpt - offs - used;
673*4882a593Smuzhiyun b->free = c->leb_size - sleb->endpt;
674*4882a593Smuzhiyun dbg_mnt("bud LEB %d replied: dirty %d, free %d",
675*4882a593Smuzhiyun lnum, b->dirty, b->free);
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun out:
678*4882a593Smuzhiyun ubifs_scan_destroy(sleb);
679*4882a593Smuzhiyun return err;
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun out_dump:
682*4882a593Smuzhiyun ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs);
683*4882a593Smuzhiyun ubifs_dump_node(c, snod->node);
684*4882a593Smuzhiyun ubifs_scan_destroy(sleb);
685*4882a593Smuzhiyun return -EINVAL;
686*4882a593Smuzhiyun }
687*4882a593Smuzhiyun
688*4882a593Smuzhiyun /**
689*4882a593Smuzhiyun * replay_buds - replay all buds.
690*4882a593Smuzhiyun * @c: UBIFS file-system description object
691*4882a593Smuzhiyun *
692*4882a593Smuzhiyun * This function returns zero in case of success and a negative error code in
693*4882a593Smuzhiyun * case of failure.
694*4882a593Smuzhiyun */
replay_buds(struct ubifs_info * c)695*4882a593Smuzhiyun static int replay_buds(struct ubifs_info *c)
696*4882a593Smuzhiyun {
697*4882a593Smuzhiyun struct bud_entry *b;
698*4882a593Smuzhiyun int err;
699*4882a593Smuzhiyun unsigned long long prev_sqnum = 0;
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun list_for_each_entry(b, &c->replay_buds, list) {
702*4882a593Smuzhiyun err = replay_bud(c, b);
703*4882a593Smuzhiyun if (err)
704*4882a593Smuzhiyun return err;
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun ubifs_assert(b->sqnum > prev_sqnum);
707*4882a593Smuzhiyun prev_sqnum = b->sqnum;
708*4882a593Smuzhiyun }
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun return 0;
711*4882a593Smuzhiyun }
712*4882a593Smuzhiyun
713*4882a593Smuzhiyun /**
714*4882a593Smuzhiyun * destroy_bud_list - destroy the list of buds to replay.
715*4882a593Smuzhiyun * @c: UBIFS file-system description object
716*4882a593Smuzhiyun */
destroy_bud_list(struct ubifs_info * c)717*4882a593Smuzhiyun static void destroy_bud_list(struct ubifs_info *c)
718*4882a593Smuzhiyun {
719*4882a593Smuzhiyun struct bud_entry *b;
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun while (!list_empty(&c->replay_buds)) {
722*4882a593Smuzhiyun b = list_entry(c->replay_buds.next, struct bud_entry, list);
723*4882a593Smuzhiyun list_del(&b->list);
724*4882a593Smuzhiyun kfree(b);
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun }
727*4882a593Smuzhiyun
728*4882a593Smuzhiyun /**
729*4882a593Smuzhiyun * add_replay_bud - add a bud to the list of buds to replay.
730*4882a593Smuzhiyun * @c: UBIFS file-system description object
731*4882a593Smuzhiyun * @lnum: bud logical eraseblock number to replay
732*4882a593Smuzhiyun * @offs: bud start offset
733*4882a593Smuzhiyun * @jhead: journal head to which this bud belongs
734*4882a593Smuzhiyun * @sqnum: reference node sequence number
735*4882a593Smuzhiyun *
736*4882a593Smuzhiyun * This function returns zero in case of success and a negative error code in
737*4882a593Smuzhiyun * case of failure.
738*4882a593Smuzhiyun */
add_replay_bud(struct ubifs_info * c,int lnum,int offs,int jhead,unsigned long long sqnum)739*4882a593Smuzhiyun static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
740*4882a593Smuzhiyun unsigned long long sqnum)
741*4882a593Smuzhiyun {
742*4882a593Smuzhiyun struct ubifs_bud *bud;
743*4882a593Smuzhiyun struct bud_entry *b;
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
746*4882a593Smuzhiyun
747*4882a593Smuzhiyun bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
748*4882a593Smuzhiyun if (!bud)
749*4882a593Smuzhiyun return -ENOMEM;
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
752*4882a593Smuzhiyun if (!b) {
753*4882a593Smuzhiyun kfree(bud);
754*4882a593Smuzhiyun return -ENOMEM;
755*4882a593Smuzhiyun }
756*4882a593Smuzhiyun
757*4882a593Smuzhiyun bud->lnum = lnum;
758*4882a593Smuzhiyun bud->start = offs;
759*4882a593Smuzhiyun bud->jhead = jhead;
760*4882a593Smuzhiyun ubifs_add_bud(c, bud);
761*4882a593Smuzhiyun
762*4882a593Smuzhiyun b->bud = bud;
763*4882a593Smuzhiyun b->sqnum = sqnum;
764*4882a593Smuzhiyun list_add_tail(&b->list, &c->replay_buds);
765*4882a593Smuzhiyun
766*4882a593Smuzhiyun return 0;
767*4882a593Smuzhiyun }
768*4882a593Smuzhiyun
769*4882a593Smuzhiyun /**
770*4882a593Smuzhiyun * validate_ref - validate a reference node.
771*4882a593Smuzhiyun * @c: UBIFS file-system description object
772*4882a593Smuzhiyun * @ref: the reference node to validate
773*4882a593Smuzhiyun * @ref_lnum: LEB number of the reference node
774*4882a593Smuzhiyun * @ref_offs: reference node offset
775*4882a593Smuzhiyun *
776*4882a593Smuzhiyun * This function returns %1 if a bud reference already exists for the LEB. %0 is
777*4882a593Smuzhiyun * returned if the reference node is new, otherwise %-EINVAL is returned if
778*4882a593Smuzhiyun * validation failed.
779*4882a593Smuzhiyun */
validate_ref(struct ubifs_info * c,const struct ubifs_ref_node * ref)780*4882a593Smuzhiyun static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
781*4882a593Smuzhiyun {
782*4882a593Smuzhiyun struct ubifs_bud *bud;
783*4882a593Smuzhiyun int lnum = le32_to_cpu(ref->lnum);
784*4882a593Smuzhiyun unsigned int offs = le32_to_cpu(ref->offs);
785*4882a593Smuzhiyun unsigned int jhead = le32_to_cpu(ref->jhead);
786*4882a593Smuzhiyun
787*4882a593Smuzhiyun /*
788*4882a593Smuzhiyun * ref->offs may point to the end of LEB when the journal head points
789*4882a593Smuzhiyun * to the end of LEB and we write reference node for it during commit.
790*4882a593Smuzhiyun * So this is why we require 'offs > c->leb_size'.
791*4882a593Smuzhiyun */
792*4882a593Smuzhiyun if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
793*4882a593Smuzhiyun lnum < c->main_first || offs > c->leb_size ||
794*4882a593Smuzhiyun offs & (c->min_io_size - 1))
795*4882a593Smuzhiyun return -EINVAL;
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun /* Make sure we have not already looked at this bud */
798*4882a593Smuzhiyun bud = ubifs_search_bud(c, lnum);
799*4882a593Smuzhiyun if (bud) {
800*4882a593Smuzhiyun if (bud->jhead == jhead && bud->start <= offs)
801*4882a593Smuzhiyun return 1;
802*4882a593Smuzhiyun ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs);
803*4882a593Smuzhiyun return -EINVAL;
804*4882a593Smuzhiyun }
805*4882a593Smuzhiyun
806*4882a593Smuzhiyun return 0;
807*4882a593Smuzhiyun }
808*4882a593Smuzhiyun
809*4882a593Smuzhiyun /**
810*4882a593Smuzhiyun * replay_log_leb - replay a log logical eraseblock.
811*4882a593Smuzhiyun * @c: UBIFS file-system description object
812*4882a593Smuzhiyun * @lnum: log logical eraseblock to replay
813*4882a593Smuzhiyun * @offs: offset to start replaying from
814*4882a593Smuzhiyun * @sbuf: scan buffer
815*4882a593Smuzhiyun *
816*4882a593Smuzhiyun * This function replays a log LEB and returns zero in case of success, %1 if
817*4882a593Smuzhiyun * this is the last LEB in the log, and a negative error code in case of
818*4882a593Smuzhiyun * failure.
819*4882a593Smuzhiyun */
replay_log_leb(struct ubifs_info * c,int lnum,int offs,void * sbuf)820*4882a593Smuzhiyun static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
821*4882a593Smuzhiyun {
822*4882a593Smuzhiyun int err;
823*4882a593Smuzhiyun struct ubifs_scan_leb *sleb;
824*4882a593Smuzhiyun struct ubifs_scan_node *snod;
825*4882a593Smuzhiyun const struct ubifs_cs_node *node;
826*4882a593Smuzhiyun
827*4882a593Smuzhiyun dbg_mnt("replay log LEB %d:%d", lnum, offs);
828*4882a593Smuzhiyun sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
829*4882a593Smuzhiyun if (IS_ERR(sleb)) {
830*4882a593Smuzhiyun if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
831*4882a593Smuzhiyun return PTR_ERR(sleb);
832*4882a593Smuzhiyun /*
833*4882a593Smuzhiyun * Note, the below function will recover this log LEB only if
834*4882a593Smuzhiyun * it is the last, because unclean reboots can possibly corrupt
835*4882a593Smuzhiyun * only the tail of the log.
836*4882a593Smuzhiyun */
837*4882a593Smuzhiyun sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
838*4882a593Smuzhiyun if (IS_ERR(sleb))
839*4882a593Smuzhiyun return PTR_ERR(sleb);
840*4882a593Smuzhiyun }
841*4882a593Smuzhiyun
842*4882a593Smuzhiyun if (sleb->nodes_cnt == 0) {
843*4882a593Smuzhiyun err = 1;
844*4882a593Smuzhiyun goto out;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun
847*4882a593Smuzhiyun node = sleb->buf;
848*4882a593Smuzhiyun snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
849*4882a593Smuzhiyun if (c->cs_sqnum == 0) {
850*4882a593Smuzhiyun /*
851*4882a593Smuzhiyun * This is the first log LEB we are looking at, make sure that
852*4882a593Smuzhiyun * the first node is a commit start node. Also record its
853*4882a593Smuzhiyun * sequence number so that UBIFS can determine where the log
854*4882a593Smuzhiyun * ends, because all nodes which were have higher sequence
855*4882a593Smuzhiyun * numbers.
856*4882a593Smuzhiyun */
857*4882a593Smuzhiyun if (snod->type != UBIFS_CS_NODE) {
858*4882a593Smuzhiyun ubifs_err(c, "first log node at LEB %d:%d is not CS node",
859*4882a593Smuzhiyun lnum, offs);
860*4882a593Smuzhiyun goto out_dump;
861*4882a593Smuzhiyun }
862*4882a593Smuzhiyun if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
863*4882a593Smuzhiyun ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
864*4882a593Smuzhiyun lnum, offs,
865*4882a593Smuzhiyun (unsigned long long)le64_to_cpu(node->cmt_no),
866*4882a593Smuzhiyun c->cmt_no);
867*4882a593Smuzhiyun goto out_dump;
868*4882a593Smuzhiyun }
869*4882a593Smuzhiyun
870*4882a593Smuzhiyun c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
871*4882a593Smuzhiyun dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
872*4882a593Smuzhiyun }
873*4882a593Smuzhiyun
874*4882a593Smuzhiyun if (snod->sqnum < c->cs_sqnum) {
875*4882a593Smuzhiyun /*
876*4882a593Smuzhiyun * This means that we reached end of log and now
877*4882a593Smuzhiyun * look to the older log data, which was already
878*4882a593Smuzhiyun * committed but the eraseblock was not erased (UBIFS
879*4882a593Smuzhiyun * only un-maps it). So this basically means we have to
880*4882a593Smuzhiyun * exit with "end of log" code.
881*4882a593Smuzhiyun */
882*4882a593Smuzhiyun err = 1;
883*4882a593Smuzhiyun goto out;
884*4882a593Smuzhiyun }
885*4882a593Smuzhiyun
886*4882a593Smuzhiyun /* Make sure the first node sits at offset zero of the LEB */
887*4882a593Smuzhiyun if (snod->offs != 0) {
888*4882a593Smuzhiyun ubifs_err(c, "first node is not at zero offset");
889*4882a593Smuzhiyun goto out_dump;
890*4882a593Smuzhiyun }
891*4882a593Smuzhiyun
892*4882a593Smuzhiyun list_for_each_entry(snod, &sleb->nodes, list) {
893*4882a593Smuzhiyun cond_resched();
894*4882a593Smuzhiyun
895*4882a593Smuzhiyun if (snod->sqnum >= SQNUM_WATERMARK) {
896*4882a593Smuzhiyun ubifs_err(c, "file system's life ended");
897*4882a593Smuzhiyun goto out_dump;
898*4882a593Smuzhiyun }
899*4882a593Smuzhiyun
900*4882a593Smuzhiyun if (snod->sqnum < c->cs_sqnum) {
901*4882a593Smuzhiyun ubifs_err(c, "bad sqnum %llu, commit sqnum %llu",
902*4882a593Smuzhiyun snod->sqnum, c->cs_sqnum);
903*4882a593Smuzhiyun goto out_dump;
904*4882a593Smuzhiyun }
905*4882a593Smuzhiyun
906*4882a593Smuzhiyun if (snod->sqnum > c->max_sqnum)
907*4882a593Smuzhiyun c->max_sqnum = snod->sqnum;
908*4882a593Smuzhiyun
909*4882a593Smuzhiyun switch (snod->type) {
910*4882a593Smuzhiyun case UBIFS_REF_NODE: {
911*4882a593Smuzhiyun const struct ubifs_ref_node *ref = snod->node;
912*4882a593Smuzhiyun
913*4882a593Smuzhiyun err = validate_ref(c, ref);
914*4882a593Smuzhiyun if (err == 1)
915*4882a593Smuzhiyun break; /* Already have this bud */
916*4882a593Smuzhiyun if (err)
917*4882a593Smuzhiyun goto out_dump;
918*4882a593Smuzhiyun
919*4882a593Smuzhiyun err = add_replay_bud(c, le32_to_cpu(ref->lnum),
920*4882a593Smuzhiyun le32_to_cpu(ref->offs),
921*4882a593Smuzhiyun le32_to_cpu(ref->jhead),
922*4882a593Smuzhiyun snod->sqnum);
923*4882a593Smuzhiyun if (err)
924*4882a593Smuzhiyun goto out;
925*4882a593Smuzhiyun
926*4882a593Smuzhiyun break;
927*4882a593Smuzhiyun }
928*4882a593Smuzhiyun case UBIFS_CS_NODE:
929*4882a593Smuzhiyun /* Make sure it sits at the beginning of LEB */
930*4882a593Smuzhiyun if (snod->offs != 0) {
931*4882a593Smuzhiyun ubifs_err(c, "unexpected node in log");
932*4882a593Smuzhiyun goto out_dump;
933*4882a593Smuzhiyun }
934*4882a593Smuzhiyun break;
935*4882a593Smuzhiyun default:
936*4882a593Smuzhiyun ubifs_err(c, "unexpected node in log");
937*4882a593Smuzhiyun goto out_dump;
938*4882a593Smuzhiyun }
939*4882a593Smuzhiyun }
940*4882a593Smuzhiyun
941*4882a593Smuzhiyun if (sleb->endpt || c->lhead_offs >= c->leb_size) {
942*4882a593Smuzhiyun c->lhead_lnum = lnum;
943*4882a593Smuzhiyun c->lhead_offs = sleb->endpt;
944*4882a593Smuzhiyun }
945*4882a593Smuzhiyun
946*4882a593Smuzhiyun err = !sleb->endpt;
947*4882a593Smuzhiyun out:
948*4882a593Smuzhiyun ubifs_scan_destroy(sleb);
949*4882a593Smuzhiyun return err;
950*4882a593Smuzhiyun
951*4882a593Smuzhiyun out_dump:
952*4882a593Smuzhiyun ubifs_err(c, "log error detected while replaying the log at LEB %d:%d",
953*4882a593Smuzhiyun lnum, offs + snod->offs);
954*4882a593Smuzhiyun ubifs_dump_node(c, snod->node);
955*4882a593Smuzhiyun ubifs_scan_destroy(sleb);
956*4882a593Smuzhiyun return -EINVAL;
957*4882a593Smuzhiyun }
958*4882a593Smuzhiyun
959*4882a593Smuzhiyun /**
960*4882a593Smuzhiyun * take_ihead - update the status of the index head in lprops to 'taken'.
961*4882a593Smuzhiyun * @c: UBIFS file-system description object
962*4882a593Smuzhiyun *
963*4882a593Smuzhiyun * This function returns the amount of free space in the index head LEB or a
964*4882a593Smuzhiyun * negative error code.
965*4882a593Smuzhiyun */
take_ihead(struct ubifs_info * c)966*4882a593Smuzhiyun static int take_ihead(struct ubifs_info *c)
967*4882a593Smuzhiyun {
968*4882a593Smuzhiyun const struct ubifs_lprops *lp;
969*4882a593Smuzhiyun int err, free;
970*4882a593Smuzhiyun
971*4882a593Smuzhiyun ubifs_get_lprops(c);
972*4882a593Smuzhiyun
973*4882a593Smuzhiyun lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
974*4882a593Smuzhiyun if (IS_ERR(lp)) {
975*4882a593Smuzhiyun err = PTR_ERR(lp);
976*4882a593Smuzhiyun goto out;
977*4882a593Smuzhiyun }
978*4882a593Smuzhiyun
979*4882a593Smuzhiyun free = lp->free;
980*4882a593Smuzhiyun
981*4882a593Smuzhiyun lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
982*4882a593Smuzhiyun lp->flags | LPROPS_TAKEN, 0);
983*4882a593Smuzhiyun if (IS_ERR(lp)) {
984*4882a593Smuzhiyun err = PTR_ERR(lp);
985*4882a593Smuzhiyun goto out;
986*4882a593Smuzhiyun }
987*4882a593Smuzhiyun
988*4882a593Smuzhiyun err = free;
989*4882a593Smuzhiyun out:
990*4882a593Smuzhiyun ubifs_release_lprops(c);
991*4882a593Smuzhiyun return err;
992*4882a593Smuzhiyun }
993*4882a593Smuzhiyun
994*4882a593Smuzhiyun /**
995*4882a593Smuzhiyun * ubifs_replay_journal - replay journal.
996*4882a593Smuzhiyun * @c: UBIFS file-system description object
997*4882a593Smuzhiyun *
998*4882a593Smuzhiyun * This function scans the journal, replays and cleans it up. It makes sure all
999*4882a593Smuzhiyun * memory data structures related to uncommitted journal are built (dirty TNC
1000*4882a593Smuzhiyun * tree, tree of buds, modified lprops, etc).
1001*4882a593Smuzhiyun */
ubifs_replay_journal(struct ubifs_info * c)1002*4882a593Smuzhiyun int ubifs_replay_journal(struct ubifs_info *c)
1003*4882a593Smuzhiyun {
1004*4882a593Smuzhiyun int err, lnum, free;
1005*4882a593Smuzhiyun
1006*4882a593Smuzhiyun BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1007*4882a593Smuzhiyun
1008*4882a593Smuzhiyun /* Update the status of the index head in lprops to 'taken' */
1009*4882a593Smuzhiyun free = take_ihead(c);
1010*4882a593Smuzhiyun if (free < 0)
1011*4882a593Smuzhiyun return free; /* Error code */
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun if (c->ihead_offs != c->leb_size - free) {
1014*4882a593Smuzhiyun ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum,
1015*4882a593Smuzhiyun c->ihead_offs);
1016*4882a593Smuzhiyun return -EINVAL;
1017*4882a593Smuzhiyun }
1018*4882a593Smuzhiyun
1019*4882a593Smuzhiyun dbg_mnt("start replaying the journal");
1020*4882a593Smuzhiyun c->replaying = 1;
1021*4882a593Smuzhiyun lnum = c->ltail_lnum = c->lhead_lnum;
1022*4882a593Smuzhiyun
1023*4882a593Smuzhiyun do {
1024*4882a593Smuzhiyun err = replay_log_leb(c, lnum, 0, c->sbuf);
1025*4882a593Smuzhiyun if (err == 1) {
1026*4882a593Smuzhiyun if (lnum != c->lhead_lnum)
1027*4882a593Smuzhiyun /* We hit the end of the log */
1028*4882a593Smuzhiyun break;
1029*4882a593Smuzhiyun
1030*4882a593Smuzhiyun /*
1031*4882a593Smuzhiyun * The head of the log must always start with the
1032*4882a593Smuzhiyun * "commit start" node on a properly formatted UBIFS.
1033*4882a593Smuzhiyun * But we found no nodes at all, which means that
1034*4882a593Smuzhiyun * someting went wrong and we cannot proceed mounting
1035*4882a593Smuzhiyun * the file-system.
1036*4882a593Smuzhiyun */
1037*4882a593Smuzhiyun ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted",
1038*4882a593Smuzhiyun lnum, 0);
1039*4882a593Smuzhiyun err = -EINVAL;
1040*4882a593Smuzhiyun }
1041*4882a593Smuzhiyun if (err)
1042*4882a593Smuzhiyun goto out;
1043*4882a593Smuzhiyun lnum = ubifs_next_log_lnum(c, lnum);
1044*4882a593Smuzhiyun } while (lnum != c->ltail_lnum);
1045*4882a593Smuzhiyun
1046*4882a593Smuzhiyun err = replay_buds(c);
1047*4882a593Smuzhiyun if (err)
1048*4882a593Smuzhiyun goto out;
1049*4882a593Smuzhiyun
1050*4882a593Smuzhiyun err = apply_replay_list(c);
1051*4882a593Smuzhiyun if (err)
1052*4882a593Smuzhiyun goto out;
1053*4882a593Smuzhiyun
1054*4882a593Smuzhiyun err = set_buds_lprops(c);
1055*4882a593Smuzhiyun if (err)
1056*4882a593Smuzhiyun goto out;
1057*4882a593Smuzhiyun
1058*4882a593Smuzhiyun /*
1059*4882a593Smuzhiyun * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1060*4882a593Smuzhiyun * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1061*4882a593Smuzhiyun * depend on it. This means we have to initialize it to make sure
1062*4882a593Smuzhiyun * budgeting works properly.
1063*4882a593Smuzhiyun */
1064*4882a593Smuzhiyun c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1065*4882a593Smuzhiyun c->bi.uncommitted_idx *= c->max_idx_node_sz;
1066*4882a593Smuzhiyun
1067*4882a593Smuzhiyun ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1068*4882a593Smuzhiyun dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
1069*4882a593Smuzhiyun c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1070*4882a593Smuzhiyun (unsigned long)c->highest_inum);
1071*4882a593Smuzhiyun out:
1072*4882a593Smuzhiyun destroy_replay_list(c);
1073*4882a593Smuzhiyun destroy_bud_list(c);
1074*4882a593Smuzhiyun c->replaying = 0;
1075*4882a593Smuzhiyun return err;
1076*4882a593Smuzhiyun }
1077