xref: /OK3568_Linux_fs/kernel/fs/ext4/fast_commit.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * fs/ext4/fast_commit.c
5  *
6  * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
7  *
8  * Ext4 fast commits routines.
9  */
10 #include "ext4.h"
11 #include "ext4_jbd2.h"
12 #include "ext4_extents.h"
13 #include "mballoc.h"
14 
15 /*
16  * Ext4 Fast Commits
17  * -----------------
18  *
19  * Ext4 fast commits implement fine grained journalling for Ext4.
20  *
21  * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
22  * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
23  * TLV during the recovery phase. For the scenarios for which we currently
24  * don't have replay code, fast commit falls back to full commits.
25  * Fast commits record delta in one of the following three categories.
26  *
27  * (A) Directory entry updates:
28  *
29  * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
30  * - EXT4_FC_TAG_LINK		- records directory entry link
31  * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
32  *
33  * (B) File specific data range updates:
34  *
35  * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
36  * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
37  *
38  * (C) Inode metadata (mtime / ctime etc):
39  *
40  * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
41  *				  during recovery. Note that iblocks field is
42  *				  not replayed and instead derived during
43  *				  replay.
44  * Commit Operation
45  * ----------------
46  * With fast commits, we maintain all the directory entry operations in the
47  * order in which they are issued in an in-memory queue. This queue is flushed
48  * to disk during the commit operation. We also maintain a list of inodes
49  * that need to be committed during a fast commit in another in memory queue of
50  * inodes. During the commit operation, we commit in the following order:
51  *
52  * [1] Lock inodes for any further data updates by setting COMMITTING state
53  * [2] Submit data buffers of all the inodes
54  * [3] Wait for [2] to complete
55  * [4] Commit all the directory entry updates in the fast commit space
56  * [5] Commit all the changed inode structures
57  * [6] Write tail tag (this tag ensures the atomicity, please read the following
58  *     section for more details).
59  * [7] Wait for [4], [5] and [6] to complete.
60  *
61  * All the inode updates must call ext4_fc_start_update() before starting an
62  * update. If such an ongoing update is present, fast commit waits for it to
63  * complete. The completion of such an update is marked by
64  * ext4_fc_stop_update().
65  *
66  * Fast Commit Ineligibility
67  * -------------------------
68  * Not all operations are supported by fast commits today (e.g extended
69  * attributes). Fast commit ineligiblity is marked by calling one of the
70  * two following functions:
71  *
72  * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
73  *   back to full commit. This is useful in case of transient errors.
74  *
75  * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
76  *   the fast commits happening between ext4_fc_start_ineligible() and
77  *   ext4_fc_stop_ineligible() and one fast commit after the call to
78  *   ext4_fc_stop_ineligible() to fall back to full commits. It is important to
79  *   make one more fast commit to fall back to full commit after stop call so
80  *   that it guaranteed that the fast commit ineligible operation contained
81  *   within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
82  *   followed by at least 1 full commit.
83  *
84  * Atomicity of commits
85  * --------------------
86  * In order to guarantee atomicity during the commit operation, fast commit
87  * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
88  * tag contains CRC of the contents and TID of the transaction after which
89  * this fast commit should be applied. Recovery code replays fast commit
90  * logs only if there's at least 1 valid tail present. For every fast commit
91  * operation, there is 1 tail. This means, we may end up with multiple tails
92  * in the fast commit space. Here's an example:
93  *
94  * - Create a new file A and remove existing file B
95  * - fsync()
96  * - Append contents to file A
97  * - Truncate file A
98  * - fsync()
99  *
100  * The fast commit space at the end of above operations would look like this:
101  *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
102  *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
103  *
104  * Replay code should thus check for all the valid tails in the FC area.
105  *
106  * TODOs
107  * -----
108  * 1) Make fast commit atomic updates more fine grained. Today, a fast commit
109  *    eligible update must be protected within ext4_fc_start_update() and
110  *    ext4_fc_stop_update(). These routines are called at much higher
111  *    routines. This can be made more fine grained by combining with
112  *    ext4_journal_start().
113  *
114  * 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
115  *
116  * 3) Handle more ineligible cases.
117  */
118 
119 #include <trace/events/ext4.h>
120 static struct kmem_cache *ext4_fc_dentry_cachep;
121 
ext4_end_buffer_io_sync(struct buffer_head * bh,int uptodate)122 static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
123 {
124 	BUFFER_TRACE(bh, "");
125 	if (uptodate) {
126 		ext4_debug("%s: Block %lld up-to-date",
127 			   __func__, bh->b_blocknr);
128 		set_buffer_uptodate(bh);
129 	} else {
130 		ext4_debug("%s: Block %lld not up-to-date",
131 			   __func__, bh->b_blocknr);
132 		clear_buffer_uptodate(bh);
133 	}
134 
135 	unlock_buffer(bh);
136 }
137 
ext4_fc_reset_inode(struct inode * inode)138 static inline void ext4_fc_reset_inode(struct inode *inode)
139 {
140 	struct ext4_inode_info *ei = EXT4_I(inode);
141 
142 	ei->i_fc_lblk_start = 0;
143 	ei->i_fc_lblk_len = 0;
144 }
145 
ext4_fc_init_inode(struct inode * inode)146 void ext4_fc_init_inode(struct inode *inode)
147 {
148 	struct ext4_inode_info *ei = EXT4_I(inode);
149 
150 	ext4_fc_reset_inode(inode);
151 	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
152 	INIT_LIST_HEAD(&ei->i_fc_list);
153 	init_waitqueue_head(&ei->i_fc_wait);
154 	atomic_set(&ei->i_fc_updates, 0);
155 }
156 
157 /* This function must be called with sbi->s_fc_lock held. */
ext4_fc_wait_committing_inode(struct inode * inode)158 static void ext4_fc_wait_committing_inode(struct inode *inode)
159 __releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
160 {
161 	wait_queue_head_t *wq;
162 	struct ext4_inode_info *ei = EXT4_I(inode);
163 
164 #if (BITS_PER_LONG < 64)
165 	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
166 			EXT4_STATE_FC_COMMITTING);
167 	wq = bit_waitqueue(&ei->i_state_flags,
168 				EXT4_STATE_FC_COMMITTING);
169 #else
170 	DEFINE_WAIT_BIT(wait, &ei->i_flags,
171 			EXT4_STATE_FC_COMMITTING);
172 	wq = bit_waitqueue(&ei->i_flags,
173 				EXT4_STATE_FC_COMMITTING);
174 #endif
175 	lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
176 	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
177 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
178 	schedule();
179 	finish_wait(wq, &wait.wq_entry);
180 }
181 
182 /*
183  * Inform Ext4's fast about start of an inode update
184  *
185  * This function is called by the high level call VFS callbacks before
186  * performing any inode update. This function blocks if there's an ongoing
187  * fast commit on the inode in question.
188  */
ext4_fc_start_update(struct inode * inode)189 void ext4_fc_start_update(struct inode *inode)
190 {
191 	struct ext4_inode_info *ei = EXT4_I(inode);
192 
193 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
194 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
195 		return;
196 
197 restart:
198 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
199 	if (list_empty(&ei->i_fc_list))
200 		goto out;
201 
202 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
203 		ext4_fc_wait_committing_inode(inode);
204 		goto restart;
205 	}
206 out:
207 	atomic_inc(&ei->i_fc_updates);
208 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
209 }
210 
211 /*
212  * Stop inode update and wake up waiting fast commits if any.
213  */
ext4_fc_stop_update(struct inode * inode)214 void ext4_fc_stop_update(struct inode *inode)
215 {
216 	struct ext4_inode_info *ei = EXT4_I(inode);
217 
218 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
219 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
220 		return;
221 
222 	if (atomic_dec_and_test(&ei->i_fc_updates))
223 		wake_up_all(&ei->i_fc_wait);
224 }
225 
226 /*
227  * Remove inode from fast commit list. If the inode is being committed
228  * we wait until inode commit is done.
229  */
ext4_fc_del(struct inode * inode)230 void ext4_fc_del(struct inode *inode)
231 {
232 	struct ext4_inode_info *ei = EXT4_I(inode);
233 
234 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
235 	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
236 		return;
237 
238 restart:
239 	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
240 	if (list_empty(&ei->i_fc_list)) {
241 		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
242 		return;
243 	}
244 
245 	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
246 		ext4_fc_wait_committing_inode(inode);
247 		goto restart;
248 	}
249 	list_del_init(&ei->i_fc_list);
250 	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
251 }
252 
253 /*
254  * Mark file system as fast commit ineligible. This means that next commit
255  * operation would result in a full jbd2 commit.
256  */
ext4_fc_mark_ineligible(struct super_block * sb,int reason)257 void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
258 {
259 	struct ext4_sb_info *sbi = EXT4_SB(sb);
260 
261 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
262 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
263 		return;
264 
265 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
266 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
267 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
268 }
269 
270 /*
271  * Start a fast commit ineligible update. Any commits that happen while
272  * such an operation is in progress fall back to full commits.
273  */
ext4_fc_start_ineligible(struct super_block * sb,int reason)274 void ext4_fc_start_ineligible(struct super_block *sb, int reason)
275 {
276 	struct ext4_sb_info *sbi = EXT4_SB(sb);
277 
278 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
279 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
280 		return;
281 
282 	WARN_ON(reason >= EXT4_FC_REASON_MAX);
283 	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
284 	atomic_inc(&sbi->s_fc_ineligible_updates);
285 }
286 
287 /*
288  * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
289  * to ensure that after stopping the ineligible update, at least one full
290  * commit takes place.
291  */
ext4_fc_stop_ineligible(struct super_block * sb)292 void ext4_fc_stop_ineligible(struct super_block *sb)
293 {
294 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
295 	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
296 		return;
297 
298 	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
299 	atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
300 }
301 
ext4_fc_is_ineligible(struct super_block * sb)302 static inline int ext4_fc_is_ineligible(struct super_block *sb)
303 {
304 	return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
305 		atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
306 }
307 
308 /*
309  * Generic fast commit tracking function. If this is the first time this we are
310  * called after a full commit, we initialize fast commit fields and then call
311  * __fc_track_fn() with update = 0. If we have already been called after a full
312  * commit, we pass update = 1. Based on that, the track function can determine
313  * if it needs to track a field for the first time or if it needs to just
314  * update the previously tracked value.
315  *
316  * If enqueue is set, this function enqueues the inode in fast commit list.
317  */
ext4_fc_track_template(handle_t * handle,struct inode * inode,int (* __fc_track_fn)(struct inode *,void *,bool),void * args,int enqueue)318 static int ext4_fc_track_template(
319 	handle_t *handle, struct inode *inode,
320 	int (*__fc_track_fn)(struct inode *, void *, bool),
321 	void *args, int enqueue)
322 {
323 	bool update = false;
324 	struct ext4_inode_info *ei = EXT4_I(inode);
325 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
326 	tid_t tid = 0;
327 	int ret;
328 
329 	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
330 	    (sbi->s_mount_state & EXT4_FC_REPLAY))
331 		return -EOPNOTSUPP;
332 
333 	if (ext4_fc_is_ineligible(inode->i_sb))
334 		return -EINVAL;
335 
336 	tid = handle->h_transaction->t_tid;
337 	mutex_lock(&ei->i_fc_lock);
338 	if (tid == ei->i_sync_tid) {
339 		update = true;
340 	} else {
341 		ext4_fc_reset_inode(inode);
342 		ei->i_sync_tid = tid;
343 	}
344 	ret = __fc_track_fn(inode, args, update);
345 	mutex_unlock(&ei->i_fc_lock);
346 
347 	if (!enqueue)
348 		return ret;
349 
350 	spin_lock(&sbi->s_fc_lock);
351 	if (list_empty(&EXT4_I(inode)->i_fc_list))
352 		list_add_tail(&EXT4_I(inode)->i_fc_list,
353 				(ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
354 				&sbi->s_fc_q[FC_Q_STAGING] :
355 				&sbi->s_fc_q[FC_Q_MAIN]);
356 	spin_unlock(&sbi->s_fc_lock);
357 
358 	return ret;
359 }
360 
361 struct __track_dentry_update_args {
362 	struct dentry *dentry;
363 	int op;
364 };
365 
366 /* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
__track_dentry_update(struct inode * inode,void * arg,bool update)367 static int __track_dentry_update(struct inode *inode, void *arg, bool update)
368 {
369 	struct ext4_fc_dentry_update *node;
370 	struct ext4_inode_info *ei = EXT4_I(inode);
371 	struct __track_dentry_update_args *dentry_update =
372 		(struct __track_dentry_update_args *)arg;
373 	struct dentry *dentry = dentry_update->dentry;
374 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
375 
376 	mutex_unlock(&ei->i_fc_lock);
377 	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
378 	if (!node) {
379 		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
380 		mutex_lock(&ei->i_fc_lock);
381 		return -ENOMEM;
382 	}
383 
384 	node->fcd_op = dentry_update->op;
385 	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
386 	node->fcd_ino = inode->i_ino;
387 	if (dentry->d_name.len > DNAME_INLINE_LEN) {
388 		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
389 		if (!node->fcd_name.name) {
390 			kmem_cache_free(ext4_fc_dentry_cachep, node);
391 			ext4_fc_mark_ineligible(inode->i_sb,
392 				EXT4_FC_REASON_NOMEM);
393 			mutex_lock(&ei->i_fc_lock);
394 			return -ENOMEM;
395 		}
396 		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
397 			dentry->d_name.len);
398 	} else {
399 		memcpy(node->fcd_iname, dentry->d_name.name,
400 			dentry->d_name.len);
401 		node->fcd_name.name = node->fcd_iname;
402 	}
403 	node->fcd_name.len = dentry->d_name.len;
404 
405 	spin_lock(&sbi->s_fc_lock);
406 	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
407 		list_add_tail(&node->fcd_list,
408 				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
409 	else
410 		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
411 	spin_unlock(&sbi->s_fc_lock);
412 	mutex_lock(&ei->i_fc_lock);
413 
414 	return 0;
415 }
416 
__ext4_fc_track_unlink(handle_t * handle,struct inode * inode,struct dentry * dentry)417 void __ext4_fc_track_unlink(handle_t *handle,
418 		struct inode *inode, struct dentry *dentry)
419 {
420 	struct __track_dentry_update_args args;
421 	int ret;
422 
423 	args.dentry = dentry;
424 	args.op = EXT4_FC_TAG_UNLINK;
425 
426 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
427 					(void *)&args, 0);
428 	trace_ext4_fc_track_unlink(inode, dentry, ret);
429 }
430 
ext4_fc_track_unlink(handle_t * handle,struct dentry * dentry)431 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
432 {
433 	__ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
434 }
435 
__ext4_fc_track_link(handle_t * handle,struct inode * inode,struct dentry * dentry)436 void __ext4_fc_track_link(handle_t *handle,
437 	struct inode *inode, struct dentry *dentry)
438 {
439 	struct __track_dentry_update_args args;
440 	int ret;
441 
442 	args.dentry = dentry;
443 	args.op = EXT4_FC_TAG_LINK;
444 
445 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
446 					(void *)&args, 0);
447 	trace_ext4_fc_track_link(inode, dentry, ret);
448 }
449 
ext4_fc_track_link(handle_t * handle,struct dentry * dentry)450 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
451 {
452 	__ext4_fc_track_link(handle, d_inode(dentry), dentry);
453 }
454 
__ext4_fc_track_create(handle_t * handle,struct inode * inode,struct dentry * dentry)455 void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
456 			  struct dentry *dentry)
457 {
458 	struct __track_dentry_update_args args;
459 	int ret;
460 
461 	args.dentry = dentry;
462 	args.op = EXT4_FC_TAG_CREAT;
463 
464 	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
465 					(void *)&args, 0);
466 	trace_ext4_fc_track_create(inode, dentry, ret);
467 }
468 
ext4_fc_track_create(handle_t * handle,struct dentry * dentry)469 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
470 {
471 	__ext4_fc_track_create(handle, d_inode(dentry), dentry);
472 }
473 
474 /* __track_fn for inode tracking */
__track_inode(struct inode * inode,void * arg,bool update)475 static int __track_inode(struct inode *inode, void *arg, bool update)
476 {
477 	if (update)
478 		return -EEXIST;
479 
480 	EXT4_I(inode)->i_fc_lblk_len = 0;
481 
482 	return 0;
483 }
484 
ext4_fc_track_inode(handle_t * handle,struct inode * inode)485 void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
486 {
487 	int ret;
488 
489 	if (S_ISDIR(inode->i_mode))
490 		return;
491 
492 	if (ext4_should_journal_data(inode)) {
493 		ext4_fc_mark_ineligible(inode->i_sb,
494 					EXT4_FC_REASON_INODE_JOURNAL_DATA);
495 		return;
496 	}
497 
498 	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
499 	trace_ext4_fc_track_inode(inode, ret);
500 }
501 
502 struct __track_range_args {
503 	ext4_lblk_t start, end;
504 };
505 
506 /* __track_fn for tracking data updates */
__track_range(struct inode * inode,void * arg,bool update)507 static int __track_range(struct inode *inode, void *arg, bool update)
508 {
509 	struct ext4_inode_info *ei = EXT4_I(inode);
510 	ext4_lblk_t oldstart;
511 	struct __track_range_args *__arg =
512 		(struct __track_range_args *)arg;
513 
514 	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
515 		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
516 		return -ECANCELED;
517 	}
518 
519 	oldstart = ei->i_fc_lblk_start;
520 
521 	if (update && ei->i_fc_lblk_len > 0) {
522 		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
523 		ei->i_fc_lblk_len =
524 			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
525 				ei->i_fc_lblk_start + 1;
526 	} else {
527 		ei->i_fc_lblk_start = __arg->start;
528 		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
529 	}
530 
531 	return 0;
532 }
533 
ext4_fc_track_range(handle_t * handle,struct inode * inode,ext4_lblk_t start,ext4_lblk_t end)534 void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
535 			 ext4_lblk_t end)
536 {
537 	struct __track_range_args args;
538 	int ret;
539 
540 	if (S_ISDIR(inode->i_mode))
541 		return;
542 
543 	args.start = start;
544 	args.end = end;
545 
546 	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
547 
548 	trace_ext4_fc_track_range(inode, start, end, ret);
549 }
550 
ext4_fc_submit_bh(struct super_block * sb)551 static void ext4_fc_submit_bh(struct super_block *sb)
552 {
553 	int write_flags = REQ_SYNC;
554 	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
555 
556 	/* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */
557 	if (test_opt(sb, BARRIER))
558 		write_flags |= REQ_FUA | REQ_PREFLUSH;
559 	lock_buffer(bh);
560 	set_buffer_dirty(bh);
561 	set_buffer_uptodate(bh);
562 	bh->b_end_io = ext4_end_buffer_io_sync;
563 	submit_bh(REQ_OP_WRITE, write_flags, bh);
564 	EXT4_SB(sb)->s_fc_bh = NULL;
565 }
566 
567 /* Ext4 commit path routines */
568 
569 /* memzero and update CRC */
ext4_fc_memzero(struct super_block * sb,void * dst,int len,u32 * crc)570 static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
571 				u32 *crc)
572 {
573 	void *ret;
574 
575 	ret = memset(dst, 0, len);
576 	if (crc)
577 		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
578 	return ret;
579 }
580 
581 /*
582  * Allocate len bytes on a fast commit buffer.
583  *
584  * During the commit time this function is used to manage fast commit
585  * block space. We don't split a fast commit log onto different
586  * blocks. So this function makes sure that if there's not enough space
587  * on the current block, the remaining space in the current block is
588  * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
589  * new block is from jbd2 and CRC is updated to reflect the padding
590  * we added.
591  */
ext4_fc_reserve_space(struct super_block * sb,int len,u32 * crc)592 static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
593 {
594 	struct ext4_fc_tl *tl;
595 	struct ext4_sb_info *sbi = EXT4_SB(sb);
596 	struct buffer_head *bh;
597 	int bsize = sbi->s_journal->j_blocksize;
598 	int ret, off = sbi->s_fc_bytes % bsize;
599 	int pad_len;
600 
601 	/*
602 	 * After allocating len, we should have space at least for a 0 byte
603 	 * padding.
604 	 */
605 	if (len + sizeof(struct ext4_fc_tl) > bsize)
606 		return NULL;
607 
608 	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
609 		/*
610 		 * Only allocate from current buffer if we have enough space for
611 		 * this request AND we have space to add a zero byte padding.
612 		 */
613 		if (!sbi->s_fc_bh) {
614 			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
615 			if (ret)
616 				return NULL;
617 			sbi->s_fc_bh = bh;
618 		}
619 		sbi->s_fc_bytes += len;
620 		return sbi->s_fc_bh->b_data + off;
621 	}
622 	/* Need to add PAD tag */
623 	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
624 	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
625 	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
626 	tl->fc_len = cpu_to_le16(pad_len);
627 	if (crc)
628 		*crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
629 	if (pad_len > 0)
630 		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
631 	ext4_fc_submit_bh(sb);
632 
633 	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
634 	if (ret)
635 		return NULL;
636 	sbi->s_fc_bh = bh;
637 	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
638 	return sbi->s_fc_bh->b_data;
639 }
640 
641 /* memcpy to fc reserved space and update CRC */
ext4_fc_memcpy(struct super_block * sb,void * dst,const void * src,int len,u32 * crc)642 static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
643 				int len, u32 *crc)
644 {
645 	if (crc)
646 		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
647 	return memcpy(dst, src, len);
648 }
649 
650 /*
651  * Complete a fast commit by writing tail tag.
652  *
653  * Writing tail tag marks the end of a fast commit. In order to guarantee
654  * atomicity, after writing tail tag, even if there's space remaining
655  * in the block, next commit shouldn't use it. That's why tail tag
656  * has the length as that of the remaining space on the block.
657  */
ext4_fc_write_tail(struct super_block * sb,u32 crc)658 static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
659 {
660 	struct ext4_sb_info *sbi = EXT4_SB(sb);
661 	struct ext4_fc_tl tl;
662 	struct ext4_fc_tail tail;
663 	int off, bsize = sbi->s_journal->j_blocksize;
664 	u8 *dst;
665 
666 	/*
667 	 * ext4_fc_reserve_space takes care of allocating an extra block if
668 	 * there's no enough space on this block for accommodating this tail.
669 	 */
670 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
671 	if (!dst)
672 		return -ENOSPC;
673 
674 	off = sbi->s_fc_bytes % bsize;
675 
676 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
677 	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
678 	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
679 
680 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
681 	dst += sizeof(tl);
682 	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
683 	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
684 	dst += sizeof(tail.fc_tid);
685 	tail.fc_crc = cpu_to_le32(crc);
686 	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
687 
688 	ext4_fc_submit_bh(sb);
689 
690 	return 0;
691 }
692 
693 /*
694  * Adds tag, length, value and updates CRC. Returns true if tlv was added.
695  * Returns false if there's not enough space.
696  */
ext4_fc_add_tlv(struct super_block * sb,u16 tag,u16 len,u8 * val,u32 * crc)697 static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
698 			   u32 *crc)
699 {
700 	struct ext4_fc_tl tl;
701 	u8 *dst;
702 
703 	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
704 	if (!dst)
705 		return false;
706 
707 	tl.fc_tag = cpu_to_le16(tag);
708 	tl.fc_len = cpu_to_le16(len);
709 
710 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
711 	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
712 
713 	return true;
714 }
715 
716 /* Same as above, but adds dentry tlv. */
ext4_fc_add_dentry_tlv(struct super_block * sb,u16 tag,int parent_ino,int ino,int dlen,const unsigned char * dname,u32 * crc)717 static  bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
718 					int parent_ino, int ino, int dlen,
719 					const unsigned char *dname,
720 					u32 *crc)
721 {
722 	struct ext4_fc_dentry_info fcd;
723 	struct ext4_fc_tl tl;
724 	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
725 					crc);
726 
727 	if (!dst)
728 		return false;
729 
730 	fcd.fc_parent_ino = cpu_to_le32(parent_ino);
731 	fcd.fc_ino = cpu_to_le32(ino);
732 	tl.fc_tag = cpu_to_le16(tag);
733 	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
734 	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
735 	dst += sizeof(tl);
736 	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
737 	dst += sizeof(fcd);
738 	ext4_fc_memcpy(sb, dst, dname, dlen, crc);
739 	dst += dlen;
740 
741 	return true;
742 }
743 
744 /*
745  * Writes inode in the fast commit space under TLV with tag @tag.
746  * Returns 0 on success, error on failure.
747  */
ext4_fc_write_inode(struct inode * inode,u32 * crc)748 static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
749 {
750 	struct ext4_inode_info *ei = EXT4_I(inode);
751 	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
752 	int ret;
753 	struct ext4_iloc iloc;
754 	struct ext4_fc_inode fc_inode;
755 	struct ext4_fc_tl tl;
756 	u8 *dst;
757 
758 	ret = ext4_get_inode_loc(inode, &iloc);
759 	if (ret)
760 		return ret;
761 
762 	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
763 		inode_len += ei->i_extra_isize;
764 
765 	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
766 	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
767 	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
768 
769 	ret = -ECANCELED;
770 	dst = ext4_fc_reserve_space(inode->i_sb,
771 			sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
772 	if (!dst)
773 		goto err;
774 
775 	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
776 		goto err;
777 	dst += sizeof(tl);
778 	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
779 		goto err;
780 	dst += sizeof(fc_inode);
781 	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
782 					inode_len, crc))
783 		goto err;
784 	ret = 0;
785 err:
786 	brelse(iloc.bh);
787 	return ret;
788 }
789 
790 /*
791  * Writes updated data ranges for the inode in question. Updates CRC.
792  * Returns 0 on success, error otherwise.
793  */
ext4_fc_write_inode_data(struct inode * inode,u32 * crc)794 static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
795 {
796 	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
797 	struct ext4_inode_info *ei = EXT4_I(inode);
798 	struct ext4_map_blocks map;
799 	struct ext4_fc_add_range fc_ext;
800 	struct ext4_fc_del_range lrange;
801 	struct ext4_extent *ex;
802 	int ret;
803 
804 	mutex_lock(&ei->i_fc_lock);
805 	if (ei->i_fc_lblk_len == 0) {
806 		mutex_unlock(&ei->i_fc_lock);
807 		return 0;
808 	}
809 	old_blk_size = ei->i_fc_lblk_start;
810 	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
811 	ei->i_fc_lblk_len = 0;
812 	mutex_unlock(&ei->i_fc_lock);
813 
814 	cur_lblk_off = old_blk_size;
815 	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
816 		  __func__, cur_lblk_off, new_blk_size, inode->i_ino);
817 
818 	while (cur_lblk_off <= new_blk_size) {
819 		map.m_lblk = cur_lblk_off;
820 		map.m_len = new_blk_size - cur_lblk_off + 1;
821 		ret = ext4_map_blocks(NULL, inode, &map, 0);
822 		if (ret < 0)
823 			return -ECANCELED;
824 
825 		if (map.m_len == 0) {
826 			cur_lblk_off++;
827 			continue;
828 		}
829 
830 		if (ret == 0) {
831 			lrange.fc_ino = cpu_to_le32(inode->i_ino);
832 			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
833 			lrange.fc_len = cpu_to_le32(map.m_len);
834 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
835 					    sizeof(lrange), (u8 *)&lrange, crc))
836 				return -ENOSPC;
837 		} else {
838 			unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
839 				EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
840 
841 			/* Limit the number of blocks in one extent */
842 			map.m_len = min(max, map.m_len);
843 
844 			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
845 			ex = (struct ext4_extent *)&fc_ext.fc_ex;
846 			ex->ee_block = cpu_to_le32(map.m_lblk);
847 			ex->ee_len = cpu_to_le16(map.m_len);
848 			ext4_ext_store_pblock(ex, map.m_pblk);
849 			if (map.m_flags & EXT4_MAP_UNWRITTEN)
850 				ext4_ext_mark_unwritten(ex);
851 			else
852 				ext4_ext_mark_initialized(ex);
853 			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
854 					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
855 				return -ENOSPC;
856 		}
857 
858 		cur_lblk_off += map.m_len;
859 	}
860 
861 	return 0;
862 }
863 
864 
865 /* Submit data for all the fast commit inodes */
ext4_fc_submit_inode_data_all(journal_t * journal)866 static int ext4_fc_submit_inode_data_all(journal_t *journal)
867 {
868 	struct super_block *sb = (struct super_block *)(journal->j_private);
869 	struct ext4_sb_info *sbi = EXT4_SB(sb);
870 	struct ext4_inode_info *ei;
871 	struct list_head *pos;
872 	int ret = 0;
873 
874 	spin_lock(&sbi->s_fc_lock);
875 	ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
876 	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
877 		ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
878 		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
879 		while (atomic_read(&ei->i_fc_updates)) {
880 			DEFINE_WAIT(wait);
881 
882 			prepare_to_wait(&ei->i_fc_wait, &wait,
883 						TASK_UNINTERRUPTIBLE);
884 			if (atomic_read(&ei->i_fc_updates)) {
885 				spin_unlock(&sbi->s_fc_lock);
886 				schedule();
887 				spin_lock(&sbi->s_fc_lock);
888 			}
889 			finish_wait(&ei->i_fc_wait, &wait);
890 		}
891 		spin_unlock(&sbi->s_fc_lock);
892 		ret = jbd2_submit_inode_data(ei->jinode);
893 		if (ret)
894 			return ret;
895 		spin_lock(&sbi->s_fc_lock);
896 	}
897 	spin_unlock(&sbi->s_fc_lock);
898 
899 	return ret;
900 }
901 
902 /* Wait for completion of data for all the fast commit inodes */
ext4_fc_wait_inode_data_all(journal_t * journal)903 static int ext4_fc_wait_inode_data_all(journal_t *journal)
904 {
905 	struct super_block *sb = (struct super_block *)(journal->j_private);
906 	struct ext4_sb_info *sbi = EXT4_SB(sb);
907 	struct ext4_inode_info *pos, *n;
908 	int ret = 0;
909 
910 	spin_lock(&sbi->s_fc_lock);
911 	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
912 		if (!ext4_test_inode_state(&pos->vfs_inode,
913 					   EXT4_STATE_FC_COMMITTING))
914 			continue;
915 		spin_unlock(&sbi->s_fc_lock);
916 
917 		ret = jbd2_wait_inode_data(journal, pos->jinode);
918 		if (ret)
919 			return ret;
920 		spin_lock(&sbi->s_fc_lock);
921 	}
922 	spin_unlock(&sbi->s_fc_lock);
923 
924 	return 0;
925 }
926 
927 /* Commit all the directory entry updates */
ext4_fc_commit_dentry_updates(journal_t * journal,u32 * crc)928 static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
929 __acquires(&sbi->s_fc_lock)
930 __releases(&sbi->s_fc_lock)
931 {
932 	struct super_block *sb = (struct super_block *)(journal->j_private);
933 	struct ext4_sb_info *sbi = EXT4_SB(sb);
934 	struct ext4_fc_dentry_update *fc_dentry;
935 	struct inode *inode;
936 	struct list_head *pos, *n, *fcd_pos, *fcd_n;
937 	struct ext4_inode_info *ei;
938 	int ret;
939 
940 	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
941 		return 0;
942 	list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
943 		fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
944 					fcd_list);
945 		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
946 			spin_unlock(&sbi->s_fc_lock);
947 			if (!ext4_fc_add_dentry_tlv(
948 				sb, fc_dentry->fcd_op,
949 				fc_dentry->fcd_parent, fc_dentry->fcd_ino,
950 				fc_dentry->fcd_name.len,
951 				fc_dentry->fcd_name.name, crc)) {
952 				ret = -ENOSPC;
953 				goto lock_and_exit;
954 			}
955 			spin_lock(&sbi->s_fc_lock);
956 			continue;
957 		}
958 
959 		inode = NULL;
960 		list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
961 			ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
962 			if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
963 				inode = &ei->vfs_inode;
964 				break;
965 			}
966 		}
967 		/*
968 		 * If we don't find inode in our list, then it was deleted,
969 		 * in which case, we don't need to record it's create tag.
970 		 */
971 		if (!inode)
972 			continue;
973 		spin_unlock(&sbi->s_fc_lock);
974 
975 		/*
976 		 * We first write the inode and then the create dirent. This
977 		 * allows the recovery code to create an unnamed inode first
978 		 * and then link it to a directory entry. This allows us
979 		 * to use namei.c routines almost as is and simplifies
980 		 * the recovery code.
981 		 */
982 		ret = ext4_fc_write_inode(inode, crc);
983 		if (ret)
984 			goto lock_and_exit;
985 
986 		ret = ext4_fc_write_inode_data(inode, crc);
987 		if (ret)
988 			goto lock_and_exit;
989 
990 		if (!ext4_fc_add_dentry_tlv(
991 			sb, fc_dentry->fcd_op,
992 			fc_dentry->fcd_parent, fc_dentry->fcd_ino,
993 			fc_dentry->fcd_name.len,
994 			fc_dentry->fcd_name.name, crc)) {
995 			ret = -ENOSPC;
996 			goto lock_and_exit;
997 		}
998 
999 		spin_lock(&sbi->s_fc_lock);
1000 	}
1001 	return 0;
1002 lock_and_exit:
1003 	spin_lock(&sbi->s_fc_lock);
1004 	return ret;
1005 }
1006 
ext4_fc_perform_commit(journal_t * journal)1007 static int ext4_fc_perform_commit(journal_t *journal)
1008 {
1009 	struct super_block *sb = (struct super_block *)(journal->j_private);
1010 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1011 	struct ext4_inode_info *iter;
1012 	struct ext4_fc_head head;
1013 	struct list_head *pos;
1014 	struct inode *inode;
1015 	struct blk_plug plug;
1016 	int ret = 0;
1017 	u32 crc = 0;
1018 
1019 	ret = ext4_fc_submit_inode_data_all(journal);
1020 	if (ret)
1021 		return ret;
1022 
1023 	ret = ext4_fc_wait_inode_data_all(journal);
1024 	if (ret)
1025 		return ret;
1026 
1027 	/*
1028 	 * If file system device is different from journal device, issue a cache
1029 	 * flush before we start writing fast commit blocks.
1030 	 */
1031 	if (journal->j_fs_dev != journal->j_dev)
1032 		blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
1033 
1034 	blk_start_plug(&plug);
1035 	if (sbi->s_fc_bytes == 0) {
1036 		/*
1037 		 * Add a head tag only if this is the first fast commit
1038 		 * in this TID.
1039 		 */
1040 		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
1041 		head.fc_tid = cpu_to_le32(
1042 			sbi->s_journal->j_running_transaction->t_tid);
1043 		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
1044 			(u8 *)&head, &crc)) {
1045 			ret = -ENOSPC;
1046 			goto out;
1047 		}
1048 	}
1049 
1050 	spin_lock(&sbi->s_fc_lock);
1051 	ret = ext4_fc_commit_dentry_updates(journal, &crc);
1052 	if (ret) {
1053 		spin_unlock(&sbi->s_fc_lock);
1054 		goto out;
1055 	}
1056 
1057 	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
1058 		iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
1059 		inode = &iter->vfs_inode;
1060 		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
1061 			continue;
1062 
1063 		spin_unlock(&sbi->s_fc_lock);
1064 		ret = ext4_fc_write_inode_data(inode, &crc);
1065 		if (ret)
1066 			goto out;
1067 		ret = ext4_fc_write_inode(inode, &crc);
1068 		if (ret)
1069 			goto out;
1070 		spin_lock(&sbi->s_fc_lock);
1071 	}
1072 	spin_unlock(&sbi->s_fc_lock);
1073 
1074 	ret = ext4_fc_write_tail(sb, crc);
1075 
1076 out:
1077 	blk_finish_plug(&plug);
1078 	return ret;
1079 }
1080 
1081 /*
1082  * The main commit entry point. Performs a fast commit for transaction
1083  * commit_tid if needed. If it's not possible to perform a fast commit
1084  * due to various reasons, we fall back to full commit. Returns 0
1085  * on success, error otherwise.
1086  */
ext4_fc_commit(journal_t * journal,tid_t commit_tid)1087 int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
1088 {
1089 	struct super_block *sb = (struct super_block *)(journal->j_private);
1090 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1091 	int nblks = 0, ret, bsize = journal->j_blocksize;
1092 	int subtid = atomic_read(&sbi->s_fc_subtid);
1093 	int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
1094 	ktime_t start_time, commit_time;
1095 
1096 	trace_ext4_fc_commit_start(sb);
1097 
1098 	start_time = ktime_get();
1099 
1100 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
1101 		(ext4_fc_is_ineligible(sb))) {
1102 		reason = EXT4_FC_REASON_INELIGIBLE;
1103 		goto out;
1104 	}
1105 
1106 restart_fc:
1107 	ret = jbd2_fc_begin_commit(journal, commit_tid);
1108 	if (ret == -EALREADY) {
1109 		/* There was an ongoing commit, check if we need to restart */
1110 		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
1111 			commit_tid > journal->j_commit_sequence)
1112 			goto restart_fc;
1113 		reason = EXT4_FC_REASON_ALREADY_COMMITTED;
1114 		goto out;
1115 	} else if (ret) {
1116 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1117 		reason = EXT4_FC_REASON_FC_START_FAILED;
1118 		goto out;
1119 	}
1120 
1121 	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
1122 	ret = ext4_fc_perform_commit(journal);
1123 	if (ret < 0) {
1124 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1125 		reason = EXT4_FC_REASON_FC_FAILED;
1126 		goto out;
1127 	}
1128 	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
1129 	ret = jbd2_fc_wait_bufs(journal, nblks);
1130 	if (ret < 0) {
1131 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1132 		reason = EXT4_FC_REASON_FC_FAILED;
1133 		goto out;
1134 	}
1135 	atomic_inc(&sbi->s_fc_subtid);
1136 	jbd2_fc_end_commit(journal);
1137 out:
1138 	/* Has any ineligible update happened since we started? */
1139 	if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
1140 		sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
1141 		reason = EXT4_FC_REASON_INELIGIBLE;
1142 	}
1143 
1144 	spin_lock(&sbi->s_fc_lock);
1145 	if (reason != EXT4_FC_REASON_OK &&
1146 		reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
1147 		sbi->s_fc_stats.fc_ineligible_commits++;
1148 	} else {
1149 		sbi->s_fc_stats.fc_num_commits++;
1150 		sbi->s_fc_stats.fc_numblks += nblks;
1151 	}
1152 	spin_unlock(&sbi->s_fc_lock);
1153 	nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
1154 	trace_ext4_fc_commit_stop(sb, nblks, reason);
1155 	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1156 	/*
1157 	 * weight the commit time higher than the average time so we don't
1158 	 * react too strongly to vast changes in the commit time
1159 	 */
1160 	if (likely(sbi->s_fc_avg_commit_time))
1161 		sbi->s_fc_avg_commit_time = (commit_time +
1162 				sbi->s_fc_avg_commit_time * 3) / 4;
1163 	else
1164 		sbi->s_fc_avg_commit_time = commit_time;
1165 	jbd_debug(1,
1166 		"Fast commit ended with blks = %d, reason = %d, subtid - %d",
1167 		nblks, reason, subtid);
1168 	if (reason == EXT4_FC_REASON_FC_FAILED)
1169 		return jbd2_fc_end_commit_fallback(journal);
1170 	if (reason == EXT4_FC_REASON_FC_START_FAILED ||
1171 		reason == EXT4_FC_REASON_INELIGIBLE)
1172 		return jbd2_complete_transaction(journal, commit_tid);
1173 	return 0;
1174 }
1175 
1176 /*
1177  * Fast commit cleanup routine. This is called after every fast commit and
1178  * full commit. full is true if we are called after a full commit.
1179  */
ext4_fc_cleanup(journal_t * journal,int full)1180 static void ext4_fc_cleanup(journal_t *journal, int full)
1181 {
1182 	struct super_block *sb = journal->j_private;
1183 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1184 	struct ext4_inode_info *iter;
1185 	struct ext4_fc_dentry_update *fc_dentry;
1186 	struct list_head *pos, *n;
1187 
1188 	if (full && sbi->s_fc_bh)
1189 		sbi->s_fc_bh = NULL;
1190 
1191 	jbd2_fc_release_bufs(journal);
1192 
1193 	spin_lock(&sbi->s_fc_lock);
1194 	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
1195 		iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
1196 		list_del_init(&iter->i_fc_list);
1197 		ext4_clear_inode_state(&iter->vfs_inode,
1198 				       EXT4_STATE_FC_COMMITTING);
1199 		ext4_fc_reset_inode(&iter->vfs_inode);
1200 		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
1201 		smp_mb();
1202 #if (BITS_PER_LONG < 64)
1203 		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
1204 #else
1205 		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
1206 #endif
1207 	}
1208 
1209 	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
1210 		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
1211 					     struct ext4_fc_dentry_update,
1212 					     fcd_list);
1213 		list_del_init(&fc_dentry->fcd_list);
1214 		spin_unlock(&sbi->s_fc_lock);
1215 
1216 		if (fc_dentry->fcd_name.name &&
1217 			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
1218 			kfree(fc_dentry->fcd_name.name);
1219 		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
1220 		spin_lock(&sbi->s_fc_lock);
1221 	}
1222 
1223 	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
1224 				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
1225 	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
1226 				&sbi->s_fc_q[FC_Q_MAIN]);
1227 
1228 	ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
1229 	ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
1230 
1231 	if (full)
1232 		sbi->s_fc_bytes = 0;
1233 	spin_unlock(&sbi->s_fc_lock);
1234 	trace_ext4_fc_stats(sb);
1235 }
1236 
1237 /* Ext4 Replay Path Routines */
1238 
1239 /* Helper struct for dentry replay routines */
1240 struct dentry_info_args {
1241 	int parent_ino, dname_len, ino, inode_len;
1242 	char *dname;
1243 };
1244 
tl_to_darg(struct dentry_info_args * darg,struct ext4_fc_tl * tl,u8 * val)1245 static inline void tl_to_darg(struct dentry_info_args *darg,
1246 			      struct  ext4_fc_tl *tl, u8 *val)
1247 {
1248 	struct ext4_fc_dentry_info fcd;
1249 
1250 	memcpy(&fcd, val, sizeof(fcd));
1251 
1252 	darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
1253 	darg->ino = le32_to_cpu(fcd.fc_ino);
1254 	darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
1255 	darg->dname_len = le16_to_cpu(tl->fc_len) -
1256 		sizeof(struct ext4_fc_dentry_info);
1257 }
1258 
1259 /* Unlink replay function */
ext4_fc_replay_unlink(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1260 static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
1261 				 u8 *val)
1262 {
1263 	struct inode *inode, *old_parent;
1264 	struct qstr entry;
1265 	struct dentry_info_args darg;
1266 	int ret = 0;
1267 
1268 	tl_to_darg(&darg, tl, val);
1269 
1270 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
1271 			darg.parent_ino, darg.dname_len);
1272 
1273 	entry.name = darg.dname;
1274 	entry.len = darg.dname_len;
1275 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1276 
1277 	if (IS_ERR(inode)) {
1278 		jbd_debug(1, "Inode %d not found", darg.ino);
1279 		return 0;
1280 	}
1281 
1282 	old_parent = ext4_iget(sb, darg.parent_ino,
1283 				EXT4_IGET_NORMAL);
1284 	if (IS_ERR(old_parent)) {
1285 		jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
1286 		iput(inode);
1287 		return 0;
1288 	}
1289 
1290 	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
1291 	/* -ENOENT ok coz it might not exist anymore. */
1292 	if (ret == -ENOENT)
1293 		ret = 0;
1294 	iput(old_parent);
1295 	iput(inode);
1296 	return ret;
1297 }
1298 
ext4_fc_replay_link_internal(struct super_block * sb,struct dentry_info_args * darg,struct inode * inode)1299 static int ext4_fc_replay_link_internal(struct super_block *sb,
1300 				struct dentry_info_args *darg,
1301 				struct inode *inode)
1302 {
1303 	struct inode *dir = NULL;
1304 	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
1305 	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
1306 	int ret = 0;
1307 
1308 	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
1309 	if (IS_ERR(dir)) {
1310 		jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
1311 		dir = NULL;
1312 		goto out;
1313 	}
1314 
1315 	dentry_dir = d_obtain_alias(dir);
1316 	if (IS_ERR(dentry_dir)) {
1317 		jbd_debug(1, "Failed to obtain dentry");
1318 		dentry_dir = NULL;
1319 		goto out;
1320 	}
1321 
1322 	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
1323 	if (!dentry_inode) {
1324 		jbd_debug(1, "Inode dentry not created.");
1325 		ret = -ENOMEM;
1326 		goto out;
1327 	}
1328 
1329 	ret = __ext4_link(dir, inode, dentry_inode);
1330 	/*
1331 	 * It's possible that link already existed since data blocks
1332 	 * for the dir in question got persisted before we crashed OR
1333 	 * we replayed this tag and crashed before the entire replay
1334 	 * could complete.
1335 	 */
1336 	if (ret && ret != -EEXIST) {
1337 		jbd_debug(1, "Failed to link\n");
1338 		goto out;
1339 	}
1340 
1341 	ret = 0;
1342 out:
1343 	if (dentry_dir) {
1344 		d_drop(dentry_dir);
1345 		dput(dentry_dir);
1346 	} else if (dir) {
1347 		iput(dir);
1348 	}
1349 	if (dentry_inode) {
1350 		d_drop(dentry_inode);
1351 		dput(dentry_inode);
1352 	}
1353 
1354 	return ret;
1355 }
1356 
1357 /* Link replay function */
ext4_fc_replay_link(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1358 static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
1359 			       u8 *val)
1360 {
1361 	struct inode *inode;
1362 	struct dentry_info_args darg;
1363 	int ret = 0;
1364 
1365 	tl_to_darg(&darg, tl, val);
1366 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
1367 			darg.parent_ino, darg.dname_len);
1368 
1369 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1370 	if (IS_ERR(inode)) {
1371 		jbd_debug(1, "Inode not found.");
1372 		return 0;
1373 	}
1374 
1375 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
1376 	iput(inode);
1377 	return ret;
1378 }
1379 
1380 /*
1381  * Record all the modified inodes during replay. We use this later to setup
1382  * block bitmaps correctly.
1383  */
ext4_fc_record_modified_inode(struct super_block * sb,int ino)1384 static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
1385 {
1386 	struct ext4_fc_replay_state *state;
1387 	int i;
1388 
1389 	state = &EXT4_SB(sb)->s_fc_replay_state;
1390 	for (i = 0; i < state->fc_modified_inodes_used; i++)
1391 		if (state->fc_modified_inodes[i] == ino)
1392 			return 0;
1393 	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
1394 		int *fc_modified_inodes;
1395 
1396 		fc_modified_inodes = krealloc(state->fc_modified_inodes,
1397 				sizeof(int) * (state->fc_modified_inodes_size +
1398 				EXT4_FC_REPLAY_REALLOC_INCREMENT),
1399 				GFP_KERNEL);
1400 		if (!fc_modified_inodes)
1401 			return -ENOMEM;
1402 		state->fc_modified_inodes = fc_modified_inodes;
1403 		state->fc_modified_inodes_size +=
1404 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
1405 	}
1406 	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
1407 	return 0;
1408 }
1409 
1410 /*
1411  * Inode replay function
1412  */
ext4_fc_replay_inode(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1413 static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
1414 				u8 *val)
1415 {
1416 	struct ext4_fc_inode fc_inode;
1417 	struct ext4_inode *raw_inode;
1418 	struct ext4_inode *raw_fc_inode;
1419 	struct inode *inode = NULL;
1420 	struct ext4_iloc iloc;
1421 	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
1422 	struct ext4_extent_header *eh;
1423 
1424 	memcpy(&fc_inode, val, sizeof(fc_inode));
1425 
1426 	ino = le32_to_cpu(fc_inode.fc_ino);
1427 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
1428 
1429 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
1430 	if (!IS_ERR(inode)) {
1431 		ext4_ext_clear_bb(inode);
1432 		iput(inode);
1433 	}
1434 	inode = NULL;
1435 
1436 	ret = ext4_fc_record_modified_inode(sb, ino);
1437 	if (ret)
1438 		goto out;
1439 
1440 	raw_fc_inode = (struct ext4_inode *)
1441 		(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
1442 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
1443 	if (ret)
1444 		goto out;
1445 
1446 	inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode);
1447 	raw_inode = ext4_raw_inode(&iloc);
1448 
1449 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
1450 	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
1451 		inode_len - offsetof(struct ext4_inode, i_generation));
1452 	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
1453 		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
1454 		if (eh->eh_magic != EXT4_EXT_MAGIC) {
1455 			memset(eh, 0, sizeof(*eh));
1456 			eh->eh_magic = EXT4_EXT_MAGIC;
1457 			eh->eh_max = cpu_to_le16(
1458 				(sizeof(raw_inode->i_block) -
1459 				 sizeof(struct ext4_extent_header))
1460 				 / sizeof(struct ext4_extent));
1461 		}
1462 	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
1463 		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
1464 			sizeof(raw_inode->i_block));
1465 	}
1466 
1467 	/* Immediately update the inode on disk. */
1468 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
1469 	if (ret)
1470 		goto out;
1471 	ret = sync_dirty_buffer(iloc.bh);
1472 	if (ret)
1473 		goto out;
1474 	ret = ext4_mark_inode_used(sb, ino);
1475 	if (ret)
1476 		goto out;
1477 
1478 	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
1479 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
1480 	if (IS_ERR(inode)) {
1481 		jbd_debug(1, "Inode not found.");
1482 		return -EFSCORRUPTED;
1483 	}
1484 
1485 	/*
1486 	 * Our allocator could have made different decisions than before
1487 	 * crashing. This should be fixed but until then, we calculate
1488 	 * the number of blocks the inode.
1489 	 */
1490 	ext4_ext_replay_set_iblocks(inode);
1491 
1492 	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
1493 	ext4_reset_inode_seed(inode);
1494 
1495 	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
1496 	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
1497 	sync_dirty_buffer(iloc.bh);
1498 	brelse(iloc.bh);
1499 out:
1500 	iput(inode);
1501 	if (!ret)
1502 		blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
1503 
1504 	return 0;
1505 }
1506 
1507 /*
1508  * Dentry create replay function.
1509  *
1510  * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
1511  * inode for which we are trying to create a dentry here, should already have
1512  * been replayed before we start here.
1513  */
ext4_fc_replay_create(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1514 static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
1515 				 u8 *val)
1516 {
1517 	int ret = 0;
1518 	struct inode *inode = NULL;
1519 	struct inode *dir = NULL;
1520 	struct dentry_info_args darg;
1521 
1522 	tl_to_darg(&darg, tl, val);
1523 
1524 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
1525 			darg.parent_ino, darg.dname_len);
1526 
1527 	/* This takes care of update group descriptor and other metadata */
1528 	ret = ext4_mark_inode_used(sb, darg.ino);
1529 	if (ret)
1530 		goto out;
1531 
1532 	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
1533 	if (IS_ERR(inode)) {
1534 		jbd_debug(1, "inode %d not found.", darg.ino);
1535 		inode = NULL;
1536 		ret = -EINVAL;
1537 		goto out;
1538 	}
1539 
1540 	if (S_ISDIR(inode->i_mode)) {
1541 		/*
1542 		 * If we are creating a directory, we need to make sure that the
1543 		 * dot and dot dot dirents are setup properly.
1544 		 */
1545 		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
1546 		if (IS_ERR(dir)) {
1547 			jbd_debug(1, "Dir %d not found.", darg.ino);
1548 			goto out;
1549 		}
1550 		ret = ext4_init_new_dir(NULL, dir, inode);
1551 		iput(dir);
1552 		if (ret) {
1553 			ret = 0;
1554 			goto out;
1555 		}
1556 	}
1557 	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
1558 	if (ret)
1559 		goto out;
1560 	set_nlink(inode, 1);
1561 	ext4_mark_inode_dirty(NULL, inode);
1562 out:
1563 	if (inode)
1564 		iput(inode);
1565 	return ret;
1566 }
1567 
1568 /*
1569  * Record physical disk regions which are in use as per fast commit area,
1570  * and used by inodes during replay phase. Our simple replay phase
1571  * allocator excludes these regions from allocation.
1572  */
ext4_fc_record_regions(struct super_block * sb,int ino,ext4_lblk_t lblk,ext4_fsblk_t pblk,int len,int replay)1573 int ext4_fc_record_regions(struct super_block *sb, int ino,
1574 		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
1575 {
1576 	struct ext4_fc_replay_state *state;
1577 	struct ext4_fc_alloc_region *region;
1578 
1579 	state = &EXT4_SB(sb)->s_fc_replay_state;
1580 	/*
1581 	 * during replay phase, the fc_regions_valid may not same as
1582 	 * fc_regions_used, update it when do new additions.
1583 	 */
1584 	if (replay && state->fc_regions_used != state->fc_regions_valid)
1585 		state->fc_regions_used = state->fc_regions_valid;
1586 	if (state->fc_regions_used == state->fc_regions_size) {
1587 		struct ext4_fc_alloc_region *fc_regions;
1588 
1589 		fc_regions = krealloc(state->fc_regions,
1590 				      sizeof(struct ext4_fc_alloc_region) *
1591 				      (state->fc_regions_size +
1592 				       EXT4_FC_REPLAY_REALLOC_INCREMENT),
1593 				      GFP_KERNEL);
1594 		if (!fc_regions)
1595 			return -ENOMEM;
1596 		state->fc_regions_size +=
1597 			EXT4_FC_REPLAY_REALLOC_INCREMENT;
1598 		state->fc_regions = fc_regions;
1599 	}
1600 	region = &state->fc_regions[state->fc_regions_used++];
1601 	region->ino = ino;
1602 	region->lblk = lblk;
1603 	region->pblk = pblk;
1604 	region->len = len;
1605 
1606 	if (replay)
1607 		state->fc_regions_valid++;
1608 
1609 	return 0;
1610 }
1611 
1612 /* Replay add range tag */
ext4_fc_replay_add_range(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1613 static int ext4_fc_replay_add_range(struct super_block *sb,
1614 				    struct ext4_fc_tl *tl, u8 *val)
1615 {
1616 	struct ext4_fc_add_range fc_add_ex;
1617 	struct ext4_extent newex, *ex;
1618 	struct inode *inode;
1619 	ext4_lblk_t start, cur;
1620 	int remaining, len;
1621 	ext4_fsblk_t start_pblk;
1622 	struct ext4_map_blocks map;
1623 	struct ext4_ext_path *path = NULL;
1624 	int ret;
1625 
1626 	memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
1627 	ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
1628 
1629 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
1630 		le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
1631 		ext4_ext_get_actual_len(ex));
1632 
1633 	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
1634 	if (IS_ERR(inode)) {
1635 		jbd_debug(1, "Inode not found.");
1636 		return 0;
1637 	}
1638 
1639 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1640 	if (ret)
1641 		goto out;
1642 
1643 	start = le32_to_cpu(ex->ee_block);
1644 	start_pblk = ext4_ext_pblock(ex);
1645 	len = ext4_ext_get_actual_len(ex);
1646 
1647 	cur = start;
1648 	remaining = len;
1649 	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
1650 		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
1651 		  inode->i_ino);
1652 
1653 	while (remaining > 0) {
1654 		map.m_lblk = cur;
1655 		map.m_len = remaining;
1656 		map.m_pblk = 0;
1657 		ret = ext4_map_blocks(NULL, inode, &map, 0);
1658 
1659 		if (ret < 0)
1660 			goto out;
1661 
1662 		if (ret == 0) {
1663 			/* Range is not mapped */
1664 			path = ext4_find_extent(inode, cur, NULL, 0);
1665 			if (IS_ERR(path))
1666 				goto out;
1667 			memset(&newex, 0, sizeof(newex));
1668 			newex.ee_block = cpu_to_le32(cur);
1669 			ext4_ext_store_pblock(
1670 				&newex, start_pblk + cur - start);
1671 			newex.ee_len = cpu_to_le16(map.m_len);
1672 			if (ext4_ext_is_unwritten(ex))
1673 				ext4_ext_mark_unwritten(&newex);
1674 			down_write(&EXT4_I(inode)->i_data_sem);
1675 			ret = ext4_ext_insert_extent(
1676 				NULL, inode, &path, &newex, 0);
1677 			up_write((&EXT4_I(inode)->i_data_sem));
1678 			ext4_ext_drop_refs(path);
1679 			kfree(path);
1680 			if (ret)
1681 				goto out;
1682 			goto next;
1683 		}
1684 
1685 		if (start_pblk + cur - start != map.m_pblk) {
1686 			/*
1687 			 * Logical to physical mapping changed. This can happen
1688 			 * if this range was removed and then reallocated to
1689 			 * map to new physical blocks during a fast commit.
1690 			 */
1691 			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
1692 					ext4_ext_is_unwritten(ex),
1693 					start_pblk + cur - start);
1694 			if (ret)
1695 				goto out;
1696 			/*
1697 			 * Mark the old blocks as free since they aren't used
1698 			 * anymore. We maintain an array of all the modified
1699 			 * inodes. In case these blocks are still used at either
1700 			 * a different logical range in the same inode or in
1701 			 * some different inode, we will mark them as allocated
1702 			 * at the end of the FC replay using our array of
1703 			 * modified inodes.
1704 			 */
1705 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
1706 			goto next;
1707 		}
1708 
1709 		/* Range is mapped and needs a state change */
1710 		jbd_debug(1, "Converting from %ld to %d %lld",
1711 				map.m_flags & EXT4_MAP_UNWRITTEN,
1712 			ext4_ext_is_unwritten(ex), map.m_pblk);
1713 		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
1714 					ext4_ext_is_unwritten(ex), map.m_pblk);
1715 		if (ret)
1716 			goto out;
1717 		/*
1718 		 * We may have split the extent tree while toggling the state.
1719 		 * Try to shrink the extent tree now.
1720 		 */
1721 		ext4_ext_replay_shrink_inode(inode, start + len);
1722 next:
1723 		cur += map.m_len;
1724 		remaining -= map.m_len;
1725 	}
1726 	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
1727 					sb->s_blocksize_bits);
1728 out:
1729 	iput(inode);
1730 	return 0;
1731 }
1732 
1733 /* Replay DEL_RANGE tag */
1734 static int
ext4_fc_replay_del_range(struct super_block * sb,struct ext4_fc_tl * tl,u8 * val)1735 ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
1736 			 u8 *val)
1737 {
1738 	struct inode *inode;
1739 	struct ext4_fc_del_range lrange;
1740 	struct ext4_map_blocks map;
1741 	ext4_lblk_t cur, remaining;
1742 	int ret;
1743 
1744 	memcpy(&lrange, val, sizeof(lrange));
1745 	cur = le32_to_cpu(lrange.fc_lblk);
1746 	remaining = le32_to_cpu(lrange.fc_len);
1747 
1748 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
1749 		le32_to_cpu(lrange.fc_ino), cur, remaining);
1750 
1751 	inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
1752 	if (IS_ERR(inode)) {
1753 		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino));
1754 		return 0;
1755 	}
1756 
1757 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
1758 	if (ret)
1759 		goto out;
1760 
1761 	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
1762 			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
1763 			le32_to_cpu(lrange.fc_len));
1764 	while (remaining > 0) {
1765 		map.m_lblk = cur;
1766 		map.m_len = remaining;
1767 
1768 		ret = ext4_map_blocks(NULL, inode, &map, 0);
1769 		if (ret < 0)
1770 			goto out;
1771 		if (ret > 0) {
1772 			remaining -= ret;
1773 			cur += ret;
1774 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
1775 		} else {
1776 			remaining -= map.m_len;
1777 			cur += map.m_len;
1778 		}
1779 	}
1780 
1781 	down_write(&EXT4_I(inode)->i_data_sem);
1782 	ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
1783 				le32_to_cpu(lrange.fc_lblk) +
1784 				le32_to_cpu(lrange.fc_len) - 1);
1785 	up_write(&EXT4_I(inode)->i_data_sem);
1786 	if (ret)
1787 		goto out;
1788 	ext4_ext_replay_shrink_inode(inode,
1789 		i_size_read(inode) >> sb->s_blocksize_bits);
1790 	ext4_mark_inode_dirty(NULL, inode);
1791 out:
1792 	iput(inode);
1793 	return 0;
1794 }
1795 
tag2str(u16 tag)1796 static inline const char *tag2str(u16 tag)
1797 {
1798 	switch (tag) {
1799 	case EXT4_FC_TAG_LINK:
1800 		return "TAG_ADD_ENTRY";
1801 	case EXT4_FC_TAG_UNLINK:
1802 		return "TAG_DEL_ENTRY";
1803 	case EXT4_FC_TAG_ADD_RANGE:
1804 		return "TAG_ADD_RANGE";
1805 	case EXT4_FC_TAG_CREAT:
1806 		return "TAG_CREAT_DENTRY";
1807 	case EXT4_FC_TAG_DEL_RANGE:
1808 		return "TAG_DEL_RANGE";
1809 	case EXT4_FC_TAG_INODE:
1810 		return "TAG_INODE";
1811 	case EXT4_FC_TAG_PAD:
1812 		return "TAG_PAD";
1813 	case EXT4_FC_TAG_TAIL:
1814 		return "TAG_TAIL";
1815 	case EXT4_FC_TAG_HEAD:
1816 		return "TAG_HEAD";
1817 	default:
1818 		return "TAG_ERROR";
1819 	}
1820 }
1821 
ext4_fc_set_bitmaps_and_counters(struct super_block * sb)1822 static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
1823 {
1824 	struct ext4_fc_replay_state *state;
1825 	struct inode *inode;
1826 	struct ext4_ext_path *path = NULL;
1827 	struct ext4_map_blocks map;
1828 	int i, ret, j;
1829 	ext4_lblk_t cur, end;
1830 
1831 	state = &EXT4_SB(sb)->s_fc_replay_state;
1832 	for (i = 0; i < state->fc_modified_inodes_used; i++) {
1833 		inode = ext4_iget(sb, state->fc_modified_inodes[i],
1834 			EXT4_IGET_NORMAL);
1835 		if (IS_ERR(inode)) {
1836 			jbd_debug(1, "Inode %d not found.",
1837 				state->fc_modified_inodes[i]);
1838 			continue;
1839 		}
1840 		cur = 0;
1841 		end = EXT_MAX_BLOCKS;
1842 		while (cur < end) {
1843 			map.m_lblk = cur;
1844 			map.m_len = end - cur;
1845 
1846 			ret = ext4_map_blocks(NULL, inode, &map, 0);
1847 			if (ret < 0)
1848 				break;
1849 
1850 			if (ret > 0) {
1851 				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
1852 				if (!IS_ERR(path)) {
1853 					for (j = 0; j < path->p_depth; j++)
1854 						ext4_mb_mark_bb(inode->i_sb,
1855 							path[j].p_block, 1, 1);
1856 					ext4_ext_drop_refs(path);
1857 					kfree(path);
1858 				}
1859 				cur += ret;
1860 				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
1861 							map.m_len, 1);
1862 			} else {
1863 				cur = cur + (map.m_len ? map.m_len : 1);
1864 			}
1865 		}
1866 		iput(inode);
1867 	}
1868 }
1869 
1870 /*
1871  * Check if block is in excluded regions for block allocation. The simple
1872  * allocator that runs during replay phase is calls this function to see
1873  * if it is okay to use a block.
1874  */
ext4_fc_replay_check_excluded(struct super_block * sb,ext4_fsblk_t blk)1875 bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
1876 {
1877 	int i;
1878 	struct ext4_fc_replay_state *state;
1879 
1880 	state = &EXT4_SB(sb)->s_fc_replay_state;
1881 	for (i = 0; i < state->fc_regions_valid; i++) {
1882 		if (state->fc_regions[i].ino == 0 ||
1883 			state->fc_regions[i].len == 0)
1884 			continue;
1885 		if (blk >= state->fc_regions[i].pblk &&
1886 		    blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
1887 			return true;
1888 	}
1889 	return false;
1890 }
1891 
1892 /* Cleanup function called after replay */
ext4_fc_replay_cleanup(struct super_block * sb)1893 void ext4_fc_replay_cleanup(struct super_block *sb)
1894 {
1895 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1896 
1897 	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
1898 	kfree(sbi->s_fc_replay_state.fc_regions);
1899 	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
1900 }
1901 
1902 /*
1903  * Recovery Scan phase handler
1904  *
1905  * This function is called during the scan phase and is responsible
1906  * for doing following things:
1907  * - Make sure the fast commit area has valid tags for replay
1908  * - Count number of tags that need to be replayed by the replay handler
1909  * - Verify CRC
1910  * - Create a list of excluded blocks for allocation during replay phase
1911  *
1912  * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
1913  * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
1914  * to indicate that scan has finished and JBD2 can now start replay phase.
1915  * It returns a negative error to indicate that there was an error. At the end
1916  * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
1917  * to indicate the number of tags that need to replayed during the replay phase.
1918  */
ext4_fc_replay_scan(journal_t * journal,struct buffer_head * bh,int off,tid_t expected_tid)1919 static int ext4_fc_replay_scan(journal_t *journal,
1920 				struct buffer_head *bh, int off,
1921 				tid_t expected_tid)
1922 {
1923 	struct super_block *sb = journal->j_private;
1924 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1925 	struct ext4_fc_replay_state *state;
1926 	int ret = JBD2_FC_REPLAY_CONTINUE;
1927 	struct ext4_fc_add_range ext;
1928 	struct ext4_fc_tl tl;
1929 	struct ext4_fc_tail tail;
1930 	__u8 *start, *end, *cur, *val;
1931 	struct ext4_fc_head head;
1932 	struct ext4_extent *ex;
1933 
1934 	state = &sbi->s_fc_replay_state;
1935 
1936 	start = (u8 *)bh->b_data;
1937 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
1938 
1939 	if (state->fc_replay_expected_off == 0) {
1940 		state->fc_cur_tag = 0;
1941 		state->fc_replay_num_tags = 0;
1942 		state->fc_crc = 0;
1943 		state->fc_regions = NULL;
1944 		state->fc_regions_valid = state->fc_regions_used =
1945 			state->fc_regions_size = 0;
1946 		/* Check if we can stop early */
1947 		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
1948 			!= EXT4_FC_TAG_HEAD)
1949 			return 0;
1950 	}
1951 
1952 	if (off != state->fc_replay_expected_off) {
1953 		ret = -EFSCORRUPTED;
1954 		goto out_err;
1955 	}
1956 
1957 	state->fc_replay_expected_off++;
1958 	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
1959 		memcpy(&tl, cur, sizeof(tl));
1960 		val = cur + sizeof(tl);
1961 		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
1962 			  tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
1963 		switch (le16_to_cpu(tl.fc_tag)) {
1964 		case EXT4_FC_TAG_ADD_RANGE:
1965 			memcpy(&ext, val, sizeof(ext));
1966 			ex = (struct ext4_extent *)&ext.fc_ex;
1967 			ret = ext4_fc_record_regions(sb,
1968 				le32_to_cpu(ext.fc_ino),
1969 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
1970 				ext4_ext_get_actual_len(ex), 0);
1971 			if (ret < 0)
1972 				break;
1973 			ret = JBD2_FC_REPLAY_CONTINUE;
1974 			fallthrough;
1975 		case EXT4_FC_TAG_DEL_RANGE:
1976 		case EXT4_FC_TAG_LINK:
1977 		case EXT4_FC_TAG_UNLINK:
1978 		case EXT4_FC_TAG_CREAT:
1979 		case EXT4_FC_TAG_INODE:
1980 		case EXT4_FC_TAG_PAD:
1981 			state->fc_cur_tag++;
1982 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
1983 					sizeof(tl) + le16_to_cpu(tl.fc_len));
1984 			break;
1985 		case EXT4_FC_TAG_TAIL:
1986 			state->fc_cur_tag++;
1987 			memcpy(&tail, val, sizeof(tail));
1988 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
1989 						sizeof(tl) +
1990 						offsetof(struct ext4_fc_tail,
1991 						fc_crc));
1992 			if (le32_to_cpu(tail.fc_tid) == expected_tid &&
1993 				le32_to_cpu(tail.fc_crc) == state->fc_crc) {
1994 				state->fc_replay_num_tags = state->fc_cur_tag;
1995 				state->fc_regions_valid =
1996 					state->fc_regions_used;
1997 			} else {
1998 				ret = state->fc_replay_num_tags ?
1999 					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
2000 			}
2001 			state->fc_crc = 0;
2002 			break;
2003 		case EXT4_FC_TAG_HEAD:
2004 			memcpy(&head, val, sizeof(head));
2005 			if (le32_to_cpu(head.fc_features) &
2006 				~EXT4_FC_SUPPORTED_FEATURES) {
2007 				ret = -EOPNOTSUPP;
2008 				break;
2009 			}
2010 			if (le32_to_cpu(head.fc_tid) != expected_tid) {
2011 				ret = JBD2_FC_REPLAY_STOP;
2012 				break;
2013 			}
2014 			state->fc_cur_tag++;
2015 			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
2016 					    sizeof(tl) + le16_to_cpu(tl.fc_len));
2017 			break;
2018 		default:
2019 			ret = state->fc_replay_num_tags ?
2020 				JBD2_FC_REPLAY_STOP : -ECANCELED;
2021 		}
2022 		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
2023 			break;
2024 	}
2025 
2026 out_err:
2027 	trace_ext4_fc_replay_scan(sb, ret, off);
2028 	return ret;
2029 }
2030 
2031 /*
2032  * Main recovery path entry point.
2033  * The meaning of return codes is similar as above.
2034  */
ext4_fc_replay(journal_t * journal,struct buffer_head * bh,enum passtype pass,int off,tid_t expected_tid)2035 static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
2036 				enum passtype pass, int off, tid_t expected_tid)
2037 {
2038 	struct super_block *sb = journal->j_private;
2039 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2040 	struct ext4_fc_tl tl;
2041 	__u8 *start, *end, *cur, *val;
2042 	int ret = JBD2_FC_REPLAY_CONTINUE;
2043 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
2044 	struct ext4_fc_tail tail;
2045 
2046 	if (pass == PASS_SCAN) {
2047 		state->fc_current_pass = PASS_SCAN;
2048 		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
2049 	}
2050 
2051 	if (state->fc_current_pass != pass) {
2052 		state->fc_current_pass = pass;
2053 		sbi->s_mount_state |= EXT4_FC_REPLAY;
2054 	}
2055 	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
2056 		jbd_debug(1, "Replay stops\n");
2057 		ext4_fc_set_bitmaps_and_counters(sb);
2058 		return 0;
2059 	}
2060 
2061 #ifdef CONFIG_EXT4_DEBUG
2062 	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
2063 		pr_warn("Dropping fc block %d because max_replay set\n", off);
2064 		return JBD2_FC_REPLAY_STOP;
2065 	}
2066 #endif
2067 
2068 	start = (u8 *)bh->b_data;
2069 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
2070 
2071 	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
2072 		memcpy(&tl, cur, sizeof(tl));
2073 		val = cur + sizeof(tl);
2074 
2075 		if (state->fc_replay_num_tags == 0) {
2076 			ret = JBD2_FC_REPLAY_STOP;
2077 			ext4_fc_set_bitmaps_and_counters(sb);
2078 			break;
2079 		}
2080 		jbd_debug(3, "Replay phase, tag:%s\n",
2081 				tag2str(le16_to_cpu(tl.fc_tag)));
2082 		state->fc_replay_num_tags--;
2083 		switch (le16_to_cpu(tl.fc_tag)) {
2084 		case EXT4_FC_TAG_LINK:
2085 			ret = ext4_fc_replay_link(sb, &tl, val);
2086 			break;
2087 		case EXT4_FC_TAG_UNLINK:
2088 			ret = ext4_fc_replay_unlink(sb, &tl, val);
2089 			break;
2090 		case EXT4_FC_TAG_ADD_RANGE:
2091 			ret = ext4_fc_replay_add_range(sb, &tl, val);
2092 			break;
2093 		case EXT4_FC_TAG_CREAT:
2094 			ret = ext4_fc_replay_create(sb, &tl, val);
2095 			break;
2096 		case EXT4_FC_TAG_DEL_RANGE:
2097 			ret = ext4_fc_replay_del_range(sb, &tl, val);
2098 			break;
2099 		case EXT4_FC_TAG_INODE:
2100 			ret = ext4_fc_replay_inode(sb, &tl, val);
2101 			break;
2102 		case EXT4_FC_TAG_PAD:
2103 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
2104 					     le16_to_cpu(tl.fc_len), 0);
2105 			break;
2106 		case EXT4_FC_TAG_TAIL:
2107 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
2108 					     le16_to_cpu(tl.fc_len), 0);
2109 			memcpy(&tail, val, sizeof(tail));
2110 			WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
2111 			break;
2112 		case EXT4_FC_TAG_HEAD:
2113 			break;
2114 		default:
2115 			trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0,
2116 					     le16_to_cpu(tl.fc_len), 0);
2117 			ret = -ECANCELED;
2118 			break;
2119 		}
2120 		if (ret < 0)
2121 			break;
2122 		ret = JBD2_FC_REPLAY_CONTINUE;
2123 	}
2124 	return ret;
2125 }
2126 
ext4_fc_init(struct super_block * sb,journal_t * journal)2127 void ext4_fc_init(struct super_block *sb, journal_t *journal)
2128 {
2129 	/*
2130 	 * We set replay callback even if fast commit disabled because we may
2131 	 * could still have fast commit blocks that need to be replayed even if
2132 	 * fast commit has now been turned off.
2133 	 */
2134 	journal->j_fc_replay_callback = ext4_fc_replay;
2135 	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
2136 		return;
2137 	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
2138 }
2139 
2140 static const char *fc_ineligible_reasons[] = {
2141 	"Extended attributes changed",
2142 	"Cross rename",
2143 	"Journal flag changed",
2144 	"Insufficient memory",
2145 	"Swap boot",
2146 	"Resize",
2147 	"Dir renamed",
2148 	"Falloc range op",
2149 	"Data journalling",
2150 	"FC Commit Failed"
2151 };
2152 
ext4_fc_info_show(struct seq_file * seq,void * v)2153 int ext4_fc_info_show(struct seq_file *seq, void *v)
2154 {
2155 	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
2156 	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
2157 	int i;
2158 
2159 	if (v != SEQ_START_TOKEN)
2160 		return 0;
2161 
2162 	seq_printf(seq,
2163 		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
2164 		   stats->fc_num_commits, stats->fc_ineligible_commits,
2165 		   stats->fc_numblks,
2166 		   div_u64(sbi->s_fc_avg_commit_time, 1000));
2167 	seq_puts(seq, "Ineligible reasons:\n");
2168 	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
2169 		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
2170 			stats->fc_ineligible_reason_count[i]);
2171 
2172 	return 0;
2173 }
2174 
ext4_fc_init_dentry_cache(void)2175 int __init ext4_fc_init_dentry_cache(void)
2176 {
2177 	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
2178 					   SLAB_RECLAIM_ACCOUNT);
2179 
2180 	if (ext4_fc_dentry_cachep == NULL)
2181 		return -ENOMEM;
2182 
2183 	return 0;
2184 }
2185 
ext4_fc_destroy_dentry_cache(void)2186 void ext4_fc_destroy_dentry_cache(void)
2187 {
2188 	kmem_cache_destroy(ext4_fc_dentry_cachep);
2189 }
2190