xref: /OK3568_Linux_fs/kernel/fs/ext4/extents.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
4*4882a593Smuzhiyun  * Written by Alex Tomas <alex@clusterfs.com>
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Architecture independence:
7*4882a593Smuzhiyun  *   Copyright (c) 2005, Bull S.A.
8*4882a593Smuzhiyun  *   Written by Pierre Peiffer <pierre.peiffer@bull.net>
9*4882a593Smuzhiyun  */
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun  * Extents support for EXT4
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  * TODO:
15*4882a593Smuzhiyun  *   - ext4*_error() should be used in some situations
16*4882a593Smuzhiyun  *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
17*4882a593Smuzhiyun  *   - smart tree reduction
18*4882a593Smuzhiyun  */
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include <linux/fs.h>
21*4882a593Smuzhiyun #include <linux/time.h>
22*4882a593Smuzhiyun #include <linux/jbd2.h>
23*4882a593Smuzhiyun #include <linux/highuid.h>
24*4882a593Smuzhiyun #include <linux/pagemap.h>
25*4882a593Smuzhiyun #include <linux/quotaops.h>
26*4882a593Smuzhiyun #include <linux/string.h>
27*4882a593Smuzhiyun #include <linux/slab.h>
28*4882a593Smuzhiyun #include <linux/uaccess.h>
29*4882a593Smuzhiyun #include <linux/fiemap.h>
30*4882a593Smuzhiyun #include <linux/backing-dev.h>
31*4882a593Smuzhiyun #include <linux/iomap.h>
32*4882a593Smuzhiyun #include "ext4_jbd2.h"
33*4882a593Smuzhiyun #include "ext4_extents.h"
34*4882a593Smuzhiyun #include "xattr.h"
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun #include <trace/events/ext4.h>
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun /*
39*4882a593Smuzhiyun  * used by extent splitting.
40*4882a593Smuzhiyun  */
41*4882a593Smuzhiyun #define EXT4_EXT_MAY_ZEROOUT	0x1  /* safe to zeroout if split fails \
42*4882a593Smuzhiyun 					due to ENOSPC */
43*4882a593Smuzhiyun #define EXT4_EXT_MARK_UNWRIT1	0x2  /* mark first half unwritten */
44*4882a593Smuzhiyun #define EXT4_EXT_MARK_UNWRIT2	0x4  /* mark second half unwritten */
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun #define EXT4_EXT_DATA_VALID1	0x8  /* first half contains valid data */
47*4882a593Smuzhiyun #define EXT4_EXT_DATA_VALID2	0x10 /* second half contains valid data */
48*4882a593Smuzhiyun 
ext4_extent_block_csum(struct inode * inode,struct ext4_extent_header * eh)49*4882a593Smuzhiyun static __le32 ext4_extent_block_csum(struct inode *inode,
50*4882a593Smuzhiyun 				     struct ext4_extent_header *eh)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun 	struct ext4_inode_info *ei = EXT4_I(inode);
53*4882a593Smuzhiyun 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
54*4882a593Smuzhiyun 	__u32 csum;
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun 	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
57*4882a593Smuzhiyun 			   EXT4_EXTENT_TAIL_OFFSET(eh));
58*4882a593Smuzhiyun 	return cpu_to_le32(csum);
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun 
ext4_extent_block_csum_verify(struct inode * inode,struct ext4_extent_header * eh)61*4882a593Smuzhiyun static int ext4_extent_block_csum_verify(struct inode *inode,
62*4882a593Smuzhiyun 					 struct ext4_extent_header *eh)
63*4882a593Smuzhiyun {
64*4882a593Smuzhiyun 	struct ext4_extent_tail *et;
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	if (!ext4_has_metadata_csum(inode->i_sb))
67*4882a593Smuzhiyun 		return 1;
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	et = find_ext4_extent_tail(eh);
70*4882a593Smuzhiyun 	if (et->et_checksum != ext4_extent_block_csum(inode, eh))
71*4882a593Smuzhiyun 		return 0;
72*4882a593Smuzhiyun 	return 1;
73*4882a593Smuzhiyun }
74*4882a593Smuzhiyun 
ext4_extent_block_csum_set(struct inode * inode,struct ext4_extent_header * eh)75*4882a593Smuzhiyun static void ext4_extent_block_csum_set(struct inode *inode,
76*4882a593Smuzhiyun 				       struct ext4_extent_header *eh)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun 	struct ext4_extent_tail *et;
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun 	if (!ext4_has_metadata_csum(inode->i_sb))
81*4882a593Smuzhiyun 		return;
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	et = find_ext4_extent_tail(eh);
84*4882a593Smuzhiyun 	et->et_checksum = ext4_extent_block_csum(inode, eh);
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun static int ext4_split_extent_at(handle_t *handle,
88*4882a593Smuzhiyun 			     struct inode *inode,
89*4882a593Smuzhiyun 			     struct ext4_ext_path **ppath,
90*4882a593Smuzhiyun 			     ext4_lblk_t split,
91*4882a593Smuzhiyun 			     int split_flag,
92*4882a593Smuzhiyun 			     int flags);
93*4882a593Smuzhiyun 
ext4_ext_trunc_restart_fn(struct inode * inode,int * dropped)94*4882a593Smuzhiyun static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun 	/*
97*4882a593Smuzhiyun 	 * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
98*4882a593Smuzhiyun 	 * moment, get_block can be called only for blocks inside i_size since
99*4882a593Smuzhiyun 	 * page cache has been already dropped and writes are blocked by
100*4882a593Smuzhiyun 	 * i_mutex. So we can safely drop the i_data_sem here.
101*4882a593Smuzhiyun 	 */
102*4882a593Smuzhiyun 	BUG_ON(EXT4_JOURNAL(inode) == NULL);
103*4882a593Smuzhiyun 	ext4_discard_preallocations(inode, 0);
104*4882a593Smuzhiyun 	up_write(&EXT4_I(inode)->i_data_sem);
105*4882a593Smuzhiyun 	*dropped = 1;
106*4882a593Smuzhiyun 	return 0;
107*4882a593Smuzhiyun }
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun /*
110*4882a593Smuzhiyun  * Make sure 'handle' has at least 'check_cred' credits. If not, restart
111*4882a593Smuzhiyun  * transaction with 'restart_cred' credits. The function drops i_data_sem
112*4882a593Smuzhiyun  * when restarting transaction and gets it after transaction is restarted.
113*4882a593Smuzhiyun  *
114*4882a593Smuzhiyun  * The function returns 0 on success, 1 if transaction had to be restarted,
115*4882a593Smuzhiyun  * and < 0 in case of fatal error.
116*4882a593Smuzhiyun  */
ext4_datasem_ensure_credits(handle_t * handle,struct inode * inode,int check_cred,int restart_cred,int revoke_cred)117*4882a593Smuzhiyun int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
118*4882a593Smuzhiyun 				int check_cred, int restart_cred,
119*4882a593Smuzhiyun 				int revoke_cred)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun 	int ret;
122*4882a593Smuzhiyun 	int dropped = 0;
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 	ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
125*4882a593Smuzhiyun 		revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
126*4882a593Smuzhiyun 	if (dropped)
127*4882a593Smuzhiyun 		down_write(&EXT4_I(inode)->i_data_sem);
128*4882a593Smuzhiyun 	return ret;
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun /*
132*4882a593Smuzhiyun  * could return:
133*4882a593Smuzhiyun  *  - EROFS
134*4882a593Smuzhiyun  *  - ENOMEM
135*4882a593Smuzhiyun  */
ext4_ext_get_access(handle_t * handle,struct inode * inode,struct ext4_ext_path * path)136*4882a593Smuzhiyun static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
137*4882a593Smuzhiyun 				struct ext4_ext_path *path)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun 	int err = 0;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	if (path->p_bh) {
142*4882a593Smuzhiyun 		/* path points to block */
143*4882a593Smuzhiyun 		BUFFER_TRACE(path->p_bh, "get_write_access");
144*4882a593Smuzhiyun 		err = ext4_journal_get_write_access(handle, path->p_bh);
145*4882a593Smuzhiyun 		/*
146*4882a593Smuzhiyun 		 * The extent buffer's verified bit will be set again in
147*4882a593Smuzhiyun 		 * __ext4_ext_dirty(). We could leave an inconsistent
148*4882a593Smuzhiyun 		 * buffer if the extents updating procudure break off du
149*4882a593Smuzhiyun 		 * to some error happens, force to check it again.
150*4882a593Smuzhiyun 		 */
151*4882a593Smuzhiyun 		if (!err)
152*4882a593Smuzhiyun 			clear_buffer_verified(path->p_bh);
153*4882a593Smuzhiyun 	}
154*4882a593Smuzhiyun 	/* path points to leaf/index in inode body */
155*4882a593Smuzhiyun 	/* we use in-core data, no need to protect them */
156*4882a593Smuzhiyun 	return err;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun /*
160*4882a593Smuzhiyun  * could return:
161*4882a593Smuzhiyun  *  - EROFS
162*4882a593Smuzhiyun  *  - ENOMEM
163*4882a593Smuzhiyun  *  - EIO
164*4882a593Smuzhiyun  */
__ext4_ext_dirty(const char * where,unsigned int line,handle_t * handle,struct inode * inode,struct ext4_ext_path * path)165*4882a593Smuzhiyun static int __ext4_ext_dirty(const char *where, unsigned int line,
166*4882a593Smuzhiyun 			    handle_t *handle, struct inode *inode,
167*4882a593Smuzhiyun 			    struct ext4_ext_path *path)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun 	int err;
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
172*4882a593Smuzhiyun 	if (path->p_bh) {
173*4882a593Smuzhiyun 		ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
174*4882a593Smuzhiyun 		/* path points to block */
175*4882a593Smuzhiyun 		err = __ext4_handle_dirty_metadata(where, line, handle,
176*4882a593Smuzhiyun 						   inode, path->p_bh);
177*4882a593Smuzhiyun 		/* Extents updating done, re-set verified flag */
178*4882a593Smuzhiyun 		if (!err)
179*4882a593Smuzhiyun 			set_buffer_verified(path->p_bh);
180*4882a593Smuzhiyun 	} else {
181*4882a593Smuzhiyun 		/* path points to leaf/index in inode body */
182*4882a593Smuzhiyun 		err = ext4_mark_inode_dirty(handle, inode);
183*4882a593Smuzhiyun 	}
184*4882a593Smuzhiyun 	return err;
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun #define ext4_ext_dirty(handle, inode, path) \
188*4882a593Smuzhiyun 		__ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
189*4882a593Smuzhiyun 
ext4_ext_find_goal(struct inode * inode,struct ext4_ext_path * path,ext4_lblk_t block)190*4882a593Smuzhiyun static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
191*4882a593Smuzhiyun 			      struct ext4_ext_path *path,
192*4882a593Smuzhiyun 			      ext4_lblk_t block)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun 	if (path) {
195*4882a593Smuzhiyun 		int depth = path->p_depth;
196*4882a593Smuzhiyun 		struct ext4_extent *ex;
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 		/*
199*4882a593Smuzhiyun 		 * Try to predict block placement assuming that we are
200*4882a593Smuzhiyun 		 * filling in a file which will eventually be
201*4882a593Smuzhiyun 		 * non-sparse --- i.e., in the case of libbfd writing
202*4882a593Smuzhiyun 		 * an ELF object sections out-of-order but in a way
203*4882a593Smuzhiyun 		 * the eventually results in a contiguous object or
204*4882a593Smuzhiyun 		 * executable file, or some database extending a table
205*4882a593Smuzhiyun 		 * space file.  However, this is actually somewhat
206*4882a593Smuzhiyun 		 * non-ideal if we are writing a sparse file such as
207*4882a593Smuzhiyun 		 * qemu or KVM writing a raw image file that is going
208*4882a593Smuzhiyun 		 * to stay fairly sparse, since it will end up
209*4882a593Smuzhiyun 		 * fragmenting the file system's free space.  Maybe we
210*4882a593Smuzhiyun 		 * should have some hueristics or some way to allow
211*4882a593Smuzhiyun 		 * userspace to pass a hint to file system,
212*4882a593Smuzhiyun 		 * especially if the latter case turns out to be
213*4882a593Smuzhiyun 		 * common.
214*4882a593Smuzhiyun 		 */
215*4882a593Smuzhiyun 		ex = path[depth].p_ext;
216*4882a593Smuzhiyun 		if (ex) {
217*4882a593Smuzhiyun 			ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
218*4882a593Smuzhiyun 			ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 			if (block > ext_block)
221*4882a593Smuzhiyun 				return ext_pblk + (block - ext_block);
222*4882a593Smuzhiyun 			else
223*4882a593Smuzhiyun 				return ext_pblk - (ext_block - block);
224*4882a593Smuzhiyun 		}
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 		/* it looks like index is empty;
227*4882a593Smuzhiyun 		 * try to find starting block from index itself */
228*4882a593Smuzhiyun 		if (path[depth].p_bh)
229*4882a593Smuzhiyun 			return path[depth].p_bh->b_blocknr;
230*4882a593Smuzhiyun 	}
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	/* OK. use inode's group */
233*4882a593Smuzhiyun 	return ext4_inode_to_goal_block(inode);
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun /*
237*4882a593Smuzhiyun  * Allocation for a meta data block
238*4882a593Smuzhiyun  */
239*4882a593Smuzhiyun static ext4_fsblk_t
ext4_ext_new_meta_block(handle_t * handle,struct inode * inode,struct ext4_ext_path * path,struct ext4_extent * ex,int * err,unsigned int flags)240*4882a593Smuzhiyun ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
241*4882a593Smuzhiyun 			struct ext4_ext_path *path,
242*4882a593Smuzhiyun 			struct ext4_extent *ex, int *err, unsigned int flags)
243*4882a593Smuzhiyun {
244*4882a593Smuzhiyun 	ext4_fsblk_t goal, newblock;
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
247*4882a593Smuzhiyun 	newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
248*4882a593Smuzhiyun 					NULL, err);
249*4882a593Smuzhiyun 	return newblock;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun 
ext4_ext_space_block(struct inode * inode,int check)252*4882a593Smuzhiyun static inline int ext4_ext_space_block(struct inode *inode, int check)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun 	int size;
255*4882a593Smuzhiyun 
256*4882a593Smuzhiyun 	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
257*4882a593Smuzhiyun 			/ sizeof(struct ext4_extent);
258*4882a593Smuzhiyun #ifdef AGGRESSIVE_TEST
259*4882a593Smuzhiyun 	if (!check && size > 6)
260*4882a593Smuzhiyun 		size = 6;
261*4882a593Smuzhiyun #endif
262*4882a593Smuzhiyun 	return size;
263*4882a593Smuzhiyun }
264*4882a593Smuzhiyun 
ext4_ext_space_block_idx(struct inode * inode,int check)265*4882a593Smuzhiyun static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun 	int size;
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
270*4882a593Smuzhiyun 			/ sizeof(struct ext4_extent_idx);
271*4882a593Smuzhiyun #ifdef AGGRESSIVE_TEST
272*4882a593Smuzhiyun 	if (!check && size > 5)
273*4882a593Smuzhiyun 		size = 5;
274*4882a593Smuzhiyun #endif
275*4882a593Smuzhiyun 	return size;
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun 
ext4_ext_space_root(struct inode * inode,int check)278*4882a593Smuzhiyun static inline int ext4_ext_space_root(struct inode *inode, int check)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun 	int size;
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 	size = sizeof(EXT4_I(inode)->i_data);
283*4882a593Smuzhiyun 	size -= sizeof(struct ext4_extent_header);
284*4882a593Smuzhiyun 	size /= sizeof(struct ext4_extent);
285*4882a593Smuzhiyun #ifdef AGGRESSIVE_TEST
286*4882a593Smuzhiyun 	if (!check && size > 3)
287*4882a593Smuzhiyun 		size = 3;
288*4882a593Smuzhiyun #endif
289*4882a593Smuzhiyun 	return size;
290*4882a593Smuzhiyun }
291*4882a593Smuzhiyun 
ext4_ext_space_root_idx(struct inode * inode,int check)292*4882a593Smuzhiyun static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
293*4882a593Smuzhiyun {
294*4882a593Smuzhiyun 	int size;
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	size = sizeof(EXT4_I(inode)->i_data);
297*4882a593Smuzhiyun 	size -= sizeof(struct ext4_extent_header);
298*4882a593Smuzhiyun 	size /= sizeof(struct ext4_extent_idx);
299*4882a593Smuzhiyun #ifdef AGGRESSIVE_TEST
300*4882a593Smuzhiyun 	if (!check && size > 4)
301*4882a593Smuzhiyun 		size = 4;
302*4882a593Smuzhiyun #endif
303*4882a593Smuzhiyun 	return size;
304*4882a593Smuzhiyun }
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun static inline int
ext4_force_split_extent_at(handle_t * handle,struct inode * inode,struct ext4_ext_path ** ppath,ext4_lblk_t lblk,int nofail)307*4882a593Smuzhiyun ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
308*4882a593Smuzhiyun 			   struct ext4_ext_path **ppath, ext4_lblk_t lblk,
309*4882a593Smuzhiyun 			   int nofail)
310*4882a593Smuzhiyun {
311*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
312*4882a593Smuzhiyun 	int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
313*4882a593Smuzhiyun 	int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	if (nofail)
316*4882a593Smuzhiyun 		flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun 	return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
319*4882a593Smuzhiyun 			EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
320*4882a593Smuzhiyun 			flags);
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun static int
ext4_ext_max_entries(struct inode * inode,int depth)324*4882a593Smuzhiyun ext4_ext_max_entries(struct inode *inode, int depth)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun 	int max;
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	if (depth == ext_depth(inode)) {
329*4882a593Smuzhiyun 		if (depth == 0)
330*4882a593Smuzhiyun 			max = ext4_ext_space_root(inode, 1);
331*4882a593Smuzhiyun 		else
332*4882a593Smuzhiyun 			max = ext4_ext_space_root_idx(inode, 1);
333*4882a593Smuzhiyun 	} else {
334*4882a593Smuzhiyun 		if (depth == 0)
335*4882a593Smuzhiyun 			max = ext4_ext_space_block(inode, 1);
336*4882a593Smuzhiyun 		else
337*4882a593Smuzhiyun 			max = ext4_ext_space_block_idx(inode, 1);
338*4882a593Smuzhiyun 	}
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 	return max;
341*4882a593Smuzhiyun }
342*4882a593Smuzhiyun 
ext4_valid_extent(struct inode * inode,struct ext4_extent * ext)343*4882a593Smuzhiyun static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
344*4882a593Smuzhiyun {
345*4882a593Smuzhiyun 	ext4_fsblk_t block = ext4_ext_pblock(ext);
346*4882a593Smuzhiyun 	int len = ext4_ext_get_actual_len(ext);
347*4882a593Smuzhiyun 	ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun 	/*
350*4882a593Smuzhiyun 	 * We allow neither:
351*4882a593Smuzhiyun 	 *  - zero length
352*4882a593Smuzhiyun 	 *  - overflow/wrap-around
353*4882a593Smuzhiyun 	 */
354*4882a593Smuzhiyun 	if (lblock + len <= lblock)
355*4882a593Smuzhiyun 		return 0;
356*4882a593Smuzhiyun 	return ext4_inode_block_valid(inode, block, len);
357*4882a593Smuzhiyun }
358*4882a593Smuzhiyun 
ext4_valid_extent_idx(struct inode * inode,struct ext4_extent_idx * ext_idx)359*4882a593Smuzhiyun static int ext4_valid_extent_idx(struct inode *inode,
360*4882a593Smuzhiyun 				struct ext4_extent_idx *ext_idx)
361*4882a593Smuzhiyun {
362*4882a593Smuzhiyun 	ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	return ext4_inode_block_valid(inode, block, 1);
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun 
ext4_valid_extent_entries(struct inode * inode,struct ext4_extent_header * eh,ext4_lblk_t lblk,ext4_fsblk_t * pblk,int depth)367*4882a593Smuzhiyun static int ext4_valid_extent_entries(struct inode *inode,
368*4882a593Smuzhiyun 				     struct ext4_extent_header *eh,
369*4882a593Smuzhiyun 				     ext4_lblk_t lblk, ext4_fsblk_t *pblk,
370*4882a593Smuzhiyun 				     int depth)
371*4882a593Smuzhiyun {
372*4882a593Smuzhiyun 	unsigned short entries;
373*4882a593Smuzhiyun 	ext4_lblk_t lblock = 0;
374*4882a593Smuzhiyun 	ext4_lblk_t cur = 0;
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 	if (eh->eh_entries == 0)
377*4882a593Smuzhiyun 		return 1;
378*4882a593Smuzhiyun 
379*4882a593Smuzhiyun 	entries = le16_to_cpu(eh->eh_entries);
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun 	if (depth == 0) {
382*4882a593Smuzhiyun 		/* leaf entries */
383*4882a593Smuzhiyun 		struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 		/*
386*4882a593Smuzhiyun 		 * The logical block in the first entry should equal to
387*4882a593Smuzhiyun 		 * the number in the index block.
388*4882a593Smuzhiyun 		 */
389*4882a593Smuzhiyun 		if (depth != ext_depth(inode) &&
390*4882a593Smuzhiyun 		    lblk != le32_to_cpu(ext->ee_block))
391*4882a593Smuzhiyun 			return 0;
392*4882a593Smuzhiyun 		while (entries) {
393*4882a593Smuzhiyun 			if (!ext4_valid_extent(inode, ext))
394*4882a593Smuzhiyun 				return 0;
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 			/* Check for overlapping extents */
397*4882a593Smuzhiyun 			lblock = le32_to_cpu(ext->ee_block);
398*4882a593Smuzhiyun 			if (lblock < cur) {
399*4882a593Smuzhiyun 				*pblk = ext4_ext_pblock(ext);
400*4882a593Smuzhiyun 				return 0;
401*4882a593Smuzhiyun 			}
402*4882a593Smuzhiyun 			cur = lblock + ext4_ext_get_actual_len(ext);
403*4882a593Smuzhiyun 			ext++;
404*4882a593Smuzhiyun 			entries--;
405*4882a593Smuzhiyun 		}
406*4882a593Smuzhiyun 	} else {
407*4882a593Smuzhiyun 		struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 		/*
410*4882a593Smuzhiyun 		 * The logical block in the first entry should equal to
411*4882a593Smuzhiyun 		 * the number in the parent index block.
412*4882a593Smuzhiyun 		 */
413*4882a593Smuzhiyun 		if (depth != ext_depth(inode) &&
414*4882a593Smuzhiyun 		    lblk != le32_to_cpu(ext_idx->ei_block))
415*4882a593Smuzhiyun 			return 0;
416*4882a593Smuzhiyun 		while (entries) {
417*4882a593Smuzhiyun 			if (!ext4_valid_extent_idx(inode, ext_idx))
418*4882a593Smuzhiyun 				return 0;
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 			/* Check for overlapping index extents */
421*4882a593Smuzhiyun 			lblock = le32_to_cpu(ext_idx->ei_block);
422*4882a593Smuzhiyun 			if (lblock < cur) {
423*4882a593Smuzhiyun 				*pblk = ext4_idx_pblock(ext_idx);
424*4882a593Smuzhiyun 				return 0;
425*4882a593Smuzhiyun 			}
426*4882a593Smuzhiyun 			ext_idx++;
427*4882a593Smuzhiyun 			entries--;
428*4882a593Smuzhiyun 			cur = lblock + 1;
429*4882a593Smuzhiyun 		}
430*4882a593Smuzhiyun 	}
431*4882a593Smuzhiyun 	return 1;
432*4882a593Smuzhiyun }
433*4882a593Smuzhiyun 
__ext4_ext_check(const char * function,unsigned int line,struct inode * inode,struct ext4_extent_header * eh,int depth,ext4_fsblk_t pblk,ext4_lblk_t lblk)434*4882a593Smuzhiyun static int __ext4_ext_check(const char *function, unsigned int line,
435*4882a593Smuzhiyun 			    struct inode *inode, struct ext4_extent_header *eh,
436*4882a593Smuzhiyun 			    int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
437*4882a593Smuzhiyun {
438*4882a593Smuzhiyun 	const char *error_msg;
439*4882a593Smuzhiyun 	int max = 0, err = -EFSCORRUPTED;
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun 	if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
442*4882a593Smuzhiyun 		error_msg = "invalid magic";
443*4882a593Smuzhiyun 		goto corrupted;
444*4882a593Smuzhiyun 	}
445*4882a593Smuzhiyun 	if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
446*4882a593Smuzhiyun 		error_msg = "unexpected eh_depth";
447*4882a593Smuzhiyun 		goto corrupted;
448*4882a593Smuzhiyun 	}
449*4882a593Smuzhiyun 	if (unlikely(eh->eh_max == 0)) {
450*4882a593Smuzhiyun 		error_msg = "invalid eh_max";
451*4882a593Smuzhiyun 		goto corrupted;
452*4882a593Smuzhiyun 	}
453*4882a593Smuzhiyun 	max = ext4_ext_max_entries(inode, depth);
454*4882a593Smuzhiyun 	if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
455*4882a593Smuzhiyun 		error_msg = "too large eh_max";
456*4882a593Smuzhiyun 		goto corrupted;
457*4882a593Smuzhiyun 	}
458*4882a593Smuzhiyun 	if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
459*4882a593Smuzhiyun 		error_msg = "invalid eh_entries";
460*4882a593Smuzhiyun 		goto corrupted;
461*4882a593Smuzhiyun 	}
462*4882a593Smuzhiyun 	if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
463*4882a593Smuzhiyun 		error_msg = "eh_entries is 0 but eh_depth is > 0";
464*4882a593Smuzhiyun 		goto corrupted;
465*4882a593Smuzhiyun 	}
466*4882a593Smuzhiyun 	if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
467*4882a593Smuzhiyun 		error_msg = "invalid extent entries";
468*4882a593Smuzhiyun 		goto corrupted;
469*4882a593Smuzhiyun 	}
470*4882a593Smuzhiyun 	if (unlikely(depth > 32)) {
471*4882a593Smuzhiyun 		error_msg = "too large eh_depth";
472*4882a593Smuzhiyun 		goto corrupted;
473*4882a593Smuzhiyun 	}
474*4882a593Smuzhiyun 	/* Verify checksum on non-root extent tree nodes */
475*4882a593Smuzhiyun 	if (ext_depth(inode) != depth &&
476*4882a593Smuzhiyun 	    !ext4_extent_block_csum_verify(inode, eh)) {
477*4882a593Smuzhiyun 		error_msg = "extent tree corrupted";
478*4882a593Smuzhiyun 		err = -EFSBADCRC;
479*4882a593Smuzhiyun 		goto corrupted;
480*4882a593Smuzhiyun 	}
481*4882a593Smuzhiyun 	return 0;
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun corrupted:
484*4882a593Smuzhiyun 	ext4_error_inode_err(inode, function, line, 0, -err,
485*4882a593Smuzhiyun 			     "pblk %llu bad header/extent: %s - magic %x, "
486*4882a593Smuzhiyun 			     "entries %u, max %u(%u), depth %u(%u)",
487*4882a593Smuzhiyun 			     (unsigned long long) pblk, error_msg,
488*4882a593Smuzhiyun 			     le16_to_cpu(eh->eh_magic),
489*4882a593Smuzhiyun 			     le16_to_cpu(eh->eh_entries),
490*4882a593Smuzhiyun 			     le16_to_cpu(eh->eh_max),
491*4882a593Smuzhiyun 			     max, le16_to_cpu(eh->eh_depth), depth);
492*4882a593Smuzhiyun 	return err;
493*4882a593Smuzhiyun }
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun #define ext4_ext_check(inode, eh, depth, pblk)			\
496*4882a593Smuzhiyun 	__ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
497*4882a593Smuzhiyun 
ext4_ext_check_inode(struct inode * inode)498*4882a593Smuzhiyun int ext4_ext_check_inode(struct inode *inode)
499*4882a593Smuzhiyun {
500*4882a593Smuzhiyun 	return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun 
ext4_cache_extents(struct inode * inode,struct ext4_extent_header * eh)503*4882a593Smuzhiyun static void ext4_cache_extents(struct inode *inode,
504*4882a593Smuzhiyun 			       struct ext4_extent_header *eh)
505*4882a593Smuzhiyun {
506*4882a593Smuzhiyun 	struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
507*4882a593Smuzhiyun 	ext4_lblk_t prev = 0;
508*4882a593Smuzhiyun 	int i;
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
511*4882a593Smuzhiyun 		unsigned int status = EXTENT_STATUS_WRITTEN;
512*4882a593Smuzhiyun 		ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
513*4882a593Smuzhiyun 		int len = ext4_ext_get_actual_len(ex);
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 		if (prev && (prev != lblk))
516*4882a593Smuzhiyun 			ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
517*4882a593Smuzhiyun 					     EXTENT_STATUS_HOLE);
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 		if (ext4_ext_is_unwritten(ex))
520*4882a593Smuzhiyun 			status = EXTENT_STATUS_UNWRITTEN;
521*4882a593Smuzhiyun 		ext4_es_cache_extent(inode, lblk, len,
522*4882a593Smuzhiyun 				     ext4_ext_pblock(ex), status);
523*4882a593Smuzhiyun 		prev = lblk + len;
524*4882a593Smuzhiyun 	}
525*4882a593Smuzhiyun }
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun static struct buffer_head *
__read_extent_tree_block(const char * function,unsigned int line,struct inode * inode,struct ext4_extent_idx * idx,int depth,int flags)528*4882a593Smuzhiyun __read_extent_tree_block(const char *function, unsigned int line,
529*4882a593Smuzhiyun 			 struct inode *inode, struct ext4_extent_idx *idx,
530*4882a593Smuzhiyun 			 int depth, int flags)
531*4882a593Smuzhiyun {
532*4882a593Smuzhiyun 	struct buffer_head		*bh;
533*4882a593Smuzhiyun 	int				err;
534*4882a593Smuzhiyun 	gfp_t				gfp_flags = __GFP_MOVABLE | GFP_NOFS;
535*4882a593Smuzhiyun 	ext4_fsblk_t			pblk;
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	if (flags & EXT4_EX_NOFAIL)
538*4882a593Smuzhiyun 		gfp_flags |= __GFP_NOFAIL;
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	pblk = ext4_idx_pblock(idx);
541*4882a593Smuzhiyun 	bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
542*4882a593Smuzhiyun 	if (unlikely(!bh))
543*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
544*4882a593Smuzhiyun 
545*4882a593Smuzhiyun 	if (!bh_uptodate_or_lock(bh)) {
546*4882a593Smuzhiyun 		trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
547*4882a593Smuzhiyun 		err = ext4_read_bh(bh, 0, NULL);
548*4882a593Smuzhiyun 		if (err < 0)
549*4882a593Smuzhiyun 			goto errout;
550*4882a593Smuzhiyun 	}
551*4882a593Smuzhiyun 	if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
552*4882a593Smuzhiyun 		return bh;
553*4882a593Smuzhiyun 	err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
554*4882a593Smuzhiyun 			       depth, pblk, le32_to_cpu(idx->ei_block));
555*4882a593Smuzhiyun 	if (err)
556*4882a593Smuzhiyun 		goto errout;
557*4882a593Smuzhiyun 	set_buffer_verified(bh);
558*4882a593Smuzhiyun 	/*
559*4882a593Smuzhiyun 	 * If this is a leaf block, cache all of its entries
560*4882a593Smuzhiyun 	 */
561*4882a593Smuzhiyun 	if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
562*4882a593Smuzhiyun 		struct ext4_extent_header *eh = ext_block_hdr(bh);
563*4882a593Smuzhiyun 		ext4_cache_extents(inode, eh);
564*4882a593Smuzhiyun 	}
565*4882a593Smuzhiyun 	return bh;
566*4882a593Smuzhiyun errout:
567*4882a593Smuzhiyun 	put_bh(bh);
568*4882a593Smuzhiyun 	return ERR_PTR(err);
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun }
571*4882a593Smuzhiyun 
572*4882a593Smuzhiyun #define read_extent_tree_block(inode, idx, depth, flags)		\
573*4882a593Smuzhiyun 	__read_extent_tree_block(__func__, __LINE__, (inode), (idx),	\
574*4882a593Smuzhiyun 				 (depth), (flags))
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun /*
577*4882a593Smuzhiyun  * This function is called to cache a file's extent information in the
578*4882a593Smuzhiyun  * extent status tree
579*4882a593Smuzhiyun  */
ext4_ext_precache(struct inode * inode)580*4882a593Smuzhiyun int ext4_ext_precache(struct inode *inode)
581*4882a593Smuzhiyun {
582*4882a593Smuzhiyun 	struct ext4_inode_info *ei = EXT4_I(inode);
583*4882a593Smuzhiyun 	struct ext4_ext_path *path = NULL;
584*4882a593Smuzhiyun 	struct buffer_head *bh;
585*4882a593Smuzhiyun 	int i = 0, depth, ret = 0;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
588*4882a593Smuzhiyun 		return 0;	/* not an extent-mapped inode */
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun 	down_read(&ei->i_data_sem);
591*4882a593Smuzhiyun 	depth = ext_depth(inode);
592*4882a593Smuzhiyun 
593*4882a593Smuzhiyun 	/* Don't cache anything if there are no external extent blocks */
594*4882a593Smuzhiyun 	if (!depth) {
595*4882a593Smuzhiyun 		up_read(&ei->i_data_sem);
596*4882a593Smuzhiyun 		return ret;
597*4882a593Smuzhiyun 	}
598*4882a593Smuzhiyun 
599*4882a593Smuzhiyun 	path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
600*4882a593Smuzhiyun 		       GFP_NOFS);
601*4882a593Smuzhiyun 	if (path == NULL) {
602*4882a593Smuzhiyun 		up_read(&ei->i_data_sem);
603*4882a593Smuzhiyun 		return -ENOMEM;
604*4882a593Smuzhiyun 	}
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 	path[0].p_hdr = ext_inode_hdr(inode);
607*4882a593Smuzhiyun 	ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
608*4882a593Smuzhiyun 	if (ret)
609*4882a593Smuzhiyun 		goto out;
610*4882a593Smuzhiyun 	path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
611*4882a593Smuzhiyun 	while (i >= 0) {
612*4882a593Smuzhiyun 		/*
613*4882a593Smuzhiyun 		 * If this is a leaf block or we've reached the end of
614*4882a593Smuzhiyun 		 * the index block, go up
615*4882a593Smuzhiyun 		 */
616*4882a593Smuzhiyun 		if ((i == depth) ||
617*4882a593Smuzhiyun 		    path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
618*4882a593Smuzhiyun 			brelse(path[i].p_bh);
619*4882a593Smuzhiyun 			path[i].p_bh = NULL;
620*4882a593Smuzhiyun 			i--;
621*4882a593Smuzhiyun 			continue;
622*4882a593Smuzhiyun 		}
623*4882a593Smuzhiyun 		bh = read_extent_tree_block(inode, path[i].p_idx++,
624*4882a593Smuzhiyun 					    depth - i - 1,
625*4882a593Smuzhiyun 					    EXT4_EX_FORCE_CACHE);
626*4882a593Smuzhiyun 		if (IS_ERR(bh)) {
627*4882a593Smuzhiyun 			ret = PTR_ERR(bh);
628*4882a593Smuzhiyun 			break;
629*4882a593Smuzhiyun 		}
630*4882a593Smuzhiyun 		i++;
631*4882a593Smuzhiyun 		path[i].p_bh = bh;
632*4882a593Smuzhiyun 		path[i].p_hdr = ext_block_hdr(bh);
633*4882a593Smuzhiyun 		path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
634*4882a593Smuzhiyun 	}
635*4882a593Smuzhiyun 	ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
636*4882a593Smuzhiyun out:
637*4882a593Smuzhiyun 	up_read(&ei->i_data_sem);
638*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
639*4882a593Smuzhiyun 	kfree(path);
640*4882a593Smuzhiyun 	return ret;
641*4882a593Smuzhiyun }
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun #ifdef EXT_DEBUG
ext4_ext_show_path(struct inode * inode,struct ext4_ext_path * path)644*4882a593Smuzhiyun static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
645*4882a593Smuzhiyun {
646*4882a593Smuzhiyun 	int k, l = path->p_depth;
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 	ext_debug(inode, "path:");
649*4882a593Smuzhiyun 	for (k = 0; k <= l; k++, path++) {
650*4882a593Smuzhiyun 		if (path->p_idx) {
651*4882a593Smuzhiyun 			ext_debug(inode, "  %d->%llu",
652*4882a593Smuzhiyun 				  le32_to_cpu(path->p_idx->ei_block),
653*4882a593Smuzhiyun 				  ext4_idx_pblock(path->p_idx));
654*4882a593Smuzhiyun 		} else if (path->p_ext) {
655*4882a593Smuzhiyun 			ext_debug(inode, "  %d:[%d]%d:%llu ",
656*4882a593Smuzhiyun 				  le32_to_cpu(path->p_ext->ee_block),
657*4882a593Smuzhiyun 				  ext4_ext_is_unwritten(path->p_ext),
658*4882a593Smuzhiyun 				  ext4_ext_get_actual_len(path->p_ext),
659*4882a593Smuzhiyun 				  ext4_ext_pblock(path->p_ext));
660*4882a593Smuzhiyun 		} else
661*4882a593Smuzhiyun 			ext_debug(inode, "  []");
662*4882a593Smuzhiyun 	}
663*4882a593Smuzhiyun 	ext_debug(inode, "\n");
664*4882a593Smuzhiyun }
665*4882a593Smuzhiyun 
ext4_ext_show_leaf(struct inode * inode,struct ext4_ext_path * path)666*4882a593Smuzhiyun static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
667*4882a593Smuzhiyun {
668*4882a593Smuzhiyun 	int depth = ext_depth(inode);
669*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
670*4882a593Smuzhiyun 	struct ext4_extent *ex;
671*4882a593Smuzhiyun 	int i;
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 	if (!path)
674*4882a593Smuzhiyun 		return;
675*4882a593Smuzhiyun 
676*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
677*4882a593Smuzhiyun 	ex = EXT_FIRST_EXTENT(eh);
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 	ext_debug(inode, "Displaying leaf extents\n");
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
682*4882a593Smuzhiyun 		ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
683*4882a593Smuzhiyun 			  ext4_ext_is_unwritten(ex),
684*4882a593Smuzhiyun 			  ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
685*4882a593Smuzhiyun 	}
686*4882a593Smuzhiyun 	ext_debug(inode, "\n");
687*4882a593Smuzhiyun }
688*4882a593Smuzhiyun 
ext4_ext_show_move(struct inode * inode,struct ext4_ext_path * path,ext4_fsblk_t newblock,int level)689*4882a593Smuzhiyun static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
690*4882a593Smuzhiyun 			ext4_fsblk_t newblock, int level)
691*4882a593Smuzhiyun {
692*4882a593Smuzhiyun 	int depth = ext_depth(inode);
693*4882a593Smuzhiyun 	struct ext4_extent *ex;
694*4882a593Smuzhiyun 
695*4882a593Smuzhiyun 	if (depth != level) {
696*4882a593Smuzhiyun 		struct ext4_extent_idx *idx;
697*4882a593Smuzhiyun 		idx = path[level].p_idx;
698*4882a593Smuzhiyun 		while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
699*4882a593Smuzhiyun 			ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
700*4882a593Smuzhiyun 				  level, le32_to_cpu(idx->ei_block),
701*4882a593Smuzhiyun 				  ext4_idx_pblock(idx), newblock);
702*4882a593Smuzhiyun 			idx++;
703*4882a593Smuzhiyun 		}
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 		return;
706*4882a593Smuzhiyun 	}
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 	ex = path[depth].p_ext;
709*4882a593Smuzhiyun 	while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
710*4882a593Smuzhiyun 		ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
711*4882a593Smuzhiyun 				le32_to_cpu(ex->ee_block),
712*4882a593Smuzhiyun 				ext4_ext_pblock(ex),
713*4882a593Smuzhiyun 				ext4_ext_is_unwritten(ex),
714*4882a593Smuzhiyun 				ext4_ext_get_actual_len(ex),
715*4882a593Smuzhiyun 				newblock);
716*4882a593Smuzhiyun 		ex++;
717*4882a593Smuzhiyun 	}
718*4882a593Smuzhiyun }
719*4882a593Smuzhiyun 
720*4882a593Smuzhiyun #else
721*4882a593Smuzhiyun #define ext4_ext_show_path(inode, path)
722*4882a593Smuzhiyun #define ext4_ext_show_leaf(inode, path)
723*4882a593Smuzhiyun #define ext4_ext_show_move(inode, path, newblock, level)
724*4882a593Smuzhiyun #endif
725*4882a593Smuzhiyun 
ext4_ext_drop_refs(struct ext4_ext_path * path)726*4882a593Smuzhiyun void ext4_ext_drop_refs(struct ext4_ext_path *path)
727*4882a593Smuzhiyun {
728*4882a593Smuzhiyun 	int depth, i;
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 	if (!path)
731*4882a593Smuzhiyun 		return;
732*4882a593Smuzhiyun 	depth = path->p_depth;
733*4882a593Smuzhiyun 	for (i = 0; i <= depth; i++, path++) {
734*4882a593Smuzhiyun 		brelse(path->p_bh);
735*4882a593Smuzhiyun 		path->p_bh = NULL;
736*4882a593Smuzhiyun 	}
737*4882a593Smuzhiyun }
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun /*
740*4882a593Smuzhiyun  * ext4_ext_binsearch_idx:
741*4882a593Smuzhiyun  * binary search for the closest index of the given block
742*4882a593Smuzhiyun  * the header must be checked before calling this
743*4882a593Smuzhiyun  */
744*4882a593Smuzhiyun static void
ext4_ext_binsearch_idx(struct inode * inode,struct ext4_ext_path * path,ext4_lblk_t block)745*4882a593Smuzhiyun ext4_ext_binsearch_idx(struct inode *inode,
746*4882a593Smuzhiyun 			struct ext4_ext_path *path, ext4_lblk_t block)
747*4882a593Smuzhiyun {
748*4882a593Smuzhiyun 	struct ext4_extent_header *eh = path->p_hdr;
749*4882a593Smuzhiyun 	struct ext4_extent_idx *r, *l, *m;
750*4882a593Smuzhiyun 
751*4882a593Smuzhiyun 
752*4882a593Smuzhiyun 	ext_debug(inode, "binsearch for %u(idx):  ", block);
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	l = EXT_FIRST_INDEX(eh) + 1;
755*4882a593Smuzhiyun 	r = EXT_LAST_INDEX(eh);
756*4882a593Smuzhiyun 	while (l <= r) {
757*4882a593Smuzhiyun 		m = l + (r - l) / 2;
758*4882a593Smuzhiyun 		if (block < le32_to_cpu(m->ei_block))
759*4882a593Smuzhiyun 			r = m - 1;
760*4882a593Smuzhiyun 		else
761*4882a593Smuzhiyun 			l = m + 1;
762*4882a593Smuzhiyun 		ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
763*4882a593Smuzhiyun 			  le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
764*4882a593Smuzhiyun 			  r, le32_to_cpu(r->ei_block));
765*4882a593Smuzhiyun 	}
766*4882a593Smuzhiyun 
767*4882a593Smuzhiyun 	path->p_idx = l - 1;
768*4882a593Smuzhiyun 	ext_debug(inode, "  -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
769*4882a593Smuzhiyun 		  ext4_idx_pblock(path->p_idx));
770*4882a593Smuzhiyun 
771*4882a593Smuzhiyun #ifdef CHECK_BINSEARCH
772*4882a593Smuzhiyun 	{
773*4882a593Smuzhiyun 		struct ext4_extent_idx *chix, *ix;
774*4882a593Smuzhiyun 		int k;
775*4882a593Smuzhiyun 
776*4882a593Smuzhiyun 		chix = ix = EXT_FIRST_INDEX(eh);
777*4882a593Smuzhiyun 		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
778*4882a593Smuzhiyun 			if (k != 0 && le32_to_cpu(ix->ei_block) <=
779*4882a593Smuzhiyun 			    le32_to_cpu(ix[-1].ei_block)) {
780*4882a593Smuzhiyun 				printk(KERN_DEBUG "k=%d, ix=0x%p, "
781*4882a593Smuzhiyun 				       "first=0x%p\n", k,
782*4882a593Smuzhiyun 				       ix, EXT_FIRST_INDEX(eh));
783*4882a593Smuzhiyun 				printk(KERN_DEBUG "%u <= %u\n",
784*4882a593Smuzhiyun 				       le32_to_cpu(ix->ei_block),
785*4882a593Smuzhiyun 				       le32_to_cpu(ix[-1].ei_block));
786*4882a593Smuzhiyun 			}
787*4882a593Smuzhiyun 			BUG_ON(k && le32_to_cpu(ix->ei_block)
788*4882a593Smuzhiyun 					   <= le32_to_cpu(ix[-1].ei_block));
789*4882a593Smuzhiyun 			if (block < le32_to_cpu(ix->ei_block))
790*4882a593Smuzhiyun 				break;
791*4882a593Smuzhiyun 			chix = ix;
792*4882a593Smuzhiyun 		}
793*4882a593Smuzhiyun 		BUG_ON(chix != path->p_idx);
794*4882a593Smuzhiyun 	}
795*4882a593Smuzhiyun #endif
796*4882a593Smuzhiyun 
797*4882a593Smuzhiyun }
798*4882a593Smuzhiyun 
799*4882a593Smuzhiyun /*
800*4882a593Smuzhiyun  * ext4_ext_binsearch:
801*4882a593Smuzhiyun  * binary search for closest extent of the given block
802*4882a593Smuzhiyun  * the header must be checked before calling this
803*4882a593Smuzhiyun  */
804*4882a593Smuzhiyun static void
ext4_ext_binsearch(struct inode * inode,struct ext4_ext_path * path,ext4_lblk_t block)805*4882a593Smuzhiyun ext4_ext_binsearch(struct inode *inode,
806*4882a593Smuzhiyun 		struct ext4_ext_path *path, ext4_lblk_t block)
807*4882a593Smuzhiyun {
808*4882a593Smuzhiyun 	struct ext4_extent_header *eh = path->p_hdr;
809*4882a593Smuzhiyun 	struct ext4_extent *r, *l, *m;
810*4882a593Smuzhiyun 
811*4882a593Smuzhiyun 	if (eh->eh_entries == 0) {
812*4882a593Smuzhiyun 		/*
813*4882a593Smuzhiyun 		 * this leaf is empty:
814*4882a593Smuzhiyun 		 * we get such a leaf in split/add case
815*4882a593Smuzhiyun 		 */
816*4882a593Smuzhiyun 		return;
817*4882a593Smuzhiyun 	}
818*4882a593Smuzhiyun 
819*4882a593Smuzhiyun 	ext_debug(inode, "binsearch for %u:  ", block);
820*4882a593Smuzhiyun 
821*4882a593Smuzhiyun 	l = EXT_FIRST_EXTENT(eh) + 1;
822*4882a593Smuzhiyun 	r = EXT_LAST_EXTENT(eh);
823*4882a593Smuzhiyun 
824*4882a593Smuzhiyun 	while (l <= r) {
825*4882a593Smuzhiyun 		m = l + (r - l) / 2;
826*4882a593Smuzhiyun 		if (block < le32_to_cpu(m->ee_block))
827*4882a593Smuzhiyun 			r = m - 1;
828*4882a593Smuzhiyun 		else
829*4882a593Smuzhiyun 			l = m + 1;
830*4882a593Smuzhiyun 		ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
831*4882a593Smuzhiyun 			  le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
832*4882a593Smuzhiyun 			  r, le32_to_cpu(r->ee_block));
833*4882a593Smuzhiyun 	}
834*4882a593Smuzhiyun 
835*4882a593Smuzhiyun 	path->p_ext = l - 1;
836*4882a593Smuzhiyun 	ext_debug(inode, "  -> %d:%llu:[%d]%d ",
837*4882a593Smuzhiyun 			le32_to_cpu(path->p_ext->ee_block),
838*4882a593Smuzhiyun 			ext4_ext_pblock(path->p_ext),
839*4882a593Smuzhiyun 			ext4_ext_is_unwritten(path->p_ext),
840*4882a593Smuzhiyun 			ext4_ext_get_actual_len(path->p_ext));
841*4882a593Smuzhiyun 
842*4882a593Smuzhiyun #ifdef CHECK_BINSEARCH
843*4882a593Smuzhiyun 	{
844*4882a593Smuzhiyun 		struct ext4_extent *chex, *ex;
845*4882a593Smuzhiyun 		int k;
846*4882a593Smuzhiyun 
847*4882a593Smuzhiyun 		chex = ex = EXT_FIRST_EXTENT(eh);
848*4882a593Smuzhiyun 		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
849*4882a593Smuzhiyun 			BUG_ON(k && le32_to_cpu(ex->ee_block)
850*4882a593Smuzhiyun 					  <= le32_to_cpu(ex[-1].ee_block));
851*4882a593Smuzhiyun 			if (block < le32_to_cpu(ex->ee_block))
852*4882a593Smuzhiyun 				break;
853*4882a593Smuzhiyun 			chex = ex;
854*4882a593Smuzhiyun 		}
855*4882a593Smuzhiyun 		BUG_ON(chex != path->p_ext);
856*4882a593Smuzhiyun 	}
857*4882a593Smuzhiyun #endif
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun }
860*4882a593Smuzhiyun 
ext4_ext_tree_init(handle_t * handle,struct inode * inode)861*4882a593Smuzhiyun void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
862*4882a593Smuzhiyun {
863*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
864*4882a593Smuzhiyun 
865*4882a593Smuzhiyun 	eh = ext_inode_hdr(inode);
866*4882a593Smuzhiyun 	eh->eh_depth = 0;
867*4882a593Smuzhiyun 	eh->eh_entries = 0;
868*4882a593Smuzhiyun 	eh->eh_magic = EXT4_EXT_MAGIC;
869*4882a593Smuzhiyun 	eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
870*4882a593Smuzhiyun 	eh->eh_generation = 0;
871*4882a593Smuzhiyun 	ext4_mark_inode_dirty(handle, inode);
872*4882a593Smuzhiyun }
873*4882a593Smuzhiyun 
874*4882a593Smuzhiyun struct ext4_ext_path *
ext4_find_extent(struct inode * inode,ext4_lblk_t block,struct ext4_ext_path ** orig_path,int flags)875*4882a593Smuzhiyun ext4_find_extent(struct inode *inode, ext4_lblk_t block,
876*4882a593Smuzhiyun 		 struct ext4_ext_path **orig_path, int flags)
877*4882a593Smuzhiyun {
878*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
879*4882a593Smuzhiyun 	struct buffer_head *bh;
880*4882a593Smuzhiyun 	struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
881*4882a593Smuzhiyun 	short int depth, i, ppos = 0;
882*4882a593Smuzhiyun 	int ret;
883*4882a593Smuzhiyun 	gfp_t gfp_flags = GFP_NOFS;
884*4882a593Smuzhiyun 
885*4882a593Smuzhiyun 	if (flags & EXT4_EX_NOFAIL)
886*4882a593Smuzhiyun 		gfp_flags |= __GFP_NOFAIL;
887*4882a593Smuzhiyun 
888*4882a593Smuzhiyun 	eh = ext_inode_hdr(inode);
889*4882a593Smuzhiyun 	depth = ext_depth(inode);
890*4882a593Smuzhiyun 	if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
891*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
892*4882a593Smuzhiyun 				 depth);
893*4882a593Smuzhiyun 		ret = -EFSCORRUPTED;
894*4882a593Smuzhiyun 		goto err;
895*4882a593Smuzhiyun 	}
896*4882a593Smuzhiyun 
897*4882a593Smuzhiyun 	if (path) {
898*4882a593Smuzhiyun 		ext4_ext_drop_refs(path);
899*4882a593Smuzhiyun 		if (depth > path[0].p_maxdepth) {
900*4882a593Smuzhiyun 			kfree(path);
901*4882a593Smuzhiyun 			*orig_path = path = NULL;
902*4882a593Smuzhiyun 		}
903*4882a593Smuzhiyun 	}
904*4882a593Smuzhiyun 	if (!path) {
905*4882a593Smuzhiyun 		/* account possible depth increase */
906*4882a593Smuzhiyun 		path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
907*4882a593Smuzhiyun 				gfp_flags);
908*4882a593Smuzhiyun 		if (unlikely(!path))
909*4882a593Smuzhiyun 			return ERR_PTR(-ENOMEM);
910*4882a593Smuzhiyun 		path[0].p_maxdepth = depth + 1;
911*4882a593Smuzhiyun 	}
912*4882a593Smuzhiyun 	path[0].p_hdr = eh;
913*4882a593Smuzhiyun 	path[0].p_bh = NULL;
914*4882a593Smuzhiyun 
915*4882a593Smuzhiyun 	i = depth;
916*4882a593Smuzhiyun 	if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
917*4882a593Smuzhiyun 		ext4_cache_extents(inode, eh);
918*4882a593Smuzhiyun 	/* walk through the tree */
919*4882a593Smuzhiyun 	while (i) {
920*4882a593Smuzhiyun 		ext_debug(inode, "depth %d: num %d, max %d\n",
921*4882a593Smuzhiyun 			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
922*4882a593Smuzhiyun 
923*4882a593Smuzhiyun 		ext4_ext_binsearch_idx(inode, path + ppos, block);
924*4882a593Smuzhiyun 		path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
925*4882a593Smuzhiyun 		path[ppos].p_depth = i;
926*4882a593Smuzhiyun 		path[ppos].p_ext = NULL;
927*4882a593Smuzhiyun 
928*4882a593Smuzhiyun 		bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
929*4882a593Smuzhiyun 		if (IS_ERR(bh)) {
930*4882a593Smuzhiyun 			ret = PTR_ERR(bh);
931*4882a593Smuzhiyun 			goto err;
932*4882a593Smuzhiyun 		}
933*4882a593Smuzhiyun 
934*4882a593Smuzhiyun 		eh = ext_block_hdr(bh);
935*4882a593Smuzhiyun 		ppos++;
936*4882a593Smuzhiyun 		path[ppos].p_bh = bh;
937*4882a593Smuzhiyun 		path[ppos].p_hdr = eh;
938*4882a593Smuzhiyun 	}
939*4882a593Smuzhiyun 
940*4882a593Smuzhiyun 	path[ppos].p_depth = i;
941*4882a593Smuzhiyun 	path[ppos].p_ext = NULL;
942*4882a593Smuzhiyun 	path[ppos].p_idx = NULL;
943*4882a593Smuzhiyun 
944*4882a593Smuzhiyun 	/* find extent */
945*4882a593Smuzhiyun 	ext4_ext_binsearch(inode, path + ppos, block);
946*4882a593Smuzhiyun 	/* if not an empty leaf */
947*4882a593Smuzhiyun 	if (path[ppos].p_ext)
948*4882a593Smuzhiyun 		path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
949*4882a593Smuzhiyun 
950*4882a593Smuzhiyun 	ext4_ext_show_path(inode, path);
951*4882a593Smuzhiyun 
952*4882a593Smuzhiyun 	return path;
953*4882a593Smuzhiyun 
954*4882a593Smuzhiyun err:
955*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
956*4882a593Smuzhiyun 	kfree(path);
957*4882a593Smuzhiyun 	if (orig_path)
958*4882a593Smuzhiyun 		*orig_path = NULL;
959*4882a593Smuzhiyun 	return ERR_PTR(ret);
960*4882a593Smuzhiyun }
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun /*
963*4882a593Smuzhiyun  * ext4_ext_insert_index:
964*4882a593Smuzhiyun  * insert new index [@logical;@ptr] into the block at @curp;
965*4882a593Smuzhiyun  * check where to insert: before @curp or after @curp
966*4882a593Smuzhiyun  */
ext4_ext_insert_index(handle_t * handle,struct inode * inode,struct ext4_ext_path * curp,int logical,ext4_fsblk_t ptr)967*4882a593Smuzhiyun static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
968*4882a593Smuzhiyun 				 struct ext4_ext_path *curp,
969*4882a593Smuzhiyun 				 int logical, ext4_fsblk_t ptr)
970*4882a593Smuzhiyun {
971*4882a593Smuzhiyun 	struct ext4_extent_idx *ix;
972*4882a593Smuzhiyun 	int len, err;
973*4882a593Smuzhiyun 
974*4882a593Smuzhiyun 	err = ext4_ext_get_access(handle, inode, curp);
975*4882a593Smuzhiyun 	if (err)
976*4882a593Smuzhiyun 		return err;
977*4882a593Smuzhiyun 
978*4882a593Smuzhiyun 	if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
979*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode,
980*4882a593Smuzhiyun 				 "logical %d == ei_block %d!",
981*4882a593Smuzhiyun 				 logical, le32_to_cpu(curp->p_idx->ei_block));
982*4882a593Smuzhiyun 		return -EFSCORRUPTED;
983*4882a593Smuzhiyun 	}
984*4882a593Smuzhiyun 
985*4882a593Smuzhiyun 	if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
986*4882a593Smuzhiyun 			     >= le16_to_cpu(curp->p_hdr->eh_max))) {
987*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode,
988*4882a593Smuzhiyun 				 "eh_entries %d >= eh_max %d!",
989*4882a593Smuzhiyun 				 le16_to_cpu(curp->p_hdr->eh_entries),
990*4882a593Smuzhiyun 				 le16_to_cpu(curp->p_hdr->eh_max));
991*4882a593Smuzhiyun 		return -EFSCORRUPTED;
992*4882a593Smuzhiyun 	}
993*4882a593Smuzhiyun 
994*4882a593Smuzhiyun 	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
995*4882a593Smuzhiyun 		/* insert after */
996*4882a593Smuzhiyun 		ext_debug(inode, "insert new index %d after: %llu\n",
997*4882a593Smuzhiyun 			  logical, ptr);
998*4882a593Smuzhiyun 		ix = curp->p_idx + 1;
999*4882a593Smuzhiyun 	} else {
1000*4882a593Smuzhiyun 		/* insert before */
1001*4882a593Smuzhiyun 		ext_debug(inode, "insert new index %d before: %llu\n",
1002*4882a593Smuzhiyun 			  logical, ptr);
1003*4882a593Smuzhiyun 		ix = curp->p_idx;
1004*4882a593Smuzhiyun 	}
1005*4882a593Smuzhiyun 
1006*4882a593Smuzhiyun 	len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
1007*4882a593Smuzhiyun 	BUG_ON(len < 0);
1008*4882a593Smuzhiyun 	if (len > 0) {
1009*4882a593Smuzhiyun 		ext_debug(inode, "insert new index %d: "
1010*4882a593Smuzhiyun 				"move %d indices from 0x%p to 0x%p\n",
1011*4882a593Smuzhiyun 				logical, len, ix, ix + 1);
1012*4882a593Smuzhiyun 		memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
1013*4882a593Smuzhiyun 	}
1014*4882a593Smuzhiyun 
1015*4882a593Smuzhiyun 	if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
1016*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
1017*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1018*4882a593Smuzhiyun 	}
1019*4882a593Smuzhiyun 
1020*4882a593Smuzhiyun 	ix->ei_block = cpu_to_le32(logical);
1021*4882a593Smuzhiyun 	ext4_idx_store_pblock(ix, ptr);
1022*4882a593Smuzhiyun 	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
1023*4882a593Smuzhiyun 
1024*4882a593Smuzhiyun 	if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
1025*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
1026*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1027*4882a593Smuzhiyun 	}
1028*4882a593Smuzhiyun 
1029*4882a593Smuzhiyun 	err = ext4_ext_dirty(handle, inode, curp);
1030*4882a593Smuzhiyun 	ext4_std_error(inode->i_sb, err);
1031*4882a593Smuzhiyun 
1032*4882a593Smuzhiyun 	return err;
1033*4882a593Smuzhiyun }
1034*4882a593Smuzhiyun 
1035*4882a593Smuzhiyun /*
1036*4882a593Smuzhiyun  * ext4_ext_split:
1037*4882a593Smuzhiyun  * inserts new subtree into the path, using free index entry
1038*4882a593Smuzhiyun  * at depth @at:
1039*4882a593Smuzhiyun  * - allocates all needed blocks (new leaf and all intermediate index blocks)
1040*4882a593Smuzhiyun  * - makes decision where to split
1041*4882a593Smuzhiyun  * - moves remaining extents and index entries (right to the split point)
1042*4882a593Smuzhiyun  *   into the newly allocated blocks
1043*4882a593Smuzhiyun  * - initializes subtree
1044*4882a593Smuzhiyun  */
ext4_ext_split(handle_t * handle,struct inode * inode,unsigned int flags,struct ext4_ext_path * path,struct ext4_extent * newext,int at)1045*4882a593Smuzhiyun static int ext4_ext_split(handle_t *handle, struct inode *inode,
1046*4882a593Smuzhiyun 			  unsigned int flags,
1047*4882a593Smuzhiyun 			  struct ext4_ext_path *path,
1048*4882a593Smuzhiyun 			  struct ext4_extent *newext, int at)
1049*4882a593Smuzhiyun {
1050*4882a593Smuzhiyun 	struct buffer_head *bh = NULL;
1051*4882a593Smuzhiyun 	int depth = ext_depth(inode);
1052*4882a593Smuzhiyun 	struct ext4_extent_header *neh;
1053*4882a593Smuzhiyun 	struct ext4_extent_idx *fidx;
1054*4882a593Smuzhiyun 	int i = at, k, m, a;
1055*4882a593Smuzhiyun 	ext4_fsblk_t newblock, oldblock;
1056*4882a593Smuzhiyun 	__le32 border;
1057*4882a593Smuzhiyun 	ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
1058*4882a593Smuzhiyun 	gfp_t gfp_flags = GFP_NOFS;
1059*4882a593Smuzhiyun 	int err = 0;
1060*4882a593Smuzhiyun 	size_t ext_size = 0;
1061*4882a593Smuzhiyun 
1062*4882a593Smuzhiyun 	if (flags & EXT4_EX_NOFAIL)
1063*4882a593Smuzhiyun 		gfp_flags |= __GFP_NOFAIL;
1064*4882a593Smuzhiyun 
1065*4882a593Smuzhiyun 	/* make decision: where to split? */
1066*4882a593Smuzhiyun 	/* FIXME: now decision is simplest: at current extent */
1067*4882a593Smuzhiyun 
1068*4882a593Smuzhiyun 	/* if current leaf will be split, then we should use
1069*4882a593Smuzhiyun 	 * border from split point */
1070*4882a593Smuzhiyun 	if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
1071*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
1072*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1073*4882a593Smuzhiyun 	}
1074*4882a593Smuzhiyun 	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
1075*4882a593Smuzhiyun 		border = path[depth].p_ext[1].ee_block;
1076*4882a593Smuzhiyun 		ext_debug(inode, "leaf will be split."
1077*4882a593Smuzhiyun 				" next leaf starts at %d\n",
1078*4882a593Smuzhiyun 				  le32_to_cpu(border));
1079*4882a593Smuzhiyun 	} else {
1080*4882a593Smuzhiyun 		border = newext->ee_block;
1081*4882a593Smuzhiyun 		ext_debug(inode, "leaf will be added."
1082*4882a593Smuzhiyun 				" next leaf starts at %d\n",
1083*4882a593Smuzhiyun 				le32_to_cpu(border));
1084*4882a593Smuzhiyun 	}
1085*4882a593Smuzhiyun 
1086*4882a593Smuzhiyun 	/*
1087*4882a593Smuzhiyun 	 * If error occurs, then we break processing
1088*4882a593Smuzhiyun 	 * and mark filesystem read-only. index won't
1089*4882a593Smuzhiyun 	 * be inserted and tree will be in consistent
1090*4882a593Smuzhiyun 	 * state. Next mount will repair buffers too.
1091*4882a593Smuzhiyun 	 */
1092*4882a593Smuzhiyun 
1093*4882a593Smuzhiyun 	/*
1094*4882a593Smuzhiyun 	 * Get array to track all allocated blocks.
1095*4882a593Smuzhiyun 	 * We need this to handle errors and free blocks
1096*4882a593Smuzhiyun 	 * upon them.
1097*4882a593Smuzhiyun 	 */
1098*4882a593Smuzhiyun 	ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
1099*4882a593Smuzhiyun 	if (!ablocks)
1100*4882a593Smuzhiyun 		return -ENOMEM;
1101*4882a593Smuzhiyun 
1102*4882a593Smuzhiyun 	/* allocate all needed blocks */
1103*4882a593Smuzhiyun 	ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
1104*4882a593Smuzhiyun 	for (a = 0; a < depth - at; a++) {
1105*4882a593Smuzhiyun 		newblock = ext4_ext_new_meta_block(handle, inode, path,
1106*4882a593Smuzhiyun 						   newext, &err, flags);
1107*4882a593Smuzhiyun 		if (newblock == 0)
1108*4882a593Smuzhiyun 			goto cleanup;
1109*4882a593Smuzhiyun 		ablocks[a] = newblock;
1110*4882a593Smuzhiyun 	}
1111*4882a593Smuzhiyun 
1112*4882a593Smuzhiyun 	/* initialize new leaf */
1113*4882a593Smuzhiyun 	newblock = ablocks[--a];
1114*4882a593Smuzhiyun 	if (unlikely(newblock == 0)) {
1115*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "newblock == 0!");
1116*4882a593Smuzhiyun 		err = -EFSCORRUPTED;
1117*4882a593Smuzhiyun 		goto cleanup;
1118*4882a593Smuzhiyun 	}
1119*4882a593Smuzhiyun 	bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1120*4882a593Smuzhiyun 	if (unlikely(!bh)) {
1121*4882a593Smuzhiyun 		err = -ENOMEM;
1122*4882a593Smuzhiyun 		goto cleanup;
1123*4882a593Smuzhiyun 	}
1124*4882a593Smuzhiyun 	lock_buffer(bh);
1125*4882a593Smuzhiyun 
1126*4882a593Smuzhiyun 	err = ext4_journal_get_create_access(handle, bh);
1127*4882a593Smuzhiyun 	if (err)
1128*4882a593Smuzhiyun 		goto cleanup;
1129*4882a593Smuzhiyun 
1130*4882a593Smuzhiyun 	neh = ext_block_hdr(bh);
1131*4882a593Smuzhiyun 	neh->eh_entries = 0;
1132*4882a593Smuzhiyun 	neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1133*4882a593Smuzhiyun 	neh->eh_magic = EXT4_EXT_MAGIC;
1134*4882a593Smuzhiyun 	neh->eh_depth = 0;
1135*4882a593Smuzhiyun 	neh->eh_generation = 0;
1136*4882a593Smuzhiyun 
1137*4882a593Smuzhiyun 	/* move remainder of path[depth] to the new leaf */
1138*4882a593Smuzhiyun 	if (unlikely(path[depth].p_hdr->eh_entries !=
1139*4882a593Smuzhiyun 		     path[depth].p_hdr->eh_max)) {
1140*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
1141*4882a593Smuzhiyun 				 path[depth].p_hdr->eh_entries,
1142*4882a593Smuzhiyun 				 path[depth].p_hdr->eh_max);
1143*4882a593Smuzhiyun 		err = -EFSCORRUPTED;
1144*4882a593Smuzhiyun 		goto cleanup;
1145*4882a593Smuzhiyun 	}
1146*4882a593Smuzhiyun 	/* start copy from next extent */
1147*4882a593Smuzhiyun 	m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
1148*4882a593Smuzhiyun 	ext4_ext_show_move(inode, path, newblock, depth);
1149*4882a593Smuzhiyun 	if (m) {
1150*4882a593Smuzhiyun 		struct ext4_extent *ex;
1151*4882a593Smuzhiyun 		ex = EXT_FIRST_EXTENT(neh);
1152*4882a593Smuzhiyun 		memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
1153*4882a593Smuzhiyun 		le16_add_cpu(&neh->eh_entries, m);
1154*4882a593Smuzhiyun 	}
1155*4882a593Smuzhiyun 
1156*4882a593Smuzhiyun 	/* zero out unused area in the extent block */
1157*4882a593Smuzhiyun 	ext_size = sizeof(struct ext4_extent_header) +
1158*4882a593Smuzhiyun 		sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
1159*4882a593Smuzhiyun 	memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
1160*4882a593Smuzhiyun 	ext4_extent_block_csum_set(inode, neh);
1161*4882a593Smuzhiyun 	set_buffer_uptodate(bh);
1162*4882a593Smuzhiyun 	unlock_buffer(bh);
1163*4882a593Smuzhiyun 
1164*4882a593Smuzhiyun 	err = ext4_handle_dirty_metadata(handle, inode, bh);
1165*4882a593Smuzhiyun 	if (err)
1166*4882a593Smuzhiyun 		goto cleanup;
1167*4882a593Smuzhiyun 	brelse(bh);
1168*4882a593Smuzhiyun 	bh = NULL;
1169*4882a593Smuzhiyun 
1170*4882a593Smuzhiyun 	/* correct old leaf */
1171*4882a593Smuzhiyun 	if (m) {
1172*4882a593Smuzhiyun 		err = ext4_ext_get_access(handle, inode, path + depth);
1173*4882a593Smuzhiyun 		if (err)
1174*4882a593Smuzhiyun 			goto cleanup;
1175*4882a593Smuzhiyun 		le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
1176*4882a593Smuzhiyun 		err = ext4_ext_dirty(handle, inode, path + depth);
1177*4882a593Smuzhiyun 		if (err)
1178*4882a593Smuzhiyun 			goto cleanup;
1179*4882a593Smuzhiyun 
1180*4882a593Smuzhiyun 	}
1181*4882a593Smuzhiyun 
1182*4882a593Smuzhiyun 	/* create intermediate indexes */
1183*4882a593Smuzhiyun 	k = depth - at - 1;
1184*4882a593Smuzhiyun 	if (unlikely(k < 0)) {
1185*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "k %d < 0!", k);
1186*4882a593Smuzhiyun 		err = -EFSCORRUPTED;
1187*4882a593Smuzhiyun 		goto cleanup;
1188*4882a593Smuzhiyun 	}
1189*4882a593Smuzhiyun 	if (k)
1190*4882a593Smuzhiyun 		ext_debug(inode, "create %d intermediate indices\n", k);
1191*4882a593Smuzhiyun 	/* insert new index into current index block */
1192*4882a593Smuzhiyun 	/* current depth stored in i var */
1193*4882a593Smuzhiyun 	i = depth - 1;
1194*4882a593Smuzhiyun 	while (k--) {
1195*4882a593Smuzhiyun 		oldblock = newblock;
1196*4882a593Smuzhiyun 		newblock = ablocks[--a];
1197*4882a593Smuzhiyun 		bh = sb_getblk(inode->i_sb, newblock);
1198*4882a593Smuzhiyun 		if (unlikely(!bh)) {
1199*4882a593Smuzhiyun 			err = -ENOMEM;
1200*4882a593Smuzhiyun 			goto cleanup;
1201*4882a593Smuzhiyun 		}
1202*4882a593Smuzhiyun 		lock_buffer(bh);
1203*4882a593Smuzhiyun 
1204*4882a593Smuzhiyun 		err = ext4_journal_get_create_access(handle, bh);
1205*4882a593Smuzhiyun 		if (err)
1206*4882a593Smuzhiyun 			goto cleanup;
1207*4882a593Smuzhiyun 
1208*4882a593Smuzhiyun 		neh = ext_block_hdr(bh);
1209*4882a593Smuzhiyun 		neh->eh_entries = cpu_to_le16(1);
1210*4882a593Smuzhiyun 		neh->eh_magic = EXT4_EXT_MAGIC;
1211*4882a593Smuzhiyun 		neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1212*4882a593Smuzhiyun 		neh->eh_depth = cpu_to_le16(depth - i);
1213*4882a593Smuzhiyun 		neh->eh_generation = 0;
1214*4882a593Smuzhiyun 		fidx = EXT_FIRST_INDEX(neh);
1215*4882a593Smuzhiyun 		fidx->ei_block = border;
1216*4882a593Smuzhiyun 		ext4_idx_store_pblock(fidx, oldblock);
1217*4882a593Smuzhiyun 
1218*4882a593Smuzhiyun 		ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
1219*4882a593Smuzhiyun 				i, newblock, le32_to_cpu(border), oldblock);
1220*4882a593Smuzhiyun 
1221*4882a593Smuzhiyun 		/* move remainder of path[i] to the new index block */
1222*4882a593Smuzhiyun 		if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
1223*4882a593Smuzhiyun 					EXT_LAST_INDEX(path[i].p_hdr))) {
1224*4882a593Smuzhiyun 			EXT4_ERROR_INODE(inode,
1225*4882a593Smuzhiyun 					 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1226*4882a593Smuzhiyun 					 le32_to_cpu(path[i].p_ext->ee_block));
1227*4882a593Smuzhiyun 			err = -EFSCORRUPTED;
1228*4882a593Smuzhiyun 			goto cleanup;
1229*4882a593Smuzhiyun 		}
1230*4882a593Smuzhiyun 		/* start copy indexes */
1231*4882a593Smuzhiyun 		m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
1232*4882a593Smuzhiyun 		ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
1233*4882a593Smuzhiyun 				EXT_MAX_INDEX(path[i].p_hdr));
1234*4882a593Smuzhiyun 		ext4_ext_show_move(inode, path, newblock, i);
1235*4882a593Smuzhiyun 		if (m) {
1236*4882a593Smuzhiyun 			memmove(++fidx, path[i].p_idx,
1237*4882a593Smuzhiyun 				sizeof(struct ext4_extent_idx) * m);
1238*4882a593Smuzhiyun 			le16_add_cpu(&neh->eh_entries, m);
1239*4882a593Smuzhiyun 		}
1240*4882a593Smuzhiyun 		/* zero out unused area in the extent block */
1241*4882a593Smuzhiyun 		ext_size = sizeof(struct ext4_extent_header) +
1242*4882a593Smuzhiyun 		   (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
1243*4882a593Smuzhiyun 		memset(bh->b_data + ext_size, 0,
1244*4882a593Smuzhiyun 			inode->i_sb->s_blocksize - ext_size);
1245*4882a593Smuzhiyun 		ext4_extent_block_csum_set(inode, neh);
1246*4882a593Smuzhiyun 		set_buffer_uptodate(bh);
1247*4882a593Smuzhiyun 		unlock_buffer(bh);
1248*4882a593Smuzhiyun 
1249*4882a593Smuzhiyun 		err = ext4_handle_dirty_metadata(handle, inode, bh);
1250*4882a593Smuzhiyun 		if (err)
1251*4882a593Smuzhiyun 			goto cleanup;
1252*4882a593Smuzhiyun 		brelse(bh);
1253*4882a593Smuzhiyun 		bh = NULL;
1254*4882a593Smuzhiyun 
1255*4882a593Smuzhiyun 		/* correct old index */
1256*4882a593Smuzhiyun 		if (m) {
1257*4882a593Smuzhiyun 			err = ext4_ext_get_access(handle, inode, path + i);
1258*4882a593Smuzhiyun 			if (err)
1259*4882a593Smuzhiyun 				goto cleanup;
1260*4882a593Smuzhiyun 			le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
1261*4882a593Smuzhiyun 			err = ext4_ext_dirty(handle, inode, path + i);
1262*4882a593Smuzhiyun 			if (err)
1263*4882a593Smuzhiyun 				goto cleanup;
1264*4882a593Smuzhiyun 		}
1265*4882a593Smuzhiyun 
1266*4882a593Smuzhiyun 		i--;
1267*4882a593Smuzhiyun 	}
1268*4882a593Smuzhiyun 
1269*4882a593Smuzhiyun 	/* insert new index */
1270*4882a593Smuzhiyun 	err = ext4_ext_insert_index(handle, inode, path + at,
1271*4882a593Smuzhiyun 				    le32_to_cpu(border), newblock);
1272*4882a593Smuzhiyun 
1273*4882a593Smuzhiyun cleanup:
1274*4882a593Smuzhiyun 	if (bh) {
1275*4882a593Smuzhiyun 		if (buffer_locked(bh))
1276*4882a593Smuzhiyun 			unlock_buffer(bh);
1277*4882a593Smuzhiyun 		brelse(bh);
1278*4882a593Smuzhiyun 	}
1279*4882a593Smuzhiyun 
1280*4882a593Smuzhiyun 	if (err) {
1281*4882a593Smuzhiyun 		/* free all allocated blocks in error case */
1282*4882a593Smuzhiyun 		for (i = 0; i < depth; i++) {
1283*4882a593Smuzhiyun 			if (!ablocks[i])
1284*4882a593Smuzhiyun 				continue;
1285*4882a593Smuzhiyun 			ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
1286*4882a593Smuzhiyun 					 EXT4_FREE_BLOCKS_METADATA);
1287*4882a593Smuzhiyun 		}
1288*4882a593Smuzhiyun 	}
1289*4882a593Smuzhiyun 	kfree(ablocks);
1290*4882a593Smuzhiyun 
1291*4882a593Smuzhiyun 	return err;
1292*4882a593Smuzhiyun }
1293*4882a593Smuzhiyun 
1294*4882a593Smuzhiyun /*
1295*4882a593Smuzhiyun  * ext4_ext_grow_indepth:
1296*4882a593Smuzhiyun  * implements tree growing procedure:
1297*4882a593Smuzhiyun  * - allocates new block
1298*4882a593Smuzhiyun  * - moves top-level data (index block or leaf) into the new block
1299*4882a593Smuzhiyun  * - initializes new top-level, creating index that points to the
1300*4882a593Smuzhiyun  *   just created block
1301*4882a593Smuzhiyun  */
ext4_ext_grow_indepth(handle_t * handle,struct inode * inode,unsigned int flags)1302*4882a593Smuzhiyun static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1303*4882a593Smuzhiyun 				 unsigned int flags)
1304*4882a593Smuzhiyun {
1305*4882a593Smuzhiyun 	struct ext4_extent_header *neh;
1306*4882a593Smuzhiyun 	struct buffer_head *bh;
1307*4882a593Smuzhiyun 	ext4_fsblk_t newblock, goal = 0;
1308*4882a593Smuzhiyun 	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
1309*4882a593Smuzhiyun 	int err = 0;
1310*4882a593Smuzhiyun 	size_t ext_size = 0;
1311*4882a593Smuzhiyun 
1312*4882a593Smuzhiyun 	/* Try to prepend new index to old one */
1313*4882a593Smuzhiyun 	if (ext_depth(inode))
1314*4882a593Smuzhiyun 		goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1315*4882a593Smuzhiyun 	if (goal > le32_to_cpu(es->s_first_data_block)) {
1316*4882a593Smuzhiyun 		flags |= EXT4_MB_HINT_TRY_GOAL;
1317*4882a593Smuzhiyun 		goal--;
1318*4882a593Smuzhiyun 	} else
1319*4882a593Smuzhiyun 		goal = ext4_inode_to_goal_block(inode);
1320*4882a593Smuzhiyun 	newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
1321*4882a593Smuzhiyun 					NULL, &err);
1322*4882a593Smuzhiyun 	if (newblock == 0)
1323*4882a593Smuzhiyun 		return err;
1324*4882a593Smuzhiyun 
1325*4882a593Smuzhiyun 	bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1326*4882a593Smuzhiyun 	if (unlikely(!bh))
1327*4882a593Smuzhiyun 		return -ENOMEM;
1328*4882a593Smuzhiyun 	lock_buffer(bh);
1329*4882a593Smuzhiyun 
1330*4882a593Smuzhiyun 	err = ext4_journal_get_create_access(handle, bh);
1331*4882a593Smuzhiyun 	if (err) {
1332*4882a593Smuzhiyun 		unlock_buffer(bh);
1333*4882a593Smuzhiyun 		goto out;
1334*4882a593Smuzhiyun 	}
1335*4882a593Smuzhiyun 
1336*4882a593Smuzhiyun 	ext_size = sizeof(EXT4_I(inode)->i_data);
1337*4882a593Smuzhiyun 	/* move top-level index/leaf into new block */
1338*4882a593Smuzhiyun 	memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
1339*4882a593Smuzhiyun 	/* zero out unused area in the extent block */
1340*4882a593Smuzhiyun 	memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
1341*4882a593Smuzhiyun 
1342*4882a593Smuzhiyun 	/* set size of new block */
1343*4882a593Smuzhiyun 	neh = ext_block_hdr(bh);
1344*4882a593Smuzhiyun 	/* old root could have indexes or leaves
1345*4882a593Smuzhiyun 	 * so calculate e_max right way */
1346*4882a593Smuzhiyun 	if (ext_depth(inode))
1347*4882a593Smuzhiyun 		neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
1348*4882a593Smuzhiyun 	else
1349*4882a593Smuzhiyun 		neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
1350*4882a593Smuzhiyun 	neh->eh_magic = EXT4_EXT_MAGIC;
1351*4882a593Smuzhiyun 	ext4_extent_block_csum_set(inode, neh);
1352*4882a593Smuzhiyun 	set_buffer_uptodate(bh);
1353*4882a593Smuzhiyun 	unlock_buffer(bh);
1354*4882a593Smuzhiyun 
1355*4882a593Smuzhiyun 	err = ext4_handle_dirty_metadata(handle, inode, bh);
1356*4882a593Smuzhiyun 	if (err)
1357*4882a593Smuzhiyun 		goto out;
1358*4882a593Smuzhiyun 
1359*4882a593Smuzhiyun 	/* Update top-level index: num,max,pointer */
1360*4882a593Smuzhiyun 	neh = ext_inode_hdr(inode);
1361*4882a593Smuzhiyun 	neh->eh_entries = cpu_to_le16(1);
1362*4882a593Smuzhiyun 	ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1363*4882a593Smuzhiyun 	if (neh->eh_depth == 0) {
1364*4882a593Smuzhiyun 		/* Root extent block becomes index block */
1365*4882a593Smuzhiyun 		neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1366*4882a593Smuzhiyun 		EXT_FIRST_INDEX(neh)->ei_block =
1367*4882a593Smuzhiyun 			EXT_FIRST_EXTENT(neh)->ee_block;
1368*4882a593Smuzhiyun 	}
1369*4882a593Smuzhiyun 	ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
1370*4882a593Smuzhiyun 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1371*4882a593Smuzhiyun 		  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1372*4882a593Smuzhiyun 		  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1373*4882a593Smuzhiyun 
1374*4882a593Smuzhiyun 	le16_add_cpu(&neh->eh_depth, 1);
1375*4882a593Smuzhiyun 	err = ext4_mark_inode_dirty(handle, inode);
1376*4882a593Smuzhiyun out:
1377*4882a593Smuzhiyun 	brelse(bh);
1378*4882a593Smuzhiyun 
1379*4882a593Smuzhiyun 	return err;
1380*4882a593Smuzhiyun }
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun /*
1383*4882a593Smuzhiyun  * ext4_ext_create_new_leaf:
1384*4882a593Smuzhiyun  * finds empty index and adds new leaf.
1385*4882a593Smuzhiyun  * if no free index is found, then it requests in-depth growing.
1386*4882a593Smuzhiyun  */
ext4_ext_create_new_leaf(handle_t * handle,struct inode * inode,unsigned int mb_flags,unsigned int gb_flags,struct ext4_ext_path ** ppath,struct ext4_extent * newext)1387*4882a593Smuzhiyun static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1388*4882a593Smuzhiyun 				    unsigned int mb_flags,
1389*4882a593Smuzhiyun 				    unsigned int gb_flags,
1390*4882a593Smuzhiyun 				    struct ext4_ext_path **ppath,
1391*4882a593Smuzhiyun 				    struct ext4_extent *newext)
1392*4882a593Smuzhiyun {
1393*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
1394*4882a593Smuzhiyun 	struct ext4_ext_path *curp;
1395*4882a593Smuzhiyun 	int depth, i, err = 0;
1396*4882a593Smuzhiyun 
1397*4882a593Smuzhiyun repeat:
1398*4882a593Smuzhiyun 	i = depth = ext_depth(inode);
1399*4882a593Smuzhiyun 
1400*4882a593Smuzhiyun 	/* walk up to the tree and look for free index entry */
1401*4882a593Smuzhiyun 	curp = path + depth;
1402*4882a593Smuzhiyun 	while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1403*4882a593Smuzhiyun 		i--;
1404*4882a593Smuzhiyun 		curp--;
1405*4882a593Smuzhiyun 	}
1406*4882a593Smuzhiyun 
1407*4882a593Smuzhiyun 	/* we use already allocated block for index block,
1408*4882a593Smuzhiyun 	 * so subsequent data blocks should be contiguous */
1409*4882a593Smuzhiyun 	if (EXT_HAS_FREE_INDEX(curp)) {
1410*4882a593Smuzhiyun 		/* if we found index with free entry, then use that
1411*4882a593Smuzhiyun 		 * entry: create all needed subtree and add new leaf */
1412*4882a593Smuzhiyun 		err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
1413*4882a593Smuzhiyun 		if (err)
1414*4882a593Smuzhiyun 			goto out;
1415*4882a593Smuzhiyun 
1416*4882a593Smuzhiyun 		/* refill path */
1417*4882a593Smuzhiyun 		path = ext4_find_extent(inode,
1418*4882a593Smuzhiyun 				    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1419*4882a593Smuzhiyun 				    ppath, gb_flags);
1420*4882a593Smuzhiyun 		if (IS_ERR(path))
1421*4882a593Smuzhiyun 			err = PTR_ERR(path);
1422*4882a593Smuzhiyun 	} else {
1423*4882a593Smuzhiyun 		/* tree is full, time to grow in depth */
1424*4882a593Smuzhiyun 		err = ext4_ext_grow_indepth(handle, inode, mb_flags);
1425*4882a593Smuzhiyun 		if (err)
1426*4882a593Smuzhiyun 			goto out;
1427*4882a593Smuzhiyun 
1428*4882a593Smuzhiyun 		/* refill path */
1429*4882a593Smuzhiyun 		path = ext4_find_extent(inode,
1430*4882a593Smuzhiyun 				   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1431*4882a593Smuzhiyun 				    ppath, gb_flags);
1432*4882a593Smuzhiyun 		if (IS_ERR(path)) {
1433*4882a593Smuzhiyun 			err = PTR_ERR(path);
1434*4882a593Smuzhiyun 			goto out;
1435*4882a593Smuzhiyun 		}
1436*4882a593Smuzhiyun 
1437*4882a593Smuzhiyun 		/*
1438*4882a593Smuzhiyun 		 * only first (depth 0 -> 1) produces free space;
1439*4882a593Smuzhiyun 		 * in all other cases we have to split the grown tree
1440*4882a593Smuzhiyun 		 */
1441*4882a593Smuzhiyun 		depth = ext_depth(inode);
1442*4882a593Smuzhiyun 		if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1443*4882a593Smuzhiyun 			/* now we need to split */
1444*4882a593Smuzhiyun 			goto repeat;
1445*4882a593Smuzhiyun 		}
1446*4882a593Smuzhiyun 	}
1447*4882a593Smuzhiyun 
1448*4882a593Smuzhiyun out:
1449*4882a593Smuzhiyun 	return err;
1450*4882a593Smuzhiyun }
1451*4882a593Smuzhiyun 
1452*4882a593Smuzhiyun /*
1453*4882a593Smuzhiyun  * search the closest allocated block to the left for *logical
1454*4882a593Smuzhiyun  * and returns it at @logical + it's physical address at @phys
1455*4882a593Smuzhiyun  * if *logical is the smallest allocated block, the function
1456*4882a593Smuzhiyun  * returns 0 at @phys
1457*4882a593Smuzhiyun  * return value contains 0 (success) or error code
1458*4882a593Smuzhiyun  */
ext4_ext_search_left(struct inode * inode,struct ext4_ext_path * path,ext4_lblk_t * logical,ext4_fsblk_t * phys)1459*4882a593Smuzhiyun static int ext4_ext_search_left(struct inode *inode,
1460*4882a593Smuzhiyun 				struct ext4_ext_path *path,
1461*4882a593Smuzhiyun 				ext4_lblk_t *logical, ext4_fsblk_t *phys)
1462*4882a593Smuzhiyun {
1463*4882a593Smuzhiyun 	struct ext4_extent_idx *ix;
1464*4882a593Smuzhiyun 	struct ext4_extent *ex;
1465*4882a593Smuzhiyun 	int depth, ee_len;
1466*4882a593Smuzhiyun 
1467*4882a593Smuzhiyun 	if (unlikely(path == NULL)) {
1468*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1469*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1470*4882a593Smuzhiyun 	}
1471*4882a593Smuzhiyun 	depth = path->p_depth;
1472*4882a593Smuzhiyun 	*phys = 0;
1473*4882a593Smuzhiyun 
1474*4882a593Smuzhiyun 	if (depth == 0 && path->p_ext == NULL)
1475*4882a593Smuzhiyun 		return 0;
1476*4882a593Smuzhiyun 
1477*4882a593Smuzhiyun 	/* usually extent in the path covers blocks smaller
1478*4882a593Smuzhiyun 	 * then *logical, but it can be that extent is the
1479*4882a593Smuzhiyun 	 * first one in the file */
1480*4882a593Smuzhiyun 
1481*4882a593Smuzhiyun 	ex = path[depth].p_ext;
1482*4882a593Smuzhiyun 	ee_len = ext4_ext_get_actual_len(ex);
1483*4882a593Smuzhiyun 	if (*logical < le32_to_cpu(ex->ee_block)) {
1484*4882a593Smuzhiyun 		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1485*4882a593Smuzhiyun 			EXT4_ERROR_INODE(inode,
1486*4882a593Smuzhiyun 					 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1487*4882a593Smuzhiyun 					 *logical, le32_to_cpu(ex->ee_block));
1488*4882a593Smuzhiyun 			return -EFSCORRUPTED;
1489*4882a593Smuzhiyun 		}
1490*4882a593Smuzhiyun 		while (--depth >= 0) {
1491*4882a593Smuzhiyun 			ix = path[depth].p_idx;
1492*4882a593Smuzhiyun 			if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1493*4882a593Smuzhiyun 				EXT4_ERROR_INODE(inode,
1494*4882a593Smuzhiyun 				  "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1495*4882a593Smuzhiyun 				  ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1496*4882a593Smuzhiyun 				  EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1497*4882a593Smuzhiyun 		le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
1498*4882a593Smuzhiyun 				  depth);
1499*4882a593Smuzhiyun 				return -EFSCORRUPTED;
1500*4882a593Smuzhiyun 			}
1501*4882a593Smuzhiyun 		}
1502*4882a593Smuzhiyun 		return 0;
1503*4882a593Smuzhiyun 	}
1504*4882a593Smuzhiyun 
1505*4882a593Smuzhiyun 	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1506*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode,
1507*4882a593Smuzhiyun 				 "logical %d < ee_block %d + ee_len %d!",
1508*4882a593Smuzhiyun 				 *logical, le32_to_cpu(ex->ee_block), ee_len);
1509*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1510*4882a593Smuzhiyun 	}
1511*4882a593Smuzhiyun 
1512*4882a593Smuzhiyun 	*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1513*4882a593Smuzhiyun 	*phys = ext4_ext_pblock(ex) + ee_len - 1;
1514*4882a593Smuzhiyun 	return 0;
1515*4882a593Smuzhiyun }
1516*4882a593Smuzhiyun 
1517*4882a593Smuzhiyun /*
1518*4882a593Smuzhiyun  * Search the closest allocated block to the right for *logical
1519*4882a593Smuzhiyun  * and returns it at @logical + it's physical address at @phys.
1520*4882a593Smuzhiyun  * If not exists, return 0 and @phys is set to 0. We will return
1521*4882a593Smuzhiyun  * 1 which means we found an allocated block and ret_ex is valid.
1522*4882a593Smuzhiyun  * Or return a (< 0) error code.
1523*4882a593Smuzhiyun  */
ext4_ext_search_right(struct inode * inode,struct ext4_ext_path * path,ext4_lblk_t * logical,ext4_fsblk_t * phys,struct ext4_extent * ret_ex)1524*4882a593Smuzhiyun static int ext4_ext_search_right(struct inode *inode,
1525*4882a593Smuzhiyun 				 struct ext4_ext_path *path,
1526*4882a593Smuzhiyun 				 ext4_lblk_t *logical, ext4_fsblk_t *phys,
1527*4882a593Smuzhiyun 				 struct ext4_extent *ret_ex)
1528*4882a593Smuzhiyun {
1529*4882a593Smuzhiyun 	struct buffer_head *bh = NULL;
1530*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
1531*4882a593Smuzhiyun 	struct ext4_extent_idx *ix;
1532*4882a593Smuzhiyun 	struct ext4_extent *ex;
1533*4882a593Smuzhiyun 	int depth;	/* Note, NOT eh_depth; depth from top of tree */
1534*4882a593Smuzhiyun 	int ee_len;
1535*4882a593Smuzhiyun 
1536*4882a593Smuzhiyun 	if (unlikely(path == NULL)) {
1537*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
1538*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1539*4882a593Smuzhiyun 	}
1540*4882a593Smuzhiyun 	depth = path->p_depth;
1541*4882a593Smuzhiyun 	*phys = 0;
1542*4882a593Smuzhiyun 
1543*4882a593Smuzhiyun 	if (depth == 0 && path->p_ext == NULL)
1544*4882a593Smuzhiyun 		return 0;
1545*4882a593Smuzhiyun 
1546*4882a593Smuzhiyun 	/* usually extent in the path covers blocks smaller
1547*4882a593Smuzhiyun 	 * then *logical, but it can be that extent is the
1548*4882a593Smuzhiyun 	 * first one in the file */
1549*4882a593Smuzhiyun 
1550*4882a593Smuzhiyun 	ex = path[depth].p_ext;
1551*4882a593Smuzhiyun 	ee_len = ext4_ext_get_actual_len(ex);
1552*4882a593Smuzhiyun 	if (*logical < le32_to_cpu(ex->ee_block)) {
1553*4882a593Smuzhiyun 		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1554*4882a593Smuzhiyun 			EXT4_ERROR_INODE(inode,
1555*4882a593Smuzhiyun 					 "first_extent(path[%d].p_hdr) != ex",
1556*4882a593Smuzhiyun 					 depth);
1557*4882a593Smuzhiyun 			return -EFSCORRUPTED;
1558*4882a593Smuzhiyun 		}
1559*4882a593Smuzhiyun 		while (--depth >= 0) {
1560*4882a593Smuzhiyun 			ix = path[depth].p_idx;
1561*4882a593Smuzhiyun 			if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1562*4882a593Smuzhiyun 				EXT4_ERROR_INODE(inode,
1563*4882a593Smuzhiyun 						 "ix != EXT_FIRST_INDEX *logical %d!",
1564*4882a593Smuzhiyun 						 *logical);
1565*4882a593Smuzhiyun 				return -EFSCORRUPTED;
1566*4882a593Smuzhiyun 			}
1567*4882a593Smuzhiyun 		}
1568*4882a593Smuzhiyun 		goto found_extent;
1569*4882a593Smuzhiyun 	}
1570*4882a593Smuzhiyun 
1571*4882a593Smuzhiyun 	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1572*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode,
1573*4882a593Smuzhiyun 				 "logical %d < ee_block %d + ee_len %d!",
1574*4882a593Smuzhiyun 				 *logical, le32_to_cpu(ex->ee_block), ee_len);
1575*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1576*4882a593Smuzhiyun 	}
1577*4882a593Smuzhiyun 
1578*4882a593Smuzhiyun 	if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1579*4882a593Smuzhiyun 		/* next allocated block in this leaf */
1580*4882a593Smuzhiyun 		ex++;
1581*4882a593Smuzhiyun 		goto found_extent;
1582*4882a593Smuzhiyun 	}
1583*4882a593Smuzhiyun 
1584*4882a593Smuzhiyun 	/* go up and search for index to the right */
1585*4882a593Smuzhiyun 	while (--depth >= 0) {
1586*4882a593Smuzhiyun 		ix = path[depth].p_idx;
1587*4882a593Smuzhiyun 		if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1588*4882a593Smuzhiyun 			goto got_index;
1589*4882a593Smuzhiyun 	}
1590*4882a593Smuzhiyun 
1591*4882a593Smuzhiyun 	/* we've gone up to the root and found no index to the right */
1592*4882a593Smuzhiyun 	return 0;
1593*4882a593Smuzhiyun 
1594*4882a593Smuzhiyun got_index:
1595*4882a593Smuzhiyun 	/* we've found index to the right, let's
1596*4882a593Smuzhiyun 	 * follow it and find the closest allocated
1597*4882a593Smuzhiyun 	 * block to the right */
1598*4882a593Smuzhiyun 	ix++;
1599*4882a593Smuzhiyun 	while (++depth < path->p_depth) {
1600*4882a593Smuzhiyun 		/* subtract from p_depth to get proper eh_depth */
1601*4882a593Smuzhiyun 		bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
1602*4882a593Smuzhiyun 		if (IS_ERR(bh))
1603*4882a593Smuzhiyun 			return PTR_ERR(bh);
1604*4882a593Smuzhiyun 		eh = ext_block_hdr(bh);
1605*4882a593Smuzhiyun 		ix = EXT_FIRST_INDEX(eh);
1606*4882a593Smuzhiyun 		put_bh(bh);
1607*4882a593Smuzhiyun 	}
1608*4882a593Smuzhiyun 
1609*4882a593Smuzhiyun 	bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
1610*4882a593Smuzhiyun 	if (IS_ERR(bh))
1611*4882a593Smuzhiyun 		return PTR_ERR(bh);
1612*4882a593Smuzhiyun 	eh = ext_block_hdr(bh);
1613*4882a593Smuzhiyun 	ex = EXT_FIRST_EXTENT(eh);
1614*4882a593Smuzhiyun found_extent:
1615*4882a593Smuzhiyun 	*logical = le32_to_cpu(ex->ee_block);
1616*4882a593Smuzhiyun 	*phys = ext4_ext_pblock(ex);
1617*4882a593Smuzhiyun 	if (ret_ex)
1618*4882a593Smuzhiyun 		*ret_ex = *ex;
1619*4882a593Smuzhiyun 	if (bh)
1620*4882a593Smuzhiyun 		put_bh(bh);
1621*4882a593Smuzhiyun 	return 1;
1622*4882a593Smuzhiyun }
1623*4882a593Smuzhiyun 
1624*4882a593Smuzhiyun /*
1625*4882a593Smuzhiyun  * ext4_ext_next_allocated_block:
1626*4882a593Smuzhiyun  * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
1627*4882a593Smuzhiyun  * NOTE: it considers block number from index entry as
1628*4882a593Smuzhiyun  * allocated block. Thus, index entries have to be consistent
1629*4882a593Smuzhiyun  * with leaves.
1630*4882a593Smuzhiyun  */
1631*4882a593Smuzhiyun ext4_lblk_t
ext4_ext_next_allocated_block(struct ext4_ext_path * path)1632*4882a593Smuzhiyun ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1633*4882a593Smuzhiyun {
1634*4882a593Smuzhiyun 	int depth;
1635*4882a593Smuzhiyun 
1636*4882a593Smuzhiyun 	BUG_ON(path == NULL);
1637*4882a593Smuzhiyun 	depth = path->p_depth;
1638*4882a593Smuzhiyun 
1639*4882a593Smuzhiyun 	if (depth == 0 && path->p_ext == NULL)
1640*4882a593Smuzhiyun 		return EXT_MAX_BLOCKS;
1641*4882a593Smuzhiyun 
1642*4882a593Smuzhiyun 	while (depth >= 0) {
1643*4882a593Smuzhiyun 		struct ext4_ext_path *p = &path[depth];
1644*4882a593Smuzhiyun 
1645*4882a593Smuzhiyun 		if (depth == path->p_depth) {
1646*4882a593Smuzhiyun 			/* leaf */
1647*4882a593Smuzhiyun 			if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
1648*4882a593Smuzhiyun 				return le32_to_cpu(p->p_ext[1].ee_block);
1649*4882a593Smuzhiyun 		} else {
1650*4882a593Smuzhiyun 			/* index */
1651*4882a593Smuzhiyun 			if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
1652*4882a593Smuzhiyun 				return le32_to_cpu(p->p_idx[1].ei_block);
1653*4882a593Smuzhiyun 		}
1654*4882a593Smuzhiyun 		depth--;
1655*4882a593Smuzhiyun 	}
1656*4882a593Smuzhiyun 
1657*4882a593Smuzhiyun 	return EXT_MAX_BLOCKS;
1658*4882a593Smuzhiyun }
1659*4882a593Smuzhiyun 
1660*4882a593Smuzhiyun /*
1661*4882a593Smuzhiyun  * ext4_ext_next_leaf_block:
1662*4882a593Smuzhiyun  * returns first allocated block from next leaf or EXT_MAX_BLOCKS
1663*4882a593Smuzhiyun  */
ext4_ext_next_leaf_block(struct ext4_ext_path * path)1664*4882a593Smuzhiyun static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
1665*4882a593Smuzhiyun {
1666*4882a593Smuzhiyun 	int depth;
1667*4882a593Smuzhiyun 
1668*4882a593Smuzhiyun 	BUG_ON(path == NULL);
1669*4882a593Smuzhiyun 	depth = path->p_depth;
1670*4882a593Smuzhiyun 
1671*4882a593Smuzhiyun 	/* zero-tree has no leaf blocks at all */
1672*4882a593Smuzhiyun 	if (depth == 0)
1673*4882a593Smuzhiyun 		return EXT_MAX_BLOCKS;
1674*4882a593Smuzhiyun 
1675*4882a593Smuzhiyun 	/* go to index block */
1676*4882a593Smuzhiyun 	depth--;
1677*4882a593Smuzhiyun 
1678*4882a593Smuzhiyun 	while (depth >= 0) {
1679*4882a593Smuzhiyun 		if (path[depth].p_idx !=
1680*4882a593Smuzhiyun 				EXT_LAST_INDEX(path[depth].p_hdr))
1681*4882a593Smuzhiyun 			return (ext4_lblk_t)
1682*4882a593Smuzhiyun 				le32_to_cpu(path[depth].p_idx[1].ei_block);
1683*4882a593Smuzhiyun 		depth--;
1684*4882a593Smuzhiyun 	}
1685*4882a593Smuzhiyun 
1686*4882a593Smuzhiyun 	return EXT_MAX_BLOCKS;
1687*4882a593Smuzhiyun }
1688*4882a593Smuzhiyun 
1689*4882a593Smuzhiyun /*
1690*4882a593Smuzhiyun  * ext4_ext_correct_indexes:
1691*4882a593Smuzhiyun  * if leaf gets modified and modified extent is first in the leaf,
1692*4882a593Smuzhiyun  * then we have to correct all indexes above.
1693*4882a593Smuzhiyun  * TODO: do we need to correct tree in all cases?
1694*4882a593Smuzhiyun  */
ext4_ext_correct_indexes(handle_t * handle,struct inode * inode,struct ext4_ext_path * path)1695*4882a593Smuzhiyun static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1696*4882a593Smuzhiyun 				struct ext4_ext_path *path)
1697*4882a593Smuzhiyun {
1698*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
1699*4882a593Smuzhiyun 	int depth = ext_depth(inode);
1700*4882a593Smuzhiyun 	struct ext4_extent *ex;
1701*4882a593Smuzhiyun 	__le32 border;
1702*4882a593Smuzhiyun 	int k, err = 0;
1703*4882a593Smuzhiyun 
1704*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
1705*4882a593Smuzhiyun 	ex = path[depth].p_ext;
1706*4882a593Smuzhiyun 
1707*4882a593Smuzhiyun 	if (unlikely(ex == NULL || eh == NULL)) {
1708*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode,
1709*4882a593Smuzhiyun 				 "ex %p == NULL or eh %p == NULL", ex, eh);
1710*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1711*4882a593Smuzhiyun 	}
1712*4882a593Smuzhiyun 
1713*4882a593Smuzhiyun 	if (depth == 0) {
1714*4882a593Smuzhiyun 		/* there is no tree at all */
1715*4882a593Smuzhiyun 		return 0;
1716*4882a593Smuzhiyun 	}
1717*4882a593Smuzhiyun 
1718*4882a593Smuzhiyun 	if (ex != EXT_FIRST_EXTENT(eh)) {
1719*4882a593Smuzhiyun 		/* we correct tree if first leaf got modified only */
1720*4882a593Smuzhiyun 		return 0;
1721*4882a593Smuzhiyun 	}
1722*4882a593Smuzhiyun 
1723*4882a593Smuzhiyun 	/*
1724*4882a593Smuzhiyun 	 * TODO: we need correction if border is smaller than current one
1725*4882a593Smuzhiyun 	 */
1726*4882a593Smuzhiyun 	k = depth - 1;
1727*4882a593Smuzhiyun 	border = path[depth].p_ext->ee_block;
1728*4882a593Smuzhiyun 	err = ext4_ext_get_access(handle, inode, path + k);
1729*4882a593Smuzhiyun 	if (err)
1730*4882a593Smuzhiyun 		return err;
1731*4882a593Smuzhiyun 	path[k].p_idx->ei_block = border;
1732*4882a593Smuzhiyun 	err = ext4_ext_dirty(handle, inode, path + k);
1733*4882a593Smuzhiyun 	if (err)
1734*4882a593Smuzhiyun 		return err;
1735*4882a593Smuzhiyun 
1736*4882a593Smuzhiyun 	while (k--) {
1737*4882a593Smuzhiyun 		/* change all left-side indexes */
1738*4882a593Smuzhiyun 		if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1739*4882a593Smuzhiyun 			break;
1740*4882a593Smuzhiyun 		err = ext4_ext_get_access(handle, inode, path + k);
1741*4882a593Smuzhiyun 		if (err)
1742*4882a593Smuzhiyun 			break;
1743*4882a593Smuzhiyun 		path[k].p_idx->ei_block = border;
1744*4882a593Smuzhiyun 		err = ext4_ext_dirty(handle, inode, path + k);
1745*4882a593Smuzhiyun 		if (err)
1746*4882a593Smuzhiyun 			break;
1747*4882a593Smuzhiyun 	}
1748*4882a593Smuzhiyun 
1749*4882a593Smuzhiyun 	return err;
1750*4882a593Smuzhiyun }
1751*4882a593Smuzhiyun 
ext4_can_extents_be_merged(struct inode * inode,struct ext4_extent * ex1,struct ext4_extent * ex2)1752*4882a593Smuzhiyun static int ext4_can_extents_be_merged(struct inode *inode,
1753*4882a593Smuzhiyun 				      struct ext4_extent *ex1,
1754*4882a593Smuzhiyun 				      struct ext4_extent *ex2)
1755*4882a593Smuzhiyun {
1756*4882a593Smuzhiyun 	unsigned short ext1_ee_len, ext2_ee_len;
1757*4882a593Smuzhiyun 
1758*4882a593Smuzhiyun 	if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1759*4882a593Smuzhiyun 		return 0;
1760*4882a593Smuzhiyun 
1761*4882a593Smuzhiyun 	ext1_ee_len = ext4_ext_get_actual_len(ex1);
1762*4882a593Smuzhiyun 	ext2_ee_len = ext4_ext_get_actual_len(ex2);
1763*4882a593Smuzhiyun 
1764*4882a593Smuzhiyun 	if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1765*4882a593Smuzhiyun 			le32_to_cpu(ex2->ee_block))
1766*4882a593Smuzhiyun 		return 0;
1767*4882a593Smuzhiyun 
1768*4882a593Smuzhiyun 	if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1769*4882a593Smuzhiyun 		return 0;
1770*4882a593Smuzhiyun 
1771*4882a593Smuzhiyun 	if (ext4_ext_is_unwritten(ex1) &&
1772*4882a593Smuzhiyun 	    ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
1773*4882a593Smuzhiyun 		return 0;
1774*4882a593Smuzhiyun #ifdef AGGRESSIVE_TEST
1775*4882a593Smuzhiyun 	if (ext1_ee_len >= 4)
1776*4882a593Smuzhiyun 		return 0;
1777*4882a593Smuzhiyun #endif
1778*4882a593Smuzhiyun 
1779*4882a593Smuzhiyun 	if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
1780*4882a593Smuzhiyun 		return 1;
1781*4882a593Smuzhiyun 	return 0;
1782*4882a593Smuzhiyun }
1783*4882a593Smuzhiyun 
1784*4882a593Smuzhiyun /*
1785*4882a593Smuzhiyun  * This function tries to merge the "ex" extent to the next extent in the tree.
1786*4882a593Smuzhiyun  * It always tries to merge towards right. If you want to merge towards
1787*4882a593Smuzhiyun  * left, pass "ex - 1" as argument instead of "ex".
1788*4882a593Smuzhiyun  * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1789*4882a593Smuzhiyun  * 1 if they got merged.
1790*4882a593Smuzhiyun  */
ext4_ext_try_to_merge_right(struct inode * inode,struct ext4_ext_path * path,struct ext4_extent * ex)1791*4882a593Smuzhiyun static int ext4_ext_try_to_merge_right(struct inode *inode,
1792*4882a593Smuzhiyun 				 struct ext4_ext_path *path,
1793*4882a593Smuzhiyun 				 struct ext4_extent *ex)
1794*4882a593Smuzhiyun {
1795*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
1796*4882a593Smuzhiyun 	unsigned int depth, len;
1797*4882a593Smuzhiyun 	int merge_done = 0, unwritten;
1798*4882a593Smuzhiyun 
1799*4882a593Smuzhiyun 	depth = ext_depth(inode);
1800*4882a593Smuzhiyun 	BUG_ON(path[depth].p_hdr == NULL);
1801*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
1802*4882a593Smuzhiyun 
1803*4882a593Smuzhiyun 	while (ex < EXT_LAST_EXTENT(eh)) {
1804*4882a593Smuzhiyun 		if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1805*4882a593Smuzhiyun 			break;
1806*4882a593Smuzhiyun 		/* merge with next extent! */
1807*4882a593Smuzhiyun 		unwritten = ext4_ext_is_unwritten(ex);
1808*4882a593Smuzhiyun 		ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1809*4882a593Smuzhiyun 				+ ext4_ext_get_actual_len(ex + 1));
1810*4882a593Smuzhiyun 		if (unwritten)
1811*4882a593Smuzhiyun 			ext4_ext_mark_unwritten(ex);
1812*4882a593Smuzhiyun 
1813*4882a593Smuzhiyun 		if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1814*4882a593Smuzhiyun 			len = (EXT_LAST_EXTENT(eh) - ex - 1)
1815*4882a593Smuzhiyun 				* sizeof(struct ext4_extent);
1816*4882a593Smuzhiyun 			memmove(ex + 1, ex + 2, len);
1817*4882a593Smuzhiyun 		}
1818*4882a593Smuzhiyun 		le16_add_cpu(&eh->eh_entries, -1);
1819*4882a593Smuzhiyun 		merge_done = 1;
1820*4882a593Smuzhiyun 		WARN_ON(eh->eh_entries == 0);
1821*4882a593Smuzhiyun 		if (!eh->eh_entries)
1822*4882a593Smuzhiyun 			EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
1823*4882a593Smuzhiyun 	}
1824*4882a593Smuzhiyun 
1825*4882a593Smuzhiyun 	return merge_done;
1826*4882a593Smuzhiyun }
1827*4882a593Smuzhiyun 
1828*4882a593Smuzhiyun /*
1829*4882a593Smuzhiyun  * This function does a very simple check to see if we can collapse
1830*4882a593Smuzhiyun  * an extent tree with a single extent tree leaf block into the inode.
1831*4882a593Smuzhiyun  */
ext4_ext_try_to_merge_up(handle_t * handle,struct inode * inode,struct ext4_ext_path * path)1832*4882a593Smuzhiyun static void ext4_ext_try_to_merge_up(handle_t *handle,
1833*4882a593Smuzhiyun 				     struct inode *inode,
1834*4882a593Smuzhiyun 				     struct ext4_ext_path *path)
1835*4882a593Smuzhiyun {
1836*4882a593Smuzhiyun 	size_t s;
1837*4882a593Smuzhiyun 	unsigned max_root = ext4_ext_space_root(inode, 0);
1838*4882a593Smuzhiyun 	ext4_fsblk_t blk;
1839*4882a593Smuzhiyun 
1840*4882a593Smuzhiyun 	if ((path[0].p_depth != 1) ||
1841*4882a593Smuzhiyun 	    (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1842*4882a593Smuzhiyun 	    (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1843*4882a593Smuzhiyun 		return;
1844*4882a593Smuzhiyun 
1845*4882a593Smuzhiyun 	/*
1846*4882a593Smuzhiyun 	 * We need to modify the block allocation bitmap and the block
1847*4882a593Smuzhiyun 	 * group descriptor to release the extent tree block.  If we
1848*4882a593Smuzhiyun 	 * can't get the journal credits, give up.
1849*4882a593Smuzhiyun 	 */
1850*4882a593Smuzhiyun 	if (ext4_journal_extend(handle, 2,
1851*4882a593Smuzhiyun 			ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
1852*4882a593Smuzhiyun 		return;
1853*4882a593Smuzhiyun 
1854*4882a593Smuzhiyun 	/*
1855*4882a593Smuzhiyun 	 * Copy the extent data up to the inode
1856*4882a593Smuzhiyun 	 */
1857*4882a593Smuzhiyun 	blk = ext4_idx_pblock(path[0].p_idx);
1858*4882a593Smuzhiyun 	s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1859*4882a593Smuzhiyun 		sizeof(struct ext4_extent_idx);
1860*4882a593Smuzhiyun 	s += sizeof(struct ext4_extent_header);
1861*4882a593Smuzhiyun 
1862*4882a593Smuzhiyun 	path[1].p_maxdepth = path[0].p_maxdepth;
1863*4882a593Smuzhiyun 	memcpy(path[0].p_hdr, path[1].p_hdr, s);
1864*4882a593Smuzhiyun 	path[0].p_depth = 0;
1865*4882a593Smuzhiyun 	path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1866*4882a593Smuzhiyun 		(path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1867*4882a593Smuzhiyun 	path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1868*4882a593Smuzhiyun 
1869*4882a593Smuzhiyun 	brelse(path[1].p_bh);
1870*4882a593Smuzhiyun 	ext4_free_blocks(handle, inode, NULL, blk, 1,
1871*4882a593Smuzhiyun 			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1872*4882a593Smuzhiyun }
1873*4882a593Smuzhiyun 
1874*4882a593Smuzhiyun /*
1875*4882a593Smuzhiyun  * This function tries to merge the @ex extent to neighbours in the tree, then
1876*4882a593Smuzhiyun  * tries to collapse the extent tree into the inode.
1877*4882a593Smuzhiyun  */
ext4_ext_try_to_merge(handle_t * handle,struct inode * inode,struct ext4_ext_path * path,struct ext4_extent * ex)1878*4882a593Smuzhiyun static void ext4_ext_try_to_merge(handle_t *handle,
1879*4882a593Smuzhiyun 				  struct inode *inode,
1880*4882a593Smuzhiyun 				  struct ext4_ext_path *path,
1881*4882a593Smuzhiyun 				  struct ext4_extent *ex)
1882*4882a593Smuzhiyun {
1883*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
1884*4882a593Smuzhiyun 	unsigned int depth;
1885*4882a593Smuzhiyun 	int merge_done = 0;
1886*4882a593Smuzhiyun 
1887*4882a593Smuzhiyun 	depth = ext_depth(inode);
1888*4882a593Smuzhiyun 	BUG_ON(path[depth].p_hdr == NULL);
1889*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
1890*4882a593Smuzhiyun 
1891*4882a593Smuzhiyun 	if (ex > EXT_FIRST_EXTENT(eh))
1892*4882a593Smuzhiyun 		merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1893*4882a593Smuzhiyun 
1894*4882a593Smuzhiyun 	if (!merge_done)
1895*4882a593Smuzhiyun 		(void) ext4_ext_try_to_merge_right(inode, path, ex);
1896*4882a593Smuzhiyun 
1897*4882a593Smuzhiyun 	ext4_ext_try_to_merge_up(handle, inode, path);
1898*4882a593Smuzhiyun }
1899*4882a593Smuzhiyun 
1900*4882a593Smuzhiyun /*
1901*4882a593Smuzhiyun  * check if a portion of the "newext" extent overlaps with an
1902*4882a593Smuzhiyun  * existing extent.
1903*4882a593Smuzhiyun  *
1904*4882a593Smuzhiyun  * If there is an overlap discovered, it updates the length of the newext
1905*4882a593Smuzhiyun  * such that there will be no overlap, and then returns 1.
1906*4882a593Smuzhiyun  * If there is no overlap found, it returns 0.
1907*4882a593Smuzhiyun  */
ext4_ext_check_overlap(struct ext4_sb_info * sbi,struct inode * inode,struct ext4_extent * newext,struct ext4_ext_path * path)1908*4882a593Smuzhiyun static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1909*4882a593Smuzhiyun 					   struct inode *inode,
1910*4882a593Smuzhiyun 					   struct ext4_extent *newext,
1911*4882a593Smuzhiyun 					   struct ext4_ext_path *path)
1912*4882a593Smuzhiyun {
1913*4882a593Smuzhiyun 	ext4_lblk_t b1, b2;
1914*4882a593Smuzhiyun 	unsigned int depth, len1;
1915*4882a593Smuzhiyun 	unsigned int ret = 0;
1916*4882a593Smuzhiyun 
1917*4882a593Smuzhiyun 	b1 = le32_to_cpu(newext->ee_block);
1918*4882a593Smuzhiyun 	len1 = ext4_ext_get_actual_len(newext);
1919*4882a593Smuzhiyun 	depth = ext_depth(inode);
1920*4882a593Smuzhiyun 	if (!path[depth].p_ext)
1921*4882a593Smuzhiyun 		goto out;
1922*4882a593Smuzhiyun 	b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
1923*4882a593Smuzhiyun 
1924*4882a593Smuzhiyun 	/*
1925*4882a593Smuzhiyun 	 * get the next allocated block if the extent in the path
1926*4882a593Smuzhiyun 	 * is before the requested block(s)
1927*4882a593Smuzhiyun 	 */
1928*4882a593Smuzhiyun 	if (b2 < b1) {
1929*4882a593Smuzhiyun 		b2 = ext4_ext_next_allocated_block(path);
1930*4882a593Smuzhiyun 		if (b2 == EXT_MAX_BLOCKS)
1931*4882a593Smuzhiyun 			goto out;
1932*4882a593Smuzhiyun 		b2 = EXT4_LBLK_CMASK(sbi, b2);
1933*4882a593Smuzhiyun 	}
1934*4882a593Smuzhiyun 
1935*4882a593Smuzhiyun 	/* check for wrap through zero on extent logical start block*/
1936*4882a593Smuzhiyun 	if (b1 + len1 < b1) {
1937*4882a593Smuzhiyun 		len1 = EXT_MAX_BLOCKS - b1;
1938*4882a593Smuzhiyun 		newext->ee_len = cpu_to_le16(len1);
1939*4882a593Smuzhiyun 		ret = 1;
1940*4882a593Smuzhiyun 	}
1941*4882a593Smuzhiyun 
1942*4882a593Smuzhiyun 	/* check for overlap */
1943*4882a593Smuzhiyun 	if (b1 + len1 > b2) {
1944*4882a593Smuzhiyun 		newext->ee_len = cpu_to_le16(b2 - b1);
1945*4882a593Smuzhiyun 		ret = 1;
1946*4882a593Smuzhiyun 	}
1947*4882a593Smuzhiyun out:
1948*4882a593Smuzhiyun 	return ret;
1949*4882a593Smuzhiyun }
1950*4882a593Smuzhiyun 
1951*4882a593Smuzhiyun /*
1952*4882a593Smuzhiyun  * ext4_ext_insert_extent:
1953*4882a593Smuzhiyun  * tries to merge requested extent into the existing extent or
1954*4882a593Smuzhiyun  * inserts requested extent as new one into the tree,
1955*4882a593Smuzhiyun  * creating new leaf in the no-space case.
1956*4882a593Smuzhiyun  */
ext4_ext_insert_extent(handle_t * handle,struct inode * inode,struct ext4_ext_path ** ppath,struct ext4_extent * newext,int gb_flags)1957*4882a593Smuzhiyun int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1958*4882a593Smuzhiyun 				struct ext4_ext_path **ppath,
1959*4882a593Smuzhiyun 				struct ext4_extent *newext, int gb_flags)
1960*4882a593Smuzhiyun {
1961*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
1962*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
1963*4882a593Smuzhiyun 	struct ext4_extent *ex, *fex;
1964*4882a593Smuzhiyun 	struct ext4_extent *nearex; /* nearest extent */
1965*4882a593Smuzhiyun 	struct ext4_ext_path *npath = NULL;
1966*4882a593Smuzhiyun 	int depth, len, err;
1967*4882a593Smuzhiyun 	ext4_lblk_t next;
1968*4882a593Smuzhiyun 	int mb_flags = 0, unwritten;
1969*4882a593Smuzhiyun 
1970*4882a593Smuzhiyun 	if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1971*4882a593Smuzhiyun 		mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1972*4882a593Smuzhiyun 	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1973*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1974*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1975*4882a593Smuzhiyun 	}
1976*4882a593Smuzhiyun 	depth = ext_depth(inode);
1977*4882a593Smuzhiyun 	ex = path[depth].p_ext;
1978*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
1979*4882a593Smuzhiyun 	if (unlikely(path[depth].p_hdr == NULL)) {
1980*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1981*4882a593Smuzhiyun 		return -EFSCORRUPTED;
1982*4882a593Smuzhiyun 	}
1983*4882a593Smuzhiyun 
1984*4882a593Smuzhiyun 	/* try to insert block into found extent and return */
1985*4882a593Smuzhiyun 	if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
1986*4882a593Smuzhiyun 
1987*4882a593Smuzhiyun 		/*
1988*4882a593Smuzhiyun 		 * Try to see whether we should rather test the extent on
1989*4882a593Smuzhiyun 		 * right from ex, or from the left of ex. This is because
1990*4882a593Smuzhiyun 		 * ext4_find_extent() can return either extent on the
1991*4882a593Smuzhiyun 		 * left, or on the right from the searched position. This
1992*4882a593Smuzhiyun 		 * will make merging more effective.
1993*4882a593Smuzhiyun 		 */
1994*4882a593Smuzhiyun 		if (ex < EXT_LAST_EXTENT(eh) &&
1995*4882a593Smuzhiyun 		    (le32_to_cpu(ex->ee_block) +
1996*4882a593Smuzhiyun 		    ext4_ext_get_actual_len(ex) <
1997*4882a593Smuzhiyun 		    le32_to_cpu(newext->ee_block))) {
1998*4882a593Smuzhiyun 			ex += 1;
1999*4882a593Smuzhiyun 			goto prepend;
2000*4882a593Smuzhiyun 		} else if ((ex > EXT_FIRST_EXTENT(eh)) &&
2001*4882a593Smuzhiyun 			   (le32_to_cpu(newext->ee_block) +
2002*4882a593Smuzhiyun 			   ext4_ext_get_actual_len(newext) <
2003*4882a593Smuzhiyun 			   le32_to_cpu(ex->ee_block)))
2004*4882a593Smuzhiyun 			ex -= 1;
2005*4882a593Smuzhiyun 
2006*4882a593Smuzhiyun 		/* Try to append newex to the ex */
2007*4882a593Smuzhiyun 		if (ext4_can_extents_be_merged(inode, ex, newext)) {
2008*4882a593Smuzhiyun 			ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
2009*4882a593Smuzhiyun 				  "(from %llu)\n",
2010*4882a593Smuzhiyun 				  ext4_ext_is_unwritten(newext),
2011*4882a593Smuzhiyun 				  ext4_ext_get_actual_len(newext),
2012*4882a593Smuzhiyun 				  le32_to_cpu(ex->ee_block),
2013*4882a593Smuzhiyun 				  ext4_ext_is_unwritten(ex),
2014*4882a593Smuzhiyun 				  ext4_ext_get_actual_len(ex),
2015*4882a593Smuzhiyun 				  ext4_ext_pblock(ex));
2016*4882a593Smuzhiyun 			err = ext4_ext_get_access(handle, inode,
2017*4882a593Smuzhiyun 						  path + depth);
2018*4882a593Smuzhiyun 			if (err)
2019*4882a593Smuzhiyun 				return err;
2020*4882a593Smuzhiyun 			unwritten = ext4_ext_is_unwritten(ex);
2021*4882a593Smuzhiyun 			ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2022*4882a593Smuzhiyun 					+ ext4_ext_get_actual_len(newext));
2023*4882a593Smuzhiyun 			if (unwritten)
2024*4882a593Smuzhiyun 				ext4_ext_mark_unwritten(ex);
2025*4882a593Smuzhiyun 			eh = path[depth].p_hdr;
2026*4882a593Smuzhiyun 			nearex = ex;
2027*4882a593Smuzhiyun 			goto merge;
2028*4882a593Smuzhiyun 		}
2029*4882a593Smuzhiyun 
2030*4882a593Smuzhiyun prepend:
2031*4882a593Smuzhiyun 		/* Try to prepend newex to the ex */
2032*4882a593Smuzhiyun 		if (ext4_can_extents_be_merged(inode, newext, ex)) {
2033*4882a593Smuzhiyun 			ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
2034*4882a593Smuzhiyun 				  "(from %llu)\n",
2035*4882a593Smuzhiyun 				  le32_to_cpu(newext->ee_block),
2036*4882a593Smuzhiyun 				  ext4_ext_is_unwritten(newext),
2037*4882a593Smuzhiyun 				  ext4_ext_get_actual_len(newext),
2038*4882a593Smuzhiyun 				  le32_to_cpu(ex->ee_block),
2039*4882a593Smuzhiyun 				  ext4_ext_is_unwritten(ex),
2040*4882a593Smuzhiyun 				  ext4_ext_get_actual_len(ex),
2041*4882a593Smuzhiyun 				  ext4_ext_pblock(ex));
2042*4882a593Smuzhiyun 			err = ext4_ext_get_access(handle, inode,
2043*4882a593Smuzhiyun 						  path + depth);
2044*4882a593Smuzhiyun 			if (err)
2045*4882a593Smuzhiyun 				return err;
2046*4882a593Smuzhiyun 
2047*4882a593Smuzhiyun 			unwritten = ext4_ext_is_unwritten(ex);
2048*4882a593Smuzhiyun 			ex->ee_block = newext->ee_block;
2049*4882a593Smuzhiyun 			ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
2050*4882a593Smuzhiyun 			ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2051*4882a593Smuzhiyun 					+ ext4_ext_get_actual_len(newext));
2052*4882a593Smuzhiyun 			if (unwritten)
2053*4882a593Smuzhiyun 				ext4_ext_mark_unwritten(ex);
2054*4882a593Smuzhiyun 			eh = path[depth].p_hdr;
2055*4882a593Smuzhiyun 			nearex = ex;
2056*4882a593Smuzhiyun 			goto merge;
2057*4882a593Smuzhiyun 		}
2058*4882a593Smuzhiyun 	}
2059*4882a593Smuzhiyun 
2060*4882a593Smuzhiyun 	depth = ext_depth(inode);
2061*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
2062*4882a593Smuzhiyun 	if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
2063*4882a593Smuzhiyun 		goto has_space;
2064*4882a593Smuzhiyun 
2065*4882a593Smuzhiyun 	/* probably next leaf has space for us? */
2066*4882a593Smuzhiyun 	fex = EXT_LAST_EXTENT(eh);
2067*4882a593Smuzhiyun 	next = EXT_MAX_BLOCKS;
2068*4882a593Smuzhiyun 	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
2069*4882a593Smuzhiyun 		next = ext4_ext_next_leaf_block(path);
2070*4882a593Smuzhiyun 	if (next != EXT_MAX_BLOCKS) {
2071*4882a593Smuzhiyun 		ext_debug(inode, "next leaf block - %u\n", next);
2072*4882a593Smuzhiyun 		BUG_ON(npath != NULL);
2073*4882a593Smuzhiyun 		npath = ext4_find_extent(inode, next, NULL, gb_flags);
2074*4882a593Smuzhiyun 		if (IS_ERR(npath))
2075*4882a593Smuzhiyun 			return PTR_ERR(npath);
2076*4882a593Smuzhiyun 		BUG_ON(npath->p_depth != path->p_depth);
2077*4882a593Smuzhiyun 		eh = npath[depth].p_hdr;
2078*4882a593Smuzhiyun 		if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
2079*4882a593Smuzhiyun 			ext_debug(inode, "next leaf isn't full(%d)\n",
2080*4882a593Smuzhiyun 				  le16_to_cpu(eh->eh_entries));
2081*4882a593Smuzhiyun 			path = npath;
2082*4882a593Smuzhiyun 			goto has_space;
2083*4882a593Smuzhiyun 		}
2084*4882a593Smuzhiyun 		ext_debug(inode, "next leaf has no free space(%d,%d)\n",
2085*4882a593Smuzhiyun 			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
2086*4882a593Smuzhiyun 	}
2087*4882a593Smuzhiyun 
2088*4882a593Smuzhiyun 	/*
2089*4882a593Smuzhiyun 	 * There is no free space in the found leaf.
2090*4882a593Smuzhiyun 	 * We're gonna add a new leaf in the tree.
2091*4882a593Smuzhiyun 	 */
2092*4882a593Smuzhiyun 	if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
2093*4882a593Smuzhiyun 		mb_flags |= EXT4_MB_USE_RESERVED;
2094*4882a593Smuzhiyun 	err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2095*4882a593Smuzhiyun 				       ppath, newext);
2096*4882a593Smuzhiyun 	if (err)
2097*4882a593Smuzhiyun 		goto cleanup;
2098*4882a593Smuzhiyun 	depth = ext_depth(inode);
2099*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
2100*4882a593Smuzhiyun 
2101*4882a593Smuzhiyun has_space:
2102*4882a593Smuzhiyun 	nearex = path[depth].p_ext;
2103*4882a593Smuzhiyun 
2104*4882a593Smuzhiyun 	err = ext4_ext_get_access(handle, inode, path + depth);
2105*4882a593Smuzhiyun 	if (err)
2106*4882a593Smuzhiyun 		goto cleanup;
2107*4882a593Smuzhiyun 
2108*4882a593Smuzhiyun 	if (!nearex) {
2109*4882a593Smuzhiyun 		/* there is no extent in this leaf, create first one */
2110*4882a593Smuzhiyun 		ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
2111*4882a593Smuzhiyun 				le32_to_cpu(newext->ee_block),
2112*4882a593Smuzhiyun 				ext4_ext_pblock(newext),
2113*4882a593Smuzhiyun 				ext4_ext_is_unwritten(newext),
2114*4882a593Smuzhiyun 				ext4_ext_get_actual_len(newext));
2115*4882a593Smuzhiyun 		nearex = EXT_FIRST_EXTENT(eh);
2116*4882a593Smuzhiyun 	} else {
2117*4882a593Smuzhiyun 		if (le32_to_cpu(newext->ee_block)
2118*4882a593Smuzhiyun 			   > le32_to_cpu(nearex->ee_block)) {
2119*4882a593Smuzhiyun 			/* Insert after */
2120*4882a593Smuzhiyun 			ext_debug(inode, "insert %u:%llu:[%d]%d before: "
2121*4882a593Smuzhiyun 					"nearest %p\n",
2122*4882a593Smuzhiyun 					le32_to_cpu(newext->ee_block),
2123*4882a593Smuzhiyun 					ext4_ext_pblock(newext),
2124*4882a593Smuzhiyun 					ext4_ext_is_unwritten(newext),
2125*4882a593Smuzhiyun 					ext4_ext_get_actual_len(newext),
2126*4882a593Smuzhiyun 					nearex);
2127*4882a593Smuzhiyun 			nearex++;
2128*4882a593Smuzhiyun 		} else {
2129*4882a593Smuzhiyun 			/* Insert before */
2130*4882a593Smuzhiyun 			BUG_ON(newext->ee_block == nearex->ee_block);
2131*4882a593Smuzhiyun 			ext_debug(inode, "insert %u:%llu:[%d]%d after: "
2132*4882a593Smuzhiyun 					"nearest %p\n",
2133*4882a593Smuzhiyun 					le32_to_cpu(newext->ee_block),
2134*4882a593Smuzhiyun 					ext4_ext_pblock(newext),
2135*4882a593Smuzhiyun 					ext4_ext_is_unwritten(newext),
2136*4882a593Smuzhiyun 					ext4_ext_get_actual_len(newext),
2137*4882a593Smuzhiyun 					nearex);
2138*4882a593Smuzhiyun 		}
2139*4882a593Smuzhiyun 		len = EXT_LAST_EXTENT(eh) - nearex + 1;
2140*4882a593Smuzhiyun 		if (len > 0) {
2141*4882a593Smuzhiyun 			ext_debug(inode, "insert %u:%llu:[%d]%d: "
2142*4882a593Smuzhiyun 					"move %d extents from 0x%p to 0x%p\n",
2143*4882a593Smuzhiyun 					le32_to_cpu(newext->ee_block),
2144*4882a593Smuzhiyun 					ext4_ext_pblock(newext),
2145*4882a593Smuzhiyun 					ext4_ext_is_unwritten(newext),
2146*4882a593Smuzhiyun 					ext4_ext_get_actual_len(newext),
2147*4882a593Smuzhiyun 					len, nearex, nearex + 1);
2148*4882a593Smuzhiyun 			memmove(nearex + 1, nearex,
2149*4882a593Smuzhiyun 				len * sizeof(struct ext4_extent));
2150*4882a593Smuzhiyun 		}
2151*4882a593Smuzhiyun 	}
2152*4882a593Smuzhiyun 
2153*4882a593Smuzhiyun 	le16_add_cpu(&eh->eh_entries, 1);
2154*4882a593Smuzhiyun 	path[depth].p_ext = nearex;
2155*4882a593Smuzhiyun 	nearex->ee_block = newext->ee_block;
2156*4882a593Smuzhiyun 	ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
2157*4882a593Smuzhiyun 	nearex->ee_len = newext->ee_len;
2158*4882a593Smuzhiyun 
2159*4882a593Smuzhiyun merge:
2160*4882a593Smuzhiyun 	/* try to merge extents */
2161*4882a593Smuzhiyun 	if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
2162*4882a593Smuzhiyun 		ext4_ext_try_to_merge(handle, inode, path, nearex);
2163*4882a593Smuzhiyun 
2164*4882a593Smuzhiyun 
2165*4882a593Smuzhiyun 	/* time to correct all indexes above */
2166*4882a593Smuzhiyun 	err = ext4_ext_correct_indexes(handle, inode, path);
2167*4882a593Smuzhiyun 	if (err)
2168*4882a593Smuzhiyun 		goto cleanup;
2169*4882a593Smuzhiyun 
2170*4882a593Smuzhiyun 	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2171*4882a593Smuzhiyun 
2172*4882a593Smuzhiyun cleanup:
2173*4882a593Smuzhiyun 	ext4_ext_drop_refs(npath);
2174*4882a593Smuzhiyun 	kfree(npath);
2175*4882a593Smuzhiyun 	return err;
2176*4882a593Smuzhiyun }
2177*4882a593Smuzhiyun 
ext4_fill_es_cache_info(struct inode * inode,ext4_lblk_t block,ext4_lblk_t num,struct fiemap_extent_info * fieinfo)2178*4882a593Smuzhiyun static int ext4_fill_es_cache_info(struct inode *inode,
2179*4882a593Smuzhiyun 				   ext4_lblk_t block, ext4_lblk_t num,
2180*4882a593Smuzhiyun 				   struct fiemap_extent_info *fieinfo)
2181*4882a593Smuzhiyun {
2182*4882a593Smuzhiyun 	ext4_lblk_t next, end = block + num - 1;
2183*4882a593Smuzhiyun 	struct extent_status es;
2184*4882a593Smuzhiyun 	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
2185*4882a593Smuzhiyun 	unsigned int flags;
2186*4882a593Smuzhiyun 	int err;
2187*4882a593Smuzhiyun 
2188*4882a593Smuzhiyun 	while (block <= end) {
2189*4882a593Smuzhiyun 		next = 0;
2190*4882a593Smuzhiyun 		flags = 0;
2191*4882a593Smuzhiyun 		if (!ext4_es_lookup_extent(inode, block, &next, &es))
2192*4882a593Smuzhiyun 			break;
2193*4882a593Smuzhiyun 		if (ext4_es_is_unwritten(&es))
2194*4882a593Smuzhiyun 			flags |= FIEMAP_EXTENT_UNWRITTEN;
2195*4882a593Smuzhiyun 		if (ext4_es_is_delayed(&es))
2196*4882a593Smuzhiyun 			flags |= (FIEMAP_EXTENT_DELALLOC |
2197*4882a593Smuzhiyun 				  FIEMAP_EXTENT_UNKNOWN);
2198*4882a593Smuzhiyun 		if (ext4_es_is_hole(&es))
2199*4882a593Smuzhiyun 			flags |= EXT4_FIEMAP_EXTENT_HOLE;
2200*4882a593Smuzhiyun 		if (next == 0)
2201*4882a593Smuzhiyun 			flags |= FIEMAP_EXTENT_LAST;
2202*4882a593Smuzhiyun 		if (flags & (FIEMAP_EXTENT_DELALLOC|
2203*4882a593Smuzhiyun 			     EXT4_FIEMAP_EXTENT_HOLE))
2204*4882a593Smuzhiyun 			es.es_pblk = 0;
2205*4882a593Smuzhiyun 		else
2206*4882a593Smuzhiyun 			es.es_pblk = ext4_es_pblock(&es);
2207*4882a593Smuzhiyun 		err = fiemap_fill_next_extent(fieinfo,
2208*4882a593Smuzhiyun 				(__u64)es.es_lblk << blksize_bits,
2209*4882a593Smuzhiyun 				(__u64)es.es_pblk << blksize_bits,
2210*4882a593Smuzhiyun 				(__u64)es.es_len << blksize_bits,
2211*4882a593Smuzhiyun 				flags);
2212*4882a593Smuzhiyun 		if (next == 0)
2213*4882a593Smuzhiyun 			break;
2214*4882a593Smuzhiyun 		block = next;
2215*4882a593Smuzhiyun 		if (err < 0)
2216*4882a593Smuzhiyun 			return err;
2217*4882a593Smuzhiyun 		if (err == 1)
2218*4882a593Smuzhiyun 			return 0;
2219*4882a593Smuzhiyun 	}
2220*4882a593Smuzhiyun 	return 0;
2221*4882a593Smuzhiyun }
2222*4882a593Smuzhiyun 
2223*4882a593Smuzhiyun 
2224*4882a593Smuzhiyun /*
2225*4882a593Smuzhiyun  * ext4_ext_determine_hole - determine hole around given block
2226*4882a593Smuzhiyun  * @inode:	inode we lookup in
2227*4882a593Smuzhiyun  * @path:	path in extent tree to @lblk
2228*4882a593Smuzhiyun  * @lblk:	pointer to logical block around which we want to determine hole
2229*4882a593Smuzhiyun  *
2230*4882a593Smuzhiyun  * Determine hole length (and start if easily possible) around given logical
2231*4882a593Smuzhiyun  * block. We don't try too hard to find the beginning of the hole but @path
2232*4882a593Smuzhiyun  * actually points to extent before @lblk, we provide it.
2233*4882a593Smuzhiyun  *
2234*4882a593Smuzhiyun  * The function returns the length of a hole starting at @lblk. We update @lblk
2235*4882a593Smuzhiyun  * to the beginning of the hole if we managed to find it.
2236*4882a593Smuzhiyun  */
ext4_ext_determine_hole(struct inode * inode,struct ext4_ext_path * path,ext4_lblk_t * lblk)2237*4882a593Smuzhiyun static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
2238*4882a593Smuzhiyun 					   struct ext4_ext_path *path,
2239*4882a593Smuzhiyun 					   ext4_lblk_t *lblk)
2240*4882a593Smuzhiyun {
2241*4882a593Smuzhiyun 	int depth = ext_depth(inode);
2242*4882a593Smuzhiyun 	struct ext4_extent *ex;
2243*4882a593Smuzhiyun 	ext4_lblk_t len;
2244*4882a593Smuzhiyun 
2245*4882a593Smuzhiyun 	ex = path[depth].p_ext;
2246*4882a593Smuzhiyun 	if (ex == NULL) {
2247*4882a593Smuzhiyun 		/* there is no extent yet, so gap is [0;-] */
2248*4882a593Smuzhiyun 		*lblk = 0;
2249*4882a593Smuzhiyun 		len = EXT_MAX_BLOCKS;
2250*4882a593Smuzhiyun 	} else if (*lblk < le32_to_cpu(ex->ee_block)) {
2251*4882a593Smuzhiyun 		len = le32_to_cpu(ex->ee_block) - *lblk;
2252*4882a593Smuzhiyun 	} else if (*lblk >= le32_to_cpu(ex->ee_block)
2253*4882a593Smuzhiyun 			+ ext4_ext_get_actual_len(ex)) {
2254*4882a593Smuzhiyun 		ext4_lblk_t next;
2255*4882a593Smuzhiyun 
2256*4882a593Smuzhiyun 		*lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
2257*4882a593Smuzhiyun 		next = ext4_ext_next_allocated_block(path);
2258*4882a593Smuzhiyun 		BUG_ON(next == *lblk);
2259*4882a593Smuzhiyun 		len = next - *lblk;
2260*4882a593Smuzhiyun 	} else {
2261*4882a593Smuzhiyun 		BUG();
2262*4882a593Smuzhiyun 	}
2263*4882a593Smuzhiyun 	return len;
2264*4882a593Smuzhiyun }
2265*4882a593Smuzhiyun 
2266*4882a593Smuzhiyun /*
2267*4882a593Smuzhiyun  * ext4_ext_put_gap_in_cache:
2268*4882a593Smuzhiyun  * calculate boundaries of the gap that the requested block fits into
2269*4882a593Smuzhiyun  * and cache this gap
2270*4882a593Smuzhiyun  */
2271*4882a593Smuzhiyun static void
ext4_ext_put_gap_in_cache(struct inode * inode,ext4_lblk_t hole_start,ext4_lblk_t hole_len)2272*4882a593Smuzhiyun ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
2273*4882a593Smuzhiyun 			  ext4_lblk_t hole_len)
2274*4882a593Smuzhiyun {
2275*4882a593Smuzhiyun 	struct extent_status es;
2276*4882a593Smuzhiyun 
2277*4882a593Smuzhiyun 	ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
2278*4882a593Smuzhiyun 				  hole_start + hole_len - 1, &es);
2279*4882a593Smuzhiyun 	if (es.es_len) {
2280*4882a593Smuzhiyun 		/* There's delayed extent containing lblock? */
2281*4882a593Smuzhiyun 		if (es.es_lblk <= hole_start)
2282*4882a593Smuzhiyun 			return;
2283*4882a593Smuzhiyun 		hole_len = min(es.es_lblk - hole_start, hole_len);
2284*4882a593Smuzhiyun 	}
2285*4882a593Smuzhiyun 	ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
2286*4882a593Smuzhiyun 	ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
2287*4882a593Smuzhiyun 			      EXTENT_STATUS_HOLE);
2288*4882a593Smuzhiyun }
2289*4882a593Smuzhiyun 
2290*4882a593Smuzhiyun /*
2291*4882a593Smuzhiyun  * ext4_ext_rm_idx:
2292*4882a593Smuzhiyun  * removes index from the index block.
2293*4882a593Smuzhiyun  */
ext4_ext_rm_idx(handle_t * handle,struct inode * inode,struct ext4_ext_path * path,int depth)2294*4882a593Smuzhiyun static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2295*4882a593Smuzhiyun 			struct ext4_ext_path *path, int depth)
2296*4882a593Smuzhiyun {
2297*4882a593Smuzhiyun 	int err;
2298*4882a593Smuzhiyun 	ext4_fsblk_t leaf;
2299*4882a593Smuzhiyun 
2300*4882a593Smuzhiyun 	/* free index block */
2301*4882a593Smuzhiyun 	depth--;
2302*4882a593Smuzhiyun 	path = path + depth;
2303*4882a593Smuzhiyun 	leaf = ext4_idx_pblock(path->p_idx);
2304*4882a593Smuzhiyun 	if (unlikely(path->p_hdr->eh_entries == 0)) {
2305*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
2306*4882a593Smuzhiyun 		return -EFSCORRUPTED;
2307*4882a593Smuzhiyun 	}
2308*4882a593Smuzhiyun 	err = ext4_ext_get_access(handle, inode, path);
2309*4882a593Smuzhiyun 	if (err)
2310*4882a593Smuzhiyun 		return err;
2311*4882a593Smuzhiyun 
2312*4882a593Smuzhiyun 	if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
2313*4882a593Smuzhiyun 		int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
2314*4882a593Smuzhiyun 		len *= sizeof(struct ext4_extent_idx);
2315*4882a593Smuzhiyun 		memmove(path->p_idx, path->p_idx + 1, len);
2316*4882a593Smuzhiyun 	}
2317*4882a593Smuzhiyun 
2318*4882a593Smuzhiyun 	le16_add_cpu(&path->p_hdr->eh_entries, -1);
2319*4882a593Smuzhiyun 	err = ext4_ext_dirty(handle, inode, path);
2320*4882a593Smuzhiyun 	if (err)
2321*4882a593Smuzhiyun 		return err;
2322*4882a593Smuzhiyun 	ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
2323*4882a593Smuzhiyun 	trace_ext4_ext_rm_idx(inode, leaf);
2324*4882a593Smuzhiyun 
2325*4882a593Smuzhiyun 	ext4_free_blocks(handle, inode, NULL, leaf, 1,
2326*4882a593Smuzhiyun 			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2327*4882a593Smuzhiyun 
2328*4882a593Smuzhiyun 	while (--depth >= 0) {
2329*4882a593Smuzhiyun 		if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
2330*4882a593Smuzhiyun 			break;
2331*4882a593Smuzhiyun 		path--;
2332*4882a593Smuzhiyun 		err = ext4_ext_get_access(handle, inode, path);
2333*4882a593Smuzhiyun 		if (err)
2334*4882a593Smuzhiyun 			break;
2335*4882a593Smuzhiyun 		path->p_idx->ei_block = (path+1)->p_idx->ei_block;
2336*4882a593Smuzhiyun 		err = ext4_ext_dirty(handle, inode, path);
2337*4882a593Smuzhiyun 		if (err)
2338*4882a593Smuzhiyun 			break;
2339*4882a593Smuzhiyun 	}
2340*4882a593Smuzhiyun 	return err;
2341*4882a593Smuzhiyun }
2342*4882a593Smuzhiyun 
2343*4882a593Smuzhiyun /*
2344*4882a593Smuzhiyun  * ext4_ext_calc_credits_for_single_extent:
2345*4882a593Smuzhiyun  * This routine returns max. credits that needed to insert an extent
2346*4882a593Smuzhiyun  * to the extent tree.
2347*4882a593Smuzhiyun  * When pass the actual path, the caller should calculate credits
2348*4882a593Smuzhiyun  * under i_data_sem.
2349*4882a593Smuzhiyun  */
ext4_ext_calc_credits_for_single_extent(struct inode * inode,int nrblocks,struct ext4_ext_path * path)2350*4882a593Smuzhiyun int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2351*4882a593Smuzhiyun 						struct ext4_ext_path *path)
2352*4882a593Smuzhiyun {
2353*4882a593Smuzhiyun 	if (path) {
2354*4882a593Smuzhiyun 		int depth = ext_depth(inode);
2355*4882a593Smuzhiyun 		int ret = 0;
2356*4882a593Smuzhiyun 
2357*4882a593Smuzhiyun 		/* probably there is space in leaf? */
2358*4882a593Smuzhiyun 		if (le16_to_cpu(path[depth].p_hdr->eh_entries)
2359*4882a593Smuzhiyun 				< le16_to_cpu(path[depth].p_hdr->eh_max)) {
2360*4882a593Smuzhiyun 
2361*4882a593Smuzhiyun 			/*
2362*4882a593Smuzhiyun 			 *  There are some space in the leaf tree, no
2363*4882a593Smuzhiyun 			 *  need to account for leaf block credit
2364*4882a593Smuzhiyun 			 *
2365*4882a593Smuzhiyun 			 *  bitmaps and block group descriptor blocks
2366*4882a593Smuzhiyun 			 *  and other metadata blocks still need to be
2367*4882a593Smuzhiyun 			 *  accounted.
2368*4882a593Smuzhiyun 			 */
2369*4882a593Smuzhiyun 			/* 1 bitmap, 1 block group descriptor */
2370*4882a593Smuzhiyun 			ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
2371*4882a593Smuzhiyun 			return ret;
2372*4882a593Smuzhiyun 		}
2373*4882a593Smuzhiyun 	}
2374*4882a593Smuzhiyun 
2375*4882a593Smuzhiyun 	return ext4_chunk_trans_blocks(inode, nrblocks);
2376*4882a593Smuzhiyun }
2377*4882a593Smuzhiyun 
2378*4882a593Smuzhiyun /*
2379*4882a593Smuzhiyun  * How many index/leaf blocks need to change/allocate to add @extents extents?
2380*4882a593Smuzhiyun  *
2381*4882a593Smuzhiyun  * If we add a single extent, then in the worse case, each tree level
2382*4882a593Smuzhiyun  * index/leaf need to be changed in case of the tree split.
2383*4882a593Smuzhiyun  *
2384*4882a593Smuzhiyun  * If more extents are inserted, they could cause the whole tree split more
2385*4882a593Smuzhiyun  * than once, but this is really rare.
2386*4882a593Smuzhiyun  */
ext4_ext_index_trans_blocks(struct inode * inode,int extents)2387*4882a593Smuzhiyun int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
2388*4882a593Smuzhiyun {
2389*4882a593Smuzhiyun 	int index;
2390*4882a593Smuzhiyun 	int depth;
2391*4882a593Smuzhiyun 
2392*4882a593Smuzhiyun 	/* If we are converting the inline data, only one is needed here. */
2393*4882a593Smuzhiyun 	if (ext4_has_inline_data(inode))
2394*4882a593Smuzhiyun 		return 1;
2395*4882a593Smuzhiyun 
2396*4882a593Smuzhiyun 	depth = ext_depth(inode);
2397*4882a593Smuzhiyun 
2398*4882a593Smuzhiyun 	if (extents <= 1)
2399*4882a593Smuzhiyun 		index = depth * 2;
2400*4882a593Smuzhiyun 	else
2401*4882a593Smuzhiyun 		index = depth * 3;
2402*4882a593Smuzhiyun 
2403*4882a593Smuzhiyun 	return index;
2404*4882a593Smuzhiyun }
2405*4882a593Smuzhiyun 
get_default_free_blocks_flags(struct inode * inode)2406*4882a593Smuzhiyun static inline int get_default_free_blocks_flags(struct inode *inode)
2407*4882a593Smuzhiyun {
2408*4882a593Smuzhiyun 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
2409*4882a593Smuzhiyun 	    ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
2410*4882a593Smuzhiyun 		return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2411*4882a593Smuzhiyun 	else if (ext4_should_journal_data(inode))
2412*4882a593Smuzhiyun 		return EXT4_FREE_BLOCKS_FORGET;
2413*4882a593Smuzhiyun 	return 0;
2414*4882a593Smuzhiyun }
2415*4882a593Smuzhiyun 
2416*4882a593Smuzhiyun /*
2417*4882a593Smuzhiyun  * ext4_rereserve_cluster - increment the reserved cluster count when
2418*4882a593Smuzhiyun  *                          freeing a cluster with a pending reservation
2419*4882a593Smuzhiyun  *
2420*4882a593Smuzhiyun  * @inode - file containing the cluster
2421*4882a593Smuzhiyun  * @lblk - logical block in cluster to be reserved
2422*4882a593Smuzhiyun  *
2423*4882a593Smuzhiyun  * Increments the reserved cluster count and adjusts quota in a bigalloc
2424*4882a593Smuzhiyun  * file system when freeing a partial cluster containing at least one
2425*4882a593Smuzhiyun  * delayed and unwritten block.  A partial cluster meeting that
2426*4882a593Smuzhiyun  * requirement will have a pending reservation.  If so, the
2427*4882a593Smuzhiyun  * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
2428*4882a593Smuzhiyun  * defer reserved and allocated space accounting to a subsequent call
2429*4882a593Smuzhiyun  * to this function.
2430*4882a593Smuzhiyun  */
ext4_rereserve_cluster(struct inode * inode,ext4_lblk_t lblk)2431*4882a593Smuzhiyun static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
2432*4882a593Smuzhiyun {
2433*4882a593Smuzhiyun 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2434*4882a593Smuzhiyun 	struct ext4_inode_info *ei = EXT4_I(inode);
2435*4882a593Smuzhiyun 
2436*4882a593Smuzhiyun 	dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
2437*4882a593Smuzhiyun 
2438*4882a593Smuzhiyun 	spin_lock(&ei->i_block_reservation_lock);
2439*4882a593Smuzhiyun 	ei->i_reserved_data_blocks++;
2440*4882a593Smuzhiyun 	percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
2441*4882a593Smuzhiyun 	spin_unlock(&ei->i_block_reservation_lock);
2442*4882a593Smuzhiyun 
2443*4882a593Smuzhiyun 	percpu_counter_add(&sbi->s_freeclusters_counter, 1);
2444*4882a593Smuzhiyun 	ext4_remove_pending(inode, lblk);
2445*4882a593Smuzhiyun }
2446*4882a593Smuzhiyun 
ext4_remove_blocks(handle_t * handle,struct inode * inode,struct ext4_extent * ex,struct partial_cluster * partial,ext4_lblk_t from,ext4_lblk_t to)2447*4882a593Smuzhiyun static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2448*4882a593Smuzhiyun 			      struct ext4_extent *ex,
2449*4882a593Smuzhiyun 			      struct partial_cluster *partial,
2450*4882a593Smuzhiyun 			      ext4_lblk_t from, ext4_lblk_t to)
2451*4882a593Smuzhiyun {
2452*4882a593Smuzhiyun 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2453*4882a593Smuzhiyun 	unsigned short ee_len = ext4_ext_get_actual_len(ex);
2454*4882a593Smuzhiyun 	ext4_fsblk_t last_pblk, pblk;
2455*4882a593Smuzhiyun 	ext4_lblk_t num;
2456*4882a593Smuzhiyun 	int flags;
2457*4882a593Smuzhiyun 
2458*4882a593Smuzhiyun 	/* only extent tail removal is allowed */
2459*4882a593Smuzhiyun 	if (from < le32_to_cpu(ex->ee_block) ||
2460*4882a593Smuzhiyun 	    to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
2461*4882a593Smuzhiyun 		ext4_error(sbi->s_sb,
2462*4882a593Smuzhiyun 			   "strange request: removal(2) %u-%u from %u:%u",
2463*4882a593Smuzhiyun 			   from, to, le32_to_cpu(ex->ee_block), ee_len);
2464*4882a593Smuzhiyun 		return 0;
2465*4882a593Smuzhiyun 	}
2466*4882a593Smuzhiyun 
2467*4882a593Smuzhiyun #ifdef EXTENTS_STATS
2468*4882a593Smuzhiyun 	spin_lock(&sbi->s_ext_stats_lock);
2469*4882a593Smuzhiyun 	sbi->s_ext_blocks += ee_len;
2470*4882a593Smuzhiyun 	sbi->s_ext_extents++;
2471*4882a593Smuzhiyun 	if (ee_len < sbi->s_ext_min)
2472*4882a593Smuzhiyun 		sbi->s_ext_min = ee_len;
2473*4882a593Smuzhiyun 	if (ee_len > sbi->s_ext_max)
2474*4882a593Smuzhiyun 		sbi->s_ext_max = ee_len;
2475*4882a593Smuzhiyun 	if (ext_depth(inode) > sbi->s_depth_max)
2476*4882a593Smuzhiyun 		sbi->s_depth_max = ext_depth(inode);
2477*4882a593Smuzhiyun 	spin_unlock(&sbi->s_ext_stats_lock);
2478*4882a593Smuzhiyun #endif
2479*4882a593Smuzhiyun 
2480*4882a593Smuzhiyun 	trace_ext4_remove_blocks(inode, ex, from, to, partial);
2481*4882a593Smuzhiyun 
2482*4882a593Smuzhiyun 	/*
2483*4882a593Smuzhiyun 	 * if we have a partial cluster, and it's different from the
2484*4882a593Smuzhiyun 	 * cluster of the last block in the extent, we free it
2485*4882a593Smuzhiyun 	 */
2486*4882a593Smuzhiyun 	last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
2487*4882a593Smuzhiyun 
2488*4882a593Smuzhiyun 	if (partial->state != initial &&
2489*4882a593Smuzhiyun 	    partial->pclu != EXT4_B2C(sbi, last_pblk)) {
2490*4882a593Smuzhiyun 		if (partial->state == tofree) {
2491*4882a593Smuzhiyun 			flags = get_default_free_blocks_flags(inode);
2492*4882a593Smuzhiyun 			if (ext4_is_pending(inode, partial->lblk))
2493*4882a593Smuzhiyun 				flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2494*4882a593Smuzhiyun 			ext4_free_blocks(handle, inode, NULL,
2495*4882a593Smuzhiyun 					 EXT4_C2B(sbi, partial->pclu),
2496*4882a593Smuzhiyun 					 sbi->s_cluster_ratio, flags);
2497*4882a593Smuzhiyun 			if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2498*4882a593Smuzhiyun 				ext4_rereserve_cluster(inode, partial->lblk);
2499*4882a593Smuzhiyun 		}
2500*4882a593Smuzhiyun 		partial->state = initial;
2501*4882a593Smuzhiyun 	}
2502*4882a593Smuzhiyun 
2503*4882a593Smuzhiyun 	num = le32_to_cpu(ex->ee_block) + ee_len - from;
2504*4882a593Smuzhiyun 	pblk = ext4_ext_pblock(ex) + ee_len - num;
2505*4882a593Smuzhiyun 
2506*4882a593Smuzhiyun 	/*
2507*4882a593Smuzhiyun 	 * We free the partial cluster at the end of the extent (if any),
2508*4882a593Smuzhiyun 	 * unless the cluster is used by another extent (partial_cluster
2509*4882a593Smuzhiyun 	 * state is nofree).  If a partial cluster exists here, it must be
2510*4882a593Smuzhiyun 	 * shared with the last block in the extent.
2511*4882a593Smuzhiyun 	 */
2512*4882a593Smuzhiyun 	flags = get_default_free_blocks_flags(inode);
2513*4882a593Smuzhiyun 
2514*4882a593Smuzhiyun 	/* partial, left end cluster aligned, right end unaligned */
2515*4882a593Smuzhiyun 	if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
2516*4882a593Smuzhiyun 	    (EXT4_LBLK_CMASK(sbi, to) >= from) &&
2517*4882a593Smuzhiyun 	    (partial->state != nofree)) {
2518*4882a593Smuzhiyun 		if (ext4_is_pending(inode, to))
2519*4882a593Smuzhiyun 			flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2520*4882a593Smuzhiyun 		ext4_free_blocks(handle, inode, NULL,
2521*4882a593Smuzhiyun 				 EXT4_PBLK_CMASK(sbi, last_pblk),
2522*4882a593Smuzhiyun 				 sbi->s_cluster_ratio, flags);
2523*4882a593Smuzhiyun 		if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2524*4882a593Smuzhiyun 			ext4_rereserve_cluster(inode, to);
2525*4882a593Smuzhiyun 		partial->state = initial;
2526*4882a593Smuzhiyun 		flags = get_default_free_blocks_flags(inode);
2527*4882a593Smuzhiyun 	}
2528*4882a593Smuzhiyun 
2529*4882a593Smuzhiyun 	flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
2530*4882a593Smuzhiyun 
2531*4882a593Smuzhiyun 	/*
2532*4882a593Smuzhiyun 	 * For bigalloc file systems, we never free a partial cluster
2533*4882a593Smuzhiyun 	 * at the beginning of the extent.  Instead, we check to see if we
2534*4882a593Smuzhiyun 	 * need to free it on a subsequent call to ext4_remove_blocks,
2535*4882a593Smuzhiyun 	 * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
2536*4882a593Smuzhiyun 	 */
2537*4882a593Smuzhiyun 	flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2538*4882a593Smuzhiyun 	ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2539*4882a593Smuzhiyun 
2540*4882a593Smuzhiyun 	/* reset the partial cluster if we've freed past it */
2541*4882a593Smuzhiyun 	if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
2542*4882a593Smuzhiyun 		partial->state = initial;
2543*4882a593Smuzhiyun 
2544*4882a593Smuzhiyun 	/*
2545*4882a593Smuzhiyun 	 * If we've freed the entire extent but the beginning is not left
2546*4882a593Smuzhiyun 	 * cluster aligned and is not marked as ineligible for freeing we
2547*4882a593Smuzhiyun 	 * record the partial cluster at the beginning of the extent.  It
2548*4882a593Smuzhiyun 	 * wasn't freed by the preceding ext4_free_blocks() call, and we
2549*4882a593Smuzhiyun 	 * need to look farther to the left to determine if it's to be freed
2550*4882a593Smuzhiyun 	 * (not shared with another extent). Else, reset the partial
2551*4882a593Smuzhiyun 	 * cluster - we're either  done freeing or the beginning of the
2552*4882a593Smuzhiyun 	 * extent is left cluster aligned.
2553*4882a593Smuzhiyun 	 */
2554*4882a593Smuzhiyun 	if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
2555*4882a593Smuzhiyun 		if (partial->state == initial) {
2556*4882a593Smuzhiyun 			partial->pclu = EXT4_B2C(sbi, pblk);
2557*4882a593Smuzhiyun 			partial->lblk = from;
2558*4882a593Smuzhiyun 			partial->state = tofree;
2559*4882a593Smuzhiyun 		}
2560*4882a593Smuzhiyun 	} else {
2561*4882a593Smuzhiyun 		partial->state = initial;
2562*4882a593Smuzhiyun 	}
2563*4882a593Smuzhiyun 
2564*4882a593Smuzhiyun 	return 0;
2565*4882a593Smuzhiyun }
2566*4882a593Smuzhiyun 
2567*4882a593Smuzhiyun /*
2568*4882a593Smuzhiyun  * ext4_ext_rm_leaf() Removes the extents associated with the
2569*4882a593Smuzhiyun  * blocks appearing between "start" and "end".  Both "start"
2570*4882a593Smuzhiyun  * and "end" must appear in the same extent or EIO is returned.
2571*4882a593Smuzhiyun  *
2572*4882a593Smuzhiyun  * @handle: The journal handle
2573*4882a593Smuzhiyun  * @inode:  The files inode
2574*4882a593Smuzhiyun  * @path:   The path to the leaf
2575*4882a593Smuzhiyun  * @partial_cluster: The cluster which we'll have to free if all extents
2576*4882a593Smuzhiyun  *                   has been released from it.  However, if this value is
2577*4882a593Smuzhiyun  *                   negative, it's a cluster just to the right of the
2578*4882a593Smuzhiyun  *                   punched region and it must not be freed.
2579*4882a593Smuzhiyun  * @start:  The first block to remove
2580*4882a593Smuzhiyun  * @end:   The last block to remove
2581*4882a593Smuzhiyun  */
2582*4882a593Smuzhiyun static int
ext4_ext_rm_leaf(handle_t * handle,struct inode * inode,struct ext4_ext_path * path,struct partial_cluster * partial,ext4_lblk_t start,ext4_lblk_t end)2583*4882a593Smuzhiyun ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2584*4882a593Smuzhiyun 		 struct ext4_ext_path *path,
2585*4882a593Smuzhiyun 		 struct partial_cluster *partial,
2586*4882a593Smuzhiyun 		 ext4_lblk_t start, ext4_lblk_t end)
2587*4882a593Smuzhiyun {
2588*4882a593Smuzhiyun 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2589*4882a593Smuzhiyun 	int err = 0, correct_index = 0;
2590*4882a593Smuzhiyun 	int depth = ext_depth(inode), credits, revoke_credits;
2591*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
2592*4882a593Smuzhiyun 	ext4_lblk_t a, b;
2593*4882a593Smuzhiyun 	unsigned num;
2594*4882a593Smuzhiyun 	ext4_lblk_t ex_ee_block;
2595*4882a593Smuzhiyun 	unsigned short ex_ee_len;
2596*4882a593Smuzhiyun 	unsigned unwritten = 0;
2597*4882a593Smuzhiyun 	struct ext4_extent *ex;
2598*4882a593Smuzhiyun 	ext4_fsblk_t pblk;
2599*4882a593Smuzhiyun 
2600*4882a593Smuzhiyun 	/* the header must be checked already in ext4_ext_remove_space() */
2601*4882a593Smuzhiyun 	ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
2602*4882a593Smuzhiyun 	if (!path[depth].p_hdr)
2603*4882a593Smuzhiyun 		path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2604*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
2605*4882a593Smuzhiyun 	if (unlikely(path[depth].p_hdr == NULL)) {
2606*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2607*4882a593Smuzhiyun 		return -EFSCORRUPTED;
2608*4882a593Smuzhiyun 	}
2609*4882a593Smuzhiyun 	/* find where to start removing */
2610*4882a593Smuzhiyun 	ex = path[depth].p_ext;
2611*4882a593Smuzhiyun 	if (!ex)
2612*4882a593Smuzhiyun 		ex = EXT_LAST_EXTENT(eh);
2613*4882a593Smuzhiyun 
2614*4882a593Smuzhiyun 	ex_ee_block = le32_to_cpu(ex->ee_block);
2615*4882a593Smuzhiyun 	ex_ee_len = ext4_ext_get_actual_len(ex);
2616*4882a593Smuzhiyun 
2617*4882a593Smuzhiyun 	trace_ext4_ext_rm_leaf(inode, start, ex, partial);
2618*4882a593Smuzhiyun 
2619*4882a593Smuzhiyun 	while (ex >= EXT_FIRST_EXTENT(eh) &&
2620*4882a593Smuzhiyun 			ex_ee_block + ex_ee_len > start) {
2621*4882a593Smuzhiyun 
2622*4882a593Smuzhiyun 		if (ext4_ext_is_unwritten(ex))
2623*4882a593Smuzhiyun 			unwritten = 1;
2624*4882a593Smuzhiyun 		else
2625*4882a593Smuzhiyun 			unwritten = 0;
2626*4882a593Smuzhiyun 
2627*4882a593Smuzhiyun 		ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
2628*4882a593Smuzhiyun 			  unwritten, ex_ee_len);
2629*4882a593Smuzhiyun 		path[depth].p_ext = ex;
2630*4882a593Smuzhiyun 
2631*4882a593Smuzhiyun 		a = ex_ee_block > start ? ex_ee_block : start;
2632*4882a593Smuzhiyun 		b = ex_ee_block+ex_ee_len - 1 < end ?
2633*4882a593Smuzhiyun 			ex_ee_block+ex_ee_len - 1 : end;
2634*4882a593Smuzhiyun 
2635*4882a593Smuzhiyun 		ext_debug(inode, "  border %u:%u\n", a, b);
2636*4882a593Smuzhiyun 
2637*4882a593Smuzhiyun 		/* If this extent is beyond the end of the hole, skip it */
2638*4882a593Smuzhiyun 		if (end < ex_ee_block) {
2639*4882a593Smuzhiyun 			/*
2640*4882a593Smuzhiyun 			 * We're going to skip this extent and move to another,
2641*4882a593Smuzhiyun 			 * so note that its first cluster is in use to avoid
2642*4882a593Smuzhiyun 			 * freeing it when removing blocks.  Eventually, the
2643*4882a593Smuzhiyun 			 * right edge of the truncated/punched region will
2644*4882a593Smuzhiyun 			 * be just to the left.
2645*4882a593Smuzhiyun 			 */
2646*4882a593Smuzhiyun 			if (sbi->s_cluster_ratio > 1) {
2647*4882a593Smuzhiyun 				pblk = ext4_ext_pblock(ex);
2648*4882a593Smuzhiyun 				partial->pclu = EXT4_B2C(sbi, pblk);
2649*4882a593Smuzhiyun 				partial->state = nofree;
2650*4882a593Smuzhiyun 			}
2651*4882a593Smuzhiyun 			ex--;
2652*4882a593Smuzhiyun 			ex_ee_block = le32_to_cpu(ex->ee_block);
2653*4882a593Smuzhiyun 			ex_ee_len = ext4_ext_get_actual_len(ex);
2654*4882a593Smuzhiyun 			continue;
2655*4882a593Smuzhiyun 		} else if (b != ex_ee_block + ex_ee_len - 1) {
2656*4882a593Smuzhiyun 			EXT4_ERROR_INODE(inode,
2657*4882a593Smuzhiyun 					 "can not handle truncate %u:%u "
2658*4882a593Smuzhiyun 					 "on extent %u:%u",
2659*4882a593Smuzhiyun 					 start, end, ex_ee_block,
2660*4882a593Smuzhiyun 					 ex_ee_block + ex_ee_len - 1);
2661*4882a593Smuzhiyun 			err = -EFSCORRUPTED;
2662*4882a593Smuzhiyun 			goto out;
2663*4882a593Smuzhiyun 		} else if (a != ex_ee_block) {
2664*4882a593Smuzhiyun 			/* remove tail of the extent */
2665*4882a593Smuzhiyun 			num = a - ex_ee_block;
2666*4882a593Smuzhiyun 		} else {
2667*4882a593Smuzhiyun 			/* remove whole extent: excellent! */
2668*4882a593Smuzhiyun 			num = 0;
2669*4882a593Smuzhiyun 		}
2670*4882a593Smuzhiyun 		/*
2671*4882a593Smuzhiyun 		 * 3 for leaf, sb, and inode plus 2 (bmap and group
2672*4882a593Smuzhiyun 		 * descriptor) for each block group; assume two block
2673*4882a593Smuzhiyun 		 * groups plus ex_ee_len/blocks_per_block_group for
2674*4882a593Smuzhiyun 		 * the worst case
2675*4882a593Smuzhiyun 		 */
2676*4882a593Smuzhiyun 		credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
2677*4882a593Smuzhiyun 		if (ex == EXT_FIRST_EXTENT(eh)) {
2678*4882a593Smuzhiyun 			correct_index = 1;
2679*4882a593Smuzhiyun 			credits += (ext_depth(inode)) + 1;
2680*4882a593Smuzhiyun 		}
2681*4882a593Smuzhiyun 		credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2682*4882a593Smuzhiyun 		/*
2683*4882a593Smuzhiyun 		 * We may end up freeing some index blocks and data from the
2684*4882a593Smuzhiyun 		 * punched range. Note that partial clusters are accounted for
2685*4882a593Smuzhiyun 		 * by ext4_free_data_revoke_credits().
2686*4882a593Smuzhiyun 		 */
2687*4882a593Smuzhiyun 		revoke_credits =
2688*4882a593Smuzhiyun 			ext4_free_metadata_revoke_credits(inode->i_sb,
2689*4882a593Smuzhiyun 							  ext_depth(inode)) +
2690*4882a593Smuzhiyun 			ext4_free_data_revoke_credits(inode, b - a + 1);
2691*4882a593Smuzhiyun 
2692*4882a593Smuzhiyun 		err = ext4_datasem_ensure_credits(handle, inode, credits,
2693*4882a593Smuzhiyun 						  credits, revoke_credits);
2694*4882a593Smuzhiyun 		if (err) {
2695*4882a593Smuzhiyun 			if (err > 0)
2696*4882a593Smuzhiyun 				err = -EAGAIN;
2697*4882a593Smuzhiyun 			goto out;
2698*4882a593Smuzhiyun 		}
2699*4882a593Smuzhiyun 
2700*4882a593Smuzhiyun 		err = ext4_ext_get_access(handle, inode, path + depth);
2701*4882a593Smuzhiyun 		if (err)
2702*4882a593Smuzhiyun 			goto out;
2703*4882a593Smuzhiyun 
2704*4882a593Smuzhiyun 		err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
2705*4882a593Smuzhiyun 		if (err)
2706*4882a593Smuzhiyun 			goto out;
2707*4882a593Smuzhiyun 
2708*4882a593Smuzhiyun 		if (num == 0)
2709*4882a593Smuzhiyun 			/* this extent is removed; mark slot entirely unused */
2710*4882a593Smuzhiyun 			ext4_ext_store_pblock(ex, 0);
2711*4882a593Smuzhiyun 
2712*4882a593Smuzhiyun 		ex->ee_len = cpu_to_le16(num);
2713*4882a593Smuzhiyun 		/*
2714*4882a593Smuzhiyun 		 * Do not mark unwritten if all the blocks in the
2715*4882a593Smuzhiyun 		 * extent have been removed.
2716*4882a593Smuzhiyun 		 */
2717*4882a593Smuzhiyun 		if (unwritten && num)
2718*4882a593Smuzhiyun 			ext4_ext_mark_unwritten(ex);
2719*4882a593Smuzhiyun 		/*
2720*4882a593Smuzhiyun 		 * If the extent was completely released,
2721*4882a593Smuzhiyun 		 * we need to remove it from the leaf
2722*4882a593Smuzhiyun 		 */
2723*4882a593Smuzhiyun 		if (num == 0) {
2724*4882a593Smuzhiyun 			if (end != EXT_MAX_BLOCKS - 1) {
2725*4882a593Smuzhiyun 				/*
2726*4882a593Smuzhiyun 				 * For hole punching, we need to scoot all the
2727*4882a593Smuzhiyun 				 * extents up when an extent is removed so that
2728*4882a593Smuzhiyun 				 * we dont have blank extents in the middle
2729*4882a593Smuzhiyun 				 */
2730*4882a593Smuzhiyun 				memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
2731*4882a593Smuzhiyun 					sizeof(struct ext4_extent));
2732*4882a593Smuzhiyun 
2733*4882a593Smuzhiyun 				/* Now get rid of the one at the end */
2734*4882a593Smuzhiyun 				memset(EXT_LAST_EXTENT(eh), 0,
2735*4882a593Smuzhiyun 					sizeof(struct ext4_extent));
2736*4882a593Smuzhiyun 			}
2737*4882a593Smuzhiyun 			le16_add_cpu(&eh->eh_entries, -1);
2738*4882a593Smuzhiyun 		}
2739*4882a593Smuzhiyun 
2740*4882a593Smuzhiyun 		err = ext4_ext_dirty(handle, inode, path + depth);
2741*4882a593Smuzhiyun 		if (err)
2742*4882a593Smuzhiyun 			goto out;
2743*4882a593Smuzhiyun 
2744*4882a593Smuzhiyun 		ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
2745*4882a593Smuzhiyun 				ext4_ext_pblock(ex));
2746*4882a593Smuzhiyun 		ex--;
2747*4882a593Smuzhiyun 		ex_ee_block = le32_to_cpu(ex->ee_block);
2748*4882a593Smuzhiyun 		ex_ee_len = ext4_ext_get_actual_len(ex);
2749*4882a593Smuzhiyun 	}
2750*4882a593Smuzhiyun 
2751*4882a593Smuzhiyun 	if (correct_index && eh->eh_entries)
2752*4882a593Smuzhiyun 		err = ext4_ext_correct_indexes(handle, inode, path);
2753*4882a593Smuzhiyun 
2754*4882a593Smuzhiyun 	/*
2755*4882a593Smuzhiyun 	 * If there's a partial cluster and at least one extent remains in
2756*4882a593Smuzhiyun 	 * the leaf, free the partial cluster if it isn't shared with the
2757*4882a593Smuzhiyun 	 * current extent.  If it is shared with the current extent
2758*4882a593Smuzhiyun 	 * we reset the partial cluster because we've reached the start of the
2759*4882a593Smuzhiyun 	 * truncated/punched region and we're done removing blocks.
2760*4882a593Smuzhiyun 	 */
2761*4882a593Smuzhiyun 	if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
2762*4882a593Smuzhiyun 		pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
2763*4882a593Smuzhiyun 		if (partial->pclu != EXT4_B2C(sbi, pblk)) {
2764*4882a593Smuzhiyun 			int flags = get_default_free_blocks_flags(inode);
2765*4882a593Smuzhiyun 
2766*4882a593Smuzhiyun 			if (ext4_is_pending(inode, partial->lblk))
2767*4882a593Smuzhiyun 				flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2768*4882a593Smuzhiyun 			ext4_free_blocks(handle, inode, NULL,
2769*4882a593Smuzhiyun 					 EXT4_C2B(sbi, partial->pclu),
2770*4882a593Smuzhiyun 					 sbi->s_cluster_ratio, flags);
2771*4882a593Smuzhiyun 			if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2772*4882a593Smuzhiyun 				ext4_rereserve_cluster(inode, partial->lblk);
2773*4882a593Smuzhiyun 		}
2774*4882a593Smuzhiyun 		partial->state = initial;
2775*4882a593Smuzhiyun 	}
2776*4882a593Smuzhiyun 
2777*4882a593Smuzhiyun 	/* if this leaf is free, then we should
2778*4882a593Smuzhiyun 	 * remove it from index block above */
2779*4882a593Smuzhiyun 	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2780*4882a593Smuzhiyun 		err = ext4_ext_rm_idx(handle, inode, path, depth);
2781*4882a593Smuzhiyun 
2782*4882a593Smuzhiyun out:
2783*4882a593Smuzhiyun 	return err;
2784*4882a593Smuzhiyun }
2785*4882a593Smuzhiyun 
2786*4882a593Smuzhiyun /*
2787*4882a593Smuzhiyun  * ext4_ext_more_to_rm:
2788*4882a593Smuzhiyun  * returns 1 if current index has to be freed (even partial)
2789*4882a593Smuzhiyun  */
2790*4882a593Smuzhiyun static int
ext4_ext_more_to_rm(struct ext4_ext_path * path)2791*4882a593Smuzhiyun ext4_ext_more_to_rm(struct ext4_ext_path *path)
2792*4882a593Smuzhiyun {
2793*4882a593Smuzhiyun 	BUG_ON(path->p_idx == NULL);
2794*4882a593Smuzhiyun 
2795*4882a593Smuzhiyun 	if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2796*4882a593Smuzhiyun 		return 0;
2797*4882a593Smuzhiyun 
2798*4882a593Smuzhiyun 	/*
2799*4882a593Smuzhiyun 	 * if truncate on deeper level happened, it wasn't partial,
2800*4882a593Smuzhiyun 	 * so we have to consider current index for truncation
2801*4882a593Smuzhiyun 	 */
2802*4882a593Smuzhiyun 	if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2803*4882a593Smuzhiyun 		return 0;
2804*4882a593Smuzhiyun 	return 1;
2805*4882a593Smuzhiyun }
2806*4882a593Smuzhiyun 
ext4_ext_remove_space(struct inode * inode,ext4_lblk_t start,ext4_lblk_t end)2807*4882a593Smuzhiyun int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2808*4882a593Smuzhiyun 			  ext4_lblk_t end)
2809*4882a593Smuzhiyun {
2810*4882a593Smuzhiyun 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2811*4882a593Smuzhiyun 	int depth = ext_depth(inode);
2812*4882a593Smuzhiyun 	struct ext4_ext_path *path = NULL;
2813*4882a593Smuzhiyun 	struct partial_cluster partial;
2814*4882a593Smuzhiyun 	handle_t *handle;
2815*4882a593Smuzhiyun 	int i = 0, err = 0;
2816*4882a593Smuzhiyun 
2817*4882a593Smuzhiyun 	partial.pclu = 0;
2818*4882a593Smuzhiyun 	partial.lblk = 0;
2819*4882a593Smuzhiyun 	partial.state = initial;
2820*4882a593Smuzhiyun 
2821*4882a593Smuzhiyun 	ext_debug(inode, "truncate since %u to %u\n", start, end);
2822*4882a593Smuzhiyun 
2823*4882a593Smuzhiyun 	/* probably first extent we're gonna free will be last in block */
2824*4882a593Smuzhiyun 	handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
2825*4882a593Smuzhiyun 			depth + 1,
2826*4882a593Smuzhiyun 			ext4_free_metadata_revoke_credits(inode->i_sb, depth));
2827*4882a593Smuzhiyun 	if (IS_ERR(handle))
2828*4882a593Smuzhiyun 		return PTR_ERR(handle);
2829*4882a593Smuzhiyun 
2830*4882a593Smuzhiyun again:
2831*4882a593Smuzhiyun 	trace_ext4_ext_remove_space(inode, start, end, depth);
2832*4882a593Smuzhiyun 
2833*4882a593Smuzhiyun 	/*
2834*4882a593Smuzhiyun 	 * Check if we are removing extents inside the extent tree. If that
2835*4882a593Smuzhiyun 	 * is the case, we are going to punch a hole inside the extent tree
2836*4882a593Smuzhiyun 	 * so we have to check whether we need to split the extent covering
2837*4882a593Smuzhiyun 	 * the last block to remove so we can easily remove the part of it
2838*4882a593Smuzhiyun 	 * in ext4_ext_rm_leaf().
2839*4882a593Smuzhiyun 	 */
2840*4882a593Smuzhiyun 	if (end < EXT_MAX_BLOCKS - 1) {
2841*4882a593Smuzhiyun 		struct ext4_extent *ex;
2842*4882a593Smuzhiyun 		ext4_lblk_t ee_block, ex_end, lblk;
2843*4882a593Smuzhiyun 		ext4_fsblk_t pblk;
2844*4882a593Smuzhiyun 
2845*4882a593Smuzhiyun 		/* find extent for or closest extent to this block */
2846*4882a593Smuzhiyun 		path = ext4_find_extent(inode, end, NULL,
2847*4882a593Smuzhiyun 					EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
2848*4882a593Smuzhiyun 		if (IS_ERR(path)) {
2849*4882a593Smuzhiyun 			ext4_journal_stop(handle);
2850*4882a593Smuzhiyun 			return PTR_ERR(path);
2851*4882a593Smuzhiyun 		}
2852*4882a593Smuzhiyun 		depth = ext_depth(inode);
2853*4882a593Smuzhiyun 		/* Leaf not may not exist only if inode has no blocks at all */
2854*4882a593Smuzhiyun 		ex = path[depth].p_ext;
2855*4882a593Smuzhiyun 		if (!ex) {
2856*4882a593Smuzhiyun 			if (depth) {
2857*4882a593Smuzhiyun 				EXT4_ERROR_INODE(inode,
2858*4882a593Smuzhiyun 						 "path[%d].p_hdr == NULL",
2859*4882a593Smuzhiyun 						 depth);
2860*4882a593Smuzhiyun 				err = -EFSCORRUPTED;
2861*4882a593Smuzhiyun 			}
2862*4882a593Smuzhiyun 			goto out;
2863*4882a593Smuzhiyun 		}
2864*4882a593Smuzhiyun 
2865*4882a593Smuzhiyun 		ee_block = le32_to_cpu(ex->ee_block);
2866*4882a593Smuzhiyun 		ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
2867*4882a593Smuzhiyun 
2868*4882a593Smuzhiyun 		/*
2869*4882a593Smuzhiyun 		 * See if the last block is inside the extent, if so split
2870*4882a593Smuzhiyun 		 * the extent at 'end' block so we can easily remove the
2871*4882a593Smuzhiyun 		 * tail of the first part of the split extent in
2872*4882a593Smuzhiyun 		 * ext4_ext_rm_leaf().
2873*4882a593Smuzhiyun 		 */
2874*4882a593Smuzhiyun 		if (end >= ee_block && end < ex_end) {
2875*4882a593Smuzhiyun 
2876*4882a593Smuzhiyun 			/*
2877*4882a593Smuzhiyun 			 * If we're going to split the extent, note that
2878*4882a593Smuzhiyun 			 * the cluster containing the block after 'end' is
2879*4882a593Smuzhiyun 			 * in use to avoid freeing it when removing blocks.
2880*4882a593Smuzhiyun 			 */
2881*4882a593Smuzhiyun 			if (sbi->s_cluster_ratio > 1) {
2882*4882a593Smuzhiyun 				pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
2883*4882a593Smuzhiyun 				partial.pclu = EXT4_B2C(sbi, pblk);
2884*4882a593Smuzhiyun 				partial.state = nofree;
2885*4882a593Smuzhiyun 			}
2886*4882a593Smuzhiyun 
2887*4882a593Smuzhiyun 			/*
2888*4882a593Smuzhiyun 			 * Split the extent in two so that 'end' is the last
2889*4882a593Smuzhiyun 			 * block in the first new extent. Also we should not
2890*4882a593Smuzhiyun 			 * fail removing space due to ENOSPC so try to use
2891*4882a593Smuzhiyun 			 * reserved block if that happens.
2892*4882a593Smuzhiyun 			 */
2893*4882a593Smuzhiyun 			err = ext4_force_split_extent_at(handle, inode, &path,
2894*4882a593Smuzhiyun 							 end + 1, 1);
2895*4882a593Smuzhiyun 			if (err < 0)
2896*4882a593Smuzhiyun 				goto out;
2897*4882a593Smuzhiyun 
2898*4882a593Smuzhiyun 		} else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
2899*4882a593Smuzhiyun 			   partial.state == initial) {
2900*4882a593Smuzhiyun 			/*
2901*4882a593Smuzhiyun 			 * If we're punching, there's an extent to the right.
2902*4882a593Smuzhiyun 			 * If the partial cluster hasn't been set, set it to
2903*4882a593Smuzhiyun 			 * that extent's first cluster and its state to nofree
2904*4882a593Smuzhiyun 			 * so it won't be freed should it contain blocks to be
2905*4882a593Smuzhiyun 			 * removed. If it's already set (tofree/nofree), we're
2906*4882a593Smuzhiyun 			 * retrying and keep the original partial cluster info
2907*4882a593Smuzhiyun 			 * so a cluster marked tofree as a result of earlier
2908*4882a593Smuzhiyun 			 * extent removal is not lost.
2909*4882a593Smuzhiyun 			 */
2910*4882a593Smuzhiyun 			lblk = ex_end + 1;
2911*4882a593Smuzhiyun 			err = ext4_ext_search_right(inode, path, &lblk, &pblk,
2912*4882a593Smuzhiyun 						    NULL);
2913*4882a593Smuzhiyun 			if (err < 0)
2914*4882a593Smuzhiyun 				goto out;
2915*4882a593Smuzhiyun 			if (pblk) {
2916*4882a593Smuzhiyun 				partial.pclu = EXT4_B2C(sbi, pblk);
2917*4882a593Smuzhiyun 				partial.state = nofree;
2918*4882a593Smuzhiyun 			}
2919*4882a593Smuzhiyun 		}
2920*4882a593Smuzhiyun 	}
2921*4882a593Smuzhiyun 	/*
2922*4882a593Smuzhiyun 	 * We start scanning from right side, freeing all the blocks
2923*4882a593Smuzhiyun 	 * after i_size and walking into the tree depth-wise.
2924*4882a593Smuzhiyun 	 */
2925*4882a593Smuzhiyun 	depth = ext_depth(inode);
2926*4882a593Smuzhiyun 	if (path) {
2927*4882a593Smuzhiyun 		int k = i = depth;
2928*4882a593Smuzhiyun 		while (--k > 0)
2929*4882a593Smuzhiyun 			path[k].p_block =
2930*4882a593Smuzhiyun 				le16_to_cpu(path[k].p_hdr->eh_entries)+1;
2931*4882a593Smuzhiyun 	} else {
2932*4882a593Smuzhiyun 		path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
2933*4882a593Smuzhiyun 			       GFP_NOFS | __GFP_NOFAIL);
2934*4882a593Smuzhiyun 		if (path == NULL) {
2935*4882a593Smuzhiyun 			ext4_journal_stop(handle);
2936*4882a593Smuzhiyun 			return -ENOMEM;
2937*4882a593Smuzhiyun 		}
2938*4882a593Smuzhiyun 		path[0].p_maxdepth = path[0].p_depth = depth;
2939*4882a593Smuzhiyun 		path[0].p_hdr = ext_inode_hdr(inode);
2940*4882a593Smuzhiyun 		i = 0;
2941*4882a593Smuzhiyun 
2942*4882a593Smuzhiyun 		if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
2943*4882a593Smuzhiyun 			err = -EFSCORRUPTED;
2944*4882a593Smuzhiyun 			goto out;
2945*4882a593Smuzhiyun 		}
2946*4882a593Smuzhiyun 	}
2947*4882a593Smuzhiyun 	err = 0;
2948*4882a593Smuzhiyun 
2949*4882a593Smuzhiyun 	while (i >= 0 && err == 0) {
2950*4882a593Smuzhiyun 		if (i == depth) {
2951*4882a593Smuzhiyun 			/* this is leaf block */
2952*4882a593Smuzhiyun 			err = ext4_ext_rm_leaf(handle, inode, path,
2953*4882a593Smuzhiyun 					       &partial, start, end);
2954*4882a593Smuzhiyun 			/* root level has p_bh == NULL, brelse() eats this */
2955*4882a593Smuzhiyun 			brelse(path[i].p_bh);
2956*4882a593Smuzhiyun 			path[i].p_bh = NULL;
2957*4882a593Smuzhiyun 			i--;
2958*4882a593Smuzhiyun 			continue;
2959*4882a593Smuzhiyun 		}
2960*4882a593Smuzhiyun 
2961*4882a593Smuzhiyun 		/* this is index block */
2962*4882a593Smuzhiyun 		if (!path[i].p_hdr) {
2963*4882a593Smuzhiyun 			ext_debug(inode, "initialize header\n");
2964*4882a593Smuzhiyun 			path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2965*4882a593Smuzhiyun 		}
2966*4882a593Smuzhiyun 
2967*4882a593Smuzhiyun 		if (!path[i].p_idx) {
2968*4882a593Smuzhiyun 			/* this level hasn't been touched yet */
2969*4882a593Smuzhiyun 			path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2970*4882a593Smuzhiyun 			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
2971*4882a593Smuzhiyun 			ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
2972*4882a593Smuzhiyun 				  path[i].p_hdr,
2973*4882a593Smuzhiyun 				  le16_to_cpu(path[i].p_hdr->eh_entries));
2974*4882a593Smuzhiyun 		} else {
2975*4882a593Smuzhiyun 			/* we were already here, see at next index */
2976*4882a593Smuzhiyun 			path[i].p_idx--;
2977*4882a593Smuzhiyun 		}
2978*4882a593Smuzhiyun 
2979*4882a593Smuzhiyun 		ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
2980*4882a593Smuzhiyun 				i, EXT_FIRST_INDEX(path[i].p_hdr),
2981*4882a593Smuzhiyun 				path[i].p_idx);
2982*4882a593Smuzhiyun 		if (ext4_ext_more_to_rm(path + i)) {
2983*4882a593Smuzhiyun 			struct buffer_head *bh;
2984*4882a593Smuzhiyun 			/* go to the next level */
2985*4882a593Smuzhiyun 			ext_debug(inode, "move to level %d (block %llu)\n",
2986*4882a593Smuzhiyun 				  i + 1, ext4_idx_pblock(path[i].p_idx));
2987*4882a593Smuzhiyun 			memset(path + i + 1, 0, sizeof(*path));
2988*4882a593Smuzhiyun 			bh = read_extent_tree_block(inode, path[i].p_idx,
2989*4882a593Smuzhiyun 						    depth - i - 1,
2990*4882a593Smuzhiyun 						    EXT4_EX_NOCACHE);
2991*4882a593Smuzhiyun 			if (IS_ERR(bh)) {
2992*4882a593Smuzhiyun 				/* should we reset i_size? */
2993*4882a593Smuzhiyun 				err = PTR_ERR(bh);
2994*4882a593Smuzhiyun 				break;
2995*4882a593Smuzhiyun 			}
2996*4882a593Smuzhiyun 			/* Yield here to deal with large extent trees.
2997*4882a593Smuzhiyun 			 * Should be a no-op if we did IO above. */
2998*4882a593Smuzhiyun 			cond_resched();
2999*4882a593Smuzhiyun 			if (WARN_ON(i + 1 > depth)) {
3000*4882a593Smuzhiyun 				err = -EFSCORRUPTED;
3001*4882a593Smuzhiyun 				break;
3002*4882a593Smuzhiyun 			}
3003*4882a593Smuzhiyun 			path[i + 1].p_bh = bh;
3004*4882a593Smuzhiyun 
3005*4882a593Smuzhiyun 			/* save actual number of indexes since this
3006*4882a593Smuzhiyun 			 * number is changed at the next iteration */
3007*4882a593Smuzhiyun 			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
3008*4882a593Smuzhiyun 			i++;
3009*4882a593Smuzhiyun 		} else {
3010*4882a593Smuzhiyun 			/* we finished processing this index, go up */
3011*4882a593Smuzhiyun 			if (path[i].p_hdr->eh_entries == 0 && i > 0) {
3012*4882a593Smuzhiyun 				/* index is empty, remove it;
3013*4882a593Smuzhiyun 				 * handle must be already prepared by the
3014*4882a593Smuzhiyun 				 * truncatei_leaf() */
3015*4882a593Smuzhiyun 				err = ext4_ext_rm_idx(handle, inode, path, i);
3016*4882a593Smuzhiyun 			}
3017*4882a593Smuzhiyun 			/* root level has p_bh == NULL, brelse() eats this */
3018*4882a593Smuzhiyun 			brelse(path[i].p_bh);
3019*4882a593Smuzhiyun 			path[i].p_bh = NULL;
3020*4882a593Smuzhiyun 			i--;
3021*4882a593Smuzhiyun 			ext_debug(inode, "return to level %d\n", i);
3022*4882a593Smuzhiyun 		}
3023*4882a593Smuzhiyun 	}
3024*4882a593Smuzhiyun 
3025*4882a593Smuzhiyun 	trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
3026*4882a593Smuzhiyun 					 path->p_hdr->eh_entries);
3027*4882a593Smuzhiyun 
3028*4882a593Smuzhiyun 	/*
3029*4882a593Smuzhiyun 	 * if there's a partial cluster and we have removed the first extent
3030*4882a593Smuzhiyun 	 * in the file, then we also free the partial cluster, if any
3031*4882a593Smuzhiyun 	 */
3032*4882a593Smuzhiyun 	if (partial.state == tofree && err == 0) {
3033*4882a593Smuzhiyun 		int flags = get_default_free_blocks_flags(inode);
3034*4882a593Smuzhiyun 
3035*4882a593Smuzhiyun 		if (ext4_is_pending(inode, partial.lblk))
3036*4882a593Smuzhiyun 			flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
3037*4882a593Smuzhiyun 		ext4_free_blocks(handle, inode, NULL,
3038*4882a593Smuzhiyun 				 EXT4_C2B(sbi, partial.pclu),
3039*4882a593Smuzhiyun 				 sbi->s_cluster_ratio, flags);
3040*4882a593Smuzhiyun 		if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
3041*4882a593Smuzhiyun 			ext4_rereserve_cluster(inode, partial.lblk);
3042*4882a593Smuzhiyun 		partial.state = initial;
3043*4882a593Smuzhiyun 	}
3044*4882a593Smuzhiyun 
3045*4882a593Smuzhiyun 	/* TODO: flexible tree reduction should be here */
3046*4882a593Smuzhiyun 	if (path->p_hdr->eh_entries == 0) {
3047*4882a593Smuzhiyun 		/*
3048*4882a593Smuzhiyun 		 * truncate to zero freed all the tree,
3049*4882a593Smuzhiyun 		 * so we need to correct eh_depth
3050*4882a593Smuzhiyun 		 */
3051*4882a593Smuzhiyun 		err = ext4_ext_get_access(handle, inode, path);
3052*4882a593Smuzhiyun 		if (err == 0) {
3053*4882a593Smuzhiyun 			ext_inode_hdr(inode)->eh_depth = 0;
3054*4882a593Smuzhiyun 			ext_inode_hdr(inode)->eh_max =
3055*4882a593Smuzhiyun 				cpu_to_le16(ext4_ext_space_root(inode, 0));
3056*4882a593Smuzhiyun 			err = ext4_ext_dirty(handle, inode, path);
3057*4882a593Smuzhiyun 		}
3058*4882a593Smuzhiyun 	}
3059*4882a593Smuzhiyun out:
3060*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
3061*4882a593Smuzhiyun 	kfree(path);
3062*4882a593Smuzhiyun 	path = NULL;
3063*4882a593Smuzhiyun 	if (err == -EAGAIN)
3064*4882a593Smuzhiyun 		goto again;
3065*4882a593Smuzhiyun 	ext4_journal_stop(handle);
3066*4882a593Smuzhiyun 
3067*4882a593Smuzhiyun 	return err;
3068*4882a593Smuzhiyun }
3069*4882a593Smuzhiyun 
3070*4882a593Smuzhiyun /*
3071*4882a593Smuzhiyun  * called at mount time
3072*4882a593Smuzhiyun  */
ext4_ext_init(struct super_block * sb)3073*4882a593Smuzhiyun void ext4_ext_init(struct super_block *sb)
3074*4882a593Smuzhiyun {
3075*4882a593Smuzhiyun 	/*
3076*4882a593Smuzhiyun 	 * possible initialization would be here
3077*4882a593Smuzhiyun 	 */
3078*4882a593Smuzhiyun 
3079*4882a593Smuzhiyun 	if (ext4_has_feature_extents(sb)) {
3080*4882a593Smuzhiyun #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
3081*4882a593Smuzhiyun 		printk(KERN_INFO "EXT4-fs: file extents enabled"
3082*4882a593Smuzhiyun #ifdef AGGRESSIVE_TEST
3083*4882a593Smuzhiyun 		       ", aggressive tests"
3084*4882a593Smuzhiyun #endif
3085*4882a593Smuzhiyun #ifdef CHECK_BINSEARCH
3086*4882a593Smuzhiyun 		       ", check binsearch"
3087*4882a593Smuzhiyun #endif
3088*4882a593Smuzhiyun #ifdef EXTENTS_STATS
3089*4882a593Smuzhiyun 		       ", stats"
3090*4882a593Smuzhiyun #endif
3091*4882a593Smuzhiyun 		       "\n");
3092*4882a593Smuzhiyun #endif
3093*4882a593Smuzhiyun #ifdef EXTENTS_STATS
3094*4882a593Smuzhiyun 		spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
3095*4882a593Smuzhiyun 		EXT4_SB(sb)->s_ext_min = 1 << 30;
3096*4882a593Smuzhiyun 		EXT4_SB(sb)->s_ext_max = 0;
3097*4882a593Smuzhiyun #endif
3098*4882a593Smuzhiyun 	}
3099*4882a593Smuzhiyun }
3100*4882a593Smuzhiyun 
3101*4882a593Smuzhiyun /*
3102*4882a593Smuzhiyun  * called at umount time
3103*4882a593Smuzhiyun  */
ext4_ext_release(struct super_block * sb)3104*4882a593Smuzhiyun void ext4_ext_release(struct super_block *sb)
3105*4882a593Smuzhiyun {
3106*4882a593Smuzhiyun 	if (!ext4_has_feature_extents(sb))
3107*4882a593Smuzhiyun 		return;
3108*4882a593Smuzhiyun 
3109*4882a593Smuzhiyun #ifdef EXTENTS_STATS
3110*4882a593Smuzhiyun 	if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
3111*4882a593Smuzhiyun 		struct ext4_sb_info *sbi = EXT4_SB(sb);
3112*4882a593Smuzhiyun 		printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
3113*4882a593Smuzhiyun 			sbi->s_ext_blocks, sbi->s_ext_extents,
3114*4882a593Smuzhiyun 			sbi->s_ext_blocks / sbi->s_ext_extents);
3115*4882a593Smuzhiyun 		printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
3116*4882a593Smuzhiyun 			sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
3117*4882a593Smuzhiyun 	}
3118*4882a593Smuzhiyun #endif
3119*4882a593Smuzhiyun }
3120*4882a593Smuzhiyun 
ext4_zeroout_es(struct inode * inode,struct ext4_extent * ex)3121*4882a593Smuzhiyun static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3122*4882a593Smuzhiyun {
3123*4882a593Smuzhiyun 	ext4_lblk_t  ee_block;
3124*4882a593Smuzhiyun 	ext4_fsblk_t ee_pblock;
3125*4882a593Smuzhiyun 	unsigned int ee_len;
3126*4882a593Smuzhiyun 
3127*4882a593Smuzhiyun 	ee_block  = le32_to_cpu(ex->ee_block);
3128*4882a593Smuzhiyun 	ee_len    = ext4_ext_get_actual_len(ex);
3129*4882a593Smuzhiyun 	ee_pblock = ext4_ext_pblock(ex);
3130*4882a593Smuzhiyun 
3131*4882a593Smuzhiyun 	if (ee_len == 0)
3132*4882a593Smuzhiyun 		return 0;
3133*4882a593Smuzhiyun 
3134*4882a593Smuzhiyun 	return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3135*4882a593Smuzhiyun 				     EXTENT_STATUS_WRITTEN);
3136*4882a593Smuzhiyun }
3137*4882a593Smuzhiyun 
3138*4882a593Smuzhiyun /* FIXME!! we need to try to merge to left or right after zero-out  */
ext4_ext_zeroout(struct inode * inode,struct ext4_extent * ex)3139*4882a593Smuzhiyun static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3140*4882a593Smuzhiyun {
3141*4882a593Smuzhiyun 	ext4_fsblk_t ee_pblock;
3142*4882a593Smuzhiyun 	unsigned int ee_len;
3143*4882a593Smuzhiyun 
3144*4882a593Smuzhiyun 	ee_len    = ext4_ext_get_actual_len(ex);
3145*4882a593Smuzhiyun 	ee_pblock = ext4_ext_pblock(ex);
3146*4882a593Smuzhiyun 	return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
3147*4882a593Smuzhiyun 				  ee_len);
3148*4882a593Smuzhiyun }
3149*4882a593Smuzhiyun 
3150*4882a593Smuzhiyun /*
3151*4882a593Smuzhiyun  * ext4_split_extent_at() splits an extent at given block.
3152*4882a593Smuzhiyun  *
3153*4882a593Smuzhiyun  * @handle: the journal handle
3154*4882a593Smuzhiyun  * @inode: the file inode
3155*4882a593Smuzhiyun  * @path: the path to the extent
3156*4882a593Smuzhiyun  * @split: the logical block where the extent is splitted.
3157*4882a593Smuzhiyun  * @split_flags: indicates if the extent could be zeroout if split fails, and
3158*4882a593Smuzhiyun  *		 the states(init or unwritten) of new extents.
3159*4882a593Smuzhiyun  * @flags: flags used to insert new extent to extent tree.
3160*4882a593Smuzhiyun  *
3161*4882a593Smuzhiyun  *
3162*4882a593Smuzhiyun  * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
3163*4882a593Smuzhiyun  * of which are determined by split_flag.
3164*4882a593Smuzhiyun  *
3165*4882a593Smuzhiyun  * There are two cases:
3166*4882a593Smuzhiyun  *  a> the extent are splitted into two extent.
3167*4882a593Smuzhiyun  *  b> split is not needed, and just mark the extent.
3168*4882a593Smuzhiyun  *
3169*4882a593Smuzhiyun  * return 0 on success.
3170*4882a593Smuzhiyun  */
ext4_split_extent_at(handle_t * handle,struct inode * inode,struct ext4_ext_path ** ppath,ext4_lblk_t split,int split_flag,int flags)3171*4882a593Smuzhiyun static int ext4_split_extent_at(handle_t *handle,
3172*4882a593Smuzhiyun 			     struct inode *inode,
3173*4882a593Smuzhiyun 			     struct ext4_ext_path **ppath,
3174*4882a593Smuzhiyun 			     ext4_lblk_t split,
3175*4882a593Smuzhiyun 			     int split_flag,
3176*4882a593Smuzhiyun 			     int flags)
3177*4882a593Smuzhiyun {
3178*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
3179*4882a593Smuzhiyun 	ext4_fsblk_t newblock;
3180*4882a593Smuzhiyun 	ext4_lblk_t ee_block;
3181*4882a593Smuzhiyun 	struct ext4_extent *ex, newex, orig_ex, zero_ex;
3182*4882a593Smuzhiyun 	struct ext4_extent *ex2 = NULL;
3183*4882a593Smuzhiyun 	unsigned int ee_len, depth;
3184*4882a593Smuzhiyun 	int err = 0;
3185*4882a593Smuzhiyun 
3186*4882a593Smuzhiyun 	BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
3187*4882a593Smuzhiyun 	       (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
3188*4882a593Smuzhiyun 
3189*4882a593Smuzhiyun 	ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
3190*4882a593Smuzhiyun 
3191*4882a593Smuzhiyun 	ext4_ext_show_leaf(inode, path);
3192*4882a593Smuzhiyun 
3193*4882a593Smuzhiyun 	depth = ext_depth(inode);
3194*4882a593Smuzhiyun 	ex = path[depth].p_ext;
3195*4882a593Smuzhiyun 	ee_block = le32_to_cpu(ex->ee_block);
3196*4882a593Smuzhiyun 	ee_len = ext4_ext_get_actual_len(ex);
3197*4882a593Smuzhiyun 	newblock = split - ee_block + ext4_ext_pblock(ex);
3198*4882a593Smuzhiyun 
3199*4882a593Smuzhiyun 	BUG_ON(split < ee_block || split >= (ee_block + ee_len));
3200*4882a593Smuzhiyun 	BUG_ON(!ext4_ext_is_unwritten(ex) &&
3201*4882a593Smuzhiyun 	       split_flag & (EXT4_EXT_MAY_ZEROOUT |
3202*4882a593Smuzhiyun 			     EXT4_EXT_MARK_UNWRIT1 |
3203*4882a593Smuzhiyun 			     EXT4_EXT_MARK_UNWRIT2));
3204*4882a593Smuzhiyun 
3205*4882a593Smuzhiyun 	err = ext4_ext_get_access(handle, inode, path + depth);
3206*4882a593Smuzhiyun 	if (err)
3207*4882a593Smuzhiyun 		goto out;
3208*4882a593Smuzhiyun 
3209*4882a593Smuzhiyun 	if (split == ee_block) {
3210*4882a593Smuzhiyun 		/*
3211*4882a593Smuzhiyun 		 * case b: block @split is the block that the extent begins with
3212*4882a593Smuzhiyun 		 * then we just change the state of the extent, and splitting
3213*4882a593Smuzhiyun 		 * is not needed.
3214*4882a593Smuzhiyun 		 */
3215*4882a593Smuzhiyun 		if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3216*4882a593Smuzhiyun 			ext4_ext_mark_unwritten(ex);
3217*4882a593Smuzhiyun 		else
3218*4882a593Smuzhiyun 			ext4_ext_mark_initialized(ex);
3219*4882a593Smuzhiyun 
3220*4882a593Smuzhiyun 		if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
3221*4882a593Smuzhiyun 			ext4_ext_try_to_merge(handle, inode, path, ex);
3222*4882a593Smuzhiyun 
3223*4882a593Smuzhiyun 		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3224*4882a593Smuzhiyun 		goto out;
3225*4882a593Smuzhiyun 	}
3226*4882a593Smuzhiyun 
3227*4882a593Smuzhiyun 	/* case a */
3228*4882a593Smuzhiyun 	memcpy(&orig_ex, ex, sizeof(orig_ex));
3229*4882a593Smuzhiyun 	ex->ee_len = cpu_to_le16(split - ee_block);
3230*4882a593Smuzhiyun 	if (split_flag & EXT4_EXT_MARK_UNWRIT1)
3231*4882a593Smuzhiyun 		ext4_ext_mark_unwritten(ex);
3232*4882a593Smuzhiyun 
3233*4882a593Smuzhiyun 	/*
3234*4882a593Smuzhiyun 	 * path may lead to new leaf, not to original leaf any more
3235*4882a593Smuzhiyun 	 * after ext4_ext_insert_extent() returns,
3236*4882a593Smuzhiyun 	 */
3237*4882a593Smuzhiyun 	err = ext4_ext_dirty(handle, inode, path + depth);
3238*4882a593Smuzhiyun 	if (err)
3239*4882a593Smuzhiyun 		goto fix_extent_len;
3240*4882a593Smuzhiyun 
3241*4882a593Smuzhiyun 	ex2 = &newex;
3242*4882a593Smuzhiyun 	ex2->ee_block = cpu_to_le32(split);
3243*4882a593Smuzhiyun 	ex2->ee_len   = cpu_to_le16(ee_len - (split - ee_block));
3244*4882a593Smuzhiyun 	ext4_ext_store_pblock(ex2, newblock);
3245*4882a593Smuzhiyun 	if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3246*4882a593Smuzhiyun 		ext4_ext_mark_unwritten(ex2);
3247*4882a593Smuzhiyun 
3248*4882a593Smuzhiyun 	err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
3249*4882a593Smuzhiyun 	if (err != -ENOSPC && err != -EDQUOT)
3250*4882a593Smuzhiyun 		goto out;
3251*4882a593Smuzhiyun 
3252*4882a593Smuzhiyun 	if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
3253*4882a593Smuzhiyun 		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
3254*4882a593Smuzhiyun 			if (split_flag & EXT4_EXT_DATA_VALID1) {
3255*4882a593Smuzhiyun 				err = ext4_ext_zeroout(inode, ex2);
3256*4882a593Smuzhiyun 				zero_ex.ee_block = ex2->ee_block;
3257*4882a593Smuzhiyun 				zero_ex.ee_len = cpu_to_le16(
3258*4882a593Smuzhiyun 						ext4_ext_get_actual_len(ex2));
3259*4882a593Smuzhiyun 				ext4_ext_store_pblock(&zero_ex,
3260*4882a593Smuzhiyun 						      ext4_ext_pblock(ex2));
3261*4882a593Smuzhiyun 			} else {
3262*4882a593Smuzhiyun 				err = ext4_ext_zeroout(inode, ex);
3263*4882a593Smuzhiyun 				zero_ex.ee_block = ex->ee_block;
3264*4882a593Smuzhiyun 				zero_ex.ee_len = cpu_to_le16(
3265*4882a593Smuzhiyun 						ext4_ext_get_actual_len(ex));
3266*4882a593Smuzhiyun 				ext4_ext_store_pblock(&zero_ex,
3267*4882a593Smuzhiyun 						      ext4_ext_pblock(ex));
3268*4882a593Smuzhiyun 			}
3269*4882a593Smuzhiyun 		} else {
3270*4882a593Smuzhiyun 			err = ext4_ext_zeroout(inode, &orig_ex);
3271*4882a593Smuzhiyun 			zero_ex.ee_block = orig_ex.ee_block;
3272*4882a593Smuzhiyun 			zero_ex.ee_len = cpu_to_le16(
3273*4882a593Smuzhiyun 						ext4_ext_get_actual_len(&orig_ex));
3274*4882a593Smuzhiyun 			ext4_ext_store_pblock(&zero_ex,
3275*4882a593Smuzhiyun 					      ext4_ext_pblock(&orig_ex));
3276*4882a593Smuzhiyun 		}
3277*4882a593Smuzhiyun 
3278*4882a593Smuzhiyun 		if (!err) {
3279*4882a593Smuzhiyun 			/* update the extent length and mark as initialized */
3280*4882a593Smuzhiyun 			ex->ee_len = cpu_to_le16(ee_len);
3281*4882a593Smuzhiyun 			ext4_ext_try_to_merge(handle, inode, path, ex);
3282*4882a593Smuzhiyun 			err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3283*4882a593Smuzhiyun 			if (!err)
3284*4882a593Smuzhiyun 				/* update extent status tree */
3285*4882a593Smuzhiyun 				err = ext4_zeroout_es(inode, &zero_ex);
3286*4882a593Smuzhiyun 			/* If we failed at this point, we don't know in which
3287*4882a593Smuzhiyun 			 * state the extent tree exactly is so don't try to fix
3288*4882a593Smuzhiyun 			 * length of the original extent as it may do even more
3289*4882a593Smuzhiyun 			 * damage.
3290*4882a593Smuzhiyun 			 */
3291*4882a593Smuzhiyun 			goto out;
3292*4882a593Smuzhiyun 		}
3293*4882a593Smuzhiyun 	}
3294*4882a593Smuzhiyun 
3295*4882a593Smuzhiyun fix_extent_len:
3296*4882a593Smuzhiyun 	ex->ee_len = orig_ex.ee_len;
3297*4882a593Smuzhiyun 	/*
3298*4882a593Smuzhiyun 	 * Ignore ext4_ext_dirty return value since we are already in error path
3299*4882a593Smuzhiyun 	 * and err is a non-zero error code.
3300*4882a593Smuzhiyun 	 */
3301*4882a593Smuzhiyun 	ext4_ext_dirty(handle, inode, path + path->p_depth);
3302*4882a593Smuzhiyun 	return err;
3303*4882a593Smuzhiyun out:
3304*4882a593Smuzhiyun 	ext4_ext_show_leaf(inode, path);
3305*4882a593Smuzhiyun 	return err;
3306*4882a593Smuzhiyun }
3307*4882a593Smuzhiyun 
3308*4882a593Smuzhiyun /*
3309*4882a593Smuzhiyun  * ext4_split_extents() splits an extent and mark extent which is covered
3310*4882a593Smuzhiyun  * by @map as split_flags indicates
3311*4882a593Smuzhiyun  *
3312*4882a593Smuzhiyun  * It may result in splitting the extent into multiple extents (up to three)
3313*4882a593Smuzhiyun  * There are three possibilities:
3314*4882a593Smuzhiyun  *   a> There is no split required
3315*4882a593Smuzhiyun  *   b> Splits in two extents: Split is happening at either end of the extent
3316*4882a593Smuzhiyun  *   c> Splits in three extents: Somone is splitting in middle of the extent
3317*4882a593Smuzhiyun  *
3318*4882a593Smuzhiyun  */
ext4_split_extent(handle_t * handle,struct inode * inode,struct ext4_ext_path ** ppath,struct ext4_map_blocks * map,int split_flag,int flags)3319*4882a593Smuzhiyun static int ext4_split_extent(handle_t *handle,
3320*4882a593Smuzhiyun 			      struct inode *inode,
3321*4882a593Smuzhiyun 			      struct ext4_ext_path **ppath,
3322*4882a593Smuzhiyun 			      struct ext4_map_blocks *map,
3323*4882a593Smuzhiyun 			      int split_flag,
3324*4882a593Smuzhiyun 			      int flags)
3325*4882a593Smuzhiyun {
3326*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
3327*4882a593Smuzhiyun 	ext4_lblk_t ee_block;
3328*4882a593Smuzhiyun 	struct ext4_extent *ex;
3329*4882a593Smuzhiyun 	unsigned int ee_len, depth;
3330*4882a593Smuzhiyun 	int err = 0;
3331*4882a593Smuzhiyun 	int unwritten;
3332*4882a593Smuzhiyun 	int split_flag1, flags1;
3333*4882a593Smuzhiyun 	int allocated = map->m_len;
3334*4882a593Smuzhiyun 
3335*4882a593Smuzhiyun 	depth = ext_depth(inode);
3336*4882a593Smuzhiyun 	ex = path[depth].p_ext;
3337*4882a593Smuzhiyun 	ee_block = le32_to_cpu(ex->ee_block);
3338*4882a593Smuzhiyun 	ee_len = ext4_ext_get_actual_len(ex);
3339*4882a593Smuzhiyun 	unwritten = ext4_ext_is_unwritten(ex);
3340*4882a593Smuzhiyun 
3341*4882a593Smuzhiyun 	if (map->m_lblk + map->m_len < ee_block + ee_len) {
3342*4882a593Smuzhiyun 		split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
3343*4882a593Smuzhiyun 		flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
3344*4882a593Smuzhiyun 		if (unwritten)
3345*4882a593Smuzhiyun 			split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
3346*4882a593Smuzhiyun 				       EXT4_EXT_MARK_UNWRIT2;
3347*4882a593Smuzhiyun 		if (split_flag & EXT4_EXT_DATA_VALID2)
3348*4882a593Smuzhiyun 			split_flag1 |= EXT4_EXT_DATA_VALID1;
3349*4882a593Smuzhiyun 		err = ext4_split_extent_at(handle, inode, ppath,
3350*4882a593Smuzhiyun 				map->m_lblk + map->m_len, split_flag1, flags1);
3351*4882a593Smuzhiyun 		if (err)
3352*4882a593Smuzhiyun 			goto out;
3353*4882a593Smuzhiyun 	} else {
3354*4882a593Smuzhiyun 		allocated = ee_len - (map->m_lblk - ee_block);
3355*4882a593Smuzhiyun 	}
3356*4882a593Smuzhiyun 	/*
3357*4882a593Smuzhiyun 	 * Update path is required because previous ext4_split_extent_at() may
3358*4882a593Smuzhiyun 	 * result in split of original leaf or extent zeroout.
3359*4882a593Smuzhiyun 	 */
3360*4882a593Smuzhiyun 	path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
3361*4882a593Smuzhiyun 	if (IS_ERR(path))
3362*4882a593Smuzhiyun 		return PTR_ERR(path);
3363*4882a593Smuzhiyun 	depth = ext_depth(inode);
3364*4882a593Smuzhiyun 	ex = path[depth].p_ext;
3365*4882a593Smuzhiyun 	if (!ex) {
3366*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3367*4882a593Smuzhiyun 				 (unsigned long) map->m_lblk);
3368*4882a593Smuzhiyun 		return -EFSCORRUPTED;
3369*4882a593Smuzhiyun 	}
3370*4882a593Smuzhiyun 	unwritten = ext4_ext_is_unwritten(ex);
3371*4882a593Smuzhiyun 	split_flag1 = 0;
3372*4882a593Smuzhiyun 
3373*4882a593Smuzhiyun 	if (map->m_lblk >= ee_block) {
3374*4882a593Smuzhiyun 		split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
3375*4882a593Smuzhiyun 		if (unwritten) {
3376*4882a593Smuzhiyun 			split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
3377*4882a593Smuzhiyun 			split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3378*4882a593Smuzhiyun 						     EXT4_EXT_MARK_UNWRIT2);
3379*4882a593Smuzhiyun 		}
3380*4882a593Smuzhiyun 		err = ext4_split_extent_at(handle, inode, ppath,
3381*4882a593Smuzhiyun 				map->m_lblk, split_flag1, flags);
3382*4882a593Smuzhiyun 		if (err)
3383*4882a593Smuzhiyun 			goto out;
3384*4882a593Smuzhiyun 	}
3385*4882a593Smuzhiyun 
3386*4882a593Smuzhiyun 	ext4_ext_show_leaf(inode, path);
3387*4882a593Smuzhiyun out:
3388*4882a593Smuzhiyun 	return err ? err : allocated;
3389*4882a593Smuzhiyun }
3390*4882a593Smuzhiyun 
3391*4882a593Smuzhiyun /*
3392*4882a593Smuzhiyun  * This function is called by ext4_ext_map_blocks() if someone tries to write
3393*4882a593Smuzhiyun  * to an unwritten extent. It may result in splitting the unwritten
3394*4882a593Smuzhiyun  * extent into multiple extents (up to three - one initialized and two
3395*4882a593Smuzhiyun  * unwritten).
3396*4882a593Smuzhiyun  * There are three possibilities:
3397*4882a593Smuzhiyun  *   a> There is no split required: Entire extent should be initialized
3398*4882a593Smuzhiyun  *   b> Splits in two extents: Write is happening at either end of the extent
3399*4882a593Smuzhiyun  *   c> Splits in three extents: Somone is writing in middle of the extent
3400*4882a593Smuzhiyun  *
3401*4882a593Smuzhiyun  * Pre-conditions:
3402*4882a593Smuzhiyun  *  - The extent pointed to by 'path' is unwritten.
3403*4882a593Smuzhiyun  *  - The extent pointed to by 'path' contains a superset
3404*4882a593Smuzhiyun  *    of the logical span [map->m_lblk, map->m_lblk + map->m_len).
3405*4882a593Smuzhiyun  *
3406*4882a593Smuzhiyun  * Post-conditions on success:
3407*4882a593Smuzhiyun  *  - the returned value is the number of blocks beyond map->l_lblk
3408*4882a593Smuzhiyun  *    that are allocated and initialized.
3409*4882a593Smuzhiyun  *    It is guaranteed to be >= map->m_len.
3410*4882a593Smuzhiyun  */
ext4_ext_convert_to_initialized(handle_t * handle,struct inode * inode,struct ext4_map_blocks * map,struct ext4_ext_path ** ppath,int flags)3411*4882a593Smuzhiyun static int ext4_ext_convert_to_initialized(handle_t *handle,
3412*4882a593Smuzhiyun 					   struct inode *inode,
3413*4882a593Smuzhiyun 					   struct ext4_map_blocks *map,
3414*4882a593Smuzhiyun 					   struct ext4_ext_path **ppath,
3415*4882a593Smuzhiyun 					   int flags)
3416*4882a593Smuzhiyun {
3417*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
3418*4882a593Smuzhiyun 	struct ext4_sb_info *sbi;
3419*4882a593Smuzhiyun 	struct ext4_extent_header *eh;
3420*4882a593Smuzhiyun 	struct ext4_map_blocks split_map;
3421*4882a593Smuzhiyun 	struct ext4_extent zero_ex1, zero_ex2;
3422*4882a593Smuzhiyun 	struct ext4_extent *ex, *abut_ex;
3423*4882a593Smuzhiyun 	ext4_lblk_t ee_block, eof_block;
3424*4882a593Smuzhiyun 	unsigned int ee_len, depth, map_len = map->m_len;
3425*4882a593Smuzhiyun 	int allocated = 0, max_zeroout = 0;
3426*4882a593Smuzhiyun 	int err = 0;
3427*4882a593Smuzhiyun 	int split_flag = EXT4_EXT_DATA_VALID2;
3428*4882a593Smuzhiyun 
3429*4882a593Smuzhiyun 	ext_debug(inode, "logical block %llu, max_blocks %u\n",
3430*4882a593Smuzhiyun 		  (unsigned long long)map->m_lblk, map_len);
3431*4882a593Smuzhiyun 
3432*4882a593Smuzhiyun 	sbi = EXT4_SB(inode->i_sb);
3433*4882a593Smuzhiyun 	eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3434*4882a593Smuzhiyun 			>> inode->i_sb->s_blocksize_bits;
3435*4882a593Smuzhiyun 	if (eof_block < map->m_lblk + map_len)
3436*4882a593Smuzhiyun 		eof_block = map->m_lblk + map_len;
3437*4882a593Smuzhiyun 
3438*4882a593Smuzhiyun 	depth = ext_depth(inode);
3439*4882a593Smuzhiyun 	eh = path[depth].p_hdr;
3440*4882a593Smuzhiyun 	ex = path[depth].p_ext;
3441*4882a593Smuzhiyun 	ee_block = le32_to_cpu(ex->ee_block);
3442*4882a593Smuzhiyun 	ee_len = ext4_ext_get_actual_len(ex);
3443*4882a593Smuzhiyun 	zero_ex1.ee_len = 0;
3444*4882a593Smuzhiyun 	zero_ex2.ee_len = 0;
3445*4882a593Smuzhiyun 
3446*4882a593Smuzhiyun 	trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3447*4882a593Smuzhiyun 
3448*4882a593Smuzhiyun 	/* Pre-conditions */
3449*4882a593Smuzhiyun 	BUG_ON(!ext4_ext_is_unwritten(ex));
3450*4882a593Smuzhiyun 	BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
3451*4882a593Smuzhiyun 
3452*4882a593Smuzhiyun 	/*
3453*4882a593Smuzhiyun 	 * Attempt to transfer newly initialized blocks from the currently
3454*4882a593Smuzhiyun 	 * unwritten extent to its neighbor. This is much cheaper
3455*4882a593Smuzhiyun 	 * than an insertion followed by a merge as those involve costly
3456*4882a593Smuzhiyun 	 * memmove() calls. Transferring to the left is the common case in
3457*4882a593Smuzhiyun 	 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3458*4882a593Smuzhiyun 	 * followed by append writes.
3459*4882a593Smuzhiyun 	 *
3460*4882a593Smuzhiyun 	 * Limitations of the current logic:
3461*4882a593Smuzhiyun 	 *  - L1: we do not deal with writes covering the whole extent.
3462*4882a593Smuzhiyun 	 *    This would require removing the extent if the transfer
3463*4882a593Smuzhiyun 	 *    is possible.
3464*4882a593Smuzhiyun 	 *  - L2: we only attempt to merge with an extent stored in the
3465*4882a593Smuzhiyun 	 *    same extent tree node.
3466*4882a593Smuzhiyun 	 */
3467*4882a593Smuzhiyun 	if ((map->m_lblk == ee_block) &&
3468*4882a593Smuzhiyun 		/* See if we can merge left */
3469*4882a593Smuzhiyun 		(map_len < ee_len) &&		/*L1*/
3470*4882a593Smuzhiyun 		(ex > EXT_FIRST_EXTENT(eh))) {	/*L2*/
3471*4882a593Smuzhiyun 		ext4_lblk_t prev_lblk;
3472*4882a593Smuzhiyun 		ext4_fsblk_t prev_pblk, ee_pblk;
3473*4882a593Smuzhiyun 		unsigned int prev_len;
3474*4882a593Smuzhiyun 
3475*4882a593Smuzhiyun 		abut_ex = ex - 1;
3476*4882a593Smuzhiyun 		prev_lblk = le32_to_cpu(abut_ex->ee_block);
3477*4882a593Smuzhiyun 		prev_len = ext4_ext_get_actual_len(abut_ex);
3478*4882a593Smuzhiyun 		prev_pblk = ext4_ext_pblock(abut_ex);
3479*4882a593Smuzhiyun 		ee_pblk = ext4_ext_pblock(ex);
3480*4882a593Smuzhiyun 
3481*4882a593Smuzhiyun 		/*
3482*4882a593Smuzhiyun 		 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3483*4882a593Smuzhiyun 		 * upon those conditions:
3484*4882a593Smuzhiyun 		 * - C1: abut_ex is initialized,
3485*4882a593Smuzhiyun 		 * - C2: abut_ex is logically abutting ex,
3486*4882a593Smuzhiyun 		 * - C3: abut_ex is physically abutting ex,
3487*4882a593Smuzhiyun 		 * - C4: abut_ex can receive the additional blocks without
3488*4882a593Smuzhiyun 		 *   overflowing the (initialized) length limit.
3489*4882a593Smuzhiyun 		 */
3490*4882a593Smuzhiyun 		if ((!ext4_ext_is_unwritten(abut_ex)) &&		/*C1*/
3491*4882a593Smuzhiyun 			((prev_lblk + prev_len) == ee_block) &&		/*C2*/
3492*4882a593Smuzhiyun 			((prev_pblk + prev_len) == ee_pblk) &&		/*C3*/
3493*4882a593Smuzhiyun 			(prev_len < (EXT_INIT_MAX_LEN - map_len))) {	/*C4*/
3494*4882a593Smuzhiyun 			err = ext4_ext_get_access(handle, inode, path + depth);
3495*4882a593Smuzhiyun 			if (err)
3496*4882a593Smuzhiyun 				goto out;
3497*4882a593Smuzhiyun 
3498*4882a593Smuzhiyun 			trace_ext4_ext_convert_to_initialized_fastpath(inode,
3499*4882a593Smuzhiyun 				map, ex, abut_ex);
3500*4882a593Smuzhiyun 
3501*4882a593Smuzhiyun 			/* Shift the start of ex by 'map_len' blocks */
3502*4882a593Smuzhiyun 			ex->ee_block = cpu_to_le32(ee_block + map_len);
3503*4882a593Smuzhiyun 			ext4_ext_store_pblock(ex, ee_pblk + map_len);
3504*4882a593Smuzhiyun 			ex->ee_len = cpu_to_le16(ee_len - map_len);
3505*4882a593Smuzhiyun 			ext4_ext_mark_unwritten(ex); /* Restore the flag */
3506*4882a593Smuzhiyun 
3507*4882a593Smuzhiyun 			/* Extend abut_ex by 'map_len' blocks */
3508*4882a593Smuzhiyun 			abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
3509*4882a593Smuzhiyun 
3510*4882a593Smuzhiyun 			/* Result: number of initialized blocks past m_lblk */
3511*4882a593Smuzhiyun 			allocated = map_len;
3512*4882a593Smuzhiyun 		}
3513*4882a593Smuzhiyun 	} else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3514*4882a593Smuzhiyun 		   (map_len < ee_len) &&	/*L1*/
3515*4882a593Smuzhiyun 		   ex < EXT_LAST_EXTENT(eh)) {	/*L2*/
3516*4882a593Smuzhiyun 		/* See if we can merge right */
3517*4882a593Smuzhiyun 		ext4_lblk_t next_lblk;
3518*4882a593Smuzhiyun 		ext4_fsblk_t next_pblk, ee_pblk;
3519*4882a593Smuzhiyun 		unsigned int next_len;
3520*4882a593Smuzhiyun 
3521*4882a593Smuzhiyun 		abut_ex = ex + 1;
3522*4882a593Smuzhiyun 		next_lblk = le32_to_cpu(abut_ex->ee_block);
3523*4882a593Smuzhiyun 		next_len = ext4_ext_get_actual_len(abut_ex);
3524*4882a593Smuzhiyun 		next_pblk = ext4_ext_pblock(abut_ex);
3525*4882a593Smuzhiyun 		ee_pblk = ext4_ext_pblock(ex);
3526*4882a593Smuzhiyun 
3527*4882a593Smuzhiyun 		/*
3528*4882a593Smuzhiyun 		 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3529*4882a593Smuzhiyun 		 * upon those conditions:
3530*4882a593Smuzhiyun 		 * - C1: abut_ex is initialized,
3531*4882a593Smuzhiyun 		 * - C2: abut_ex is logically abutting ex,
3532*4882a593Smuzhiyun 		 * - C3: abut_ex is physically abutting ex,
3533*4882a593Smuzhiyun 		 * - C4: abut_ex can receive the additional blocks without
3534*4882a593Smuzhiyun 		 *   overflowing the (initialized) length limit.
3535*4882a593Smuzhiyun 		 */
3536*4882a593Smuzhiyun 		if ((!ext4_ext_is_unwritten(abut_ex)) &&		/*C1*/
3537*4882a593Smuzhiyun 		    ((map->m_lblk + map_len) == next_lblk) &&		/*C2*/
3538*4882a593Smuzhiyun 		    ((ee_pblk + ee_len) == next_pblk) &&		/*C3*/
3539*4882a593Smuzhiyun 		    (next_len < (EXT_INIT_MAX_LEN - map_len))) {	/*C4*/
3540*4882a593Smuzhiyun 			err = ext4_ext_get_access(handle, inode, path + depth);
3541*4882a593Smuzhiyun 			if (err)
3542*4882a593Smuzhiyun 				goto out;
3543*4882a593Smuzhiyun 
3544*4882a593Smuzhiyun 			trace_ext4_ext_convert_to_initialized_fastpath(inode,
3545*4882a593Smuzhiyun 				map, ex, abut_ex);
3546*4882a593Smuzhiyun 
3547*4882a593Smuzhiyun 			/* Shift the start of abut_ex by 'map_len' blocks */
3548*4882a593Smuzhiyun 			abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3549*4882a593Smuzhiyun 			ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3550*4882a593Smuzhiyun 			ex->ee_len = cpu_to_le16(ee_len - map_len);
3551*4882a593Smuzhiyun 			ext4_ext_mark_unwritten(ex); /* Restore the flag */
3552*4882a593Smuzhiyun 
3553*4882a593Smuzhiyun 			/* Extend abut_ex by 'map_len' blocks */
3554*4882a593Smuzhiyun 			abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3555*4882a593Smuzhiyun 
3556*4882a593Smuzhiyun 			/* Result: number of initialized blocks past m_lblk */
3557*4882a593Smuzhiyun 			allocated = map_len;
3558*4882a593Smuzhiyun 		}
3559*4882a593Smuzhiyun 	}
3560*4882a593Smuzhiyun 	if (allocated) {
3561*4882a593Smuzhiyun 		/* Mark the block containing both extents as dirty */
3562*4882a593Smuzhiyun 		err = ext4_ext_dirty(handle, inode, path + depth);
3563*4882a593Smuzhiyun 
3564*4882a593Smuzhiyun 		/* Update path to point to the right extent */
3565*4882a593Smuzhiyun 		path[depth].p_ext = abut_ex;
3566*4882a593Smuzhiyun 		goto out;
3567*4882a593Smuzhiyun 	} else
3568*4882a593Smuzhiyun 		allocated = ee_len - (map->m_lblk - ee_block);
3569*4882a593Smuzhiyun 
3570*4882a593Smuzhiyun 	WARN_ON(map->m_lblk < ee_block);
3571*4882a593Smuzhiyun 	/*
3572*4882a593Smuzhiyun 	 * It is safe to convert extent to initialized via explicit
3573*4882a593Smuzhiyun 	 * zeroout only if extent is fully inside i_size or new_size.
3574*4882a593Smuzhiyun 	 */
3575*4882a593Smuzhiyun 	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3576*4882a593Smuzhiyun 
3577*4882a593Smuzhiyun 	if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3578*4882a593Smuzhiyun 		max_zeroout = sbi->s_extent_max_zeroout_kb >>
3579*4882a593Smuzhiyun 			(inode->i_sb->s_blocksize_bits - 10);
3580*4882a593Smuzhiyun 
3581*4882a593Smuzhiyun 	/*
3582*4882a593Smuzhiyun 	 * five cases:
3583*4882a593Smuzhiyun 	 * 1. split the extent into three extents.
3584*4882a593Smuzhiyun 	 * 2. split the extent into two extents, zeroout the head of the first
3585*4882a593Smuzhiyun 	 *    extent.
3586*4882a593Smuzhiyun 	 * 3. split the extent into two extents, zeroout the tail of the second
3587*4882a593Smuzhiyun 	 *    extent.
3588*4882a593Smuzhiyun 	 * 4. split the extent into two extents with out zeroout.
3589*4882a593Smuzhiyun 	 * 5. no splitting needed, just possibly zeroout the head and / or the
3590*4882a593Smuzhiyun 	 *    tail of the extent.
3591*4882a593Smuzhiyun 	 */
3592*4882a593Smuzhiyun 	split_map.m_lblk = map->m_lblk;
3593*4882a593Smuzhiyun 	split_map.m_len = map->m_len;
3594*4882a593Smuzhiyun 
3595*4882a593Smuzhiyun 	if (max_zeroout && (allocated > split_map.m_len)) {
3596*4882a593Smuzhiyun 		if (allocated <= max_zeroout) {
3597*4882a593Smuzhiyun 			/* case 3 or 5 */
3598*4882a593Smuzhiyun 			zero_ex1.ee_block =
3599*4882a593Smuzhiyun 				 cpu_to_le32(split_map.m_lblk +
3600*4882a593Smuzhiyun 					     split_map.m_len);
3601*4882a593Smuzhiyun 			zero_ex1.ee_len =
3602*4882a593Smuzhiyun 				cpu_to_le16(allocated - split_map.m_len);
3603*4882a593Smuzhiyun 			ext4_ext_store_pblock(&zero_ex1,
3604*4882a593Smuzhiyun 				ext4_ext_pblock(ex) + split_map.m_lblk +
3605*4882a593Smuzhiyun 				split_map.m_len - ee_block);
3606*4882a593Smuzhiyun 			err = ext4_ext_zeroout(inode, &zero_ex1);
3607*4882a593Smuzhiyun 			if (err)
3608*4882a593Smuzhiyun 				goto out;
3609*4882a593Smuzhiyun 			split_map.m_len = allocated;
3610*4882a593Smuzhiyun 		}
3611*4882a593Smuzhiyun 		if (split_map.m_lblk - ee_block + split_map.m_len <
3612*4882a593Smuzhiyun 								max_zeroout) {
3613*4882a593Smuzhiyun 			/* case 2 or 5 */
3614*4882a593Smuzhiyun 			if (split_map.m_lblk != ee_block) {
3615*4882a593Smuzhiyun 				zero_ex2.ee_block = ex->ee_block;
3616*4882a593Smuzhiyun 				zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
3617*4882a593Smuzhiyun 							ee_block);
3618*4882a593Smuzhiyun 				ext4_ext_store_pblock(&zero_ex2,
3619*4882a593Smuzhiyun 						      ext4_ext_pblock(ex));
3620*4882a593Smuzhiyun 				err = ext4_ext_zeroout(inode, &zero_ex2);
3621*4882a593Smuzhiyun 				if (err)
3622*4882a593Smuzhiyun 					goto out;
3623*4882a593Smuzhiyun 			}
3624*4882a593Smuzhiyun 
3625*4882a593Smuzhiyun 			split_map.m_len += split_map.m_lblk - ee_block;
3626*4882a593Smuzhiyun 			split_map.m_lblk = ee_block;
3627*4882a593Smuzhiyun 			allocated = map->m_len;
3628*4882a593Smuzhiyun 		}
3629*4882a593Smuzhiyun 	}
3630*4882a593Smuzhiyun 
3631*4882a593Smuzhiyun 	err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
3632*4882a593Smuzhiyun 				flags);
3633*4882a593Smuzhiyun 	if (err > 0)
3634*4882a593Smuzhiyun 		err = 0;
3635*4882a593Smuzhiyun out:
3636*4882a593Smuzhiyun 	/* If we have gotten a failure, don't zero out status tree */
3637*4882a593Smuzhiyun 	if (!err) {
3638*4882a593Smuzhiyun 		err = ext4_zeroout_es(inode, &zero_ex1);
3639*4882a593Smuzhiyun 		if (!err)
3640*4882a593Smuzhiyun 			err = ext4_zeroout_es(inode, &zero_ex2);
3641*4882a593Smuzhiyun 	}
3642*4882a593Smuzhiyun 	return err ? err : allocated;
3643*4882a593Smuzhiyun }
3644*4882a593Smuzhiyun 
3645*4882a593Smuzhiyun /*
3646*4882a593Smuzhiyun  * This function is called by ext4_ext_map_blocks() from
3647*4882a593Smuzhiyun  * ext4_get_blocks_dio_write() when DIO to write
3648*4882a593Smuzhiyun  * to an unwritten extent.
3649*4882a593Smuzhiyun  *
3650*4882a593Smuzhiyun  * Writing to an unwritten extent may result in splitting the unwritten
3651*4882a593Smuzhiyun  * extent into multiple initialized/unwritten extents (up to three)
3652*4882a593Smuzhiyun  * There are three possibilities:
3653*4882a593Smuzhiyun  *   a> There is no split required: Entire extent should be unwritten
3654*4882a593Smuzhiyun  *   b> Splits in two extents: Write is happening at either end of the extent
3655*4882a593Smuzhiyun  *   c> Splits in three extents: Somone is writing in middle of the extent
3656*4882a593Smuzhiyun  *
3657*4882a593Smuzhiyun  * This works the same way in the case of initialized -> unwritten conversion.
3658*4882a593Smuzhiyun  *
3659*4882a593Smuzhiyun  * One of more index blocks maybe needed if the extent tree grow after
3660*4882a593Smuzhiyun  * the unwritten extent split. To prevent ENOSPC occur at the IO
3661*4882a593Smuzhiyun  * complete, we need to split the unwritten extent before DIO submit
3662*4882a593Smuzhiyun  * the IO. The unwritten extent called at this time will be split
3663*4882a593Smuzhiyun  * into three unwritten extent(at most). After IO complete, the part
3664*4882a593Smuzhiyun  * being filled will be convert to initialized by the end_io callback function
3665*4882a593Smuzhiyun  * via ext4_convert_unwritten_extents().
3666*4882a593Smuzhiyun  *
3667*4882a593Smuzhiyun  * Returns the size of unwritten extent to be written on success.
3668*4882a593Smuzhiyun  */
ext4_split_convert_extents(handle_t * handle,struct inode * inode,struct ext4_map_blocks * map,struct ext4_ext_path ** ppath,int flags)3669*4882a593Smuzhiyun static int ext4_split_convert_extents(handle_t *handle,
3670*4882a593Smuzhiyun 					struct inode *inode,
3671*4882a593Smuzhiyun 					struct ext4_map_blocks *map,
3672*4882a593Smuzhiyun 					struct ext4_ext_path **ppath,
3673*4882a593Smuzhiyun 					int flags)
3674*4882a593Smuzhiyun {
3675*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
3676*4882a593Smuzhiyun 	ext4_lblk_t eof_block;
3677*4882a593Smuzhiyun 	ext4_lblk_t ee_block;
3678*4882a593Smuzhiyun 	struct ext4_extent *ex;
3679*4882a593Smuzhiyun 	unsigned int ee_len;
3680*4882a593Smuzhiyun 	int split_flag = 0, depth;
3681*4882a593Smuzhiyun 
3682*4882a593Smuzhiyun 	ext_debug(inode, "logical block %llu, max_blocks %u\n",
3683*4882a593Smuzhiyun 		  (unsigned long long)map->m_lblk, map->m_len);
3684*4882a593Smuzhiyun 
3685*4882a593Smuzhiyun 	eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3686*4882a593Smuzhiyun 			>> inode->i_sb->s_blocksize_bits;
3687*4882a593Smuzhiyun 	if (eof_block < map->m_lblk + map->m_len)
3688*4882a593Smuzhiyun 		eof_block = map->m_lblk + map->m_len;
3689*4882a593Smuzhiyun 	/*
3690*4882a593Smuzhiyun 	 * It is safe to convert extent to initialized via explicit
3691*4882a593Smuzhiyun 	 * zeroout only if extent is fully inside i_size or new_size.
3692*4882a593Smuzhiyun 	 */
3693*4882a593Smuzhiyun 	depth = ext_depth(inode);
3694*4882a593Smuzhiyun 	ex = path[depth].p_ext;
3695*4882a593Smuzhiyun 	ee_block = le32_to_cpu(ex->ee_block);
3696*4882a593Smuzhiyun 	ee_len = ext4_ext_get_actual_len(ex);
3697*4882a593Smuzhiyun 
3698*4882a593Smuzhiyun 	/* Convert to unwritten */
3699*4882a593Smuzhiyun 	if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3700*4882a593Smuzhiyun 		split_flag |= EXT4_EXT_DATA_VALID1;
3701*4882a593Smuzhiyun 	/* Convert to initialized */
3702*4882a593Smuzhiyun 	} else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3703*4882a593Smuzhiyun 		split_flag |= ee_block + ee_len <= eof_block ?
3704*4882a593Smuzhiyun 			      EXT4_EXT_MAY_ZEROOUT : 0;
3705*4882a593Smuzhiyun 		split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
3706*4882a593Smuzhiyun 	}
3707*4882a593Smuzhiyun 	flags |= EXT4_GET_BLOCKS_PRE_IO;
3708*4882a593Smuzhiyun 	return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
3709*4882a593Smuzhiyun }
3710*4882a593Smuzhiyun 
ext4_convert_unwritten_extents_endio(handle_t * handle,struct inode * inode,struct ext4_map_blocks * map,struct ext4_ext_path ** ppath)3711*4882a593Smuzhiyun static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3712*4882a593Smuzhiyun 						struct inode *inode,
3713*4882a593Smuzhiyun 						struct ext4_map_blocks *map,
3714*4882a593Smuzhiyun 						struct ext4_ext_path **ppath)
3715*4882a593Smuzhiyun {
3716*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
3717*4882a593Smuzhiyun 	struct ext4_extent *ex;
3718*4882a593Smuzhiyun 	ext4_lblk_t ee_block;
3719*4882a593Smuzhiyun 	unsigned int ee_len;
3720*4882a593Smuzhiyun 	int depth;
3721*4882a593Smuzhiyun 	int err = 0;
3722*4882a593Smuzhiyun 
3723*4882a593Smuzhiyun 	depth = ext_depth(inode);
3724*4882a593Smuzhiyun 	ex = path[depth].p_ext;
3725*4882a593Smuzhiyun 	ee_block = le32_to_cpu(ex->ee_block);
3726*4882a593Smuzhiyun 	ee_len = ext4_ext_get_actual_len(ex);
3727*4882a593Smuzhiyun 
3728*4882a593Smuzhiyun 	ext_debug(inode, "logical block %llu, max_blocks %u\n",
3729*4882a593Smuzhiyun 		  (unsigned long long)ee_block, ee_len);
3730*4882a593Smuzhiyun 
3731*4882a593Smuzhiyun 	/* If extent is larger than requested it is a clear sign that we still
3732*4882a593Smuzhiyun 	 * have some extent state machine issues left. So extent_split is still
3733*4882a593Smuzhiyun 	 * required.
3734*4882a593Smuzhiyun 	 * TODO: Once all related issues will be fixed this situation should be
3735*4882a593Smuzhiyun 	 * illegal.
3736*4882a593Smuzhiyun 	 */
3737*4882a593Smuzhiyun 	if (ee_block != map->m_lblk || ee_len > map->m_len) {
3738*4882a593Smuzhiyun #ifdef CONFIG_EXT4_DEBUG
3739*4882a593Smuzhiyun 		ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu,"
3740*4882a593Smuzhiyun 			     " len %u; IO logical block %llu, len %u",
3741*4882a593Smuzhiyun 			     inode->i_ino, (unsigned long long)ee_block, ee_len,
3742*4882a593Smuzhiyun 			     (unsigned long long)map->m_lblk, map->m_len);
3743*4882a593Smuzhiyun #endif
3744*4882a593Smuzhiyun 		err = ext4_split_convert_extents(handle, inode, map, ppath,
3745*4882a593Smuzhiyun 						 EXT4_GET_BLOCKS_CONVERT);
3746*4882a593Smuzhiyun 		if (err < 0)
3747*4882a593Smuzhiyun 			return err;
3748*4882a593Smuzhiyun 		path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3749*4882a593Smuzhiyun 		if (IS_ERR(path))
3750*4882a593Smuzhiyun 			return PTR_ERR(path);
3751*4882a593Smuzhiyun 		depth = ext_depth(inode);
3752*4882a593Smuzhiyun 		ex = path[depth].p_ext;
3753*4882a593Smuzhiyun 	}
3754*4882a593Smuzhiyun 
3755*4882a593Smuzhiyun 	err = ext4_ext_get_access(handle, inode, path + depth);
3756*4882a593Smuzhiyun 	if (err)
3757*4882a593Smuzhiyun 		goto out;
3758*4882a593Smuzhiyun 	/* first mark the extent as initialized */
3759*4882a593Smuzhiyun 	ext4_ext_mark_initialized(ex);
3760*4882a593Smuzhiyun 
3761*4882a593Smuzhiyun 	/* note: ext4_ext_correct_indexes() isn't needed here because
3762*4882a593Smuzhiyun 	 * borders are not changed
3763*4882a593Smuzhiyun 	 */
3764*4882a593Smuzhiyun 	ext4_ext_try_to_merge(handle, inode, path, ex);
3765*4882a593Smuzhiyun 
3766*4882a593Smuzhiyun 	/* Mark modified extent as dirty */
3767*4882a593Smuzhiyun 	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3768*4882a593Smuzhiyun out:
3769*4882a593Smuzhiyun 	ext4_ext_show_leaf(inode, path);
3770*4882a593Smuzhiyun 	return err;
3771*4882a593Smuzhiyun }
3772*4882a593Smuzhiyun 
3773*4882a593Smuzhiyun static int
convert_initialized_extent(handle_t * handle,struct inode * inode,struct ext4_map_blocks * map,struct ext4_ext_path ** ppath,unsigned int * allocated)3774*4882a593Smuzhiyun convert_initialized_extent(handle_t *handle, struct inode *inode,
3775*4882a593Smuzhiyun 			   struct ext4_map_blocks *map,
3776*4882a593Smuzhiyun 			   struct ext4_ext_path **ppath,
3777*4882a593Smuzhiyun 			   unsigned int *allocated)
3778*4882a593Smuzhiyun {
3779*4882a593Smuzhiyun 	struct ext4_ext_path *path = *ppath;
3780*4882a593Smuzhiyun 	struct ext4_extent *ex;
3781*4882a593Smuzhiyun 	ext4_lblk_t ee_block;
3782*4882a593Smuzhiyun 	unsigned int ee_len;
3783*4882a593Smuzhiyun 	int depth;
3784*4882a593Smuzhiyun 	int err = 0;
3785*4882a593Smuzhiyun 
3786*4882a593Smuzhiyun 	/*
3787*4882a593Smuzhiyun 	 * Make sure that the extent is no bigger than we support with
3788*4882a593Smuzhiyun 	 * unwritten extent
3789*4882a593Smuzhiyun 	 */
3790*4882a593Smuzhiyun 	if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3791*4882a593Smuzhiyun 		map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
3792*4882a593Smuzhiyun 
3793*4882a593Smuzhiyun 	depth = ext_depth(inode);
3794*4882a593Smuzhiyun 	ex = path[depth].p_ext;
3795*4882a593Smuzhiyun 	ee_block = le32_to_cpu(ex->ee_block);
3796*4882a593Smuzhiyun 	ee_len = ext4_ext_get_actual_len(ex);
3797*4882a593Smuzhiyun 
3798*4882a593Smuzhiyun 	ext_debug(inode, "logical block %llu, max_blocks %u\n",
3799*4882a593Smuzhiyun 		  (unsigned long long)ee_block, ee_len);
3800*4882a593Smuzhiyun 
3801*4882a593Smuzhiyun 	if (ee_block != map->m_lblk || ee_len > map->m_len) {
3802*4882a593Smuzhiyun 		err = ext4_split_convert_extents(handle, inode, map, ppath,
3803*4882a593Smuzhiyun 				EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3804*4882a593Smuzhiyun 		if (err < 0)
3805*4882a593Smuzhiyun 			return err;
3806*4882a593Smuzhiyun 		path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3807*4882a593Smuzhiyun 		if (IS_ERR(path))
3808*4882a593Smuzhiyun 			return PTR_ERR(path);
3809*4882a593Smuzhiyun 		depth = ext_depth(inode);
3810*4882a593Smuzhiyun 		ex = path[depth].p_ext;
3811*4882a593Smuzhiyun 		if (!ex) {
3812*4882a593Smuzhiyun 			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3813*4882a593Smuzhiyun 					 (unsigned long) map->m_lblk);
3814*4882a593Smuzhiyun 			return -EFSCORRUPTED;
3815*4882a593Smuzhiyun 		}
3816*4882a593Smuzhiyun 	}
3817*4882a593Smuzhiyun 
3818*4882a593Smuzhiyun 	err = ext4_ext_get_access(handle, inode, path + depth);
3819*4882a593Smuzhiyun 	if (err)
3820*4882a593Smuzhiyun 		return err;
3821*4882a593Smuzhiyun 	/* first mark the extent as unwritten */
3822*4882a593Smuzhiyun 	ext4_ext_mark_unwritten(ex);
3823*4882a593Smuzhiyun 
3824*4882a593Smuzhiyun 	/* note: ext4_ext_correct_indexes() isn't needed here because
3825*4882a593Smuzhiyun 	 * borders are not changed
3826*4882a593Smuzhiyun 	 */
3827*4882a593Smuzhiyun 	ext4_ext_try_to_merge(handle, inode, path, ex);
3828*4882a593Smuzhiyun 
3829*4882a593Smuzhiyun 	/* Mark modified extent as dirty */
3830*4882a593Smuzhiyun 	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3831*4882a593Smuzhiyun 	if (err)
3832*4882a593Smuzhiyun 		return err;
3833*4882a593Smuzhiyun 	ext4_ext_show_leaf(inode, path);
3834*4882a593Smuzhiyun 
3835*4882a593Smuzhiyun 	ext4_update_inode_fsync_trans(handle, inode, 1);
3836*4882a593Smuzhiyun 
3837*4882a593Smuzhiyun 	map->m_flags |= EXT4_MAP_UNWRITTEN;
3838*4882a593Smuzhiyun 	if (*allocated > map->m_len)
3839*4882a593Smuzhiyun 		*allocated = map->m_len;
3840*4882a593Smuzhiyun 	map->m_len = *allocated;
3841*4882a593Smuzhiyun 	return 0;
3842*4882a593Smuzhiyun }
3843*4882a593Smuzhiyun 
3844*4882a593Smuzhiyun static int
ext4_ext_handle_unwritten_extents(handle_t * handle,struct inode * inode,struct ext4_map_blocks * map,struct ext4_ext_path ** ppath,int flags,unsigned int allocated,ext4_fsblk_t newblock)3845*4882a593Smuzhiyun ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
3846*4882a593Smuzhiyun 			struct ext4_map_blocks *map,
3847*4882a593Smuzhiyun 			struct ext4_ext_path **ppath, int flags,
3848*4882a593Smuzhiyun 			unsigned int allocated, ext4_fsblk_t newblock)
3849*4882a593Smuzhiyun {
3850*4882a593Smuzhiyun 	struct ext4_ext_path __maybe_unused *path = *ppath;
3851*4882a593Smuzhiyun 	int ret = 0;
3852*4882a593Smuzhiyun 	int err = 0;
3853*4882a593Smuzhiyun 
3854*4882a593Smuzhiyun 	ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
3855*4882a593Smuzhiyun 		  (unsigned long long)map->m_lblk, map->m_len, flags,
3856*4882a593Smuzhiyun 		  allocated);
3857*4882a593Smuzhiyun 	ext4_ext_show_leaf(inode, path);
3858*4882a593Smuzhiyun 
3859*4882a593Smuzhiyun 	/*
3860*4882a593Smuzhiyun 	 * When writing into unwritten space, we should not fail to
3861*4882a593Smuzhiyun 	 * allocate metadata blocks for the new extent block if needed.
3862*4882a593Smuzhiyun 	 */
3863*4882a593Smuzhiyun 	flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
3864*4882a593Smuzhiyun 
3865*4882a593Smuzhiyun 	trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
3866*4882a593Smuzhiyun 						    allocated, newblock);
3867*4882a593Smuzhiyun 
3868*4882a593Smuzhiyun 	/* get_block() before submitting IO, split the extent */
3869*4882a593Smuzhiyun 	if (flags & EXT4_GET_BLOCKS_PRE_IO) {
3870*4882a593Smuzhiyun 		ret = ext4_split_convert_extents(handle, inode, map, ppath,
3871*4882a593Smuzhiyun 					 flags | EXT4_GET_BLOCKS_CONVERT);
3872*4882a593Smuzhiyun 		if (ret < 0) {
3873*4882a593Smuzhiyun 			err = ret;
3874*4882a593Smuzhiyun 			goto out2;
3875*4882a593Smuzhiyun 		}
3876*4882a593Smuzhiyun 		/*
3877*4882a593Smuzhiyun 		 * shouldn't get a 0 return when splitting an extent unless
3878*4882a593Smuzhiyun 		 * m_len is 0 (bug) or extent has been corrupted
3879*4882a593Smuzhiyun 		 */
3880*4882a593Smuzhiyun 		if (unlikely(ret == 0)) {
3881*4882a593Smuzhiyun 			EXT4_ERROR_INODE(inode,
3882*4882a593Smuzhiyun 					 "unexpected ret == 0, m_len = %u",
3883*4882a593Smuzhiyun 					 map->m_len);
3884*4882a593Smuzhiyun 			err = -EFSCORRUPTED;
3885*4882a593Smuzhiyun 			goto out2;
3886*4882a593Smuzhiyun 		}
3887*4882a593Smuzhiyun 		map->m_flags |= EXT4_MAP_UNWRITTEN;
3888*4882a593Smuzhiyun 		goto out;
3889*4882a593Smuzhiyun 	}
3890*4882a593Smuzhiyun 	/* IO end_io complete, convert the filled extent to written */
3891*4882a593Smuzhiyun 	if (flags & EXT4_GET_BLOCKS_CONVERT) {
3892*4882a593Smuzhiyun 		err = ext4_convert_unwritten_extents_endio(handle, inode, map,
3893*4882a593Smuzhiyun 							   ppath);
3894*4882a593Smuzhiyun 		if (err < 0)
3895*4882a593Smuzhiyun 			goto out2;
3896*4882a593Smuzhiyun 		ext4_update_inode_fsync_trans(handle, inode, 1);
3897*4882a593Smuzhiyun 		goto map_out;
3898*4882a593Smuzhiyun 	}
3899*4882a593Smuzhiyun 	/* buffered IO cases */
3900*4882a593Smuzhiyun 	/*
3901*4882a593Smuzhiyun 	 * repeat fallocate creation request
3902*4882a593Smuzhiyun 	 * we already have an unwritten extent
3903*4882a593Smuzhiyun 	 */
3904*4882a593Smuzhiyun 	if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
3905*4882a593Smuzhiyun 		map->m_flags |= EXT4_MAP_UNWRITTEN;
3906*4882a593Smuzhiyun 		goto map_out;
3907*4882a593Smuzhiyun 	}
3908*4882a593Smuzhiyun 
3909*4882a593Smuzhiyun 	/* buffered READ or buffered write_begin() lookup */
3910*4882a593Smuzhiyun 	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3911*4882a593Smuzhiyun 		/*
3912*4882a593Smuzhiyun 		 * We have blocks reserved already.  We
3913*4882a593Smuzhiyun 		 * return allocated blocks so that delalloc
3914*4882a593Smuzhiyun 		 * won't do block reservation for us.  But
3915*4882a593Smuzhiyun 		 * the buffer head will be unmapped so that
3916*4882a593Smuzhiyun 		 * a read from the block returns 0s.
3917*4882a593Smuzhiyun 		 */
3918*4882a593Smuzhiyun 		map->m_flags |= EXT4_MAP_UNWRITTEN;
3919*4882a593Smuzhiyun 		goto out1;
3920*4882a593Smuzhiyun 	}
3921*4882a593Smuzhiyun 
3922*4882a593Smuzhiyun 	/*
3923*4882a593Smuzhiyun 	 * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
3924*4882a593Smuzhiyun 	 * For buffered writes, at writepage time, etc.  Convert a
3925*4882a593Smuzhiyun 	 * discovered unwritten extent to written.
3926*4882a593Smuzhiyun 	 */
3927*4882a593Smuzhiyun 	ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
3928*4882a593Smuzhiyun 	if (ret < 0) {
3929*4882a593Smuzhiyun 		err = ret;
3930*4882a593Smuzhiyun 		goto out2;
3931*4882a593Smuzhiyun 	}
3932*4882a593Smuzhiyun 	ext4_update_inode_fsync_trans(handle, inode, 1);
3933*4882a593Smuzhiyun 	/*
3934*4882a593Smuzhiyun 	 * shouldn't get a 0 return when converting an unwritten extent
3935*4882a593Smuzhiyun 	 * unless m_len is 0 (bug) or extent has been corrupted
3936*4882a593Smuzhiyun 	 */
3937*4882a593Smuzhiyun 	if (unlikely(ret == 0)) {
3938*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
3939*4882a593Smuzhiyun 				 map->m_len);
3940*4882a593Smuzhiyun 		err = -EFSCORRUPTED;
3941*4882a593Smuzhiyun 		goto out2;
3942*4882a593Smuzhiyun 	}
3943*4882a593Smuzhiyun 
3944*4882a593Smuzhiyun out:
3945*4882a593Smuzhiyun 	allocated = ret;
3946*4882a593Smuzhiyun 	map->m_flags |= EXT4_MAP_NEW;
3947*4882a593Smuzhiyun map_out:
3948*4882a593Smuzhiyun 	map->m_flags |= EXT4_MAP_MAPPED;
3949*4882a593Smuzhiyun out1:
3950*4882a593Smuzhiyun 	map->m_pblk = newblock;
3951*4882a593Smuzhiyun 	if (allocated > map->m_len)
3952*4882a593Smuzhiyun 		allocated = map->m_len;
3953*4882a593Smuzhiyun 	map->m_len = allocated;
3954*4882a593Smuzhiyun 	ext4_ext_show_leaf(inode, path);
3955*4882a593Smuzhiyun out2:
3956*4882a593Smuzhiyun 	return err ? err : allocated;
3957*4882a593Smuzhiyun }
3958*4882a593Smuzhiyun 
3959*4882a593Smuzhiyun /*
3960*4882a593Smuzhiyun  * get_implied_cluster_alloc - check to see if the requested
3961*4882a593Smuzhiyun  * allocation (in the map structure) overlaps with a cluster already
3962*4882a593Smuzhiyun  * allocated in an extent.
3963*4882a593Smuzhiyun  *	@sb	The filesystem superblock structure
3964*4882a593Smuzhiyun  *	@map	The requested lblk->pblk mapping
3965*4882a593Smuzhiyun  *	@ex	The extent structure which might contain an implied
3966*4882a593Smuzhiyun  *			cluster allocation
3967*4882a593Smuzhiyun  *
3968*4882a593Smuzhiyun  * This function is called by ext4_ext_map_blocks() after we failed to
3969*4882a593Smuzhiyun  * find blocks that were already in the inode's extent tree.  Hence,
3970*4882a593Smuzhiyun  * we know that the beginning of the requested region cannot overlap
3971*4882a593Smuzhiyun  * the extent from the inode's extent tree.  There are three cases we
3972*4882a593Smuzhiyun  * want to catch.  The first is this case:
3973*4882a593Smuzhiyun  *
3974*4882a593Smuzhiyun  *		 |--- cluster # N--|
3975*4882a593Smuzhiyun  *    |--- extent ---|	|---- requested region ---|
3976*4882a593Smuzhiyun  *			|==========|
3977*4882a593Smuzhiyun  *
3978*4882a593Smuzhiyun  * The second case that we need to test for is this one:
3979*4882a593Smuzhiyun  *
3980*4882a593Smuzhiyun  *   |--------- cluster # N ----------------|
3981*4882a593Smuzhiyun  *	   |--- requested region --|   |------- extent ----|
3982*4882a593Smuzhiyun  *	   |=======================|
3983*4882a593Smuzhiyun  *
3984*4882a593Smuzhiyun  * The third case is when the requested region lies between two extents
3985*4882a593Smuzhiyun  * within the same cluster:
3986*4882a593Smuzhiyun  *          |------------- cluster # N-------------|
3987*4882a593Smuzhiyun  * |----- ex -----|                  |---- ex_right ----|
3988*4882a593Smuzhiyun  *                  |------ requested region ------|
3989*4882a593Smuzhiyun  *                  |================|
3990*4882a593Smuzhiyun  *
3991*4882a593Smuzhiyun  * In each of the above cases, we need to set the map->m_pblk and
3992*4882a593Smuzhiyun  * map->m_len so it corresponds to the return the extent labelled as
3993*4882a593Smuzhiyun  * "|====|" from cluster #N, since it is already in use for data in
3994*4882a593Smuzhiyun  * cluster EXT4_B2C(sbi, map->m_lblk).	We will then return 1 to
3995*4882a593Smuzhiyun  * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
3996*4882a593Smuzhiyun  * as a new "allocated" block region.  Otherwise, we will return 0 and
3997*4882a593Smuzhiyun  * ext4_ext_map_blocks() will then allocate one or more new clusters
3998*4882a593Smuzhiyun  * by calling ext4_mb_new_blocks().
3999*4882a593Smuzhiyun  */
get_implied_cluster_alloc(struct super_block * sb,struct ext4_map_blocks * map,struct ext4_extent * ex,struct ext4_ext_path * path)4000*4882a593Smuzhiyun static int get_implied_cluster_alloc(struct super_block *sb,
4001*4882a593Smuzhiyun 				     struct ext4_map_blocks *map,
4002*4882a593Smuzhiyun 				     struct ext4_extent *ex,
4003*4882a593Smuzhiyun 				     struct ext4_ext_path *path)
4004*4882a593Smuzhiyun {
4005*4882a593Smuzhiyun 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4006*4882a593Smuzhiyun 	ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4007*4882a593Smuzhiyun 	ext4_lblk_t ex_cluster_start, ex_cluster_end;
4008*4882a593Smuzhiyun 	ext4_lblk_t rr_cluster_start;
4009*4882a593Smuzhiyun 	ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4010*4882a593Smuzhiyun 	ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4011*4882a593Smuzhiyun 	unsigned short ee_len = ext4_ext_get_actual_len(ex);
4012*4882a593Smuzhiyun 
4013*4882a593Smuzhiyun 	/* The extent passed in that we are trying to match */
4014*4882a593Smuzhiyun 	ex_cluster_start = EXT4_B2C(sbi, ee_block);
4015*4882a593Smuzhiyun 	ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
4016*4882a593Smuzhiyun 
4017*4882a593Smuzhiyun 	/* The requested region passed into ext4_map_blocks() */
4018*4882a593Smuzhiyun 	rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
4019*4882a593Smuzhiyun 
4020*4882a593Smuzhiyun 	if ((rr_cluster_start == ex_cluster_end) ||
4021*4882a593Smuzhiyun 	    (rr_cluster_start == ex_cluster_start)) {
4022*4882a593Smuzhiyun 		if (rr_cluster_start == ex_cluster_end)
4023*4882a593Smuzhiyun 			ee_start += ee_len - 1;
4024*4882a593Smuzhiyun 		map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
4025*4882a593Smuzhiyun 		map->m_len = min(map->m_len,
4026*4882a593Smuzhiyun 				 (unsigned) sbi->s_cluster_ratio - c_offset);
4027*4882a593Smuzhiyun 		/*
4028*4882a593Smuzhiyun 		 * Check for and handle this case:
4029*4882a593Smuzhiyun 		 *
4030*4882a593Smuzhiyun 		 *   |--------- cluster # N-------------|
4031*4882a593Smuzhiyun 		 *		       |------- extent ----|
4032*4882a593Smuzhiyun 		 *	   |--- requested region ---|
4033*4882a593Smuzhiyun 		 *	   |===========|
4034*4882a593Smuzhiyun 		 */
4035*4882a593Smuzhiyun 
4036*4882a593Smuzhiyun 		if (map->m_lblk < ee_block)
4037*4882a593Smuzhiyun 			map->m_len = min(map->m_len, ee_block - map->m_lblk);
4038*4882a593Smuzhiyun 
4039*4882a593Smuzhiyun 		/*
4040*4882a593Smuzhiyun 		 * Check for the case where there is already another allocated
4041*4882a593Smuzhiyun 		 * block to the right of 'ex' but before the end of the cluster.
4042*4882a593Smuzhiyun 		 *
4043*4882a593Smuzhiyun 		 *          |------------- cluster # N-------------|
4044*4882a593Smuzhiyun 		 * |----- ex -----|                  |---- ex_right ----|
4045*4882a593Smuzhiyun 		 *                  |------ requested region ------|
4046*4882a593Smuzhiyun 		 *                  |================|
4047*4882a593Smuzhiyun 		 */
4048*4882a593Smuzhiyun 		if (map->m_lblk > ee_block) {
4049*4882a593Smuzhiyun 			ext4_lblk_t next = ext4_ext_next_allocated_block(path);
4050*4882a593Smuzhiyun 			map->m_len = min(map->m_len, next - map->m_lblk);
4051*4882a593Smuzhiyun 		}
4052*4882a593Smuzhiyun 
4053*4882a593Smuzhiyun 		trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
4054*4882a593Smuzhiyun 		return 1;
4055*4882a593Smuzhiyun 	}
4056*4882a593Smuzhiyun 
4057*4882a593Smuzhiyun 	trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
4058*4882a593Smuzhiyun 	return 0;
4059*4882a593Smuzhiyun }
4060*4882a593Smuzhiyun 
4061*4882a593Smuzhiyun 
4062*4882a593Smuzhiyun /*
4063*4882a593Smuzhiyun  * Block allocation/map/preallocation routine for extents based files
4064*4882a593Smuzhiyun  *
4065*4882a593Smuzhiyun  *
4066*4882a593Smuzhiyun  * Need to be called with
4067*4882a593Smuzhiyun  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
4068*4882a593Smuzhiyun  * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
4069*4882a593Smuzhiyun  *
4070*4882a593Smuzhiyun  * return > 0, number of blocks already mapped/allocated
4071*4882a593Smuzhiyun  *          if create == 0 and these are pre-allocated blocks
4072*4882a593Smuzhiyun  *          	buffer head is unmapped
4073*4882a593Smuzhiyun  *          otherwise blocks are mapped
4074*4882a593Smuzhiyun  *
4075*4882a593Smuzhiyun  * return = 0, if plain look up failed (blocks have not been allocated)
4076*4882a593Smuzhiyun  *          buffer head is unmapped
4077*4882a593Smuzhiyun  *
4078*4882a593Smuzhiyun  * return < 0, error case.
4079*4882a593Smuzhiyun  */
ext4_ext_map_blocks(handle_t * handle,struct inode * inode,struct ext4_map_blocks * map,int flags)4080*4882a593Smuzhiyun int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4081*4882a593Smuzhiyun 			struct ext4_map_blocks *map, int flags)
4082*4882a593Smuzhiyun {
4083*4882a593Smuzhiyun 	struct ext4_ext_path *path = NULL;
4084*4882a593Smuzhiyun 	struct ext4_extent newex, *ex, ex2;
4085*4882a593Smuzhiyun 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4086*4882a593Smuzhiyun 	ext4_fsblk_t newblock = 0, pblk;
4087*4882a593Smuzhiyun 	int err = 0, depth, ret;
4088*4882a593Smuzhiyun 	unsigned int allocated = 0, offset = 0;
4089*4882a593Smuzhiyun 	unsigned int allocated_clusters = 0;
4090*4882a593Smuzhiyun 	struct ext4_allocation_request ar;
4091*4882a593Smuzhiyun 	ext4_lblk_t cluster_offset;
4092*4882a593Smuzhiyun 
4093*4882a593Smuzhiyun 	ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
4094*4882a593Smuzhiyun 	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4095*4882a593Smuzhiyun 
4096*4882a593Smuzhiyun 	/* find extent for this block */
4097*4882a593Smuzhiyun 	path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
4098*4882a593Smuzhiyun 	if (IS_ERR(path)) {
4099*4882a593Smuzhiyun 		err = PTR_ERR(path);
4100*4882a593Smuzhiyun 		path = NULL;
4101*4882a593Smuzhiyun 		goto out;
4102*4882a593Smuzhiyun 	}
4103*4882a593Smuzhiyun 
4104*4882a593Smuzhiyun 	depth = ext_depth(inode);
4105*4882a593Smuzhiyun 
4106*4882a593Smuzhiyun 	/*
4107*4882a593Smuzhiyun 	 * consistent leaf must not be empty;
4108*4882a593Smuzhiyun 	 * this situation is possible, though, _during_ tree modification;
4109*4882a593Smuzhiyun 	 * this is why assert can't be put in ext4_find_extent()
4110*4882a593Smuzhiyun 	 */
4111*4882a593Smuzhiyun 	if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
4112*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode, "bad extent address "
4113*4882a593Smuzhiyun 				 "lblock: %lu, depth: %d pblock %lld",
4114*4882a593Smuzhiyun 				 (unsigned long) map->m_lblk, depth,
4115*4882a593Smuzhiyun 				 path[depth].p_block);
4116*4882a593Smuzhiyun 		err = -EFSCORRUPTED;
4117*4882a593Smuzhiyun 		goto out;
4118*4882a593Smuzhiyun 	}
4119*4882a593Smuzhiyun 
4120*4882a593Smuzhiyun 	ex = path[depth].p_ext;
4121*4882a593Smuzhiyun 	if (ex) {
4122*4882a593Smuzhiyun 		ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4123*4882a593Smuzhiyun 		ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4124*4882a593Smuzhiyun 		unsigned short ee_len;
4125*4882a593Smuzhiyun 
4126*4882a593Smuzhiyun 
4127*4882a593Smuzhiyun 		/*
4128*4882a593Smuzhiyun 		 * unwritten extents are treated as holes, except that
4129*4882a593Smuzhiyun 		 * we split out initialized portions during a write.
4130*4882a593Smuzhiyun 		 */
4131*4882a593Smuzhiyun 		ee_len = ext4_ext_get_actual_len(ex);
4132*4882a593Smuzhiyun 
4133*4882a593Smuzhiyun 		trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
4134*4882a593Smuzhiyun 
4135*4882a593Smuzhiyun 		/* if found extent covers block, simply return it */
4136*4882a593Smuzhiyun 		if (in_range(map->m_lblk, ee_block, ee_len)) {
4137*4882a593Smuzhiyun 			newblock = map->m_lblk - ee_block + ee_start;
4138*4882a593Smuzhiyun 			/* number of remaining blocks in the extent */
4139*4882a593Smuzhiyun 			allocated = ee_len - (map->m_lblk - ee_block);
4140*4882a593Smuzhiyun 			ext_debug(inode, "%u fit into %u:%d -> %llu\n",
4141*4882a593Smuzhiyun 				  map->m_lblk, ee_block, ee_len, newblock);
4142*4882a593Smuzhiyun 
4143*4882a593Smuzhiyun 			/*
4144*4882a593Smuzhiyun 			 * If the extent is initialized check whether the
4145*4882a593Smuzhiyun 			 * caller wants to convert it to unwritten.
4146*4882a593Smuzhiyun 			 */
4147*4882a593Smuzhiyun 			if ((!ext4_ext_is_unwritten(ex)) &&
4148*4882a593Smuzhiyun 			    (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4149*4882a593Smuzhiyun 				err = convert_initialized_extent(handle,
4150*4882a593Smuzhiyun 					inode, map, &path, &allocated);
4151*4882a593Smuzhiyun 				goto out;
4152*4882a593Smuzhiyun 			} else if (!ext4_ext_is_unwritten(ex)) {
4153*4882a593Smuzhiyun 				map->m_flags |= EXT4_MAP_MAPPED;
4154*4882a593Smuzhiyun 				map->m_pblk = newblock;
4155*4882a593Smuzhiyun 				if (allocated > map->m_len)
4156*4882a593Smuzhiyun 					allocated = map->m_len;
4157*4882a593Smuzhiyun 				map->m_len = allocated;
4158*4882a593Smuzhiyun 				ext4_ext_show_leaf(inode, path);
4159*4882a593Smuzhiyun 				goto out;
4160*4882a593Smuzhiyun 			}
4161*4882a593Smuzhiyun 
4162*4882a593Smuzhiyun 			ret = ext4_ext_handle_unwritten_extents(
4163*4882a593Smuzhiyun 				handle, inode, map, &path, flags,
4164*4882a593Smuzhiyun 				allocated, newblock);
4165*4882a593Smuzhiyun 			if (ret < 0)
4166*4882a593Smuzhiyun 				err = ret;
4167*4882a593Smuzhiyun 			else
4168*4882a593Smuzhiyun 				allocated = ret;
4169*4882a593Smuzhiyun 			goto out;
4170*4882a593Smuzhiyun 		}
4171*4882a593Smuzhiyun 	}
4172*4882a593Smuzhiyun 
4173*4882a593Smuzhiyun 	/*
4174*4882a593Smuzhiyun 	 * requested block isn't allocated yet;
4175*4882a593Smuzhiyun 	 * we couldn't try to create block if create flag is zero
4176*4882a593Smuzhiyun 	 */
4177*4882a593Smuzhiyun 	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
4178*4882a593Smuzhiyun 		ext4_lblk_t hole_start, hole_len;
4179*4882a593Smuzhiyun 
4180*4882a593Smuzhiyun 		hole_start = map->m_lblk;
4181*4882a593Smuzhiyun 		hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
4182*4882a593Smuzhiyun 		/*
4183*4882a593Smuzhiyun 		 * put just found gap into cache to speed up
4184*4882a593Smuzhiyun 		 * subsequent requests
4185*4882a593Smuzhiyun 		 */
4186*4882a593Smuzhiyun 		ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
4187*4882a593Smuzhiyun 
4188*4882a593Smuzhiyun 		/* Update hole_len to reflect hole size after map->m_lblk */
4189*4882a593Smuzhiyun 		if (hole_start != map->m_lblk)
4190*4882a593Smuzhiyun 			hole_len -= map->m_lblk - hole_start;
4191*4882a593Smuzhiyun 		map->m_pblk = 0;
4192*4882a593Smuzhiyun 		map->m_len = min_t(unsigned int, map->m_len, hole_len);
4193*4882a593Smuzhiyun 
4194*4882a593Smuzhiyun 		goto out;
4195*4882a593Smuzhiyun 	}
4196*4882a593Smuzhiyun 
4197*4882a593Smuzhiyun 	/*
4198*4882a593Smuzhiyun 	 * Okay, we need to do block allocation.
4199*4882a593Smuzhiyun 	 */
4200*4882a593Smuzhiyun 	newex.ee_block = cpu_to_le32(map->m_lblk);
4201*4882a593Smuzhiyun 	cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4202*4882a593Smuzhiyun 
4203*4882a593Smuzhiyun 	/*
4204*4882a593Smuzhiyun 	 * If we are doing bigalloc, check to see if the extent returned
4205*4882a593Smuzhiyun 	 * by ext4_find_extent() implies a cluster we can use.
4206*4882a593Smuzhiyun 	 */
4207*4882a593Smuzhiyun 	if (cluster_offset && ex &&
4208*4882a593Smuzhiyun 	    get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
4209*4882a593Smuzhiyun 		ar.len = allocated = map->m_len;
4210*4882a593Smuzhiyun 		newblock = map->m_pblk;
4211*4882a593Smuzhiyun 		goto got_allocated_blocks;
4212*4882a593Smuzhiyun 	}
4213*4882a593Smuzhiyun 
4214*4882a593Smuzhiyun 	/* find neighbour allocated blocks */
4215*4882a593Smuzhiyun 	ar.lleft = map->m_lblk;
4216*4882a593Smuzhiyun 	err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
4217*4882a593Smuzhiyun 	if (err)
4218*4882a593Smuzhiyun 		goto out;
4219*4882a593Smuzhiyun 	ar.lright = map->m_lblk;
4220*4882a593Smuzhiyun 	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
4221*4882a593Smuzhiyun 	if (err < 0)
4222*4882a593Smuzhiyun 		goto out;
4223*4882a593Smuzhiyun 
4224*4882a593Smuzhiyun 	/* Check if the extent after searching to the right implies a
4225*4882a593Smuzhiyun 	 * cluster we can use. */
4226*4882a593Smuzhiyun 	if ((sbi->s_cluster_ratio > 1) && err &&
4227*4882a593Smuzhiyun 	    get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
4228*4882a593Smuzhiyun 		ar.len = allocated = map->m_len;
4229*4882a593Smuzhiyun 		newblock = map->m_pblk;
4230*4882a593Smuzhiyun 		goto got_allocated_blocks;
4231*4882a593Smuzhiyun 	}
4232*4882a593Smuzhiyun 
4233*4882a593Smuzhiyun 	/*
4234*4882a593Smuzhiyun 	 * See if request is beyond maximum number of blocks we can have in
4235*4882a593Smuzhiyun 	 * a single extent. For an initialized extent this limit is
4236*4882a593Smuzhiyun 	 * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
4237*4882a593Smuzhiyun 	 * EXT_UNWRITTEN_MAX_LEN.
4238*4882a593Smuzhiyun 	 */
4239*4882a593Smuzhiyun 	if (map->m_len > EXT_INIT_MAX_LEN &&
4240*4882a593Smuzhiyun 	    !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4241*4882a593Smuzhiyun 		map->m_len = EXT_INIT_MAX_LEN;
4242*4882a593Smuzhiyun 	else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
4243*4882a593Smuzhiyun 		 (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4244*4882a593Smuzhiyun 		map->m_len = EXT_UNWRITTEN_MAX_LEN;
4245*4882a593Smuzhiyun 
4246*4882a593Smuzhiyun 	/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
4247*4882a593Smuzhiyun 	newex.ee_len = cpu_to_le16(map->m_len);
4248*4882a593Smuzhiyun 	err = ext4_ext_check_overlap(sbi, inode, &newex, path);
4249*4882a593Smuzhiyun 	if (err)
4250*4882a593Smuzhiyun 		allocated = ext4_ext_get_actual_len(&newex);
4251*4882a593Smuzhiyun 	else
4252*4882a593Smuzhiyun 		allocated = map->m_len;
4253*4882a593Smuzhiyun 
4254*4882a593Smuzhiyun 	/* allocate new block */
4255*4882a593Smuzhiyun 	ar.inode = inode;
4256*4882a593Smuzhiyun 	ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
4257*4882a593Smuzhiyun 	ar.logical = map->m_lblk;
4258*4882a593Smuzhiyun 	/*
4259*4882a593Smuzhiyun 	 * We calculate the offset from the beginning of the cluster
4260*4882a593Smuzhiyun 	 * for the logical block number, since when we allocate a
4261*4882a593Smuzhiyun 	 * physical cluster, the physical block should start at the
4262*4882a593Smuzhiyun 	 * same offset from the beginning of the cluster.  This is
4263*4882a593Smuzhiyun 	 * needed so that future calls to get_implied_cluster_alloc()
4264*4882a593Smuzhiyun 	 * work correctly.
4265*4882a593Smuzhiyun 	 */
4266*4882a593Smuzhiyun 	offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4267*4882a593Smuzhiyun 	ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
4268*4882a593Smuzhiyun 	ar.goal -= offset;
4269*4882a593Smuzhiyun 	ar.logical -= offset;
4270*4882a593Smuzhiyun 	if (S_ISREG(inode->i_mode))
4271*4882a593Smuzhiyun 		ar.flags = EXT4_MB_HINT_DATA;
4272*4882a593Smuzhiyun 	else
4273*4882a593Smuzhiyun 		/* disable in-core preallocation for non-regular files */
4274*4882a593Smuzhiyun 		ar.flags = 0;
4275*4882a593Smuzhiyun 	if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
4276*4882a593Smuzhiyun 		ar.flags |= EXT4_MB_HINT_NOPREALLOC;
4277*4882a593Smuzhiyun 	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4278*4882a593Smuzhiyun 		ar.flags |= EXT4_MB_DELALLOC_RESERVED;
4279*4882a593Smuzhiyun 	if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
4280*4882a593Smuzhiyun 		ar.flags |= EXT4_MB_USE_RESERVED;
4281*4882a593Smuzhiyun 	newblock = ext4_mb_new_blocks(handle, &ar, &err);
4282*4882a593Smuzhiyun 	if (!newblock)
4283*4882a593Smuzhiyun 		goto out;
4284*4882a593Smuzhiyun 	allocated_clusters = ar.len;
4285*4882a593Smuzhiyun 	ar.len = EXT4_C2B(sbi, ar.len) - offset;
4286*4882a593Smuzhiyun 	ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
4287*4882a593Smuzhiyun 		  ar.goal, newblock, ar.len, allocated);
4288*4882a593Smuzhiyun 	if (ar.len > allocated)
4289*4882a593Smuzhiyun 		ar.len = allocated;
4290*4882a593Smuzhiyun 
4291*4882a593Smuzhiyun got_allocated_blocks:
4292*4882a593Smuzhiyun 	/* try to insert new extent into found leaf and return */
4293*4882a593Smuzhiyun 	pblk = newblock + offset;
4294*4882a593Smuzhiyun 	ext4_ext_store_pblock(&newex, pblk);
4295*4882a593Smuzhiyun 	newex.ee_len = cpu_to_le16(ar.len);
4296*4882a593Smuzhiyun 	/* Mark unwritten */
4297*4882a593Smuzhiyun 	if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
4298*4882a593Smuzhiyun 		ext4_ext_mark_unwritten(&newex);
4299*4882a593Smuzhiyun 		map->m_flags |= EXT4_MAP_UNWRITTEN;
4300*4882a593Smuzhiyun 	}
4301*4882a593Smuzhiyun 
4302*4882a593Smuzhiyun 	err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
4303*4882a593Smuzhiyun 	if (err) {
4304*4882a593Smuzhiyun 		if (allocated_clusters) {
4305*4882a593Smuzhiyun 			int fb_flags = 0;
4306*4882a593Smuzhiyun 
4307*4882a593Smuzhiyun 			/*
4308*4882a593Smuzhiyun 			 * free data blocks we just allocated.
4309*4882a593Smuzhiyun 			 * not a good idea to call discard here directly,
4310*4882a593Smuzhiyun 			 * but otherwise we'd need to call it every free().
4311*4882a593Smuzhiyun 			 */
4312*4882a593Smuzhiyun 			ext4_discard_preallocations(inode, 0);
4313*4882a593Smuzhiyun 			if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4314*4882a593Smuzhiyun 				fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
4315*4882a593Smuzhiyun 			ext4_free_blocks(handle, inode, NULL, newblock,
4316*4882a593Smuzhiyun 					 EXT4_C2B(sbi, allocated_clusters),
4317*4882a593Smuzhiyun 					 fb_flags);
4318*4882a593Smuzhiyun 		}
4319*4882a593Smuzhiyun 		goto out;
4320*4882a593Smuzhiyun 	}
4321*4882a593Smuzhiyun 
4322*4882a593Smuzhiyun 	/*
4323*4882a593Smuzhiyun 	 * Reduce the reserved cluster count to reflect successful deferred
4324*4882a593Smuzhiyun 	 * allocation of delayed allocated clusters or direct allocation of
4325*4882a593Smuzhiyun 	 * clusters discovered to be delayed allocated.  Once allocated, a
4326*4882a593Smuzhiyun 	 * cluster is not included in the reserved count.
4327*4882a593Smuzhiyun 	 */
4328*4882a593Smuzhiyun 	if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
4329*4882a593Smuzhiyun 		if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
4330*4882a593Smuzhiyun 			/*
4331*4882a593Smuzhiyun 			 * When allocating delayed allocated clusters, simply
4332*4882a593Smuzhiyun 			 * reduce the reserved cluster count and claim quota
4333*4882a593Smuzhiyun 			 */
4334*4882a593Smuzhiyun 			ext4_da_update_reserve_space(inode, allocated_clusters,
4335*4882a593Smuzhiyun 							1);
4336*4882a593Smuzhiyun 		} else {
4337*4882a593Smuzhiyun 			ext4_lblk_t lblk, len;
4338*4882a593Smuzhiyun 			unsigned int n;
4339*4882a593Smuzhiyun 
4340*4882a593Smuzhiyun 			/*
4341*4882a593Smuzhiyun 			 * When allocating non-delayed allocated clusters
4342*4882a593Smuzhiyun 			 * (from fallocate, filemap, DIO, or clusters
4343*4882a593Smuzhiyun 			 * allocated when delalloc has been disabled by
4344*4882a593Smuzhiyun 			 * ext4_nonda_switch), reduce the reserved cluster
4345*4882a593Smuzhiyun 			 * count by the number of allocated clusters that
4346*4882a593Smuzhiyun 			 * have previously been delayed allocated.  Quota
4347*4882a593Smuzhiyun 			 * has been claimed by ext4_mb_new_blocks() above,
4348*4882a593Smuzhiyun 			 * so release the quota reservations made for any
4349*4882a593Smuzhiyun 			 * previously delayed allocated clusters.
4350*4882a593Smuzhiyun 			 */
4351*4882a593Smuzhiyun 			lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
4352*4882a593Smuzhiyun 			len = allocated_clusters << sbi->s_cluster_bits;
4353*4882a593Smuzhiyun 			n = ext4_es_delayed_clu(inode, lblk, len);
4354*4882a593Smuzhiyun 			if (n > 0)
4355*4882a593Smuzhiyun 				ext4_da_update_reserve_space(inode, (int) n, 0);
4356*4882a593Smuzhiyun 		}
4357*4882a593Smuzhiyun 	}
4358*4882a593Smuzhiyun 
4359*4882a593Smuzhiyun 	/*
4360*4882a593Smuzhiyun 	 * Cache the extent and update transaction to commit on fdatasync only
4361*4882a593Smuzhiyun 	 * when it is _not_ an unwritten extent.
4362*4882a593Smuzhiyun 	 */
4363*4882a593Smuzhiyun 	if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
4364*4882a593Smuzhiyun 		ext4_update_inode_fsync_trans(handle, inode, 1);
4365*4882a593Smuzhiyun 	else
4366*4882a593Smuzhiyun 		ext4_update_inode_fsync_trans(handle, inode, 0);
4367*4882a593Smuzhiyun 
4368*4882a593Smuzhiyun 	map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
4369*4882a593Smuzhiyun 	map->m_pblk = pblk;
4370*4882a593Smuzhiyun 	map->m_len = ar.len;
4371*4882a593Smuzhiyun 	allocated = map->m_len;
4372*4882a593Smuzhiyun 	ext4_ext_show_leaf(inode, path);
4373*4882a593Smuzhiyun out:
4374*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
4375*4882a593Smuzhiyun 	kfree(path);
4376*4882a593Smuzhiyun 
4377*4882a593Smuzhiyun 	trace_ext4_ext_map_blocks_exit(inode, flags, map,
4378*4882a593Smuzhiyun 				       err ? err : allocated);
4379*4882a593Smuzhiyun 	return err ? err : allocated;
4380*4882a593Smuzhiyun }
4381*4882a593Smuzhiyun 
ext4_ext_truncate(handle_t * handle,struct inode * inode)4382*4882a593Smuzhiyun int ext4_ext_truncate(handle_t *handle, struct inode *inode)
4383*4882a593Smuzhiyun {
4384*4882a593Smuzhiyun 	struct super_block *sb = inode->i_sb;
4385*4882a593Smuzhiyun 	ext4_lblk_t last_block;
4386*4882a593Smuzhiyun 	int err = 0;
4387*4882a593Smuzhiyun 
4388*4882a593Smuzhiyun 	/*
4389*4882a593Smuzhiyun 	 * TODO: optimization is possible here.
4390*4882a593Smuzhiyun 	 * Probably we need not scan at all,
4391*4882a593Smuzhiyun 	 * because page truncation is enough.
4392*4882a593Smuzhiyun 	 */
4393*4882a593Smuzhiyun 
4394*4882a593Smuzhiyun 	/* we have to know where to truncate from in crash case */
4395*4882a593Smuzhiyun 	EXT4_I(inode)->i_disksize = inode->i_size;
4396*4882a593Smuzhiyun 	err = ext4_mark_inode_dirty(handle, inode);
4397*4882a593Smuzhiyun 	if (err)
4398*4882a593Smuzhiyun 		return err;
4399*4882a593Smuzhiyun 
4400*4882a593Smuzhiyun 	last_block = (inode->i_size + sb->s_blocksize - 1)
4401*4882a593Smuzhiyun 			>> EXT4_BLOCK_SIZE_BITS(sb);
4402*4882a593Smuzhiyun retry:
4403*4882a593Smuzhiyun 	err = ext4_es_remove_extent(inode, last_block,
4404*4882a593Smuzhiyun 				    EXT_MAX_BLOCKS - last_block);
4405*4882a593Smuzhiyun 	if (err == -ENOMEM) {
4406*4882a593Smuzhiyun 		cond_resched();
4407*4882a593Smuzhiyun 		congestion_wait(BLK_RW_ASYNC, HZ/50);
4408*4882a593Smuzhiyun 		goto retry;
4409*4882a593Smuzhiyun 	}
4410*4882a593Smuzhiyun 	if (err)
4411*4882a593Smuzhiyun 		return err;
4412*4882a593Smuzhiyun retry_remove_space:
4413*4882a593Smuzhiyun 	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4414*4882a593Smuzhiyun 	if (err == -ENOMEM) {
4415*4882a593Smuzhiyun 		cond_resched();
4416*4882a593Smuzhiyun 		congestion_wait(BLK_RW_ASYNC, HZ/50);
4417*4882a593Smuzhiyun 		goto retry_remove_space;
4418*4882a593Smuzhiyun 	}
4419*4882a593Smuzhiyun 	return err;
4420*4882a593Smuzhiyun }
4421*4882a593Smuzhiyun 
ext4_alloc_file_blocks(struct file * file,ext4_lblk_t offset,ext4_lblk_t len,loff_t new_size,int flags)4422*4882a593Smuzhiyun static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4423*4882a593Smuzhiyun 				  ext4_lblk_t len, loff_t new_size,
4424*4882a593Smuzhiyun 				  int flags)
4425*4882a593Smuzhiyun {
4426*4882a593Smuzhiyun 	struct inode *inode = file_inode(file);
4427*4882a593Smuzhiyun 	handle_t *handle;
4428*4882a593Smuzhiyun 	int ret = 0;
4429*4882a593Smuzhiyun 	int ret2 = 0, ret3 = 0;
4430*4882a593Smuzhiyun 	int retries = 0;
4431*4882a593Smuzhiyun 	int depth = 0;
4432*4882a593Smuzhiyun 	struct ext4_map_blocks map;
4433*4882a593Smuzhiyun 	unsigned int credits;
4434*4882a593Smuzhiyun 	loff_t epos;
4435*4882a593Smuzhiyun 
4436*4882a593Smuzhiyun 	BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
4437*4882a593Smuzhiyun 	map.m_lblk = offset;
4438*4882a593Smuzhiyun 	map.m_len = len;
4439*4882a593Smuzhiyun 	/*
4440*4882a593Smuzhiyun 	 * Don't normalize the request if it can fit in one extent so
4441*4882a593Smuzhiyun 	 * that it doesn't get unnecessarily split into multiple
4442*4882a593Smuzhiyun 	 * extents.
4443*4882a593Smuzhiyun 	 */
4444*4882a593Smuzhiyun 	if (len <= EXT_UNWRITTEN_MAX_LEN)
4445*4882a593Smuzhiyun 		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4446*4882a593Smuzhiyun 
4447*4882a593Smuzhiyun 	/*
4448*4882a593Smuzhiyun 	 * credits to insert 1 extent into extent tree
4449*4882a593Smuzhiyun 	 */
4450*4882a593Smuzhiyun 	credits = ext4_chunk_trans_blocks(inode, len);
4451*4882a593Smuzhiyun 	depth = ext_depth(inode);
4452*4882a593Smuzhiyun 
4453*4882a593Smuzhiyun retry:
4454*4882a593Smuzhiyun 	while (ret >= 0 && len) {
4455*4882a593Smuzhiyun 		/*
4456*4882a593Smuzhiyun 		 * Recalculate credits when extent tree depth changes.
4457*4882a593Smuzhiyun 		 */
4458*4882a593Smuzhiyun 		if (depth != ext_depth(inode)) {
4459*4882a593Smuzhiyun 			credits = ext4_chunk_trans_blocks(inode, len);
4460*4882a593Smuzhiyun 			depth = ext_depth(inode);
4461*4882a593Smuzhiyun 		}
4462*4882a593Smuzhiyun 
4463*4882a593Smuzhiyun 		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4464*4882a593Smuzhiyun 					    credits);
4465*4882a593Smuzhiyun 		if (IS_ERR(handle)) {
4466*4882a593Smuzhiyun 			ret = PTR_ERR(handle);
4467*4882a593Smuzhiyun 			break;
4468*4882a593Smuzhiyun 		}
4469*4882a593Smuzhiyun 		ret = ext4_map_blocks(handle, inode, &map, flags);
4470*4882a593Smuzhiyun 		if (ret <= 0) {
4471*4882a593Smuzhiyun 			ext4_debug("inode #%lu: block %u: len %u: "
4472*4882a593Smuzhiyun 				   "ext4_ext_map_blocks returned %d",
4473*4882a593Smuzhiyun 				   inode->i_ino, map.m_lblk,
4474*4882a593Smuzhiyun 				   map.m_len, ret);
4475*4882a593Smuzhiyun 			ext4_mark_inode_dirty(handle, inode);
4476*4882a593Smuzhiyun 			ret2 = ext4_journal_stop(handle);
4477*4882a593Smuzhiyun 			break;
4478*4882a593Smuzhiyun 		}
4479*4882a593Smuzhiyun 		map.m_lblk += ret;
4480*4882a593Smuzhiyun 		map.m_len = len = len - ret;
4481*4882a593Smuzhiyun 		epos = (loff_t)map.m_lblk << inode->i_blkbits;
4482*4882a593Smuzhiyun 		inode->i_ctime = current_time(inode);
4483*4882a593Smuzhiyun 		if (new_size) {
4484*4882a593Smuzhiyun 			if (epos > new_size)
4485*4882a593Smuzhiyun 				epos = new_size;
4486*4882a593Smuzhiyun 			if (ext4_update_inode_size(inode, epos) & 0x1)
4487*4882a593Smuzhiyun 				inode->i_mtime = inode->i_ctime;
4488*4882a593Smuzhiyun 		}
4489*4882a593Smuzhiyun 		ret2 = ext4_mark_inode_dirty(handle, inode);
4490*4882a593Smuzhiyun 		ext4_update_inode_fsync_trans(handle, inode, 1);
4491*4882a593Smuzhiyun 		ret3 = ext4_journal_stop(handle);
4492*4882a593Smuzhiyun 		ret2 = ret3 ? ret3 : ret2;
4493*4882a593Smuzhiyun 		if (unlikely(ret2))
4494*4882a593Smuzhiyun 			break;
4495*4882a593Smuzhiyun 	}
4496*4882a593Smuzhiyun 	if (ret == -ENOSPC &&
4497*4882a593Smuzhiyun 			ext4_should_retry_alloc(inode->i_sb, &retries)) {
4498*4882a593Smuzhiyun 		ret = 0;
4499*4882a593Smuzhiyun 		goto retry;
4500*4882a593Smuzhiyun 	}
4501*4882a593Smuzhiyun 
4502*4882a593Smuzhiyun 	return ret > 0 ? ret2 : ret;
4503*4882a593Smuzhiyun }
4504*4882a593Smuzhiyun 
4505*4882a593Smuzhiyun static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
4506*4882a593Smuzhiyun 
4507*4882a593Smuzhiyun static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
4508*4882a593Smuzhiyun 
ext4_zero_range(struct file * file,loff_t offset,loff_t len,int mode)4509*4882a593Smuzhiyun static long ext4_zero_range(struct file *file, loff_t offset,
4510*4882a593Smuzhiyun 			    loff_t len, int mode)
4511*4882a593Smuzhiyun {
4512*4882a593Smuzhiyun 	struct inode *inode = file_inode(file);
4513*4882a593Smuzhiyun 	handle_t *handle = NULL;
4514*4882a593Smuzhiyun 	unsigned int max_blocks;
4515*4882a593Smuzhiyun 	loff_t new_size = 0;
4516*4882a593Smuzhiyun 	int ret = 0;
4517*4882a593Smuzhiyun 	int flags;
4518*4882a593Smuzhiyun 	int credits;
4519*4882a593Smuzhiyun 	int partial_begin, partial_end;
4520*4882a593Smuzhiyun 	loff_t start, end;
4521*4882a593Smuzhiyun 	ext4_lblk_t lblk;
4522*4882a593Smuzhiyun 	unsigned int blkbits = inode->i_blkbits;
4523*4882a593Smuzhiyun 
4524*4882a593Smuzhiyun 	trace_ext4_zero_range(inode, offset, len, mode);
4525*4882a593Smuzhiyun 
4526*4882a593Smuzhiyun 	/* Call ext4_force_commit to flush all data in case of data=journal. */
4527*4882a593Smuzhiyun 	if (ext4_should_journal_data(inode)) {
4528*4882a593Smuzhiyun 		ret = ext4_force_commit(inode->i_sb);
4529*4882a593Smuzhiyun 		if (ret)
4530*4882a593Smuzhiyun 			return ret;
4531*4882a593Smuzhiyun 	}
4532*4882a593Smuzhiyun 
4533*4882a593Smuzhiyun 	/*
4534*4882a593Smuzhiyun 	 * Round up offset. This is not fallocate, we need to zero out
4535*4882a593Smuzhiyun 	 * blocks, so convert interior block aligned part of the range to
4536*4882a593Smuzhiyun 	 * unwritten and possibly manually zero out unaligned parts of the
4537*4882a593Smuzhiyun 	 * range.
4538*4882a593Smuzhiyun 	 */
4539*4882a593Smuzhiyun 	start = round_up(offset, 1 << blkbits);
4540*4882a593Smuzhiyun 	end = round_down((offset + len), 1 << blkbits);
4541*4882a593Smuzhiyun 
4542*4882a593Smuzhiyun 	if (start < offset || end > offset + len)
4543*4882a593Smuzhiyun 		return -EINVAL;
4544*4882a593Smuzhiyun 	partial_begin = offset & ((1 << blkbits) - 1);
4545*4882a593Smuzhiyun 	partial_end = (offset + len) & ((1 << blkbits) - 1);
4546*4882a593Smuzhiyun 
4547*4882a593Smuzhiyun 	lblk = start >> blkbits;
4548*4882a593Smuzhiyun 	max_blocks = (end >> blkbits);
4549*4882a593Smuzhiyun 	if (max_blocks < lblk)
4550*4882a593Smuzhiyun 		max_blocks = 0;
4551*4882a593Smuzhiyun 	else
4552*4882a593Smuzhiyun 		max_blocks -= lblk;
4553*4882a593Smuzhiyun 
4554*4882a593Smuzhiyun 	inode_lock(inode);
4555*4882a593Smuzhiyun 
4556*4882a593Smuzhiyun 	/*
4557*4882a593Smuzhiyun 	 * Indirect files do not support unwritten extents
4558*4882a593Smuzhiyun 	 */
4559*4882a593Smuzhiyun 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4560*4882a593Smuzhiyun 		ret = -EOPNOTSUPP;
4561*4882a593Smuzhiyun 		goto out_mutex;
4562*4882a593Smuzhiyun 	}
4563*4882a593Smuzhiyun 
4564*4882a593Smuzhiyun 	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4565*4882a593Smuzhiyun 	    (offset + len > inode->i_size ||
4566*4882a593Smuzhiyun 	     offset + len > EXT4_I(inode)->i_disksize)) {
4567*4882a593Smuzhiyun 		new_size = offset + len;
4568*4882a593Smuzhiyun 		ret = inode_newsize_ok(inode, new_size);
4569*4882a593Smuzhiyun 		if (ret)
4570*4882a593Smuzhiyun 			goto out_mutex;
4571*4882a593Smuzhiyun 	}
4572*4882a593Smuzhiyun 
4573*4882a593Smuzhiyun 	flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4574*4882a593Smuzhiyun 
4575*4882a593Smuzhiyun 	/* Wait all existing dio workers, newcomers will block on i_mutex */
4576*4882a593Smuzhiyun 	inode_dio_wait(inode);
4577*4882a593Smuzhiyun 
4578*4882a593Smuzhiyun 	ret = file_modified(file);
4579*4882a593Smuzhiyun 	if (ret)
4580*4882a593Smuzhiyun 		goto out_mutex;
4581*4882a593Smuzhiyun 
4582*4882a593Smuzhiyun 	/* Preallocate the range including the unaligned edges */
4583*4882a593Smuzhiyun 	if (partial_begin || partial_end) {
4584*4882a593Smuzhiyun 		ret = ext4_alloc_file_blocks(file,
4585*4882a593Smuzhiyun 				round_down(offset, 1 << blkbits) >> blkbits,
4586*4882a593Smuzhiyun 				(round_up((offset + len), 1 << blkbits) -
4587*4882a593Smuzhiyun 				 round_down(offset, 1 << blkbits)) >> blkbits,
4588*4882a593Smuzhiyun 				new_size, flags);
4589*4882a593Smuzhiyun 		if (ret)
4590*4882a593Smuzhiyun 			goto out_mutex;
4591*4882a593Smuzhiyun 
4592*4882a593Smuzhiyun 	}
4593*4882a593Smuzhiyun 
4594*4882a593Smuzhiyun 	/* Zero range excluding the unaligned edges */
4595*4882a593Smuzhiyun 	if (max_blocks > 0) {
4596*4882a593Smuzhiyun 		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4597*4882a593Smuzhiyun 			  EXT4_EX_NOCACHE);
4598*4882a593Smuzhiyun 
4599*4882a593Smuzhiyun 		/*
4600*4882a593Smuzhiyun 		 * Prevent page faults from reinstantiating pages we have
4601*4882a593Smuzhiyun 		 * released from page cache.
4602*4882a593Smuzhiyun 		 */
4603*4882a593Smuzhiyun 		down_write(&EXT4_I(inode)->i_mmap_sem);
4604*4882a593Smuzhiyun 
4605*4882a593Smuzhiyun 		ret = ext4_break_layouts(inode);
4606*4882a593Smuzhiyun 		if (ret) {
4607*4882a593Smuzhiyun 			up_write(&EXT4_I(inode)->i_mmap_sem);
4608*4882a593Smuzhiyun 			goto out_mutex;
4609*4882a593Smuzhiyun 		}
4610*4882a593Smuzhiyun 
4611*4882a593Smuzhiyun 		ret = ext4_update_disksize_before_punch(inode, offset, len);
4612*4882a593Smuzhiyun 		if (ret) {
4613*4882a593Smuzhiyun 			up_write(&EXT4_I(inode)->i_mmap_sem);
4614*4882a593Smuzhiyun 			goto out_mutex;
4615*4882a593Smuzhiyun 		}
4616*4882a593Smuzhiyun 		/* Now release the pages and zero block aligned part of pages */
4617*4882a593Smuzhiyun 		truncate_pagecache_range(inode, start, end - 1);
4618*4882a593Smuzhiyun 		inode->i_mtime = inode->i_ctime = current_time(inode);
4619*4882a593Smuzhiyun 
4620*4882a593Smuzhiyun 		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4621*4882a593Smuzhiyun 					     flags);
4622*4882a593Smuzhiyun 		up_write(&EXT4_I(inode)->i_mmap_sem);
4623*4882a593Smuzhiyun 		if (ret)
4624*4882a593Smuzhiyun 			goto out_mutex;
4625*4882a593Smuzhiyun 	}
4626*4882a593Smuzhiyun 	if (!partial_begin && !partial_end)
4627*4882a593Smuzhiyun 		goto out_mutex;
4628*4882a593Smuzhiyun 
4629*4882a593Smuzhiyun 	/*
4630*4882a593Smuzhiyun 	 * In worst case we have to writeout two nonadjacent unwritten
4631*4882a593Smuzhiyun 	 * blocks and update the inode
4632*4882a593Smuzhiyun 	 */
4633*4882a593Smuzhiyun 	credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4634*4882a593Smuzhiyun 	if (ext4_should_journal_data(inode))
4635*4882a593Smuzhiyun 		credits += 2;
4636*4882a593Smuzhiyun 	handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
4637*4882a593Smuzhiyun 	if (IS_ERR(handle)) {
4638*4882a593Smuzhiyun 		ret = PTR_ERR(handle);
4639*4882a593Smuzhiyun 		ext4_std_error(inode->i_sb, ret);
4640*4882a593Smuzhiyun 		goto out_mutex;
4641*4882a593Smuzhiyun 	}
4642*4882a593Smuzhiyun 
4643*4882a593Smuzhiyun 	inode->i_mtime = inode->i_ctime = current_time(inode);
4644*4882a593Smuzhiyun 	if (new_size)
4645*4882a593Smuzhiyun 		ext4_update_inode_size(inode, new_size);
4646*4882a593Smuzhiyun 	ret = ext4_mark_inode_dirty(handle, inode);
4647*4882a593Smuzhiyun 	if (unlikely(ret))
4648*4882a593Smuzhiyun 		goto out_handle;
4649*4882a593Smuzhiyun 	/* Zero out partial block at the edges of the range */
4650*4882a593Smuzhiyun 	ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4651*4882a593Smuzhiyun 	if (ret >= 0)
4652*4882a593Smuzhiyun 		ext4_update_inode_fsync_trans(handle, inode, 1);
4653*4882a593Smuzhiyun 
4654*4882a593Smuzhiyun 	if (file->f_flags & O_SYNC)
4655*4882a593Smuzhiyun 		ext4_handle_sync(handle);
4656*4882a593Smuzhiyun 
4657*4882a593Smuzhiyun out_handle:
4658*4882a593Smuzhiyun 	ext4_journal_stop(handle);
4659*4882a593Smuzhiyun out_mutex:
4660*4882a593Smuzhiyun 	inode_unlock(inode);
4661*4882a593Smuzhiyun 	return ret;
4662*4882a593Smuzhiyun }
4663*4882a593Smuzhiyun 
4664*4882a593Smuzhiyun /*
4665*4882a593Smuzhiyun  * preallocate space for a file. This implements ext4's fallocate file
4666*4882a593Smuzhiyun  * operation, which gets called from sys_fallocate system call.
4667*4882a593Smuzhiyun  * For block-mapped files, posix_fallocate should fall back to the method
4668*4882a593Smuzhiyun  * of writing zeroes to the required new blocks (the same behavior which is
4669*4882a593Smuzhiyun  * expected for file systems which do not support fallocate() system call).
4670*4882a593Smuzhiyun  */
ext4_fallocate(struct file * file,int mode,loff_t offset,loff_t len)4671*4882a593Smuzhiyun long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4672*4882a593Smuzhiyun {
4673*4882a593Smuzhiyun 	struct inode *inode = file_inode(file);
4674*4882a593Smuzhiyun 	loff_t new_size = 0;
4675*4882a593Smuzhiyun 	unsigned int max_blocks;
4676*4882a593Smuzhiyun 	int ret = 0;
4677*4882a593Smuzhiyun 	int flags;
4678*4882a593Smuzhiyun 	ext4_lblk_t lblk;
4679*4882a593Smuzhiyun 	unsigned int blkbits = inode->i_blkbits;
4680*4882a593Smuzhiyun 
4681*4882a593Smuzhiyun 	/*
4682*4882a593Smuzhiyun 	 * Encrypted inodes can't handle collapse range or insert
4683*4882a593Smuzhiyun 	 * range since we would need to re-encrypt blocks with a
4684*4882a593Smuzhiyun 	 * different IV or XTS tweak (which are based on the logical
4685*4882a593Smuzhiyun 	 * block number).
4686*4882a593Smuzhiyun 	 */
4687*4882a593Smuzhiyun 	if (IS_ENCRYPTED(inode) &&
4688*4882a593Smuzhiyun 	    (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
4689*4882a593Smuzhiyun 		return -EOPNOTSUPP;
4690*4882a593Smuzhiyun 
4691*4882a593Smuzhiyun 	/* Return error if mode is not supported */
4692*4882a593Smuzhiyun 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4693*4882a593Smuzhiyun 		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
4694*4882a593Smuzhiyun 		     FALLOC_FL_INSERT_RANGE))
4695*4882a593Smuzhiyun 		return -EOPNOTSUPP;
4696*4882a593Smuzhiyun 
4697*4882a593Smuzhiyun 	ext4_fc_start_update(inode);
4698*4882a593Smuzhiyun 	inode_lock(inode);
4699*4882a593Smuzhiyun 	ret = ext4_convert_inline_data(inode);
4700*4882a593Smuzhiyun 	inode_unlock(inode);
4701*4882a593Smuzhiyun 	if (ret)
4702*4882a593Smuzhiyun 		goto exit;
4703*4882a593Smuzhiyun 
4704*4882a593Smuzhiyun 	if (mode & FALLOC_FL_PUNCH_HOLE) {
4705*4882a593Smuzhiyun 		ret = ext4_punch_hole(file, offset, len);
4706*4882a593Smuzhiyun 		goto exit;
4707*4882a593Smuzhiyun 	}
4708*4882a593Smuzhiyun 
4709*4882a593Smuzhiyun 	if (mode & FALLOC_FL_COLLAPSE_RANGE) {
4710*4882a593Smuzhiyun 		ret = ext4_collapse_range(file, offset, len);
4711*4882a593Smuzhiyun 		goto exit;
4712*4882a593Smuzhiyun 	}
4713*4882a593Smuzhiyun 
4714*4882a593Smuzhiyun 	if (mode & FALLOC_FL_INSERT_RANGE) {
4715*4882a593Smuzhiyun 		ret = ext4_insert_range(file, offset, len);
4716*4882a593Smuzhiyun 		goto exit;
4717*4882a593Smuzhiyun 	}
4718*4882a593Smuzhiyun 
4719*4882a593Smuzhiyun 	if (mode & FALLOC_FL_ZERO_RANGE) {
4720*4882a593Smuzhiyun 		ret = ext4_zero_range(file, offset, len, mode);
4721*4882a593Smuzhiyun 		goto exit;
4722*4882a593Smuzhiyun 	}
4723*4882a593Smuzhiyun 	trace_ext4_fallocate_enter(inode, offset, len, mode);
4724*4882a593Smuzhiyun 	lblk = offset >> blkbits;
4725*4882a593Smuzhiyun 
4726*4882a593Smuzhiyun 	max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4727*4882a593Smuzhiyun 	flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4728*4882a593Smuzhiyun 
4729*4882a593Smuzhiyun 	inode_lock(inode);
4730*4882a593Smuzhiyun 
4731*4882a593Smuzhiyun 	/*
4732*4882a593Smuzhiyun 	 * We only support preallocation for extent-based files only
4733*4882a593Smuzhiyun 	 */
4734*4882a593Smuzhiyun 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4735*4882a593Smuzhiyun 		ret = -EOPNOTSUPP;
4736*4882a593Smuzhiyun 		goto out;
4737*4882a593Smuzhiyun 	}
4738*4882a593Smuzhiyun 
4739*4882a593Smuzhiyun 	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4740*4882a593Smuzhiyun 	    (offset + len > inode->i_size ||
4741*4882a593Smuzhiyun 	     offset + len > EXT4_I(inode)->i_disksize)) {
4742*4882a593Smuzhiyun 		new_size = offset + len;
4743*4882a593Smuzhiyun 		ret = inode_newsize_ok(inode, new_size);
4744*4882a593Smuzhiyun 		if (ret)
4745*4882a593Smuzhiyun 			goto out;
4746*4882a593Smuzhiyun 	}
4747*4882a593Smuzhiyun 
4748*4882a593Smuzhiyun 	/* Wait all existing dio workers, newcomers will block on i_mutex */
4749*4882a593Smuzhiyun 	inode_dio_wait(inode);
4750*4882a593Smuzhiyun 
4751*4882a593Smuzhiyun 	ret = file_modified(file);
4752*4882a593Smuzhiyun 	if (ret)
4753*4882a593Smuzhiyun 		goto out;
4754*4882a593Smuzhiyun 
4755*4882a593Smuzhiyun 	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
4756*4882a593Smuzhiyun 	if (ret)
4757*4882a593Smuzhiyun 		goto out;
4758*4882a593Smuzhiyun 
4759*4882a593Smuzhiyun 	if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
4760*4882a593Smuzhiyun 		ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
4761*4882a593Smuzhiyun 					EXT4_I(inode)->i_sync_tid);
4762*4882a593Smuzhiyun 	}
4763*4882a593Smuzhiyun out:
4764*4882a593Smuzhiyun 	inode_unlock(inode);
4765*4882a593Smuzhiyun 	trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4766*4882a593Smuzhiyun exit:
4767*4882a593Smuzhiyun 	ext4_fc_stop_update(inode);
4768*4882a593Smuzhiyun 	return ret;
4769*4882a593Smuzhiyun }
4770*4882a593Smuzhiyun 
4771*4882a593Smuzhiyun /*
4772*4882a593Smuzhiyun  * This function convert a range of blocks to written extents
4773*4882a593Smuzhiyun  * The caller of this function will pass the start offset and the size.
4774*4882a593Smuzhiyun  * all unwritten extents within this range will be converted to
4775*4882a593Smuzhiyun  * written extents.
4776*4882a593Smuzhiyun  *
4777*4882a593Smuzhiyun  * This function is called from the direct IO end io call back
4778*4882a593Smuzhiyun  * function, to convert the fallocated extents after IO is completed.
4779*4882a593Smuzhiyun  * Returns 0 on success.
4780*4882a593Smuzhiyun  */
ext4_convert_unwritten_extents(handle_t * handle,struct inode * inode,loff_t offset,ssize_t len)4781*4882a593Smuzhiyun int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
4782*4882a593Smuzhiyun 				   loff_t offset, ssize_t len)
4783*4882a593Smuzhiyun {
4784*4882a593Smuzhiyun 	unsigned int max_blocks;
4785*4882a593Smuzhiyun 	int ret = 0, ret2 = 0, ret3 = 0;
4786*4882a593Smuzhiyun 	struct ext4_map_blocks map;
4787*4882a593Smuzhiyun 	unsigned int blkbits = inode->i_blkbits;
4788*4882a593Smuzhiyun 	unsigned int credits = 0;
4789*4882a593Smuzhiyun 
4790*4882a593Smuzhiyun 	map.m_lblk = offset >> blkbits;
4791*4882a593Smuzhiyun 	max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4792*4882a593Smuzhiyun 
4793*4882a593Smuzhiyun 	if (!handle) {
4794*4882a593Smuzhiyun 		/*
4795*4882a593Smuzhiyun 		 * credits to insert 1 extent into extent tree
4796*4882a593Smuzhiyun 		 */
4797*4882a593Smuzhiyun 		credits = ext4_chunk_trans_blocks(inode, max_blocks);
4798*4882a593Smuzhiyun 	}
4799*4882a593Smuzhiyun 	while (ret >= 0 && ret < max_blocks) {
4800*4882a593Smuzhiyun 		map.m_lblk += ret;
4801*4882a593Smuzhiyun 		map.m_len = (max_blocks -= ret);
4802*4882a593Smuzhiyun 		if (credits) {
4803*4882a593Smuzhiyun 			handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4804*4882a593Smuzhiyun 						    credits);
4805*4882a593Smuzhiyun 			if (IS_ERR(handle)) {
4806*4882a593Smuzhiyun 				ret = PTR_ERR(handle);
4807*4882a593Smuzhiyun 				break;
4808*4882a593Smuzhiyun 			}
4809*4882a593Smuzhiyun 		}
4810*4882a593Smuzhiyun 		ret = ext4_map_blocks(handle, inode, &map,
4811*4882a593Smuzhiyun 				      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
4812*4882a593Smuzhiyun 		if (ret <= 0)
4813*4882a593Smuzhiyun 			ext4_warning(inode->i_sb,
4814*4882a593Smuzhiyun 				     "inode #%lu: block %u: len %u: "
4815*4882a593Smuzhiyun 				     "ext4_ext_map_blocks returned %d",
4816*4882a593Smuzhiyun 				     inode->i_ino, map.m_lblk,
4817*4882a593Smuzhiyun 				     map.m_len, ret);
4818*4882a593Smuzhiyun 		ret2 = ext4_mark_inode_dirty(handle, inode);
4819*4882a593Smuzhiyun 		if (credits) {
4820*4882a593Smuzhiyun 			ret3 = ext4_journal_stop(handle);
4821*4882a593Smuzhiyun 			if (unlikely(ret3))
4822*4882a593Smuzhiyun 				ret2 = ret3;
4823*4882a593Smuzhiyun 		}
4824*4882a593Smuzhiyun 
4825*4882a593Smuzhiyun 		if (ret <= 0 || ret2)
4826*4882a593Smuzhiyun 			break;
4827*4882a593Smuzhiyun 	}
4828*4882a593Smuzhiyun 	return ret > 0 ? ret2 : ret;
4829*4882a593Smuzhiyun }
4830*4882a593Smuzhiyun 
ext4_convert_unwritten_io_end_vec(handle_t * handle,ext4_io_end_t * io_end)4831*4882a593Smuzhiyun int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
4832*4882a593Smuzhiyun {
4833*4882a593Smuzhiyun 	int ret = 0, err = 0;
4834*4882a593Smuzhiyun 	struct ext4_io_end_vec *io_end_vec;
4835*4882a593Smuzhiyun 
4836*4882a593Smuzhiyun 	/*
4837*4882a593Smuzhiyun 	 * This is somewhat ugly but the idea is clear: When transaction is
4838*4882a593Smuzhiyun 	 * reserved, everything goes into it. Otherwise we rather start several
4839*4882a593Smuzhiyun 	 * smaller transactions for conversion of each extent separately.
4840*4882a593Smuzhiyun 	 */
4841*4882a593Smuzhiyun 	if (handle) {
4842*4882a593Smuzhiyun 		handle = ext4_journal_start_reserved(handle,
4843*4882a593Smuzhiyun 						     EXT4_HT_EXT_CONVERT);
4844*4882a593Smuzhiyun 		if (IS_ERR(handle))
4845*4882a593Smuzhiyun 			return PTR_ERR(handle);
4846*4882a593Smuzhiyun 	}
4847*4882a593Smuzhiyun 
4848*4882a593Smuzhiyun 	list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
4849*4882a593Smuzhiyun 		ret = ext4_convert_unwritten_extents(handle, io_end->inode,
4850*4882a593Smuzhiyun 						     io_end_vec->offset,
4851*4882a593Smuzhiyun 						     io_end_vec->size);
4852*4882a593Smuzhiyun 		if (ret)
4853*4882a593Smuzhiyun 			break;
4854*4882a593Smuzhiyun 	}
4855*4882a593Smuzhiyun 
4856*4882a593Smuzhiyun 	if (handle)
4857*4882a593Smuzhiyun 		err = ext4_journal_stop(handle);
4858*4882a593Smuzhiyun 
4859*4882a593Smuzhiyun 	return ret < 0 ? ret : err;
4860*4882a593Smuzhiyun }
4861*4882a593Smuzhiyun 
ext4_iomap_xattr_fiemap(struct inode * inode,struct iomap * iomap)4862*4882a593Smuzhiyun static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
4863*4882a593Smuzhiyun {
4864*4882a593Smuzhiyun 	__u64 physical = 0;
4865*4882a593Smuzhiyun 	__u64 length = 0;
4866*4882a593Smuzhiyun 	int blockbits = inode->i_sb->s_blocksize_bits;
4867*4882a593Smuzhiyun 	int error = 0;
4868*4882a593Smuzhiyun 	u16 iomap_type;
4869*4882a593Smuzhiyun 
4870*4882a593Smuzhiyun 	/* in-inode? */
4871*4882a593Smuzhiyun 	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
4872*4882a593Smuzhiyun 		struct ext4_iloc iloc;
4873*4882a593Smuzhiyun 		int offset;	/* offset of xattr in inode */
4874*4882a593Smuzhiyun 
4875*4882a593Smuzhiyun 		error = ext4_get_inode_loc(inode, &iloc);
4876*4882a593Smuzhiyun 		if (error)
4877*4882a593Smuzhiyun 			return error;
4878*4882a593Smuzhiyun 		physical = (__u64)iloc.bh->b_blocknr << blockbits;
4879*4882a593Smuzhiyun 		offset = EXT4_GOOD_OLD_INODE_SIZE +
4880*4882a593Smuzhiyun 				EXT4_I(inode)->i_extra_isize;
4881*4882a593Smuzhiyun 		physical += offset;
4882*4882a593Smuzhiyun 		length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
4883*4882a593Smuzhiyun 		brelse(iloc.bh);
4884*4882a593Smuzhiyun 		iomap_type = IOMAP_INLINE;
4885*4882a593Smuzhiyun 	} else if (EXT4_I(inode)->i_file_acl) { /* external block */
4886*4882a593Smuzhiyun 		physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
4887*4882a593Smuzhiyun 		length = inode->i_sb->s_blocksize;
4888*4882a593Smuzhiyun 		iomap_type = IOMAP_MAPPED;
4889*4882a593Smuzhiyun 	} else {
4890*4882a593Smuzhiyun 		/* no in-inode or external block for xattr, so return -ENOENT */
4891*4882a593Smuzhiyun 		error = -ENOENT;
4892*4882a593Smuzhiyun 		goto out;
4893*4882a593Smuzhiyun 	}
4894*4882a593Smuzhiyun 
4895*4882a593Smuzhiyun 	iomap->addr = physical;
4896*4882a593Smuzhiyun 	iomap->offset = 0;
4897*4882a593Smuzhiyun 	iomap->length = length;
4898*4882a593Smuzhiyun 	iomap->type = iomap_type;
4899*4882a593Smuzhiyun 	iomap->flags = 0;
4900*4882a593Smuzhiyun out:
4901*4882a593Smuzhiyun 	return error;
4902*4882a593Smuzhiyun }
4903*4882a593Smuzhiyun 
ext4_iomap_xattr_begin(struct inode * inode,loff_t offset,loff_t length,unsigned flags,struct iomap * iomap,struct iomap * srcmap)4904*4882a593Smuzhiyun static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
4905*4882a593Smuzhiyun 				  loff_t length, unsigned flags,
4906*4882a593Smuzhiyun 				  struct iomap *iomap, struct iomap *srcmap)
4907*4882a593Smuzhiyun {
4908*4882a593Smuzhiyun 	int error;
4909*4882a593Smuzhiyun 
4910*4882a593Smuzhiyun 	error = ext4_iomap_xattr_fiemap(inode, iomap);
4911*4882a593Smuzhiyun 	if (error == 0 && (offset >= iomap->length))
4912*4882a593Smuzhiyun 		error = -ENOENT;
4913*4882a593Smuzhiyun 	return error;
4914*4882a593Smuzhiyun }
4915*4882a593Smuzhiyun 
4916*4882a593Smuzhiyun static const struct iomap_ops ext4_iomap_xattr_ops = {
4917*4882a593Smuzhiyun 	.iomap_begin		= ext4_iomap_xattr_begin,
4918*4882a593Smuzhiyun };
4919*4882a593Smuzhiyun 
ext4_fiemap_check_ranges(struct inode * inode,u64 start,u64 * len)4920*4882a593Smuzhiyun static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
4921*4882a593Smuzhiyun {
4922*4882a593Smuzhiyun 	u64 maxbytes;
4923*4882a593Smuzhiyun 
4924*4882a593Smuzhiyun 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4925*4882a593Smuzhiyun 		maxbytes = inode->i_sb->s_maxbytes;
4926*4882a593Smuzhiyun 	else
4927*4882a593Smuzhiyun 		maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
4928*4882a593Smuzhiyun 
4929*4882a593Smuzhiyun 	if (*len == 0)
4930*4882a593Smuzhiyun 		return -EINVAL;
4931*4882a593Smuzhiyun 	if (start > maxbytes)
4932*4882a593Smuzhiyun 		return -EFBIG;
4933*4882a593Smuzhiyun 
4934*4882a593Smuzhiyun 	/*
4935*4882a593Smuzhiyun 	 * Shrink request scope to what the fs can actually handle.
4936*4882a593Smuzhiyun 	 */
4937*4882a593Smuzhiyun 	if (*len > maxbytes || (maxbytes - *len) < start)
4938*4882a593Smuzhiyun 		*len = maxbytes - start;
4939*4882a593Smuzhiyun 	return 0;
4940*4882a593Smuzhiyun }
4941*4882a593Smuzhiyun 
ext4_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 start,u64 len)4942*4882a593Smuzhiyun int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4943*4882a593Smuzhiyun 		u64 start, u64 len)
4944*4882a593Smuzhiyun {
4945*4882a593Smuzhiyun 	int error = 0;
4946*4882a593Smuzhiyun 
4947*4882a593Smuzhiyun 	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4948*4882a593Smuzhiyun 		error = ext4_ext_precache(inode);
4949*4882a593Smuzhiyun 		if (error)
4950*4882a593Smuzhiyun 			return error;
4951*4882a593Smuzhiyun 		fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4952*4882a593Smuzhiyun 	}
4953*4882a593Smuzhiyun 
4954*4882a593Smuzhiyun 	/*
4955*4882a593Smuzhiyun 	 * For bitmap files the maximum size limit could be smaller than
4956*4882a593Smuzhiyun 	 * s_maxbytes, so check len here manually instead of just relying on the
4957*4882a593Smuzhiyun 	 * generic check.
4958*4882a593Smuzhiyun 	 */
4959*4882a593Smuzhiyun 	error = ext4_fiemap_check_ranges(inode, start, &len);
4960*4882a593Smuzhiyun 	if (error)
4961*4882a593Smuzhiyun 		return error;
4962*4882a593Smuzhiyun 
4963*4882a593Smuzhiyun 	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
4964*4882a593Smuzhiyun 		fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
4965*4882a593Smuzhiyun 		return iomap_fiemap(inode, fieinfo, start, len,
4966*4882a593Smuzhiyun 				    &ext4_iomap_xattr_ops);
4967*4882a593Smuzhiyun 	}
4968*4882a593Smuzhiyun 
4969*4882a593Smuzhiyun 	return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
4970*4882a593Smuzhiyun }
4971*4882a593Smuzhiyun 
ext4_get_es_cache(struct inode * inode,struct fiemap_extent_info * fieinfo,__u64 start,__u64 len)4972*4882a593Smuzhiyun int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
4973*4882a593Smuzhiyun 		      __u64 start, __u64 len)
4974*4882a593Smuzhiyun {
4975*4882a593Smuzhiyun 	ext4_lblk_t start_blk, len_blks;
4976*4882a593Smuzhiyun 	__u64 last_blk;
4977*4882a593Smuzhiyun 	int error = 0;
4978*4882a593Smuzhiyun 
4979*4882a593Smuzhiyun 	if (ext4_has_inline_data(inode)) {
4980*4882a593Smuzhiyun 		int has_inline;
4981*4882a593Smuzhiyun 
4982*4882a593Smuzhiyun 		down_read(&EXT4_I(inode)->xattr_sem);
4983*4882a593Smuzhiyun 		has_inline = ext4_has_inline_data(inode);
4984*4882a593Smuzhiyun 		up_read(&EXT4_I(inode)->xattr_sem);
4985*4882a593Smuzhiyun 		if (has_inline)
4986*4882a593Smuzhiyun 			return 0;
4987*4882a593Smuzhiyun 	}
4988*4882a593Smuzhiyun 
4989*4882a593Smuzhiyun 	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4990*4882a593Smuzhiyun 		error = ext4_ext_precache(inode);
4991*4882a593Smuzhiyun 		if (error)
4992*4882a593Smuzhiyun 			return error;
4993*4882a593Smuzhiyun 		fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4994*4882a593Smuzhiyun 	}
4995*4882a593Smuzhiyun 
4996*4882a593Smuzhiyun 	error = fiemap_prep(inode, fieinfo, start, &len, 0);
4997*4882a593Smuzhiyun 	if (error)
4998*4882a593Smuzhiyun 		return error;
4999*4882a593Smuzhiyun 
5000*4882a593Smuzhiyun 	error = ext4_fiemap_check_ranges(inode, start, &len);
5001*4882a593Smuzhiyun 	if (error)
5002*4882a593Smuzhiyun 		return error;
5003*4882a593Smuzhiyun 
5004*4882a593Smuzhiyun 	start_blk = start >> inode->i_sb->s_blocksize_bits;
5005*4882a593Smuzhiyun 	last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
5006*4882a593Smuzhiyun 	if (last_blk >= EXT_MAX_BLOCKS)
5007*4882a593Smuzhiyun 		last_blk = EXT_MAX_BLOCKS-1;
5008*4882a593Smuzhiyun 	len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
5009*4882a593Smuzhiyun 
5010*4882a593Smuzhiyun 	/*
5011*4882a593Smuzhiyun 	 * Walk the extent tree gathering extent information
5012*4882a593Smuzhiyun 	 * and pushing extents back to the user.
5013*4882a593Smuzhiyun 	 */
5014*4882a593Smuzhiyun 	return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
5015*4882a593Smuzhiyun }
5016*4882a593Smuzhiyun 
5017*4882a593Smuzhiyun /*
5018*4882a593Smuzhiyun  * ext4_ext_shift_path_extents:
5019*4882a593Smuzhiyun  * Shift the extents of a path structure lying between path[depth].p_ext
5020*4882a593Smuzhiyun  * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
5021*4882a593Smuzhiyun  * if it is right shift or left shift operation.
5022*4882a593Smuzhiyun  */
5023*4882a593Smuzhiyun static int
ext4_ext_shift_path_extents(struct ext4_ext_path * path,ext4_lblk_t shift,struct inode * inode,handle_t * handle,enum SHIFT_DIRECTION SHIFT)5024*4882a593Smuzhiyun ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5025*4882a593Smuzhiyun 			    struct inode *inode, handle_t *handle,
5026*4882a593Smuzhiyun 			    enum SHIFT_DIRECTION SHIFT)
5027*4882a593Smuzhiyun {
5028*4882a593Smuzhiyun 	int depth, err = 0;
5029*4882a593Smuzhiyun 	struct ext4_extent *ex_start, *ex_last;
5030*4882a593Smuzhiyun 	bool update = false;
5031*4882a593Smuzhiyun 	int credits, restart_credits;
5032*4882a593Smuzhiyun 	depth = path->p_depth;
5033*4882a593Smuzhiyun 
5034*4882a593Smuzhiyun 	while (depth >= 0) {
5035*4882a593Smuzhiyun 		if (depth == path->p_depth) {
5036*4882a593Smuzhiyun 			ex_start = path[depth].p_ext;
5037*4882a593Smuzhiyun 			if (!ex_start)
5038*4882a593Smuzhiyun 				return -EFSCORRUPTED;
5039*4882a593Smuzhiyun 
5040*4882a593Smuzhiyun 			ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
5041*4882a593Smuzhiyun 			/* leaf + sb + inode */
5042*4882a593Smuzhiyun 			credits = 3;
5043*4882a593Smuzhiyun 			if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
5044*4882a593Smuzhiyun 				update = true;
5045*4882a593Smuzhiyun 				/* extent tree + sb + inode */
5046*4882a593Smuzhiyun 				credits = depth + 2;
5047*4882a593Smuzhiyun 			}
5048*4882a593Smuzhiyun 
5049*4882a593Smuzhiyun 			restart_credits = ext4_writepage_trans_blocks(inode);
5050*4882a593Smuzhiyun 			err = ext4_datasem_ensure_credits(handle, inode, credits,
5051*4882a593Smuzhiyun 					restart_credits, 0);
5052*4882a593Smuzhiyun 			if (err) {
5053*4882a593Smuzhiyun 				if (err > 0)
5054*4882a593Smuzhiyun 					err = -EAGAIN;
5055*4882a593Smuzhiyun 				goto out;
5056*4882a593Smuzhiyun 			}
5057*4882a593Smuzhiyun 
5058*4882a593Smuzhiyun 			err = ext4_ext_get_access(handle, inode, path + depth);
5059*4882a593Smuzhiyun 			if (err)
5060*4882a593Smuzhiyun 				goto out;
5061*4882a593Smuzhiyun 
5062*4882a593Smuzhiyun 			while (ex_start <= ex_last) {
5063*4882a593Smuzhiyun 				if (SHIFT == SHIFT_LEFT) {
5064*4882a593Smuzhiyun 					le32_add_cpu(&ex_start->ee_block,
5065*4882a593Smuzhiyun 						-shift);
5066*4882a593Smuzhiyun 					/* Try to merge to the left. */
5067*4882a593Smuzhiyun 					if ((ex_start >
5068*4882a593Smuzhiyun 					    EXT_FIRST_EXTENT(path[depth].p_hdr))
5069*4882a593Smuzhiyun 					    &&
5070*4882a593Smuzhiyun 					    ext4_ext_try_to_merge_right(inode,
5071*4882a593Smuzhiyun 					    path, ex_start - 1))
5072*4882a593Smuzhiyun 						ex_last--;
5073*4882a593Smuzhiyun 					else
5074*4882a593Smuzhiyun 						ex_start++;
5075*4882a593Smuzhiyun 				} else {
5076*4882a593Smuzhiyun 					le32_add_cpu(&ex_last->ee_block, shift);
5077*4882a593Smuzhiyun 					ext4_ext_try_to_merge_right(inode, path,
5078*4882a593Smuzhiyun 						ex_last);
5079*4882a593Smuzhiyun 					ex_last--;
5080*4882a593Smuzhiyun 				}
5081*4882a593Smuzhiyun 			}
5082*4882a593Smuzhiyun 			err = ext4_ext_dirty(handle, inode, path + depth);
5083*4882a593Smuzhiyun 			if (err)
5084*4882a593Smuzhiyun 				goto out;
5085*4882a593Smuzhiyun 
5086*4882a593Smuzhiyun 			if (--depth < 0 || !update)
5087*4882a593Smuzhiyun 				break;
5088*4882a593Smuzhiyun 		}
5089*4882a593Smuzhiyun 
5090*4882a593Smuzhiyun 		/* Update index too */
5091*4882a593Smuzhiyun 		err = ext4_ext_get_access(handle, inode, path + depth);
5092*4882a593Smuzhiyun 		if (err)
5093*4882a593Smuzhiyun 			goto out;
5094*4882a593Smuzhiyun 
5095*4882a593Smuzhiyun 		if (SHIFT == SHIFT_LEFT)
5096*4882a593Smuzhiyun 			le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5097*4882a593Smuzhiyun 		else
5098*4882a593Smuzhiyun 			le32_add_cpu(&path[depth].p_idx->ei_block, shift);
5099*4882a593Smuzhiyun 		err = ext4_ext_dirty(handle, inode, path + depth);
5100*4882a593Smuzhiyun 		if (err)
5101*4882a593Smuzhiyun 			goto out;
5102*4882a593Smuzhiyun 
5103*4882a593Smuzhiyun 		/* we are done if current index is not a starting index */
5104*4882a593Smuzhiyun 		if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
5105*4882a593Smuzhiyun 			break;
5106*4882a593Smuzhiyun 
5107*4882a593Smuzhiyun 		depth--;
5108*4882a593Smuzhiyun 	}
5109*4882a593Smuzhiyun 
5110*4882a593Smuzhiyun out:
5111*4882a593Smuzhiyun 	return err;
5112*4882a593Smuzhiyun }
5113*4882a593Smuzhiyun 
5114*4882a593Smuzhiyun /*
5115*4882a593Smuzhiyun  * ext4_ext_shift_extents:
5116*4882a593Smuzhiyun  * All the extents which lies in the range from @start to the last allocated
5117*4882a593Smuzhiyun  * block for the @inode are shifted either towards left or right (depending
5118*4882a593Smuzhiyun  * upon @SHIFT) by @shift blocks.
5119*4882a593Smuzhiyun  * On success, 0 is returned, error otherwise.
5120*4882a593Smuzhiyun  */
5121*4882a593Smuzhiyun static int
ext4_ext_shift_extents(struct inode * inode,handle_t * handle,ext4_lblk_t start,ext4_lblk_t shift,enum SHIFT_DIRECTION SHIFT)5122*4882a593Smuzhiyun ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5123*4882a593Smuzhiyun 		       ext4_lblk_t start, ext4_lblk_t shift,
5124*4882a593Smuzhiyun 		       enum SHIFT_DIRECTION SHIFT)
5125*4882a593Smuzhiyun {
5126*4882a593Smuzhiyun 	struct ext4_ext_path *path;
5127*4882a593Smuzhiyun 	int ret = 0, depth;
5128*4882a593Smuzhiyun 	struct ext4_extent *extent;
5129*4882a593Smuzhiyun 	ext4_lblk_t stop, *iterator, ex_start, ex_end;
5130*4882a593Smuzhiyun 	ext4_lblk_t tmp = EXT_MAX_BLOCKS;
5131*4882a593Smuzhiyun 
5132*4882a593Smuzhiyun 	/* Let path point to the last extent */
5133*4882a593Smuzhiyun 	path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5134*4882a593Smuzhiyun 				EXT4_EX_NOCACHE);
5135*4882a593Smuzhiyun 	if (IS_ERR(path))
5136*4882a593Smuzhiyun 		return PTR_ERR(path);
5137*4882a593Smuzhiyun 
5138*4882a593Smuzhiyun 	depth = path->p_depth;
5139*4882a593Smuzhiyun 	extent = path[depth].p_ext;
5140*4882a593Smuzhiyun 	if (!extent)
5141*4882a593Smuzhiyun 		goto out;
5142*4882a593Smuzhiyun 
5143*4882a593Smuzhiyun 	stop = le32_to_cpu(extent->ee_block);
5144*4882a593Smuzhiyun 
5145*4882a593Smuzhiyun        /*
5146*4882a593Smuzhiyun 	* For left shifts, make sure the hole on the left is big enough to
5147*4882a593Smuzhiyun 	* accommodate the shift.  For right shifts, make sure the last extent
5148*4882a593Smuzhiyun 	* won't be shifted beyond EXT_MAX_BLOCKS.
5149*4882a593Smuzhiyun 	*/
5150*4882a593Smuzhiyun 	if (SHIFT == SHIFT_LEFT) {
5151*4882a593Smuzhiyun 		path = ext4_find_extent(inode, start - 1, &path,
5152*4882a593Smuzhiyun 					EXT4_EX_NOCACHE);
5153*4882a593Smuzhiyun 		if (IS_ERR(path))
5154*4882a593Smuzhiyun 			return PTR_ERR(path);
5155*4882a593Smuzhiyun 		depth = path->p_depth;
5156*4882a593Smuzhiyun 		extent =  path[depth].p_ext;
5157*4882a593Smuzhiyun 		if (extent) {
5158*4882a593Smuzhiyun 			ex_start = le32_to_cpu(extent->ee_block);
5159*4882a593Smuzhiyun 			ex_end = le32_to_cpu(extent->ee_block) +
5160*4882a593Smuzhiyun 				ext4_ext_get_actual_len(extent);
5161*4882a593Smuzhiyun 		} else {
5162*4882a593Smuzhiyun 			ex_start = 0;
5163*4882a593Smuzhiyun 			ex_end = 0;
5164*4882a593Smuzhiyun 		}
5165*4882a593Smuzhiyun 
5166*4882a593Smuzhiyun 		if ((start == ex_start && shift > ex_start) ||
5167*4882a593Smuzhiyun 		    (shift > start - ex_end)) {
5168*4882a593Smuzhiyun 			ret = -EINVAL;
5169*4882a593Smuzhiyun 			goto out;
5170*4882a593Smuzhiyun 		}
5171*4882a593Smuzhiyun 	} else {
5172*4882a593Smuzhiyun 		if (shift > EXT_MAX_BLOCKS -
5173*4882a593Smuzhiyun 		    (stop + ext4_ext_get_actual_len(extent))) {
5174*4882a593Smuzhiyun 			ret = -EINVAL;
5175*4882a593Smuzhiyun 			goto out;
5176*4882a593Smuzhiyun 		}
5177*4882a593Smuzhiyun 	}
5178*4882a593Smuzhiyun 
5179*4882a593Smuzhiyun 	/*
5180*4882a593Smuzhiyun 	 * In case of left shift, iterator points to start and it is increased
5181*4882a593Smuzhiyun 	 * till we reach stop. In case of right shift, iterator points to stop
5182*4882a593Smuzhiyun 	 * and it is decreased till we reach start.
5183*4882a593Smuzhiyun 	 */
5184*4882a593Smuzhiyun again:
5185*4882a593Smuzhiyun 	ret = 0;
5186*4882a593Smuzhiyun 	if (SHIFT == SHIFT_LEFT)
5187*4882a593Smuzhiyun 		iterator = &start;
5188*4882a593Smuzhiyun 	else
5189*4882a593Smuzhiyun 		iterator = &stop;
5190*4882a593Smuzhiyun 
5191*4882a593Smuzhiyun 	if (tmp != EXT_MAX_BLOCKS)
5192*4882a593Smuzhiyun 		*iterator = tmp;
5193*4882a593Smuzhiyun 
5194*4882a593Smuzhiyun 	/*
5195*4882a593Smuzhiyun 	 * Its safe to start updating extents.  Start and stop are unsigned, so
5196*4882a593Smuzhiyun 	 * in case of right shift if extent with 0 block is reached, iterator
5197*4882a593Smuzhiyun 	 * becomes NULL to indicate the end of the loop.
5198*4882a593Smuzhiyun 	 */
5199*4882a593Smuzhiyun 	while (iterator && start <= stop) {
5200*4882a593Smuzhiyun 		path = ext4_find_extent(inode, *iterator, &path,
5201*4882a593Smuzhiyun 					EXT4_EX_NOCACHE);
5202*4882a593Smuzhiyun 		if (IS_ERR(path))
5203*4882a593Smuzhiyun 			return PTR_ERR(path);
5204*4882a593Smuzhiyun 		depth = path->p_depth;
5205*4882a593Smuzhiyun 		extent = path[depth].p_ext;
5206*4882a593Smuzhiyun 		if (!extent) {
5207*4882a593Smuzhiyun 			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5208*4882a593Smuzhiyun 					 (unsigned long) *iterator);
5209*4882a593Smuzhiyun 			return -EFSCORRUPTED;
5210*4882a593Smuzhiyun 		}
5211*4882a593Smuzhiyun 		if (SHIFT == SHIFT_LEFT && *iterator >
5212*4882a593Smuzhiyun 		    le32_to_cpu(extent->ee_block)) {
5213*4882a593Smuzhiyun 			/* Hole, move to the next extent */
5214*4882a593Smuzhiyun 			if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5215*4882a593Smuzhiyun 				path[depth].p_ext++;
5216*4882a593Smuzhiyun 			} else {
5217*4882a593Smuzhiyun 				*iterator = ext4_ext_next_allocated_block(path);
5218*4882a593Smuzhiyun 				continue;
5219*4882a593Smuzhiyun 			}
5220*4882a593Smuzhiyun 		}
5221*4882a593Smuzhiyun 
5222*4882a593Smuzhiyun 		tmp = *iterator;
5223*4882a593Smuzhiyun 		if (SHIFT == SHIFT_LEFT) {
5224*4882a593Smuzhiyun 			extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5225*4882a593Smuzhiyun 			*iterator = le32_to_cpu(extent->ee_block) +
5226*4882a593Smuzhiyun 					ext4_ext_get_actual_len(extent);
5227*4882a593Smuzhiyun 		} else {
5228*4882a593Smuzhiyun 			extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
5229*4882a593Smuzhiyun 			if (le32_to_cpu(extent->ee_block) > start)
5230*4882a593Smuzhiyun 				*iterator = le32_to_cpu(extent->ee_block) - 1;
5231*4882a593Smuzhiyun 			else if (le32_to_cpu(extent->ee_block) == start)
5232*4882a593Smuzhiyun 				iterator = NULL;
5233*4882a593Smuzhiyun 			else {
5234*4882a593Smuzhiyun 				extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5235*4882a593Smuzhiyun 				while (le32_to_cpu(extent->ee_block) >= start)
5236*4882a593Smuzhiyun 					extent--;
5237*4882a593Smuzhiyun 
5238*4882a593Smuzhiyun 				if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
5239*4882a593Smuzhiyun 					break;
5240*4882a593Smuzhiyun 
5241*4882a593Smuzhiyun 				extent++;
5242*4882a593Smuzhiyun 				iterator = NULL;
5243*4882a593Smuzhiyun 			}
5244*4882a593Smuzhiyun 			path[depth].p_ext = extent;
5245*4882a593Smuzhiyun 		}
5246*4882a593Smuzhiyun 		ret = ext4_ext_shift_path_extents(path, shift, inode,
5247*4882a593Smuzhiyun 				handle, SHIFT);
5248*4882a593Smuzhiyun 		/* iterator can be NULL which means we should break */
5249*4882a593Smuzhiyun 		if (ret == -EAGAIN)
5250*4882a593Smuzhiyun 			goto again;
5251*4882a593Smuzhiyun 		if (ret)
5252*4882a593Smuzhiyun 			break;
5253*4882a593Smuzhiyun 	}
5254*4882a593Smuzhiyun out:
5255*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
5256*4882a593Smuzhiyun 	kfree(path);
5257*4882a593Smuzhiyun 	return ret;
5258*4882a593Smuzhiyun }
5259*4882a593Smuzhiyun 
5260*4882a593Smuzhiyun /*
5261*4882a593Smuzhiyun  * ext4_collapse_range:
5262*4882a593Smuzhiyun  * This implements the fallocate's collapse range functionality for ext4
5263*4882a593Smuzhiyun  * Returns: 0 and non-zero on error.
5264*4882a593Smuzhiyun  */
ext4_collapse_range(struct file * file,loff_t offset,loff_t len)5265*4882a593Smuzhiyun static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
5266*4882a593Smuzhiyun {
5267*4882a593Smuzhiyun 	struct inode *inode = file_inode(file);
5268*4882a593Smuzhiyun 	struct super_block *sb = inode->i_sb;
5269*4882a593Smuzhiyun 	ext4_lblk_t punch_start, punch_stop;
5270*4882a593Smuzhiyun 	handle_t *handle;
5271*4882a593Smuzhiyun 	unsigned int credits;
5272*4882a593Smuzhiyun 	loff_t new_size, ioffset;
5273*4882a593Smuzhiyun 	int ret;
5274*4882a593Smuzhiyun 
5275*4882a593Smuzhiyun 	/*
5276*4882a593Smuzhiyun 	 * We need to test this early because xfstests assumes that a
5277*4882a593Smuzhiyun 	 * collapse range of (0, 1) will return EOPNOTSUPP if the file
5278*4882a593Smuzhiyun 	 * system does not support collapse range.
5279*4882a593Smuzhiyun 	 */
5280*4882a593Smuzhiyun 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5281*4882a593Smuzhiyun 		return -EOPNOTSUPP;
5282*4882a593Smuzhiyun 
5283*4882a593Smuzhiyun 	/* Collapse range works only on fs cluster size aligned regions. */
5284*4882a593Smuzhiyun 	if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
5285*4882a593Smuzhiyun 		return -EINVAL;
5286*4882a593Smuzhiyun 
5287*4882a593Smuzhiyun 	trace_ext4_collapse_range(inode, offset, len);
5288*4882a593Smuzhiyun 
5289*4882a593Smuzhiyun 	punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5290*4882a593Smuzhiyun 	punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5291*4882a593Smuzhiyun 
5292*4882a593Smuzhiyun 	/* Call ext4_force_commit to flush all data in case of data=journal. */
5293*4882a593Smuzhiyun 	if (ext4_should_journal_data(inode)) {
5294*4882a593Smuzhiyun 		ret = ext4_force_commit(inode->i_sb);
5295*4882a593Smuzhiyun 		if (ret)
5296*4882a593Smuzhiyun 			return ret;
5297*4882a593Smuzhiyun 	}
5298*4882a593Smuzhiyun 
5299*4882a593Smuzhiyun 	inode_lock(inode);
5300*4882a593Smuzhiyun 	/*
5301*4882a593Smuzhiyun 	 * There is no need to overlap collapse range with EOF, in which case
5302*4882a593Smuzhiyun 	 * it is effectively a truncate operation
5303*4882a593Smuzhiyun 	 */
5304*4882a593Smuzhiyun 	if (offset + len >= inode->i_size) {
5305*4882a593Smuzhiyun 		ret = -EINVAL;
5306*4882a593Smuzhiyun 		goto out_mutex;
5307*4882a593Smuzhiyun 	}
5308*4882a593Smuzhiyun 
5309*4882a593Smuzhiyun 	/* Currently just for extent based files */
5310*4882a593Smuzhiyun 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5311*4882a593Smuzhiyun 		ret = -EOPNOTSUPP;
5312*4882a593Smuzhiyun 		goto out_mutex;
5313*4882a593Smuzhiyun 	}
5314*4882a593Smuzhiyun 
5315*4882a593Smuzhiyun 	/* Wait for existing dio to complete */
5316*4882a593Smuzhiyun 	inode_dio_wait(inode);
5317*4882a593Smuzhiyun 
5318*4882a593Smuzhiyun 	ret = file_modified(file);
5319*4882a593Smuzhiyun 	if (ret)
5320*4882a593Smuzhiyun 		goto out_mutex;
5321*4882a593Smuzhiyun 
5322*4882a593Smuzhiyun 	/*
5323*4882a593Smuzhiyun 	 * Prevent page faults from reinstantiating pages we have released from
5324*4882a593Smuzhiyun 	 * page cache.
5325*4882a593Smuzhiyun 	 */
5326*4882a593Smuzhiyun 	down_write(&EXT4_I(inode)->i_mmap_sem);
5327*4882a593Smuzhiyun 
5328*4882a593Smuzhiyun 	ret = ext4_break_layouts(inode);
5329*4882a593Smuzhiyun 	if (ret)
5330*4882a593Smuzhiyun 		goto out_mmap;
5331*4882a593Smuzhiyun 
5332*4882a593Smuzhiyun 	/*
5333*4882a593Smuzhiyun 	 * Need to round down offset to be aligned with page size boundary
5334*4882a593Smuzhiyun 	 * for page size > block size.
5335*4882a593Smuzhiyun 	 */
5336*4882a593Smuzhiyun 	ioffset = round_down(offset, PAGE_SIZE);
5337*4882a593Smuzhiyun 	/*
5338*4882a593Smuzhiyun 	 * Write tail of the last page before removed range since it will get
5339*4882a593Smuzhiyun 	 * removed from the page cache below.
5340*4882a593Smuzhiyun 	 */
5341*4882a593Smuzhiyun 	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
5342*4882a593Smuzhiyun 	if (ret)
5343*4882a593Smuzhiyun 		goto out_mmap;
5344*4882a593Smuzhiyun 	/*
5345*4882a593Smuzhiyun 	 * Write data that will be shifted to preserve them when discarding
5346*4882a593Smuzhiyun 	 * page cache below. We are also protected from pages becoming dirty
5347*4882a593Smuzhiyun 	 * by i_mmap_sem.
5348*4882a593Smuzhiyun 	 */
5349*4882a593Smuzhiyun 	ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
5350*4882a593Smuzhiyun 					   LLONG_MAX);
5351*4882a593Smuzhiyun 	if (ret)
5352*4882a593Smuzhiyun 		goto out_mmap;
5353*4882a593Smuzhiyun 	truncate_pagecache(inode, ioffset);
5354*4882a593Smuzhiyun 
5355*4882a593Smuzhiyun 	credits = ext4_writepage_trans_blocks(inode);
5356*4882a593Smuzhiyun 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5357*4882a593Smuzhiyun 	if (IS_ERR(handle)) {
5358*4882a593Smuzhiyun 		ret = PTR_ERR(handle);
5359*4882a593Smuzhiyun 		goto out_mmap;
5360*4882a593Smuzhiyun 	}
5361*4882a593Smuzhiyun 	ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
5362*4882a593Smuzhiyun 
5363*4882a593Smuzhiyun 	down_write(&EXT4_I(inode)->i_data_sem);
5364*4882a593Smuzhiyun 	ext4_discard_preallocations(inode, 0);
5365*4882a593Smuzhiyun 
5366*4882a593Smuzhiyun 	ret = ext4_es_remove_extent(inode, punch_start,
5367*4882a593Smuzhiyun 				    EXT_MAX_BLOCKS - punch_start);
5368*4882a593Smuzhiyun 	if (ret) {
5369*4882a593Smuzhiyun 		up_write(&EXT4_I(inode)->i_data_sem);
5370*4882a593Smuzhiyun 		goto out_stop;
5371*4882a593Smuzhiyun 	}
5372*4882a593Smuzhiyun 
5373*4882a593Smuzhiyun 	ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5374*4882a593Smuzhiyun 	if (ret) {
5375*4882a593Smuzhiyun 		up_write(&EXT4_I(inode)->i_data_sem);
5376*4882a593Smuzhiyun 		goto out_stop;
5377*4882a593Smuzhiyun 	}
5378*4882a593Smuzhiyun 	ext4_discard_preallocations(inode, 0);
5379*4882a593Smuzhiyun 
5380*4882a593Smuzhiyun 	ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5381*4882a593Smuzhiyun 				     punch_stop - punch_start, SHIFT_LEFT);
5382*4882a593Smuzhiyun 	if (ret) {
5383*4882a593Smuzhiyun 		up_write(&EXT4_I(inode)->i_data_sem);
5384*4882a593Smuzhiyun 		goto out_stop;
5385*4882a593Smuzhiyun 	}
5386*4882a593Smuzhiyun 
5387*4882a593Smuzhiyun 	new_size = inode->i_size - len;
5388*4882a593Smuzhiyun 	i_size_write(inode, new_size);
5389*4882a593Smuzhiyun 	EXT4_I(inode)->i_disksize = new_size;
5390*4882a593Smuzhiyun 
5391*4882a593Smuzhiyun 	up_write(&EXT4_I(inode)->i_data_sem);
5392*4882a593Smuzhiyun 	if (IS_SYNC(inode))
5393*4882a593Smuzhiyun 		ext4_handle_sync(handle);
5394*4882a593Smuzhiyun 	inode->i_mtime = inode->i_ctime = current_time(inode);
5395*4882a593Smuzhiyun 	ret = ext4_mark_inode_dirty(handle, inode);
5396*4882a593Smuzhiyun 	ext4_update_inode_fsync_trans(handle, inode, 1);
5397*4882a593Smuzhiyun 
5398*4882a593Smuzhiyun out_stop:
5399*4882a593Smuzhiyun 	ext4_journal_stop(handle);
5400*4882a593Smuzhiyun 	ext4_fc_stop_ineligible(sb);
5401*4882a593Smuzhiyun out_mmap:
5402*4882a593Smuzhiyun 	up_write(&EXT4_I(inode)->i_mmap_sem);
5403*4882a593Smuzhiyun out_mutex:
5404*4882a593Smuzhiyun 	inode_unlock(inode);
5405*4882a593Smuzhiyun 	return ret;
5406*4882a593Smuzhiyun }
5407*4882a593Smuzhiyun 
5408*4882a593Smuzhiyun /*
5409*4882a593Smuzhiyun  * ext4_insert_range:
5410*4882a593Smuzhiyun  * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
5411*4882a593Smuzhiyun  * The data blocks starting from @offset to the EOF are shifted by @len
5412*4882a593Smuzhiyun  * towards right to create a hole in the @inode. Inode size is increased
5413*4882a593Smuzhiyun  * by len bytes.
5414*4882a593Smuzhiyun  * Returns 0 on success, error otherwise.
5415*4882a593Smuzhiyun  */
ext4_insert_range(struct file * file,loff_t offset,loff_t len)5416*4882a593Smuzhiyun static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
5417*4882a593Smuzhiyun {
5418*4882a593Smuzhiyun 	struct inode *inode = file_inode(file);
5419*4882a593Smuzhiyun 	struct super_block *sb = inode->i_sb;
5420*4882a593Smuzhiyun 	handle_t *handle;
5421*4882a593Smuzhiyun 	struct ext4_ext_path *path;
5422*4882a593Smuzhiyun 	struct ext4_extent *extent;
5423*4882a593Smuzhiyun 	ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
5424*4882a593Smuzhiyun 	unsigned int credits, ee_len;
5425*4882a593Smuzhiyun 	int ret = 0, depth, split_flag = 0;
5426*4882a593Smuzhiyun 	loff_t ioffset;
5427*4882a593Smuzhiyun 
5428*4882a593Smuzhiyun 	/*
5429*4882a593Smuzhiyun 	 * We need to test this early because xfstests assumes that an
5430*4882a593Smuzhiyun 	 * insert range of (0, 1) will return EOPNOTSUPP if the file
5431*4882a593Smuzhiyun 	 * system does not support insert range.
5432*4882a593Smuzhiyun 	 */
5433*4882a593Smuzhiyun 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5434*4882a593Smuzhiyun 		return -EOPNOTSUPP;
5435*4882a593Smuzhiyun 
5436*4882a593Smuzhiyun 	/* Insert range works only on fs cluster size aligned regions. */
5437*4882a593Smuzhiyun 	if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
5438*4882a593Smuzhiyun 		return -EINVAL;
5439*4882a593Smuzhiyun 
5440*4882a593Smuzhiyun 	trace_ext4_insert_range(inode, offset, len);
5441*4882a593Smuzhiyun 
5442*4882a593Smuzhiyun 	offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5443*4882a593Smuzhiyun 	len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
5444*4882a593Smuzhiyun 
5445*4882a593Smuzhiyun 	/* Call ext4_force_commit to flush all data in case of data=journal */
5446*4882a593Smuzhiyun 	if (ext4_should_journal_data(inode)) {
5447*4882a593Smuzhiyun 		ret = ext4_force_commit(inode->i_sb);
5448*4882a593Smuzhiyun 		if (ret)
5449*4882a593Smuzhiyun 			return ret;
5450*4882a593Smuzhiyun 	}
5451*4882a593Smuzhiyun 
5452*4882a593Smuzhiyun 	inode_lock(inode);
5453*4882a593Smuzhiyun 	/* Currently just for extent based files */
5454*4882a593Smuzhiyun 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5455*4882a593Smuzhiyun 		ret = -EOPNOTSUPP;
5456*4882a593Smuzhiyun 		goto out_mutex;
5457*4882a593Smuzhiyun 	}
5458*4882a593Smuzhiyun 
5459*4882a593Smuzhiyun 	/* Check whether the maximum file size would be exceeded */
5460*4882a593Smuzhiyun 	if (len > inode->i_sb->s_maxbytes - inode->i_size) {
5461*4882a593Smuzhiyun 		ret = -EFBIG;
5462*4882a593Smuzhiyun 		goto out_mutex;
5463*4882a593Smuzhiyun 	}
5464*4882a593Smuzhiyun 
5465*4882a593Smuzhiyun 	/* Offset must be less than i_size */
5466*4882a593Smuzhiyun 	if (offset >= inode->i_size) {
5467*4882a593Smuzhiyun 		ret = -EINVAL;
5468*4882a593Smuzhiyun 		goto out_mutex;
5469*4882a593Smuzhiyun 	}
5470*4882a593Smuzhiyun 
5471*4882a593Smuzhiyun 	/* Wait for existing dio to complete */
5472*4882a593Smuzhiyun 	inode_dio_wait(inode);
5473*4882a593Smuzhiyun 
5474*4882a593Smuzhiyun 	ret = file_modified(file);
5475*4882a593Smuzhiyun 	if (ret)
5476*4882a593Smuzhiyun 		goto out_mutex;
5477*4882a593Smuzhiyun 
5478*4882a593Smuzhiyun 	/*
5479*4882a593Smuzhiyun 	 * Prevent page faults from reinstantiating pages we have released from
5480*4882a593Smuzhiyun 	 * page cache.
5481*4882a593Smuzhiyun 	 */
5482*4882a593Smuzhiyun 	down_write(&EXT4_I(inode)->i_mmap_sem);
5483*4882a593Smuzhiyun 
5484*4882a593Smuzhiyun 	ret = ext4_break_layouts(inode);
5485*4882a593Smuzhiyun 	if (ret)
5486*4882a593Smuzhiyun 		goto out_mmap;
5487*4882a593Smuzhiyun 
5488*4882a593Smuzhiyun 	/*
5489*4882a593Smuzhiyun 	 * Need to round down to align start offset to page size boundary
5490*4882a593Smuzhiyun 	 * for page size > block size.
5491*4882a593Smuzhiyun 	 */
5492*4882a593Smuzhiyun 	ioffset = round_down(offset, PAGE_SIZE);
5493*4882a593Smuzhiyun 	/* Write out all dirty pages */
5494*4882a593Smuzhiyun 	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5495*4882a593Smuzhiyun 			LLONG_MAX);
5496*4882a593Smuzhiyun 	if (ret)
5497*4882a593Smuzhiyun 		goto out_mmap;
5498*4882a593Smuzhiyun 	truncate_pagecache(inode, ioffset);
5499*4882a593Smuzhiyun 
5500*4882a593Smuzhiyun 	credits = ext4_writepage_trans_blocks(inode);
5501*4882a593Smuzhiyun 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5502*4882a593Smuzhiyun 	if (IS_ERR(handle)) {
5503*4882a593Smuzhiyun 		ret = PTR_ERR(handle);
5504*4882a593Smuzhiyun 		goto out_mmap;
5505*4882a593Smuzhiyun 	}
5506*4882a593Smuzhiyun 	ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
5507*4882a593Smuzhiyun 
5508*4882a593Smuzhiyun 	/* Expand file to avoid data loss if there is error while shifting */
5509*4882a593Smuzhiyun 	inode->i_size += len;
5510*4882a593Smuzhiyun 	EXT4_I(inode)->i_disksize += len;
5511*4882a593Smuzhiyun 	inode->i_mtime = inode->i_ctime = current_time(inode);
5512*4882a593Smuzhiyun 	ret = ext4_mark_inode_dirty(handle, inode);
5513*4882a593Smuzhiyun 	if (ret)
5514*4882a593Smuzhiyun 		goto out_stop;
5515*4882a593Smuzhiyun 
5516*4882a593Smuzhiyun 	down_write(&EXT4_I(inode)->i_data_sem);
5517*4882a593Smuzhiyun 	ext4_discard_preallocations(inode, 0);
5518*4882a593Smuzhiyun 
5519*4882a593Smuzhiyun 	path = ext4_find_extent(inode, offset_lblk, NULL, 0);
5520*4882a593Smuzhiyun 	if (IS_ERR(path)) {
5521*4882a593Smuzhiyun 		up_write(&EXT4_I(inode)->i_data_sem);
5522*4882a593Smuzhiyun 		goto out_stop;
5523*4882a593Smuzhiyun 	}
5524*4882a593Smuzhiyun 
5525*4882a593Smuzhiyun 	depth = ext_depth(inode);
5526*4882a593Smuzhiyun 	extent = path[depth].p_ext;
5527*4882a593Smuzhiyun 	if (extent) {
5528*4882a593Smuzhiyun 		ee_start_lblk = le32_to_cpu(extent->ee_block);
5529*4882a593Smuzhiyun 		ee_len = ext4_ext_get_actual_len(extent);
5530*4882a593Smuzhiyun 
5531*4882a593Smuzhiyun 		/*
5532*4882a593Smuzhiyun 		 * If offset_lblk is not the starting block of extent, split
5533*4882a593Smuzhiyun 		 * the extent @offset_lblk
5534*4882a593Smuzhiyun 		 */
5535*4882a593Smuzhiyun 		if ((offset_lblk > ee_start_lblk) &&
5536*4882a593Smuzhiyun 				(offset_lblk < (ee_start_lblk + ee_len))) {
5537*4882a593Smuzhiyun 			if (ext4_ext_is_unwritten(extent))
5538*4882a593Smuzhiyun 				split_flag = EXT4_EXT_MARK_UNWRIT1 |
5539*4882a593Smuzhiyun 					EXT4_EXT_MARK_UNWRIT2;
5540*4882a593Smuzhiyun 			ret = ext4_split_extent_at(handle, inode, &path,
5541*4882a593Smuzhiyun 					offset_lblk, split_flag,
5542*4882a593Smuzhiyun 					EXT4_EX_NOCACHE |
5543*4882a593Smuzhiyun 					EXT4_GET_BLOCKS_PRE_IO |
5544*4882a593Smuzhiyun 					EXT4_GET_BLOCKS_METADATA_NOFAIL);
5545*4882a593Smuzhiyun 		}
5546*4882a593Smuzhiyun 
5547*4882a593Smuzhiyun 		ext4_ext_drop_refs(path);
5548*4882a593Smuzhiyun 		kfree(path);
5549*4882a593Smuzhiyun 		if (ret < 0) {
5550*4882a593Smuzhiyun 			up_write(&EXT4_I(inode)->i_data_sem);
5551*4882a593Smuzhiyun 			goto out_stop;
5552*4882a593Smuzhiyun 		}
5553*4882a593Smuzhiyun 	} else {
5554*4882a593Smuzhiyun 		ext4_ext_drop_refs(path);
5555*4882a593Smuzhiyun 		kfree(path);
5556*4882a593Smuzhiyun 	}
5557*4882a593Smuzhiyun 
5558*4882a593Smuzhiyun 	ret = ext4_es_remove_extent(inode, offset_lblk,
5559*4882a593Smuzhiyun 			EXT_MAX_BLOCKS - offset_lblk);
5560*4882a593Smuzhiyun 	if (ret) {
5561*4882a593Smuzhiyun 		up_write(&EXT4_I(inode)->i_data_sem);
5562*4882a593Smuzhiyun 		goto out_stop;
5563*4882a593Smuzhiyun 	}
5564*4882a593Smuzhiyun 
5565*4882a593Smuzhiyun 	/*
5566*4882a593Smuzhiyun 	 * if offset_lblk lies in a hole which is at start of file, use
5567*4882a593Smuzhiyun 	 * ee_start_lblk to shift extents
5568*4882a593Smuzhiyun 	 */
5569*4882a593Smuzhiyun 	ret = ext4_ext_shift_extents(inode, handle,
5570*4882a593Smuzhiyun 		ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
5571*4882a593Smuzhiyun 		len_lblk, SHIFT_RIGHT);
5572*4882a593Smuzhiyun 
5573*4882a593Smuzhiyun 	up_write(&EXT4_I(inode)->i_data_sem);
5574*4882a593Smuzhiyun 	if (IS_SYNC(inode))
5575*4882a593Smuzhiyun 		ext4_handle_sync(handle);
5576*4882a593Smuzhiyun 	if (ret >= 0)
5577*4882a593Smuzhiyun 		ext4_update_inode_fsync_trans(handle, inode, 1);
5578*4882a593Smuzhiyun 
5579*4882a593Smuzhiyun out_stop:
5580*4882a593Smuzhiyun 	ext4_journal_stop(handle);
5581*4882a593Smuzhiyun 	ext4_fc_stop_ineligible(sb);
5582*4882a593Smuzhiyun out_mmap:
5583*4882a593Smuzhiyun 	up_write(&EXT4_I(inode)->i_mmap_sem);
5584*4882a593Smuzhiyun out_mutex:
5585*4882a593Smuzhiyun 	inode_unlock(inode);
5586*4882a593Smuzhiyun 	return ret;
5587*4882a593Smuzhiyun }
5588*4882a593Smuzhiyun 
5589*4882a593Smuzhiyun /**
5590*4882a593Smuzhiyun  * ext4_swap_extents() - Swap extents between two inodes
5591*4882a593Smuzhiyun  * @handle: handle for this transaction
5592*4882a593Smuzhiyun  * @inode1:	First inode
5593*4882a593Smuzhiyun  * @inode2:	Second inode
5594*4882a593Smuzhiyun  * @lblk1:	Start block for first inode
5595*4882a593Smuzhiyun  * @lblk2:	Start block for second inode
5596*4882a593Smuzhiyun  * @count:	Number of blocks to swap
5597*4882a593Smuzhiyun  * @unwritten: Mark second inode's extents as unwritten after swap
5598*4882a593Smuzhiyun  * @erp:	Pointer to save error value
5599*4882a593Smuzhiyun  *
5600*4882a593Smuzhiyun  * This helper routine does exactly what is promise "swap extents". All other
5601*4882a593Smuzhiyun  * stuff such as page-cache locking consistency, bh mapping consistency or
5602*4882a593Smuzhiyun  * extent's data copying must be performed by caller.
5603*4882a593Smuzhiyun  * Locking:
5604*4882a593Smuzhiyun  * 		i_mutex is held for both inodes
5605*4882a593Smuzhiyun  * 		i_data_sem is locked for write for both inodes
5606*4882a593Smuzhiyun  * Assumptions:
5607*4882a593Smuzhiyun  *		All pages from requested range are locked for both inodes
5608*4882a593Smuzhiyun  */
5609*4882a593Smuzhiyun int
ext4_swap_extents(handle_t * handle,struct inode * inode1,struct inode * inode2,ext4_lblk_t lblk1,ext4_lblk_t lblk2,ext4_lblk_t count,int unwritten,int * erp)5610*4882a593Smuzhiyun ext4_swap_extents(handle_t *handle, struct inode *inode1,
5611*4882a593Smuzhiyun 		  struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
5612*4882a593Smuzhiyun 		  ext4_lblk_t count, int unwritten, int *erp)
5613*4882a593Smuzhiyun {
5614*4882a593Smuzhiyun 	struct ext4_ext_path *path1 = NULL;
5615*4882a593Smuzhiyun 	struct ext4_ext_path *path2 = NULL;
5616*4882a593Smuzhiyun 	int replaced_count = 0;
5617*4882a593Smuzhiyun 
5618*4882a593Smuzhiyun 	BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5619*4882a593Smuzhiyun 	BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
5620*4882a593Smuzhiyun 	BUG_ON(!inode_is_locked(inode1));
5621*4882a593Smuzhiyun 	BUG_ON(!inode_is_locked(inode2));
5622*4882a593Smuzhiyun 
5623*4882a593Smuzhiyun 	*erp = ext4_es_remove_extent(inode1, lblk1, count);
5624*4882a593Smuzhiyun 	if (unlikely(*erp))
5625*4882a593Smuzhiyun 		return 0;
5626*4882a593Smuzhiyun 	*erp = ext4_es_remove_extent(inode2, lblk2, count);
5627*4882a593Smuzhiyun 	if (unlikely(*erp))
5628*4882a593Smuzhiyun 		return 0;
5629*4882a593Smuzhiyun 
5630*4882a593Smuzhiyun 	while (count) {
5631*4882a593Smuzhiyun 		struct ext4_extent *ex1, *ex2, tmp_ex;
5632*4882a593Smuzhiyun 		ext4_lblk_t e1_blk, e2_blk;
5633*4882a593Smuzhiyun 		int e1_len, e2_len, len;
5634*4882a593Smuzhiyun 		int split = 0;
5635*4882a593Smuzhiyun 
5636*4882a593Smuzhiyun 		path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
5637*4882a593Smuzhiyun 		if (IS_ERR(path1)) {
5638*4882a593Smuzhiyun 			*erp = PTR_ERR(path1);
5639*4882a593Smuzhiyun 			path1 = NULL;
5640*4882a593Smuzhiyun 		finish:
5641*4882a593Smuzhiyun 			count = 0;
5642*4882a593Smuzhiyun 			goto repeat;
5643*4882a593Smuzhiyun 		}
5644*4882a593Smuzhiyun 		path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
5645*4882a593Smuzhiyun 		if (IS_ERR(path2)) {
5646*4882a593Smuzhiyun 			*erp = PTR_ERR(path2);
5647*4882a593Smuzhiyun 			path2 = NULL;
5648*4882a593Smuzhiyun 			goto finish;
5649*4882a593Smuzhiyun 		}
5650*4882a593Smuzhiyun 		ex1 = path1[path1->p_depth].p_ext;
5651*4882a593Smuzhiyun 		ex2 = path2[path2->p_depth].p_ext;
5652*4882a593Smuzhiyun 		/* Do we have something to swap ? */
5653*4882a593Smuzhiyun 		if (unlikely(!ex2 || !ex1))
5654*4882a593Smuzhiyun 			goto finish;
5655*4882a593Smuzhiyun 
5656*4882a593Smuzhiyun 		e1_blk = le32_to_cpu(ex1->ee_block);
5657*4882a593Smuzhiyun 		e2_blk = le32_to_cpu(ex2->ee_block);
5658*4882a593Smuzhiyun 		e1_len = ext4_ext_get_actual_len(ex1);
5659*4882a593Smuzhiyun 		e2_len = ext4_ext_get_actual_len(ex2);
5660*4882a593Smuzhiyun 
5661*4882a593Smuzhiyun 		/* Hole handling */
5662*4882a593Smuzhiyun 		if (!in_range(lblk1, e1_blk, e1_len) ||
5663*4882a593Smuzhiyun 		    !in_range(lblk2, e2_blk, e2_len)) {
5664*4882a593Smuzhiyun 			ext4_lblk_t next1, next2;
5665*4882a593Smuzhiyun 
5666*4882a593Smuzhiyun 			/* if hole after extent, then go to next extent */
5667*4882a593Smuzhiyun 			next1 = ext4_ext_next_allocated_block(path1);
5668*4882a593Smuzhiyun 			next2 = ext4_ext_next_allocated_block(path2);
5669*4882a593Smuzhiyun 			/* If hole before extent, then shift to that extent */
5670*4882a593Smuzhiyun 			if (e1_blk > lblk1)
5671*4882a593Smuzhiyun 				next1 = e1_blk;
5672*4882a593Smuzhiyun 			if (e2_blk > lblk2)
5673*4882a593Smuzhiyun 				next2 = e2_blk;
5674*4882a593Smuzhiyun 			/* Do we have something to swap */
5675*4882a593Smuzhiyun 			if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
5676*4882a593Smuzhiyun 				goto finish;
5677*4882a593Smuzhiyun 			/* Move to the rightest boundary */
5678*4882a593Smuzhiyun 			len = next1 - lblk1;
5679*4882a593Smuzhiyun 			if (len < next2 - lblk2)
5680*4882a593Smuzhiyun 				len = next2 - lblk2;
5681*4882a593Smuzhiyun 			if (len > count)
5682*4882a593Smuzhiyun 				len = count;
5683*4882a593Smuzhiyun 			lblk1 += len;
5684*4882a593Smuzhiyun 			lblk2 += len;
5685*4882a593Smuzhiyun 			count -= len;
5686*4882a593Smuzhiyun 			goto repeat;
5687*4882a593Smuzhiyun 		}
5688*4882a593Smuzhiyun 
5689*4882a593Smuzhiyun 		/* Prepare left boundary */
5690*4882a593Smuzhiyun 		if (e1_blk < lblk1) {
5691*4882a593Smuzhiyun 			split = 1;
5692*4882a593Smuzhiyun 			*erp = ext4_force_split_extent_at(handle, inode1,
5693*4882a593Smuzhiyun 						&path1, lblk1, 0);
5694*4882a593Smuzhiyun 			if (unlikely(*erp))
5695*4882a593Smuzhiyun 				goto finish;
5696*4882a593Smuzhiyun 		}
5697*4882a593Smuzhiyun 		if (e2_blk < lblk2) {
5698*4882a593Smuzhiyun 			split = 1;
5699*4882a593Smuzhiyun 			*erp = ext4_force_split_extent_at(handle, inode2,
5700*4882a593Smuzhiyun 						&path2,  lblk2, 0);
5701*4882a593Smuzhiyun 			if (unlikely(*erp))
5702*4882a593Smuzhiyun 				goto finish;
5703*4882a593Smuzhiyun 		}
5704*4882a593Smuzhiyun 		/* ext4_split_extent_at() may result in leaf extent split,
5705*4882a593Smuzhiyun 		 * path must to be revalidated. */
5706*4882a593Smuzhiyun 		if (split)
5707*4882a593Smuzhiyun 			goto repeat;
5708*4882a593Smuzhiyun 
5709*4882a593Smuzhiyun 		/* Prepare right boundary */
5710*4882a593Smuzhiyun 		len = count;
5711*4882a593Smuzhiyun 		if (len > e1_blk + e1_len - lblk1)
5712*4882a593Smuzhiyun 			len = e1_blk + e1_len - lblk1;
5713*4882a593Smuzhiyun 		if (len > e2_blk + e2_len - lblk2)
5714*4882a593Smuzhiyun 			len = e2_blk + e2_len - lblk2;
5715*4882a593Smuzhiyun 
5716*4882a593Smuzhiyun 		if (len != e1_len) {
5717*4882a593Smuzhiyun 			split = 1;
5718*4882a593Smuzhiyun 			*erp = ext4_force_split_extent_at(handle, inode1,
5719*4882a593Smuzhiyun 						&path1, lblk1 + len, 0);
5720*4882a593Smuzhiyun 			if (unlikely(*erp))
5721*4882a593Smuzhiyun 				goto finish;
5722*4882a593Smuzhiyun 		}
5723*4882a593Smuzhiyun 		if (len != e2_len) {
5724*4882a593Smuzhiyun 			split = 1;
5725*4882a593Smuzhiyun 			*erp = ext4_force_split_extent_at(handle, inode2,
5726*4882a593Smuzhiyun 						&path2, lblk2 + len, 0);
5727*4882a593Smuzhiyun 			if (*erp)
5728*4882a593Smuzhiyun 				goto finish;
5729*4882a593Smuzhiyun 		}
5730*4882a593Smuzhiyun 		/* ext4_split_extent_at() may result in leaf extent split,
5731*4882a593Smuzhiyun 		 * path must to be revalidated. */
5732*4882a593Smuzhiyun 		if (split)
5733*4882a593Smuzhiyun 			goto repeat;
5734*4882a593Smuzhiyun 
5735*4882a593Smuzhiyun 		BUG_ON(e2_len != e1_len);
5736*4882a593Smuzhiyun 		*erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
5737*4882a593Smuzhiyun 		if (unlikely(*erp))
5738*4882a593Smuzhiyun 			goto finish;
5739*4882a593Smuzhiyun 		*erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
5740*4882a593Smuzhiyun 		if (unlikely(*erp))
5741*4882a593Smuzhiyun 			goto finish;
5742*4882a593Smuzhiyun 
5743*4882a593Smuzhiyun 		/* Both extents are fully inside boundaries. Swap it now */
5744*4882a593Smuzhiyun 		tmp_ex = *ex1;
5745*4882a593Smuzhiyun 		ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5746*4882a593Smuzhiyun 		ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5747*4882a593Smuzhiyun 		ex1->ee_len = cpu_to_le16(e2_len);
5748*4882a593Smuzhiyun 		ex2->ee_len = cpu_to_le16(e1_len);
5749*4882a593Smuzhiyun 		if (unwritten)
5750*4882a593Smuzhiyun 			ext4_ext_mark_unwritten(ex2);
5751*4882a593Smuzhiyun 		if (ext4_ext_is_unwritten(&tmp_ex))
5752*4882a593Smuzhiyun 			ext4_ext_mark_unwritten(ex1);
5753*4882a593Smuzhiyun 
5754*4882a593Smuzhiyun 		ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5755*4882a593Smuzhiyun 		ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5756*4882a593Smuzhiyun 		*erp = ext4_ext_dirty(handle, inode2, path2 +
5757*4882a593Smuzhiyun 				      path2->p_depth);
5758*4882a593Smuzhiyun 		if (unlikely(*erp))
5759*4882a593Smuzhiyun 			goto finish;
5760*4882a593Smuzhiyun 		*erp = ext4_ext_dirty(handle, inode1, path1 +
5761*4882a593Smuzhiyun 				      path1->p_depth);
5762*4882a593Smuzhiyun 		/*
5763*4882a593Smuzhiyun 		 * Looks scarry ah..? second inode already points to new blocks,
5764*4882a593Smuzhiyun 		 * and it was successfully dirtied. But luckily error may happen
5765*4882a593Smuzhiyun 		 * only due to journal error, so full transaction will be
5766*4882a593Smuzhiyun 		 * aborted anyway.
5767*4882a593Smuzhiyun 		 */
5768*4882a593Smuzhiyun 		if (unlikely(*erp))
5769*4882a593Smuzhiyun 			goto finish;
5770*4882a593Smuzhiyun 		lblk1 += len;
5771*4882a593Smuzhiyun 		lblk2 += len;
5772*4882a593Smuzhiyun 		replaced_count += len;
5773*4882a593Smuzhiyun 		count -= len;
5774*4882a593Smuzhiyun 
5775*4882a593Smuzhiyun 	repeat:
5776*4882a593Smuzhiyun 		ext4_ext_drop_refs(path1);
5777*4882a593Smuzhiyun 		kfree(path1);
5778*4882a593Smuzhiyun 		ext4_ext_drop_refs(path2);
5779*4882a593Smuzhiyun 		kfree(path2);
5780*4882a593Smuzhiyun 		path1 = path2 = NULL;
5781*4882a593Smuzhiyun 	}
5782*4882a593Smuzhiyun 	return replaced_count;
5783*4882a593Smuzhiyun }
5784*4882a593Smuzhiyun 
5785*4882a593Smuzhiyun /*
5786*4882a593Smuzhiyun  * ext4_clu_mapped - determine whether any block in a logical cluster has
5787*4882a593Smuzhiyun  *                   been mapped to a physical cluster
5788*4882a593Smuzhiyun  *
5789*4882a593Smuzhiyun  * @inode - file containing the logical cluster
5790*4882a593Smuzhiyun  * @lclu - logical cluster of interest
5791*4882a593Smuzhiyun  *
5792*4882a593Smuzhiyun  * Returns 1 if any block in the logical cluster is mapped, signifying
5793*4882a593Smuzhiyun  * that a physical cluster has been allocated for it.  Otherwise,
5794*4882a593Smuzhiyun  * returns 0.  Can also return negative error codes.  Derived from
5795*4882a593Smuzhiyun  * ext4_ext_map_blocks().
5796*4882a593Smuzhiyun  */
ext4_clu_mapped(struct inode * inode,ext4_lblk_t lclu)5797*4882a593Smuzhiyun int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
5798*4882a593Smuzhiyun {
5799*4882a593Smuzhiyun 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5800*4882a593Smuzhiyun 	struct ext4_ext_path *path;
5801*4882a593Smuzhiyun 	int depth, mapped = 0, err = 0;
5802*4882a593Smuzhiyun 	struct ext4_extent *extent;
5803*4882a593Smuzhiyun 	ext4_lblk_t first_lblk, first_lclu, last_lclu;
5804*4882a593Smuzhiyun 
5805*4882a593Smuzhiyun 	/* search for the extent closest to the first block in the cluster */
5806*4882a593Smuzhiyun 	path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
5807*4882a593Smuzhiyun 	if (IS_ERR(path)) {
5808*4882a593Smuzhiyun 		err = PTR_ERR(path);
5809*4882a593Smuzhiyun 		path = NULL;
5810*4882a593Smuzhiyun 		goto out;
5811*4882a593Smuzhiyun 	}
5812*4882a593Smuzhiyun 
5813*4882a593Smuzhiyun 	depth = ext_depth(inode);
5814*4882a593Smuzhiyun 
5815*4882a593Smuzhiyun 	/*
5816*4882a593Smuzhiyun 	 * A consistent leaf must not be empty.  This situation is possible,
5817*4882a593Smuzhiyun 	 * though, _during_ tree modification, and it's why an assert can't
5818*4882a593Smuzhiyun 	 * be put in ext4_find_extent().
5819*4882a593Smuzhiyun 	 */
5820*4882a593Smuzhiyun 	if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
5821*4882a593Smuzhiyun 		EXT4_ERROR_INODE(inode,
5822*4882a593Smuzhiyun 		    "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
5823*4882a593Smuzhiyun 				 (unsigned long) EXT4_C2B(sbi, lclu),
5824*4882a593Smuzhiyun 				 depth, path[depth].p_block);
5825*4882a593Smuzhiyun 		err = -EFSCORRUPTED;
5826*4882a593Smuzhiyun 		goto out;
5827*4882a593Smuzhiyun 	}
5828*4882a593Smuzhiyun 
5829*4882a593Smuzhiyun 	extent = path[depth].p_ext;
5830*4882a593Smuzhiyun 
5831*4882a593Smuzhiyun 	/* can't be mapped if the extent tree is empty */
5832*4882a593Smuzhiyun 	if (extent == NULL)
5833*4882a593Smuzhiyun 		goto out;
5834*4882a593Smuzhiyun 
5835*4882a593Smuzhiyun 	first_lblk = le32_to_cpu(extent->ee_block);
5836*4882a593Smuzhiyun 	first_lclu = EXT4_B2C(sbi, first_lblk);
5837*4882a593Smuzhiyun 
5838*4882a593Smuzhiyun 	/*
5839*4882a593Smuzhiyun 	 * Three possible outcomes at this point - found extent spanning
5840*4882a593Smuzhiyun 	 * the target cluster, to the left of the target cluster, or to the
5841*4882a593Smuzhiyun 	 * right of the target cluster.  The first two cases are handled here.
5842*4882a593Smuzhiyun 	 * The last case indicates the target cluster is not mapped.
5843*4882a593Smuzhiyun 	 */
5844*4882a593Smuzhiyun 	if (lclu >= first_lclu) {
5845*4882a593Smuzhiyun 		last_lclu = EXT4_B2C(sbi, first_lblk +
5846*4882a593Smuzhiyun 				     ext4_ext_get_actual_len(extent) - 1);
5847*4882a593Smuzhiyun 		if (lclu <= last_lclu) {
5848*4882a593Smuzhiyun 			mapped = 1;
5849*4882a593Smuzhiyun 		} else {
5850*4882a593Smuzhiyun 			first_lblk = ext4_ext_next_allocated_block(path);
5851*4882a593Smuzhiyun 			first_lclu = EXT4_B2C(sbi, first_lblk);
5852*4882a593Smuzhiyun 			if (lclu == first_lclu)
5853*4882a593Smuzhiyun 				mapped = 1;
5854*4882a593Smuzhiyun 		}
5855*4882a593Smuzhiyun 	}
5856*4882a593Smuzhiyun 
5857*4882a593Smuzhiyun out:
5858*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
5859*4882a593Smuzhiyun 	kfree(path);
5860*4882a593Smuzhiyun 
5861*4882a593Smuzhiyun 	return err ? err : mapped;
5862*4882a593Smuzhiyun }
5863*4882a593Smuzhiyun 
5864*4882a593Smuzhiyun /*
5865*4882a593Smuzhiyun  * Updates physical block address and unwritten status of extent
5866*4882a593Smuzhiyun  * starting at lblk start and of len. If such an extent doesn't exist,
5867*4882a593Smuzhiyun  * this function splits the extent tree appropriately to create an
5868*4882a593Smuzhiyun  * extent like this.  This function is called in the fast commit
5869*4882a593Smuzhiyun  * replay path.  Returns 0 on success and error on failure.
5870*4882a593Smuzhiyun  */
ext4_ext_replay_update_ex(struct inode * inode,ext4_lblk_t start,int len,int unwritten,ext4_fsblk_t pblk)5871*4882a593Smuzhiyun int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
5872*4882a593Smuzhiyun 			      int len, int unwritten, ext4_fsblk_t pblk)
5873*4882a593Smuzhiyun {
5874*4882a593Smuzhiyun 	struct ext4_ext_path *path = NULL, *ppath;
5875*4882a593Smuzhiyun 	struct ext4_extent *ex;
5876*4882a593Smuzhiyun 	int ret;
5877*4882a593Smuzhiyun 
5878*4882a593Smuzhiyun 	path = ext4_find_extent(inode, start, NULL, 0);
5879*4882a593Smuzhiyun 	if (IS_ERR(path))
5880*4882a593Smuzhiyun 		return PTR_ERR(path);
5881*4882a593Smuzhiyun 	ex = path[path->p_depth].p_ext;
5882*4882a593Smuzhiyun 	if (!ex) {
5883*4882a593Smuzhiyun 		ret = -EFSCORRUPTED;
5884*4882a593Smuzhiyun 		goto out;
5885*4882a593Smuzhiyun 	}
5886*4882a593Smuzhiyun 
5887*4882a593Smuzhiyun 	if (le32_to_cpu(ex->ee_block) != start ||
5888*4882a593Smuzhiyun 		ext4_ext_get_actual_len(ex) != len) {
5889*4882a593Smuzhiyun 		/* We need to split this extent to match our extent first */
5890*4882a593Smuzhiyun 		ppath = path;
5891*4882a593Smuzhiyun 		down_write(&EXT4_I(inode)->i_data_sem);
5892*4882a593Smuzhiyun 		ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
5893*4882a593Smuzhiyun 		up_write(&EXT4_I(inode)->i_data_sem);
5894*4882a593Smuzhiyun 		if (ret)
5895*4882a593Smuzhiyun 			goto out;
5896*4882a593Smuzhiyun 		kfree(path);
5897*4882a593Smuzhiyun 		path = ext4_find_extent(inode, start, NULL, 0);
5898*4882a593Smuzhiyun 		if (IS_ERR(path))
5899*4882a593Smuzhiyun 			return -1;
5900*4882a593Smuzhiyun 		ppath = path;
5901*4882a593Smuzhiyun 		ex = path[path->p_depth].p_ext;
5902*4882a593Smuzhiyun 		WARN_ON(le32_to_cpu(ex->ee_block) != start);
5903*4882a593Smuzhiyun 		if (ext4_ext_get_actual_len(ex) != len) {
5904*4882a593Smuzhiyun 			down_write(&EXT4_I(inode)->i_data_sem);
5905*4882a593Smuzhiyun 			ret = ext4_force_split_extent_at(NULL, inode, &ppath,
5906*4882a593Smuzhiyun 							 start + len, 1);
5907*4882a593Smuzhiyun 			up_write(&EXT4_I(inode)->i_data_sem);
5908*4882a593Smuzhiyun 			if (ret)
5909*4882a593Smuzhiyun 				goto out;
5910*4882a593Smuzhiyun 			kfree(path);
5911*4882a593Smuzhiyun 			path = ext4_find_extent(inode, start, NULL, 0);
5912*4882a593Smuzhiyun 			if (IS_ERR(path))
5913*4882a593Smuzhiyun 				return -EINVAL;
5914*4882a593Smuzhiyun 			ex = path[path->p_depth].p_ext;
5915*4882a593Smuzhiyun 		}
5916*4882a593Smuzhiyun 	}
5917*4882a593Smuzhiyun 	if (unwritten)
5918*4882a593Smuzhiyun 		ext4_ext_mark_unwritten(ex);
5919*4882a593Smuzhiyun 	else
5920*4882a593Smuzhiyun 		ext4_ext_mark_initialized(ex);
5921*4882a593Smuzhiyun 	ext4_ext_store_pblock(ex, pblk);
5922*4882a593Smuzhiyun 	down_write(&EXT4_I(inode)->i_data_sem);
5923*4882a593Smuzhiyun 	ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5924*4882a593Smuzhiyun 	up_write(&EXT4_I(inode)->i_data_sem);
5925*4882a593Smuzhiyun out:
5926*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
5927*4882a593Smuzhiyun 	kfree(path);
5928*4882a593Smuzhiyun 	ext4_mark_inode_dirty(NULL, inode);
5929*4882a593Smuzhiyun 	return ret;
5930*4882a593Smuzhiyun }
5931*4882a593Smuzhiyun 
5932*4882a593Smuzhiyun /* Try to shrink the extent tree */
ext4_ext_replay_shrink_inode(struct inode * inode,ext4_lblk_t end)5933*4882a593Smuzhiyun void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
5934*4882a593Smuzhiyun {
5935*4882a593Smuzhiyun 	struct ext4_ext_path *path = NULL;
5936*4882a593Smuzhiyun 	struct ext4_extent *ex;
5937*4882a593Smuzhiyun 	ext4_lblk_t old_cur, cur = 0;
5938*4882a593Smuzhiyun 
5939*4882a593Smuzhiyun 	while (cur < end) {
5940*4882a593Smuzhiyun 		path = ext4_find_extent(inode, cur, NULL, 0);
5941*4882a593Smuzhiyun 		if (IS_ERR(path))
5942*4882a593Smuzhiyun 			return;
5943*4882a593Smuzhiyun 		ex = path[path->p_depth].p_ext;
5944*4882a593Smuzhiyun 		if (!ex) {
5945*4882a593Smuzhiyun 			ext4_ext_drop_refs(path);
5946*4882a593Smuzhiyun 			kfree(path);
5947*4882a593Smuzhiyun 			ext4_mark_inode_dirty(NULL, inode);
5948*4882a593Smuzhiyun 			return;
5949*4882a593Smuzhiyun 		}
5950*4882a593Smuzhiyun 		old_cur = cur;
5951*4882a593Smuzhiyun 		cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5952*4882a593Smuzhiyun 		if (cur <= old_cur)
5953*4882a593Smuzhiyun 			cur = old_cur + 1;
5954*4882a593Smuzhiyun 		ext4_ext_try_to_merge(NULL, inode, path, ex);
5955*4882a593Smuzhiyun 		down_write(&EXT4_I(inode)->i_data_sem);
5956*4882a593Smuzhiyun 		ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5957*4882a593Smuzhiyun 		up_write(&EXT4_I(inode)->i_data_sem);
5958*4882a593Smuzhiyun 		ext4_mark_inode_dirty(NULL, inode);
5959*4882a593Smuzhiyun 		ext4_ext_drop_refs(path);
5960*4882a593Smuzhiyun 		kfree(path);
5961*4882a593Smuzhiyun 	}
5962*4882a593Smuzhiyun }
5963*4882a593Smuzhiyun 
5964*4882a593Smuzhiyun /* Check if *cur is a hole and if it is, skip it */
skip_hole(struct inode * inode,ext4_lblk_t * cur)5965*4882a593Smuzhiyun static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
5966*4882a593Smuzhiyun {
5967*4882a593Smuzhiyun 	int ret;
5968*4882a593Smuzhiyun 	struct ext4_map_blocks map;
5969*4882a593Smuzhiyun 
5970*4882a593Smuzhiyun 	map.m_lblk = *cur;
5971*4882a593Smuzhiyun 	map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
5972*4882a593Smuzhiyun 
5973*4882a593Smuzhiyun 	ret = ext4_map_blocks(NULL, inode, &map, 0);
5974*4882a593Smuzhiyun 	if (ret < 0)
5975*4882a593Smuzhiyun 		return ret;
5976*4882a593Smuzhiyun 	if (ret != 0)
5977*4882a593Smuzhiyun 		return 0;
5978*4882a593Smuzhiyun 	*cur = *cur + map.m_len;
5979*4882a593Smuzhiyun 	return 0;
5980*4882a593Smuzhiyun }
5981*4882a593Smuzhiyun 
5982*4882a593Smuzhiyun /* Count number of blocks used by this inode and update i_blocks */
ext4_ext_replay_set_iblocks(struct inode * inode)5983*4882a593Smuzhiyun int ext4_ext_replay_set_iblocks(struct inode *inode)
5984*4882a593Smuzhiyun {
5985*4882a593Smuzhiyun 	struct ext4_ext_path *path = NULL, *path2 = NULL;
5986*4882a593Smuzhiyun 	struct ext4_extent *ex;
5987*4882a593Smuzhiyun 	ext4_lblk_t cur = 0, end;
5988*4882a593Smuzhiyun 	int numblks = 0, i, ret = 0;
5989*4882a593Smuzhiyun 	ext4_fsblk_t cmp1, cmp2;
5990*4882a593Smuzhiyun 	struct ext4_map_blocks map;
5991*4882a593Smuzhiyun 
5992*4882a593Smuzhiyun 	/* Determin the size of the file first */
5993*4882a593Smuzhiyun 	path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5994*4882a593Smuzhiyun 					EXT4_EX_NOCACHE);
5995*4882a593Smuzhiyun 	if (IS_ERR(path))
5996*4882a593Smuzhiyun 		return PTR_ERR(path);
5997*4882a593Smuzhiyun 	ex = path[path->p_depth].p_ext;
5998*4882a593Smuzhiyun 	if (!ex) {
5999*4882a593Smuzhiyun 		ext4_ext_drop_refs(path);
6000*4882a593Smuzhiyun 		kfree(path);
6001*4882a593Smuzhiyun 		goto out;
6002*4882a593Smuzhiyun 	}
6003*4882a593Smuzhiyun 	end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6004*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
6005*4882a593Smuzhiyun 	kfree(path);
6006*4882a593Smuzhiyun 
6007*4882a593Smuzhiyun 	/* Count the number of data blocks */
6008*4882a593Smuzhiyun 	cur = 0;
6009*4882a593Smuzhiyun 	while (cur < end) {
6010*4882a593Smuzhiyun 		map.m_lblk = cur;
6011*4882a593Smuzhiyun 		map.m_len = end - cur;
6012*4882a593Smuzhiyun 		ret = ext4_map_blocks(NULL, inode, &map, 0);
6013*4882a593Smuzhiyun 		if (ret < 0)
6014*4882a593Smuzhiyun 			break;
6015*4882a593Smuzhiyun 		if (ret > 0)
6016*4882a593Smuzhiyun 			numblks += ret;
6017*4882a593Smuzhiyun 		cur = cur + map.m_len;
6018*4882a593Smuzhiyun 	}
6019*4882a593Smuzhiyun 
6020*4882a593Smuzhiyun 	/*
6021*4882a593Smuzhiyun 	 * Count the number of extent tree blocks. We do it by looking up
6022*4882a593Smuzhiyun 	 * two successive extents and determining the difference between
6023*4882a593Smuzhiyun 	 * their paths. When path is different for 2 successive extents
6024*4882a593Smuzhiyun 	 * we compare the blocks in the path at each level and increment
6025*4882a593Smuzhiyun 	 * iblocks by total number of differences found.
6026*4882a593Smuzhiyun 	 */
6027*4882a593Smuzhiyun 	cur = 0;
6028*4882a593Smuzhiyun 	ret = skip_hole(inode, &cur);
6029*4882a593Smuzhiyun 	if (ret < 0)
6030*4882a593Smuzhiyun 		goto out;
6031*4882a593Smuzhiyun 	path = ext4_find_extent(inode, cur, NULL, 0);
6032*4882a593Smuzhiyun 	if (IS_ERR(path))
6033*4882a593Smuzhiyun 		goto out;
6034*4882a593Smuzhiyun 	numblks += path->p_depth;
6035*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
6036*4882a593Smuzhiyun 	kfree(path);
6037*4882a593Smuzhiyun 	while (cur < end) {
6038*4882a593Smuzhiyun 		path = ext4_find_extent(inode, cur, NULL, 0);
6039*4882a593Smuzhiyun 		if (IS_ERR(path))
6040*4882a593Smuzhiyun 			break;
6041*4882a593Smuzhiyun 		ex = path[path->p_depth].p_ext;
6042*4882a593Smuzhiyun 		if (!ex) {
6043*4882a593Smuzhiyun 			ext4_ext_drop_refs(path);
6044*4882a593Smuzhiyun 			kfree(path);
6045*4882a593Smuzhiyun 			return 0;
6046*4882a593Smuzhiyun 		}
6047*4882a593Smuzhiyun 		cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
6048*4882a593Smuzhiyun 					ext4_ext_get_actual_len(ex));
6049*4882a593Smuzhiyun 		ret = skip_hole(inode, &cur);
6050*4882a593Smuzhiyun 		if (ret < 0) {
6051*4882a593Smuzhiyun 			ext4_ext_drop_refs(path);
6052*4882a593Smuzhiyun 			kfree(path);
6053*4882a593Smuzhiyun 			break;
6054*4882a593Smuzhiyun 		}
6055*4882a593Smuzhiyun 		path2 = ext4_find_extent(inode, cur, NULL, 0);
6056*4882a593Smuzhiyun 		if (IS_ERR(path2)) {
6057*4882a593Smuzhiyun 			ext4_ext_drop_refs(path);
6058*4882a593Smuzhiyun 			kfree(path);
6059*4882a593Smuzhiyun 			break;
6060*4882a593Smuzhiyun 		}
6061*4882a593Smuzhiyun 		ex = path2[path2->p_depth].p_ext;
6062*4882a593Smuzhiyun 		for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
6063*4882a593Smuzhiyun 			cmp1 = cmp2 = 0;
6064*4882a593Smuzhiyun 			if (i <= path->p_depth)
6065*4882a593Smuzhiyun 				cmp1 = path[i].p_bh ?
6066*4882a593Smuzhiyun 					path[i].p_bh->b_blocknr : 0;
6067*4882a593Smuzhiyun 			if (i <= path2->p_depth)
6068*4882a593Smuzhiyun 				cmp2 = path2[i].p_bh ?
6069*4882a593Smuzhiyun 					path2[i].p_bh->b_blocknr : 0;
6070*4882a593Smuzhiyun 			if (cmp1 != cmp2 && cmp2 != 0)
6071*4882a593Smuzhiyun 				numblks++;
6072*4882a593Smuzhiyun 		}
6073*4882a593Smuzhiyun 		ext4_ext_drop_refs(path);
6074*4882a593Smuzhiyun 		ext4_ext_drop_refs(path2);
6075*4882a593Smuzhiyun 		kfree(path);
6076*4882a593Smuzhiyun 		kfree(path2);
6077*4882a593Smuzhiyun 	}
6078*4882a593Smuzhiyun 
6079*4882a593Smuzhiyun out:
6080*4882a593Smuzhiyun 	inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
6081*4882a593Smuzhiyun 	ext4_mark_inode_dirty(NULL, inode);
6082*4882a593Smuzhiyun 	return 0;
6083*4882a593Smuzhiyun }
6084*4882a593Smuzhiyun 
ext4_ext_clear_bb(struct inode * inode)6085*4882a593Smuzhiyun int ext4_ext_clear_bb(struct inode *inode)
6086*4882a593Smuzhiyun {
6087*4882a593Smuzhiyun 	struct ext4_ext_path *path = NULL;
6088*4882a593Smuzhiyun 	struct ext4_extent *ex;
6089*4882a593Smuzhiyun 	ext4_lblk_t cur = 0, end;
6090*4882a593Smuzhiyun 	int j, ret = 0;
6091*4882a593Smuzhiyun 	struct ext4_map_blocks map;
6092*4882a593Smuzhiyun 
6093*4882a593Smuzhiyun 	/* Determin the size of the file first */
6094*4882a593Smuzhiyun 	path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6095*4882a593Smuzhiyun 					EXT4_EX_NOCACHE);
6096*4882a593Smuzhiyun 	if (IS_ERR(path))
6097*4882a593Smuzhiyun 		return PTR_ERR(path);
6098*4882a593Smuzhiyun 	ex = path[path->p_depth].p_ext;
6099*4882a593Smuzhiyun 	if (!ex) {
6100*4882a593Smuzhiyun 		ext4_ext_drop_refs(path);
6101*4882a593Smuzhiyun 		kfree(path);
6102*4882a593Smuzhiyun 		return 0;
6103*4882a593Smuzhiyun 	}
6104*4882a593Smuzhiyun 	end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6105*4882a593Smuzhiyun 	ext4_ext_drop_refs(path);
6106*4882a593Smuzhiyun 	kfree(path);
6107*4882a593Smuzhiyun 
6108*4882a593Smuzhiyun 	cur = 0;
6109*4882a593Smuzhiyun 	while (cur < end) {
6110*4882a593Smuzhiyun 		map.m_lblk = cur;
6111*4882a593Smuzhiyun 		map.m_len = end - cur;
6112*4882a593Smuzhiyun 		ret = ext4_map_blocks(NULL, inode, &map, 0);
6113*4882a593Smuzhiyun 		if (ret < 0)
6114*4882a593Smuzhiyun 			break;
6115*4882a593Smuzhiyun 		if (ret > 0) {
6116*4882a593Smuzhiyun 			path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
6117*4882a593Smuzhiyun 			if (!IS_ERR_OR_NULL(path)) {
6118*4882a593Smuzhiyun 				for (j = 0; j < path->p_depth; j++) {
6119*4882a593Smuzhiyun 
6120*4882a593Smuzhiyun 					ext4_mb_mark_bb(inode->i_sb,
6121*4882a593Smuzhiyun 							path[j].p_block, 1, 0);
6122*4882a593Smuzhiyun 					ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6123*4882a593Smuzhiyun 							0, path[j].p_block, 1, 1);
6124*4882a593Smuzhiyun 				}
6125*4882a593Smuzhiyun 				ext4_ext_drop_refs(path);
6126*4882a593Smuzhiyun 				kfree(path);
6127*4882a593Smuzhiyun 			}
6128*4882a593Smuzhiyun 			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
6129*4882a593Smuzhiyun 			ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6130*4882a593Smuzhiyun 					map.m_lblk, map.m_pblk, map.m_len, 1);
6131*4882a593Smuzhiyun 		}
6132*4882a593Smuzhiyun 		cur = cur + map.m_len;
6133*4882a593Smuzhiyun 	}
6134*4882a593Smuzhiyun 
6135*4882a593Smuzhiyun 	return 0;
6136*4882a593Smuzhiyun }
6137