xref: /OK3568_Linux_fs/kernel/fs/ext4/mballoc.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *  fs/ext4/mballoc.h
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  *  Written by: Alex Tomas <alex@clusterfs.com>
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun #ifndef _EXT4_MBALLOC_H
9*4882a593Smuzhiyun #define _EXT4_MBALLOC_H
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include <linux/time.h>
12*4882a593Smuzhiyun #include <linux/fs.h>
13*4882a593Smuzhiyun #include <linux/namei.h>
14*4882a593Smuzhiyun #include <linux/quotaops.h>
15*4882a593Smuzhiyun #include <linux/buffer_head.h>
16*4882a593Smuzhiyun #include <linux/module.h>
17*4882a593Smuzhiyun #include <linux/swap.h>
18*4882a593Smuzhiyun #include <linux/proc_fs.h>
19*4882a593Smuzhiyun #include <linux/pagemap.h>
20*4882a593Smuzhiyun #include <linux/seq_file.h>
21*4882a593Smuzhiyun #include <linux/blkdev.h>
22*4882a593Smuzhiyun #include <linux/mutex.h>
23*4882a593Smuzhiyun #include "ext4_jbd2.h"
24*4882a593Smuzhiyun #include "ext4.h"
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun /*
27*4882a593Smuzhiyun  * mb_debug() dynamic printk msgs could be used to debug mballoc code.
28*4882a593Smuzhiyun  */
29*4882a593Smuzhiyun #ifdef CONFIG_EXT4_DEBUG
30*4882a593Smuzhiyun #define mb_debug(sb, fmt, ...)						\
31*4882a593Smuzhiyun 	pr_debug("[%s/%d] EXT4-fs (%s): (%s, %d): %s: " fmt,		\
32*4882a593Smuzhiyun 		current->comm, task_pid_nr(current), sb->s_id,		\
33*4882a593Smuzhiyun 	       __FILE__, __LINE__, __func__, ##__VA_ARGS__)
34*4882a593Smuzhiyun #else
35*4882a593Smuzhiyun #define mb_debug(sb, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
36*4882a593Smuzhiyun #endif
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun #define EXT4_MB_HISTORY_ALLOC		1	/* allocation */
39*4882a593Smuzhiyun #define EXT4_MB_HISTORY_PREALLOC	2	/* preallocated blocks used */
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun /*
42*4882a593Smuzhiyun  * How long mballoc can look for a best extent (in found extents)
43*4882a593Smuzhiyun  */
44*4882a593Smuzhiyun #define MB_DEFAULT_MAX_TO_SCAN		200
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun /*
47*4882a593Smuzhiyun  * How long mballoc must look for a best extent
48*4882a593Smuzhiyun  */
49*4882a593Smuzhiyun #define MB_DEFAULT_MIN_TO_SCAN		10
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun /*
52*4882a593Smuzhiyun  * with 'ext4_mb_stats' allocator will collect stats that will be
53*4882a593Smuzhiyun  * shown at umount. The collecting costs though!
54*4882a593Smuzhiyun  */
55*4882a593Smuzhiyun #define MB_DEFAULT_STATS		0
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun /*
58*4882a593Smuzhiyun  * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
59*4882a593Smuzhiyun  * by the stream allocator, which purpose is to pack requests
60*4882a593Smuzhiyun  * as close each to other as possible to produce smooth I/O traffic
61*4882a593Smuzhiyun  * We use locality group prealloc space for stream request.
62*4882a593Smuzhiyun  * We can tune the same via /proc/fs/ext4/<parition>/stream_req
63*4882a593Smuzhiyun  */
64*4882a593Smuzhiyun #define MB_DEFAULT_STREAM_THRESHOLD	16	/* 64K */
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun /*
67*4882a593Smuzhiyun  * for which requests use 2^N search using buddies
68*4882a593Smuzhiyun  */
69*4882a593Smuzhiyun #define MB_DEFAULT_ORDER2_REQS		2
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun /*
72*4882a593Smuzhiyun  * default group prealloc size 512 blocks
73*4882a593Smuzhiyun  */
74*4882a593Smuzhiyun #define MB_DEFAULT_GROUP_PREALLOC	512
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun /*
77*4882a593Smuzhiyun  * maximum length of inode prealloc list
78*4882a593Smuzhiyun  */
79*4882a593Smuzhiyun #define MB_DEFAULT_MAX_INODE_PREALLOC	512
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun struct ext4_free_data {
82*4882a593Smuzhiyun 	/* this links the free block information from sb_info */
83*4882a593Smuzhiyun 	struct list_head		efd_list;
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	/* this links the free block information from group_info */
86*4882a593Smuzhiyun 	struct rb_node			efd_node;
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun 	/* group which free block extent belongs */
89*4882a593Smuzhiyun 	ext4_group_t			efd_group;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	/* free block extent */
92*4882a593Smuzhiyun 	ext4_grpblk_t			efd_start_cluster;
93*4882a593Smuzhiyun 	ext4_grpblk_t			efd_count;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	/* transaction which freed this extent */
96*4882a593Smuzhiyun 	tid_t				efd_tid;
97*4882a593Smuzhiyun };
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun struct ext4_prealloc_space {
100*4882a593Smuzhiyun 	struct list_head	pa_inode_list;
101*4882a593Smuzhiyun 	struct list_head	pa_group_list;
102*4882a593Smuzhiyun 	union {
103*4882a593Smuzhiyun 		struct list_head pa_tmp_list;
104*4882a593Smuzhiyun 		struct rcu_head	pa_rcu;
105*4882a593Smuzhiyun 	} u;
106*4882a593Smuzhiyun 	spinlock_t		pa_lock;
107*4882a593Smuzhiyun 	atomic_t		pa_count;
108*4882a593Smuzhiyun 	unsigned		pa_deleted;
109*4882a593Smuzhiyun 	ext4_fsblk_t		pa_pstart;	/* phys. block */
110*4882a593Smuzhiyun 	ext4_lblk_t		pa_lstart;	/* log. block */
111*4882a593Smuzhiyun 	ext4_grpblk_t		pa_len;		/* len of preallocated chunk */
112*4882a593Smuzhiyun 	ext4_grpblk_t		pa_free;	/* how many blocks are free */
113*4882a593Smuzhiyun 	unsigned short		pa_type;	/* pa type. inode or group */
114*4882a593Smuzhiyun 	spinlock_t		*pa_obj_lock;
115*4882a593Smuzhiyun 	struct inode		*pa_inode;	/* hack, for history only */
116*4882a593Smuzhiyun };
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun enum {
119*4882a593Smuzhiyun 	MB_INODE_PA = 0,
120*4882a593Smuzhiyun 	MB_GROUP_PA = 1
121*4882a593Smuzhiyun };
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun struct ext4_free_extent {
124*4882a593Smuzhiyun 	ext4_lblk_t fe_logical;
125*4882a593Smuzhiyun 	ext4_grpblk_t fe_start;	/* In cluster units */
126*4882a593Smuzhiyun 	ext4_group_t fe_group;
127*4882a593Smuzhiyun 	ext4_grpblk_t fe_len;	/* In cluster units */
128*4882a593Smuzhiyun };
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun  * Locality group:
132*4882a593Smuzhiyun  *   we try to group all related changes together
133*4882a593Smuzhiyun  *   so that writeback can flush/allocate them together as well
134*4882a593Smuzhiyun  *   Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
135*4882a593Smuzhiyun  *   (512). We store prealloc space into the hash based on the pa_free blocks
136*4882a593Smuzhiyun  *   order value.ie, fls(pa_free)-1;
137*4882a593Smuzhiyun  */
138*4882a593Smuzhiyun #define PREALLOC_TB_SIZE 10
139*4882a593Smuzhiyun struct ext4_locality_group {
140*4882a593Smuzhiyun 	/* for allocator */
141*4882a593Smuzhiyun 	/* to serialize allocates */
142*4882a593Smuzhiyun 	struct mutex		lg_mutex;
143*4882a593Smuzhiyun 	/* list of preallocations */
144*4882a593Smuzhiyun 	struct list_head	lg_prealloc_list[PREALLOC_TB_SIZE];
145*4882a593Smuzhiyun 	spinlock_t		lg_prealloc_lock;
146*4882a593Smuzhiyun };
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun struct ext4_allocation_context {
149*4882a593Smuzhiyun 	struct inode *ac_inode;
150*4882a593Smuzhiyun 	struct super_block *ac_sb;
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 	/* original request */
153*4882a593Smuzhiyun 	struct ext4_free_extent ac_o_ex;
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	/* goal request (normalized ac_o_ex) */
156*4882a593Smuzhiyun 	struct ext4_free_extent ac_g_ex;
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	/* the best found extent */
159*4882a593Smuzhiyun 	struct ext4_free_extent ac_b_ex;
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	/* copy of the best found extent taken before preallocation efforts */
162*4882a593Smuzhiyun 	struct ext4_free_extent ac_f_ex;
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 	__u16 ac_groups_scanned;
165*4882a593Smuzhiyun 	__u16 ac_found;
166*4882a593Smuzhiyun 	__u16 ac_tail;
167*4882a593Smuzhiyun 	__u16 ac_buddy;
168*4882a593Smuzhiyun 	__u16 ac_flags;		/* allocation hints */
169*4882a593Smuzhiyun 	__u8 ac_status;
170*4882a593Smuzhiyun 	__u8 ac_criteria;
171*4882a593Smuzhiyun 	__u8 ac_2order;		/* if request is to allocate 2^N blocks and
172*4882a593Smuzhiyun 				 * N > 0, the field stores N, otherwise 0 */
173*4882a593Smuzhiyun 	__u8 ac_op;		/* operation, for history only */
174*4882a593Smuzhiyun 	struct page *ac_bitmap_page;
175*4882a593Smuzhiyun 	struct page *ac_buddy_page;
176*4882a593Smuzhiyun 	struct ext4_prealloc_space *ac_pa;
177*4882a593Smuzhiyun 	struct ext4_locality_group *ac_lg;
178*4882a593Smuzhiyun };
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun #define AC_STATUS_CONTINUE	1
181*4882a593Smuzhiyun #define AC_STATUS_FOUND		2
182*4882a593Smuzhiyun #define AC_STATUS_BREAK		3
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun struct ext4_buddy {
185*4882a593Smuzhiyun 	struct page *bd_buddy_page;
186*4882a593Smuzhiyun 	void *bd_buddy;
187*4882a593Smuzhiyun 	struct page *bd_bitmap_page;
188*4882a593Smuzhiyun 	void *bd_bitmap;
189*4882a593Smuzhiyun 	struct ext4_group_info *bd_info;
190*4882a593Smuzhiyun 	struct super_block *bd_sb;
191*4882a593Smuzhiyun 	__u16 bd_blkbits;
192*4882a593Smuzhiyun 	ext4_group_t bd_group;
193*4882a593Smuzhiyun };
194*4882a593Smuzhiyun 
ext4_grp_offs_to_block(struct super_block * sb,struct ext4_free_extent * fex)195*4882a593Smuzhiyun static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
196*4882a593Smuzhiyun 					struct ext4_free_extent *fex)
197*4882a593Smuzhiyun {
198*4882a593Smuzhiyun 	return ext4_group_first_block_no(sb, fex->fe_group) +
199*4882a593Smuzhiyun 		(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun typedef int (*ext4_mballoc_query_range_fn)(
203*4882a593Smuzhiyun 	struct super_block		*sb,
204*4882a593Smuzhiyun 	ext4_group_t			agno,
205*4882a593Smuzhiyun 	ext4_grpblk_t			start,
206*4882a593Smuzhiyun 	ext4_grpblk_t			len,
207*4882a593Smuzhiyun 	void				*priv);
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun int
210*4882a593Smuzhiyun ext4_mballoc_query_range(
211*4882a593Smuzhiyun 	struct super_block		*sb,
212*4882a593Smuzhiyun 	ext4_group_t			agno,
213*4882a593Smuzhiyun 	ext4_grpblk_t			start,
214*4882a593Smuzhiyun 	ext4_grpblk_t			end,
215*4882a593Smuzhiyun 	ext4_mballoc_query_range_fn	formatter,
216*4882a593Smuzhiyun 	void				*priv);
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun #endif
219