1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * fs/ext4/mballoc.h
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Written by: Alex Tomas <alex@clusterfs.com>
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun #ifndef _EXT4_MBALLOC_H
9*4882a593Smuzhiyun #define _EXT4_MBALLOC_H
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include <linux/time.h>
12*4882a593Smuzhiyun #include <linux/fs.h>
13*4882a593Smuzhiyun #include <linux/namei.h>
14*4882a593Smuzhiyun #include <linux/quotaops.h>
15*4882a593Smuzhiyun #include <linux/buffer_head.h>
16*4882a593Smuzhiyun #include <linux/module.h>
17*4882a593Smuzhiyun #include <linux/swap.h>
18*4882a593Smuzhiyun #include <linux/proc_fs.h>
19*4882a593Smuzhiyun #include <linux/pagemap.h>
20*4882a593Smuzhiyun #include <linux/seq_file.h>
21*4882a593Smuzhiyun #include <linux/blkdev.h>
22*4882a593Smuzhiyun #include <linux/mutex.h>
23*4882a593Smuzhiyun #include "ext4_jbd2.h"
24*4882a593Smuzhiyun #include "ext4.h"
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun /*
27*4882a593Smuzhiyun * mb_debug() dynamic printk msgs could be used to debug mballoc code.
28*4882a593Smuzhiyun */
29*4882a593Smuzhiyun #ifdef CONFIG_EXT4_DEBUG
30*4882a593Smuzhiyun #define mb_debug(sb, fmt, ...) \
31*4882a593Smuzhiyun pr_debug("[%s/%d] EXT4-fs (%s): (%s, %d): %s: " fmt, \
32*4882a593Smuzhiyun current->comm, task_pid_nr(current), sb->s_id, \
33*4882a593Smuzhiyun __FILE__, __LINE__, __func__, ##__VA_ARGS__)
34*4882a593Smuzhiyun #else
35*4882a593Smuzhiyun #define mb_debug(sb, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
36*4882a593Smuzhiyun #endif
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
39*4882a593Smuzhiyun #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun /*
42*4882a593Smuzhiyun * How long mballoc can look for a best extent (in found extents)
43*4882a593Smuzhiyun */
44*4882a593Smuzhiyun #define MB_DEFAULT_MAX_TO_SCAN 200
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun /*
47*4882a593Smuzhiyun * How long mballoc must look for a best extent
48*4882a593Smuzhiyun */
49*4882a593Smuzhiyun #define MB_DEFAULT_MIN_TO_SCAN 10
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun /*
52*4882a593Smuzhiyun * with 'ext4_mb_stats' allocator will collect stats that will be
53*4882a593Smuzhiyun * shown at umount. The collecting costs though!
54*4882a593Smuzhiyun */
55*4882a593Smuzhiyun #define MB_DEFAULT_STATS 0
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun /*
58*4882a593Smuzhiyun * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
59*4882a593Smuzhiyun * by the stream allocator, which purpose is to pack requests
60*4882a593Smuzhiyun * as close each to other as possible to produce smooth I/O traffic
61*4882a593Smuzhiyun * We use locality group prealloc space for stream request.
62*4882a593Smuzhiyun * We can tune the same via /proc/fs/ext4/<parition>/stream_req
63*4882a593Smuzhiyun */
64*4882a593Smuzhiyun #define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun /*
67*4882a593Smuzhiyun * for which requests use 2^N search using buddies
68*4882a593Smuzhiyun */
69*4882a593Smuzhiyun #define MB_DEFAULT_ORDER2_REQS 2
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun /*
72*4882a593Smuzhiyun * default group prealloc size 512 blocks
73*4882a593Smuzhiyun */
74*4882a593Smuzhiyun #define MB_DEFAULT_GROUP_PREALLOC 512
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun /*
77*4882a593Smuzhiyun * maximum length of inode prealloc list
78*4882a593Smuzhiyun */
79*4882a593Smuzhiyun #define MB_DEFAULT_MAX_INODE_PREALLOC 512
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun struct ext4_free_data {
82*4882a593Smuzhiyun /* this links the free block information from sb_info */
83*4882a593Smuzhiyun struct list_head efd_list;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun /* this links the free block information from group_info */
86*4882a593Smuzhiyun struct rb_node efd_node;
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun /* group which free block extent belongs */
89*4882a593Smuzhiyun ext4_group_t efd_group;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun /* free block extent */
92*4882a593Smuzhiyun ext4_grpblk_t efd_start_cluster;
93*4882a593Smuzhiyun ext4_grpblk_t efd_count;
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun /* transaction which freed this extent */
96*4882a593Smuzhiyun tid_t efd_tid;
97*4882a593Smuzhiyun };
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun struct ext4_prealloc_space {
100*4882a593Smuzhiyun struct list_head pa_inode_list;
101*4882a593Smuzhiyun struct list_head pa_group_list;
102*4882a593Smuzhiyun union {
103*4882a593Smuzhiyun struct list_head pa_tmp_list;
104*4882a593Smuzhiyun struct rcu_head pa_rcu;
105*4882a593Smuzhiyun } u;
106*4882a593Smuzhiyun spinlock_t pa_lock;
107*4882a593Smuzhiyun atomic_t pa_count;
108*4882a593Smuzhiyun unsigned pa_deleted;
109*4882a593Smuzhiyun ext4_fsblk_t pa_pstart; /* phys. block */
110*4882a593Smuzhiyun ext4_lblk_t pa_lstart; /* log. block */
111*4882a593Smuzhiyun ext4_grpblk_t pa_len; /* len of preallocated chunk */
112*4882a593Smuzhiyun ext4_grpblk_t pa_free; /* how many blocks are free */
113*4882a593Smuzhiyun unsigned short pa_type; /* pa type. inode or group */
114*4882a593Smuzhiyun spinlock_t *pa_obj_lock;
115*4882a593Smuzhiyun struct inode *pa_inode; /* hack, for history only */
116*4882a593Smuzhiyun };
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun enum {
119*4882a593Smuzhiyun MB_INODE_PA = 0,
120*4882a593Smuzhiyun MB_GROUP_PA = 1
121*4882a593Smuzhiyun };
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun struct ext4_free_extent {
124*4882a593Smuzhiyun ext4_lblk_t fe_logical;
125*4882a593Smuzhiyun ext4_grpblk_t fe_start; /* In cluster units */
126*4882a593Smuzhiyun ext4_group_t fe_group;
127*4882a593Smuzhiyun ext4_grpblk_t fe_len; /* In cluster units */
128*4882a593Smuzhiyun };
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun * Locality group:
132*4882a593Smuzhiyun * we try to group all related changes together
133*4882a593Smuzhiyun * so that writeback can flush/allocate them together as well
134*4882a593Smuzhiyun * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
135*4882a593Smuzhiyun * (512). We store prealloc space into the hash based on the pa_free blocks
136*4882a593Smuzhiyun * order value.ie, fls(pa_free)-1;
137*4882a593Smuzhiyun */
138*4882a593Smuzhiyun #define PREALLOC_TB_SIZE 10
139*4882a593Smuzhiyun struct ext4_locality_group {
140*4882a593Smuzhiyun /* for allocator */
141*4882a593Smuzhiyun /* to serialize allocates */
142*4882a593Smuzhiyun struct mutex lg_mutex;
143*4882a593Smuzhiyun /* list of preallocations */
144*4882a593Smuzhiyun struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
145*4882a593Smuzhiyun spinlock_t lg_prealloc_lock;
146*4882a593Smuzhiyun };
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun struct ext4_allocation_context {
149*4882a593Smuzhiyun struct inode *ac_inode;
150*4882a593Smuzhiyun struct super_block *ac_sb;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun /* original request */
153*4882a593Smuzhiyun struct ext4_free_extent ac_o_ex;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun /* goal request (normalized ac_o_ex) */
156*4882a593Smuzhiyun struct ext4_free_extent ac_g_ex;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /* the best found extent */
159*4882a593Smuzhiyun struct ext4_free_extent ac_b_ex;
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun /* copy of the best found extent taken before preallocation efforts */
162*4882a593Smuzhiyun struct ext4_free_extent ac_f_ex;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun __u16 ac_groups_scanned;
165*4882a593Smuzhiyun __u16 ac_found;
166*4882a593Smuzhiyun __u16 ac_tail;
167*4882a593Smuzhiyun __u16 ac_buddy;
168*4882a593Smuzhiyun __u16 ac_flags; /* allocation hints */
169*4882a593Smuzhiyun __u8 ac_status;
170*4882a593Smuzhiyun __u8 ac_criteria;
171*4882a593Smuzhiyun __u8 ac_2order; /* if request is to allocate 2^N blocks and
172*4882a593Smuzhiyun * N > 0, the field stores N, otherwise 0 */
173*4882a593Smuzhiyun __u8 ac_op; /* operation, for history only */
174*4882a593Smuzhiyun struct page *ac_bitmap_page;
175*4882a593Smuzhiyun struct page *ac_buddy_page;
176*4882a593Smuzhiyun struct ext4_prealloc_space *ac_pa;
177*4882a593Smuzhiyun struct ext4_locality_group *ac_lg;
178*4882a593Smuzhiyun };
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun #define AC_STATUS_CONTINUE 1
181*4882a593Smuzhiyun #define AC_STATUS_FOUND 2
182*4882a593Smuzhiyun #define AC_STATUS_BREAK 3
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun struct ext4_buddy {
185*4882a593Smuzhiyun struct page *bd_buddy_page;
186*4882a593Smuzhiyun void *bd_buddy;
187*4882a593Smuzhiyun struct page *bd_bitmap_page;
188*4882a593Smuzhiyun void *bd_bitmap;
189*4882a593Smuzhiyun struct ext4_group_info *bd_info;
190*4882a593Smuzhiyun struct super_block *bd_sb;
191*4882a593Smuzhiyun __u16 bd_blkbits;
192*4882a593Smuzhiyun ext4_group_t bd_group;
193*4882a593Smuzhiyun };
194*4882a593Smuzhiyun
ext4_grp_offs_to_block(struct super_block * sb,struct ext4_free_extent * fex)195*4882a593Smuzhiyun static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
196*4882a593Smuzhiyun struct ext4_free_extent *fex)
197*4882a593Smuzhiyun {
198*4882a593Smuzhiyun return ext4_group_first_block_no(sb, fex->fe_group) +
199*4882a593Smuzhiyun (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun typedef int (*ext4_mballoc_query_range_fn)(
203*4882a593Smuzhiyun struct super_block *sb,
204*4882a593Smuzhiyun ext4_group_t agno,
205*4882a593Smuzhiyun ext4_grpblk_t start,
206*4882a593Smuzhiyun ext4_grpblk_t len,
207*4882a593Smuzhiyun void *priv);
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun int
210*4882a593Smuzhiyun ext4_mballoc_query_range(
211*4882a593Smuzhiyun struct super_block *sb,
212*4882a593Smuzhiyun ext4_group_t agno,
213*4882a593Smuzhiyun ext4_grpblk_t start,
214*4882a593Smuzhiyun ext4_grpblk_t end,
215*4882a593Smuzhiyun ext4_mballoc_query_range_fn formatter,
216*4882a593Smuzhiyun void *priv);
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun #endif
219