xref: /OK3568_Linux_fs/kernel/fs/btrfs/block-group.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun 
3*4882a593Smuzhiyun #ifndef BTRFS_BLOCK_GROUP_H
4*4882a593Smuzhiyun #define BTRFS_BLOCK_GROUP_H
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include "free-space-cache.h"
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun enum btrfs_disk_cache_state {
9*4882a593Smuzhiyun 	BTRFS_DC_WRITTEN,
10*4882a593Smuzhiyun 	BTRFS_DC_ERROR,
11*4882a593Smuzhiyun 	BTRFS_DC_CLEAR,
12*4882a593Smuzhiyun 	BTRFS_DC_SETUP,
13*4882a593Smuzhiyun };
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun /*
16*4882a593Smuzhiyun  * This describes the state of the block_group for async discard.  This is due
17*4882a593Smuzhiyun  * to the two pass nature of it where extent discarding is prioritized over
18*4882a593Smuzhiyun  * bitmap discarding.  BTRFS_DISCARD_RESET_CURSOR is set when we are resetting
19*4882a593Smuzhiyun  * between lists to prevent contention for discard state variables
20*4882a593Smuzhiyun  * (eg. discard_cursor).
21*4882a593Smuzhiyun  */
22*4882a593Smuzhiyun enum btrfs_discard_state {
23*4882a593Smuzhiyun 	BTRFS_DISCARD_EXTENTS,
24*4882a593Smuzhiyun 	BTRFS_DISCARD_BITMAPS,
25*4882a593Smuzhiyun 	BTRFS_DISCARD_RESET_CURSOR,
26*4882a593Smuzhiyun };
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun /*
29*4882a593Smuzhiyun  * Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
30*4882a593Smuzhiyun  * only allocate a chunk if we really need one.
31*4882a593Smuzhiyun  *
32*4882a593Smuzhiyun  * CHUNK_ALLOC_LIMITED means to only try and allocate one if we have very few
33*4882a593Smuzhiyun  * chunks already allocated.  This is used as part of the clustering code to
34*4882a593Smuzhiyun  * help make sure we have a good pool of storage to cluster in, without filling
35*4882a593Smuzhiyun  * the FS with empty chunks
36*4882a593Smuzhiyun  *
37*4882a593Smuzhiyun  * CHUNK_ALLOC_FORCE means it must try to allocate one
38*4882a593Smuzhiyun  */
39*4882a593Smuzhiyun enum btrfs_chunk_alloc_enum {
40*4882a593Smuzhiyun 	CHUNK_ALLOC_NO_FORCE,
41*4882a593Smuzhiyun 	CHUNK_ALLOC_LIMITED,
42*4882a593Smuzhiyun 	CHUNK_ALLOC_FORCE,
43*4882a593Smuzhiyun };
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun struct btrfs_caching_control {
46*4882a593Smuzhiyun 	struct list_head list;
47*4882a593Smuzhiyun 	struct mutex mutex;
48*4882a593Smuzhiyun 	wait_queue_head_t wait;
49*4882a593Smuzhiyun 	struct btrfs_work work;
50*4882a593Smuzhiyun 	struct btrfs_block_group *block_group;
51*4882a593Smuzhiyun 	u64 progress;
52*4882a593Smuzhiyun 	refcount_t count;
53*4882a593Smuzhiyun };
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun /* Once caching_thread() finds this much free space, it will wake up waiters. */
56*4882a593Smuzhiyun #define CACHING_CTL_WAKE_UP SZ_2M
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun struct btrfs_block_group {
59*4882a593Smuzhiyun 	struct btrfs_fs_info *fs_info;
60*4882a593Smuzhiyun 	struct inode *inode;
61*4882a593Smuzhiyun 	spinlock_t lock;
62*4882a593Smuzhiyun 	u64 start;
63*4882a593Smuzhiyun 	u64 length;
64*4882a593Smuzhiyun 	u64 pinned;
65*4882a593Smuzhiyun 	u64 reserved;
66*4882a593Smuzhiyun 	u64 used;
67*4882a593Smuzhiyun 	u64 delalloc_bytes;
68*4882a593Smuzhiyun 	u64 bytes_super;
69*4882a593Smuzhiyun 	u64 flags;
70*4882a593Smuzhiyun 	u64 cache_generation;
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun 	/*
73*4882a593Smuzhiyun 	 * If the free space extent count exceeds this number, convert the block
74*4882a593Smuzhiyun 	 * group to bitmaps.
75*4882a593Smuzhiyun 	 */
76*4882a593Smuzhiyun 	u32 bitmap_high_thresh;
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	/*
79*4882a593Smuzhiyun 	 * If the free space extent count drops below this number, convert the
80*4882a593Smuzhiyun 	 * block group back to extents.
81*4882a593Smuzhiyun 	 */
82*4882a593Smuzhiyun 	u32 bitmap_low_thresh;
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	/*
85*4882a593Smuzhiyun 	 * It is just used for the delayed data space allocation because
86*4882a593Smuzhiyun 	 * only the data space allocation and the relative metadata update
87*4882a593Smuzhiyun 	 * can be done cross the transaction.
88*4882a593Smuzhiyun 	 */
89*4882a593Smuzhiyun 	struct rw_semaphore data_rwsem;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	/* For raid56, this is a full stripe, without parity */
92*4882a593Smuzhiyun 	unsigned long full_stripe_len;
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	unsigned int ro;
95*4882a593Smuzhiyun 	unsigned int iref:1;
96*4882a593Smuzhiyun 	unsigned int has_caching_ctl:1;
97*4882a593Smuzhiyun 	unsigned int removed:1;
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	int disk_cache_state;
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	/* Cache tracking stuff */
102*4882a593Smuzhiyun 	int cached;
103*4882a593Smuzhiyun 	struct btrfs_caching_control *caching_ctl;
104*4882a593Smuzhiyun 	u64 last_byte_to_unpin;
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	struct btrfs_space_info *space_info;
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun 	/* Free space cache stuff */
109*4882a593Smuzhiyun 	struct btrfs_free_space_ctl *free_space_ctl;
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	/* Block group cache stuff */
112*4882a593Smuzhiyun 	struct rb_node cache_node;
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 	/* For block groups in the same raid type */
115*4882a593Smuzhiyun 	struct list_head list;
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 	refcount_t refs;
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	/*
120*4882a593Smuzhiyun 	 * List of struct btrfs_free_clusters for this block group.
121*4882a593Smuzhiyun 	 * Today it will only have one thing on it, but that may change
122*4882a593Smuzhiyun 	 */
123*4882a593Smuzhiyun 	struct list_head cluster_list;
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	/* For delayed block group creation or deletion of empty block groups */
126*4882a593Smuzhiyun 	struct list_head bg_list;
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	/* For read-only block groups */
129*4882a593Smuzhiyun 	struct list_head ro_list;
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	/*
132*4882a593Smuzhiyun 	 * When non-zero it means the block group's logical address and its
133*4882a593Smuzhiyun 	 * device extents can not be reused for future block group allocations
134*4882a593Smuzhiyun 	 * until the counter goes down to 0. This is to prevent them from being
135*4882a593Smuzhiyun 	 * reused while some task is still using the block group after it was
136*4882a593Smuzhiyun 	 * deleted - we want to make sure they can only be reused for new block
137*4882a593Smuzhiyun 	 * groups after that task is done with the deleted block group.
138*4882a593Smuzhiyun 	 */
139*4882a593Smuzhiyun 	atomic_t frozen;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	/* For discard operations */
142*4882a593Smuzhiyun 	struct list_head discard_list;
143*4882a593Smuzhiyun 	int discard_index;
144*4882a593Smuzhiyun 	u64 discard_eligible_time;
145*4882a593Smuzhiyun 	u64 discard_cursor;
146*4882a593Smuzhiyun 	enum btrfs_discard_state discard_state;
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 	/* For dirty block groups */
149*4882a593Smuzhiyun 	struct list_head dirty_list;
150*4882a593Smuzhiyun 	struct list_head io_list;
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 	struct btrfs_io_ctl io_ctl;
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	/*
155*4882a593Smuzhiyun 	 * Incremented when doing extent allocations and holding a read lock
156*4882a593Smuzhiyun 	 * on the space_info's groups_sem semaphore.
157*4882a593Smuzhiyun 	 * Decremented when an ordered extent that represents an IO against this
158*4882a593Smuzhiyun 	 * block group's range is created (after it's added to its inode's
159*4882a593Smuzhiyun 	 * root's list of ordered extents) or immediately after the allocation
160*4882a593Smuzhiyun 	 * if it's a metadata extent or fallocate extent (for these cases we
161*4882a593Smuzhiyun 	 * don't create ordered extents).
162*4882a593Smuzhiyun 	 */
163*4882a593Smuzhiyun 	atomic_t reservations;
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	/*
166*4882a593Smuzhiyun 	 * Incremented while holding the spinlock *lock* by a task checking if
167*4882a593Smuzhiyun 	 * it can perform a nocow write (incremented if the value for the *ro*
168*4882a593Smuzhiyun 	 * field is 0). Decremented by such tasks once they create an ordered
169*4882a593Smuzhiyun 	 * extent or before that if some error happens before reaching that step.
170*4882a593Smuzhiyun 	 * This is to prevent races between block group relocation and nocow
171*4882a593Smuzhiyun 	 * writes through direct IO.
172*4882a593Smuzhiyun 	 */
173*4882a593Smuzhiyun 	atomic_t nocow_writers;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	/* Lock for free space tree operations. */
176*4882a593Smuzhiyun 	struct mutex free_space_lock;
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	/*
179*4882a593Smuzhiyun 	 * Does the block group need to be added to the free space tree?
180*4882a593Smuzhiyun 	 * Protected by free_space_lock.
181*4882a593Smuzhiyun 	 */
182*4882a593Smuzhiyun 	int needs_free_space;
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun 	/*
185*4882a593Smuzhiyun 	 * Number of extents in this block group used for swap files.
186*4882a593Smuzhiyun 	 * All accesses protected by the spinlock 'lock'.
187*4882a593Smuzhiyun 	 */
188*4882a593Smuzhiyun 	int swap_extents;
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	/* Record locked full stripes for RAID5/6 block group */
191*4882a593Smuzhiyun 	struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
192*4882a593Smuzhiyun };
193*4882a593Smuzhiyun 
btrfs_block_group_end(struct btrfs_block_group * block_group)194*4882a593Smuzhiyun static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun 	return (block_group->start + block_group->length);
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun 
btrfs_is_block_group_data_only(struct btrfs_block_group * block_group)199*4882a593Smuzhiyun static inline bool btrfs_is_block_group_data_only(
200*4882a593Smuzhiyun 					struct btrfs_block_group *block_group)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	/*
203*4882a593Smuzhiyun 	 * In mixed mode the fragmentation is expected to be high, lowering the
204*4882a593Smuzhiyun 	 * efficiency, so only proper data block groups are considered.
205*4882a593Smuzhiyun 	 */
206*4882a593Smuzhiyun 	return (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
207*4882a593Smuzhiyun 	       !(block_group->flags & BTRFS_BLOCK_GROUP_METADATA);
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun #ifdef CONFIG_BTRFS_DEBUG
btrfs_should_fragment_free_space(struct btrfs_block_group * block_group)211*4882a593Smuzhiyun static inline int btrfs_should_fragment_free_space(
212*4882a593Smuzhiyun 		struct btrfs_block_group *block_group)
213*4882a593Smuzhiyun {
214*4882a593Smuzhiyun 	struct btrfs_fs_info *fs_info = block_group->fs_info;
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun 	return (btrfs_test_opt(fs_info, FRAGMENT_METADATA) &&
217*4882a593Smuzhiyun 		block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
218*4882a593Smuzhiyun 	       (btrfs_test_opt(fs_info, FRAGMENT_DATA) &&
219*4882a593Smuzhiyun 		block_group->flags &  BTRFS_BLOCK_GROUP_DATA);
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun #endif
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun struct btrfs_block_group *btrfs_lookup_first_block_group(
224*4882a593Smuzhiyun 		struct btrfs_fs_info *info, u64 bytenr);
225*4882a593Smuzhiyun struct btrfs_block_group *btrfs_lookup_block_group(
226*4882a593Smuzhiyun 		struct btrfs_fs_info *info, u64 bytenr);
227*4882a593Smuzhiyun struct btrfs_block_group *btrfs_next_block_group(
228*4882a593Smuzhiyun 		struct btrfs_block_group *cache);
229*4882a593Smuzhiyun void btrfs_get_block_group(struct btrfs_block_group *cache);
230*4882a593Smuzhiyun void btrfs_put_block_group(struct btrfs_block_group *cache);
231*4882a593Smuzhiyun void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
232*4882a593Smuzhiyun 					const u64 start);
233*4882a593Smuzhiyun void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg);
234*4882a593Smuzhiyun bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
235*4882a593Smuzhiyun void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
236*4882a593Smuzhiyun void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
237*4882a593Smuzhiyun void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
238*4882a593Smuzhiyun 				           u64 num_bytes);
239*4882a593Smuzhiyun int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
240*4882a593Smuzhiyun int btrfs_cache_block_group(struct btrfs_block_group *cache,
241*4882a593Smuzhiyun 			    int load_cache_only);
242*4882a593Smuzhiyun void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
243*4882a593Smuzhiyun struct btrfs_caching_control *btrfs_get_caching_control(
244*4882a593Smuzhiyun 		struct btrfs_block_group *cache);
245*4882a593Smuzhiyun u64 add_new_free_space(struct btrfs_block_group *block_group,
246*4882a593Smuzhiyun 		       u64 start, u64 end);
247*4882a593Smuzhiyun struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
248*4882a593Smuzhiyun 				struct btrfs_fs_info *fs_info,
249*4882a593Smuzhiyun 				const u64 chunk_offset);
250*4882a593Smuzhiyun int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
251*4882a593Smuzhiyun 			     u64 group_start, struct extent_map *em);
252*4882a593Smuzhiyun void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
253*4882a593Smuzhiyun void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
254*4882a593Smuzhiyun int btrfs_read_block_groups(struct btrfs_fs_info *info);
255*4882a593Smuzhiyun int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
256*4882a593Smuzhiyun 			   u64 type, u64 chunk_offset, u64 size);
257*4882a593Smuzhiyun void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
258*4882a593Smuzhiyun int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
259*4882a593Smuzhiyun 			     bool do_chunk_alloc);
260*4882a593Smuzhiyun void btrfs_dec_block_group_ro(struct btrfs_block_group *cache);
261*4882a593Smuzhiyun int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
262*4882a593Smuzhiyun int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
263*4882a593Smuzhiyun int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
264*4882a593Smuzhiyun int btrfs_update_block_group(struct btrfs_trans_handle *trans,
265*4882a593Smuzhiyun 			     u64 bytenr, u64 num_bytes, int alloc);
266*4882a593Smuzhiyun int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
267*4882a593Smuzhiyun 			     u64 ram_bytes, u64 num_bytes, int delalloc);
268*4882a593Smuzhiyun void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
269*4882a593Smuzhiyun 			       u64 num_bytes, int delalloc);
270*4882a593Smuzhiyun int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
271*4882a593Smuzhiyun 		      enum btrfs_chunk_alloc_enum force);
272*4882a593Smuzhiyun int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
273*4882a593Smuzhiyun void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
274*4882a593Smuzhiyun u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
275*4882a593Smuzhiyun void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
276*4882a593Smuzhiyun int btrfs_free_block_groups(struct btrfs_fs_info *info);
277*4882a593Smuzhiyun 
btrfs_data_alloc_profile(struct btrfs_fs_info * fs_info)278*4882a593Smuzhiyun static inline u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun 	return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun 
btrfs_metadata_alloc_profile(struct btrfs_fs_info * fs_info)283*4882a593Smuzhiyun static inline u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
284*4882a593Smuzhiyun {
285*4882a593Smuzhiyun 	return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun 
btrfs_system_alloc_profile(struct btrfs_fs_info * fs_info)288*4882a593Smuzhiyun static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun 	return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun 
btrfs_block_group_done(struct btrfs_block_group * cache)293*4882a593Smuzhiyun static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
294*4882a593Smuzhiyun {
295*4882a593Smuzhiyun 	smp_mb();
296*4882a593Smuzhiyun 	return cache->cached == BTRFS_CACHE_FINISHED ||
297*4882a593Smuzhiyun 		cache->cached == BTRFS_CACHE_ERROR;
298*4882a593Smuzhiyun }
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun void btrfs_freeze_block_group(struct btrfs_block_group *cache);
301*4882a593Smuzhiyun void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
304*4882a593Smuzhiyun int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
305*4882a593Smuzhiyun 		     u64 physical, u64 **logical, int *naddrs, int *stripe_len);
306*4882a593Smuzhiyun #endif
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
309*4882a593Smuzhiyun void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun #endif /* BTRFS_BLOCK_GROUP_H */
312