1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2008 Red Hat. All rights reserved.
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun #include <linux/pagemap.h>
7*4882a593Smuzhiyun #include <linux/sched.h>
8*4882a593Smuzhiyun #include <linux/sched/signal.h>
9*4882a593Smuzhiyun #include <linux/slab.h>
10*4882a593Smuzhiyun #include <linux/math64.h>
11*4882a593Smuzhiyun #include <linux/ratelimit.h>
12*4882a593Smuzhiyun #include <linux/error-injection.h>
13*4882a593Smuzhiyun #include <linux/sched/mm.h>
14*4882a593Smuzhiyun #include "ctree.h"
15*4882a593Smuzhiyun #include "free-space-cache.h"
16*4882a593Smuzhiyun #include "transaction.h"
17*4882a593Smuzhiyun #include "disk-io.h"
18*4882a593Smuzhiyun #include "extent_io.h"
19*4882a593Smuzhiyun #include "inode-map.h"
20*4882a593Smuzhiyun #include "volumes.h"
21*4882a593Smuzhiyun #include "space-info.h"
22*4882a593Smuzhiyun #include "delalloc-space.h"
23*4882a593Smuzhiyun #include "block-group.h"
24*4882a593Smuzhiyun #include "discard.h"
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun #define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
27*4882a593Smuzhiyun #define MAX_CACHE_BYTES_PER_GIG SZ_64K
28*4882a593Smuzhiyun #define FORCE_EXTENT_THRESHOLD SZ_1M
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun struct btrfs_trim_range {
31*4882a593Smuzhiyun u64 start;
32*4882a593Smuzhiyun u64 bytes;
33*4882a593Smuzhiyun struct list_head list;
34*4882a593Smuzhiyun };
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
37*4882a593Smuzhiyun struct btrfs_free_space *bitmap_info);
38*4882a593Smuzhiyun static int link_free_space(struct btrfs_free_space_ctl *ctl,
39*4882a593Smuzhiyun struct btrfs_free_space *info);
40*4882a593Smuzhiyun static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
41*4882a593Smuzhiyun struct btrfs_free_space *info);
42*4882a593Smuzhiyun static int btrfs_wait_cache_io_root(struct btrfs_root *root,
43*4882a593Smuzhiyun struct btrfs_trans_handle *trans,
44*4882a593Smuzhiyun struct btrfs_io_ctl *io_ctl,
45*4882a593Smuzhiyun struct btrfs_path *path);
46*4882a593Smuzhiyun
__lookup_free_space_inode(struct btrfs_root * root,struct btrfs_path * path,u64 offset)47*4882a593Smuzhiyun static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
48*4882a593Smuzhiyun struct btrfs_path *path,
49*4882a593Smuzhiyun u64 offset)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = root->fs_info;
52*4882a593Smuzhiyun struct btrfs_key key;
53*4882a593Smuzhiyun struct btrfs_key location;
54*4882a593Smuzhiyun struct btrfs_disk_key disk_key;
55*4882a593Smuzhiyun struct btrfs_free_space_header *header;
56*4882a593Smuzhiyun struct extent_buffer *leaf;
57*4882a593Smuzhiyun struct inode *inode = NULL;
58*4882a593Smuzhiyun unsigned nofs_flag;
59*4882a593Smuzhiyun int ret;
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun key.objectid = BTRFS_FREE_SPACE_OBJECTID;
62*4882a593Smuzhiyun key.offset = offset;
63*4882a593Smuzhiyun key.type = 0;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
66*4882a593Smuzhiyun if (ret < 0)
67*4882a593Smuzhiyun return ERR_PTR(ret);
68*4882a593Smuzhiyun if (ret > 0) {
69*4882a593Smuzhiyun btrfs_release_path(path);
70*4882a593Smuzhiyun return ERR_PTR(-ENOENT);
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun leaf = path->nodes[0];
74*4882a593Smuzhiyun header = btrfs_item_ptr(leaf, path->slots[0],
75*4882a593Smuzhiyun struct btrfs_free_space_header);
76*4882a593Smuzhiyun btrfs_free_space_key(leaf, header, &disk_key);
77*4882a593Smuzhiyun btrfs_disk_key_to_cpu(&location, &disk_key);
78*4882a593Smuzhiyun btrfs_release_path(path);
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun /*
81*4882a593Smuzhiyun * We are often under a trans handle at this point, so we need to make
82*4882a593Smuzhiyun * sure NOFS is set to keep us from deadlocking.
83*4882a593Smuzhiyun */
84*4882a593Smuzhiyun nofs_flag = memalloc_nofs_save();
85*4882a593Smuzhiyun inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
86*4882a593Smuzhiyun btrfs_release_path(path);
87*4882a593Smuzhiyun memalloc_nofs_restore(nofs_flag);
88*4882a593Smuzhiyun if (IS_ERR(inode))
89*4882a593Smuzhiyun return inode;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun mapping_set_gfp_mask(inode->i_mapping,
92*4882a593Smuzhiyun mapping_gfp_constraint(inode->i_mapping,
93*4882a593Smuzhiyun ~(__GFP_FS | __GFP_HIGHMEM)));
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun return inode;
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun
lookup_free_space_inode(struct btrfs_block_group * block_group,struct btrfs_path * path)98*4882a593Smuzhiyun struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
99*4882a593Smuzhiyun struct btrfs_path *path)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = block_group->fs_info;
102*4882a593Smuzhiyun struct inode *inode = NULL;
103*4882a593Smuzhiyun u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun spin_lock(&block_group->lock);
106*4882a593Smuzhiyun if (block_group->inode)
107*4882a593Smuzhiyun inode = igrab(block_group->inode);
108*4882a593Smuzhiyun spin_unlock(&block_group->lock);
109*4882a593Smuzhiyun if (inode)
110*4882a593Smuzhiyun return inode;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun inode = __lookup_free_space_inode(fs_info->tree_root, path,
113*4882a593Smuzhiyun block_group->start);
114*4882a593Smuzhiyun if (IS_ERR(inode))
115*4882a593Smuzhiyun return inode;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun spin_lock(&block_group->lock);
118*4882a593Smuzhiyun if (!((BTRFS_I(inode)->flags & flags) == flags)) {
119*4882a593Smuzhiyun btrfs_info(fs_info, "Old style space inode found, converting.");
120*4882a593Smuzhiyun BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
121*4882a593Smuzhiyun BTRFS_INODE_NODATACOW;
122*4882a593Smuzhiyun block_group->disk_cache_state = BTRFS_DC_CLEAR;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun if (!block_group->iref) {
126*4882a593Smuzhiyun block_group->inode = igrab(inode);
127*4882a593Smuzhiyun block_group->iref = 1;
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun spin_unlock(&block_group->lock);
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun return inode;
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
__create_free_space_inode(struct btrfs_root * root,struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 ino,u64 offset)134*4882a593Smuzhiyun static int __create_free_space_inode(struct btrfs_root *root,
135*4882a593Smuzhiyun struct btrfs_trans_handle *trans,
136*4882a593Smuzhiyun struct btrfs_path *path,
137*4882a593Smuzhiyun u64 ino, u64 offset)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun struct btrfs_key key;
140*4882a593Smuzhiyun struct btrfs_disk_key disk_key;
141*4882a593Smuzhiyun struct btrfs_free_space_header *header;
142*4882a593Smuzhiyun struct btrfs_inode_item *inode_item;
143*4882a593Smuzhiyun struct extent_buffer *leaf;
144*4882a593Smuzhiyun u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
145*4882a593Smuzhiyun int ret;
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun ret = btrfs_insert_empty_inode(trans, root, path, ino);
148*4882a593Smuzhiyun if (ret)
149*4882a593Smuzhiyun return ret;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun /* We inline crc's for the free disk space cache */
152*4882a593Smuzhiyun if (ino != BTRFS_FREE_INO_OBJECTID)
153*4882a593Smuzhiyun flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun leaf = path->nodes[0];
156*4882a593Smuzhiyun inode_item = btrfs_item_ptr(leaf, path->slots[0],
157*4882a593Smuzhiyun struct btrfs_inode_item);
158*4882a593Smuzhiyun btrfs_item_key(leaf, &disk_key, path->slots[0]);
159*4882a593Smuzhiyun memzero_extent_buffer(leaf, (unsigned long)inode_item,
160*4882a593Smuzhiyun sizeof(*inode_item));
161*4882a593Smuzhiyun btrfs_set_inode_generation(leaf, inode_item, trans->transid);
162*4882a593Smuzhiyun btrfs_set_inode_size(leaf, inode_item, 0);
163*4882a593Smuzhiyun btrfs_set_inode_nbytes(leaf, inode_item, 0);
164*4882a593Smuzhiyun btrfs_set_inode_uid(leaf, inode_item, 0);
165*4882a593Smuzhiyun btrfs_set_inode_gid(leaf, inode_item, 0);
166*4882a593Smuzhiyun btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
167*4882a593Smuzhiyun btrfs_set_inode_flags(leaf, inode_item, flags);
168*4882a593Smuzhiyun btrfs_set_inode_nlink(leaf, inode_item, 1);
169*4882a593Smuzhiyun btrfs_set_inode_transid(leaf, inode_item, trans->transid);
170*4882a593Smuzhiyun btrfs_set_inode_block_group(leaf, inode_item, offset);
171*4882a593Smuzhiyun btrfs_mark_buffer_dirty(leaf);
172*4882a593Smuzhiyun btrfs_release_path(path);
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun key.objectid = BTRFS_FREE_SPACE_OBJECTID;
175*4882a593Smuzhiyun key.offset = offset;
176*4882a593Smuzhiyun key.type = 0;
177*4882a593Smuzhiyun ret = btrfs_insert_empty_item(trans, root, path, &key,
178*4882a593Smuzhiyun sizeof(struct btrfs_free_space_header));
179*4882a593Smuzhiyun if (ret < 0) {
180*4882a593Smuzhiyun btrfs_release_path(path);
181*4882a593Smuzhiyun return ret;
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun leaf = path->nodes[0];
185*4882a593Smuzhiyun header = btrfs_item_ptr(leaf, path->slots[0],
186*4882a593Smuzhiyun struct btrfs_free_space_header);
187*4882a593Smuzhiyun memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
188*4882a593Smuzhiyun btrfs_set_free_space_key(leaf, header, &disk_key);
189*4882a593Smuzhiyun btrfs_mark_buffer_dirty(leaf);
190*4882a593Smuzhiyun btrfs_release_path(path);
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun return 0;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
create_free_space_inode(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)195*4882a593Smuzhiyun int create_free_space_inode(struct btrfs_trans_handle *trans,
196*4882a593Smuzhiyun struct btrfs_block_group *block_group,
197*4882a593Smuzhiyun struct btrfs_path *path)
198*4882a593Smuzhiyun {
199*4882a593Smuzhiyun int ret;
200*4882a593Smuzhiyun u64 ino;
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino);
203*4882a593Smuzhiyun if (ret < 0)
204*4882a593Smuzhiyun return ret;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
207*4882a593Smuzhiyun ino, block_group->start);
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun
btrfs_check_trunc_cache_free_space(struct btrfs_fs_info * fs_info,struct btrfs_block_rsv * rsv)210*4882a593Smuzhiyun int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
211*4882a593Smuzhiyun struct btrfs_block_rsv *rsv)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun u64 needed_bytes;
214*4882a593Smuzhiyun int ret;
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun /* 1 for slack space, 1 for updating the inode */
217*4882a593Smuzhiyun needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
218*4882a593Smuzhiyun btrfs_calc_metadata_size(fs_info, 1);
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun spin_lock(&rsv->lock);
221*4882a593Smuzhiyun if (rsv->reserved < needed_bytes)
222*4882a593Smuzhiyun ret = -ENOSPC;
223*4882a593Smuzhiyun else
224*4882a593Smuzhiyun ret = 0;
225*4882a593Smuzhiyun spin_unlock(&rsv->lock);
226*4882a593Smuzhiyun return ret;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
btrfs_truncate_free_space_cache(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct inode * inode)229*4882a593Smuzhiyun int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
230*4882a593Smuzhiyun struct btrfs_block_group *block_group,
231*4882a593Smuzhiyun struct inode *inode)
232*4882a593Smuzhiyun {
233*4882a593Smuzhiyun struct btrfs_root *root = BTRFS_I(inode)->root;
234*4882a593Smuzhiyun int ret = 0;
235*4882a593Smuzhiyun bool locked = false;
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun if (block_group) {
238*4882a593Smuzhiyun struct btrfs_path *path = btrfs_alloc_path();
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun if (!path) {
241*4882a593Smuzhiyun ret = -ENOMEM;
242*4882a593Smuzhiyun goto fail;
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun locked = true;
245*4882a593Smuzhiyun mutex_lock(&trans->transaction->cache_write_mutex);
246*4882a593Smuzhiyun if (!list_empty(&block_group->io_list)) {
247*4882a593Smuzhiyun list_del_init(&block_group->io_list);
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun btrfs_wait_cache_io(trans, block_group, path);
250*4882a593Smuzhiyun btrfs_put_block_group(block_group);
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun /*
254*4882a593Smuzhiyun * now that we've truncated the cache away, its no longer
255*4882a593Smuzhiyun * setup or written
256*4882a593Smuzhiyun */
257*4882a593Smuzhiyun spin_lock(&block_group->lock);
258*4882a593Smuzhiyun block_group->disk_cache_state = BTRFS_DC_CLEAR;
259*4882a593Smuzhiyun spin_unlock(&block_group->lock);
260*4882a593Smuzhiyun btrfs_free_path(path);
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun btrfs_i_size_write(BTRFS_I(inode), 0);
264*4882a593Smuzhiyun truncate_pagecache(inode, 0);
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun /*
267*4882a593Smuzhiyun * We skip the throttling logic for free space cache inodes, so we don't
268*4882a593Smuzhiyun * need to check for -EAGAIN.
269*4882a593Smuzhiyun */
270*4882a593Smuzhiyun ret = btrfs_truncate_inode_items(trans, root, inode,
271*4882a593Smuzhiyun 0, BTRFS_EXTENT_DATA_KEY);
272*4882a593Smuzhiyun if (ret)
273*4882a593Smuzhiyun goto fail;
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun ret = btrfs_update_inode(trans, root, inode);
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun fail:
278*4882a593Smuzhiyun if (locked)
279*4882a593Smuzhiyun mutex_unlock(&trans->transaction->cache_write_mutex);
280*4882a593Smuzhiyun if (ret)
281*4882a593Smuzhiyun btrfs_abort_transaction(trans, ret);
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun return ret;
284*4882a593Smuzhiyun }
285*4882a593Smuzhiyun
readahead_cache(struct inode * inode)286*4882a593Smuzhiyun static void readahead_cache(struct inode *inode)
287*4882a593Smuzhiyun {
288*4882a593Smuzhiyun struct file_ra_state *ra;
289*4882a593Smuzhiyun unsigned long last_index;
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun ra = kzalloc(sizeof(*ra), GFP_NOFS);
292*4882a593Smuzhiyun if (!ra)
293*4882a593Smuzhiyun return;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun file_ra_state_init(ra, inode->i_mapping);
296*4882a593Smuzhiyun last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun kfree(ra);
301*4882a593Smuzhiyun }
302*4882a593Smuzhiyun
io_ctl_init(struct btrfs_io_ctl * io_ctl,struct inode * inode,int write)303*4882a593Smuzhiyun static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
304*4882a593Smuzhiyun int write)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun int num_pages;
307*4882a593Smuzhiyun int check_crcs = 0;
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
312*4882a593Smuzhiyun check_crcs = 1;
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun /* Make sure we can fit our crcs and generation into the first page */
315*4882a593Smuzhiyun if (write && check_crcs &&
316*4882a593Smuzhiyun (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
317*4882a593Smuzhiyun return -ENOSPC;
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
322*4882a593Smuzhiyun if (!io_ctl->pages)
323*4882a593Smuzhiyun return -ENOMEM;
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun io_ctl->num_pages = num_pages;
326*4882a593Smuzhiyun io_ctl->fs_info = btrfs_sb(inode->i_sb);
327*4882a593Smuzhiyun io_ctl->check_crcs = check_crcs;
328*4882a593Smuzhiyun io_ctl->inode = inode;
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun return 0;
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
333*4882a593Smuzhiyun
io_ctl_free(struct btrfs_io_ctl * io_ctl)334*4882a593Smuzhiyun static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun kfree(io_ctl->pages);
337*4882a593Smuzhiyun io_ctl->pages = NULL;
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun
io_ctl_unmap_page(struct btrfs_io_ctl * io_ctl)340*4882a593Smuzhiyun static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
341*4882a593Smuzhiyun {
342*4882a593Smuzhiyun if (io_ctl->cur) {
343*4882a593Smuzhiyun io_ctl->cur = NULL;
344*4882a593Smuzhiyun io_ctl->orig = NULL;
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun
io_ctl_map_page(struct btrfs_io_ctl * io_ctl,int clear)348*4882a593Smuzhiyun static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
349*4882a593Smuzhiyun {
350*4882a593Smuzhiyun ASSERT(io_ctl->index < io_ctl->num_pages);
351*4882a593Smuzhiyun io_ctl->page = io_ctl->pages[io_ctl->index++];
352*4882a593Smuzhiyun io_ctl->cur = page_address(io_ctl->page);
353*4882a593Smuzhiyun io_ctl->orig = io_ctl->cur;
354*4882a593Smuzhiyun io_ctl->size = PAGE_SIZE;
355*4882a593Smuzhiyun if (clear)
356*4882a593Smuzhiyun clear_page(io_ctl->cur);
357*4882a593Smuzhiyun }
358*4882a593Smuzhiyun
io_ctl_drop_pages(struct btrfs_io_ctl * io_ctl)359*4882a593Smuzhiyun static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
360*4882a593Smuzhiyun {
361*4882a593Smuzhiyun int i;
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun io_ctl_unmap_page(io_ctl);
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun for (i = 0; i < io_ctl->num_pages; i++) {
366*4882a593Smuzhiyun if (io_ctl->pages[i]) {
367*4882a593Smuzhiyun ClearPageChecked(io_ctl->pages[i]);
368*4882a593Smuzhiyun unlock_page(io_ctl->pages[i]);
369*4882a593Smuzhiyun put_page(io_ctl->pages[i]);
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun }
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun
io_ctl_prepare_pages(struct btrfs_io_ctl * io_ctl,bool uptodate)374*4882a593Smuzhiyun static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
375*4882a593Smuzhiyun {
376*4882a593Smuzhiyun struct page *page;
377*4882a593Smuzhiyun struct inode *inode = io_ctl->inode;
378*4882a593Smuzhiyun gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
379*4882a593Smuzhiyun int i;
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun for (i = 0; i < io_ctl->num_pages; i++) {
382*4882a593Smuzhiyun page = find_or_create_page(inode->i_mapping, i, mask);
383*4882a593Smuzhiyun if (!page) {
384*4882a593Smuzhiyun io_ctl_drop_pages(io_ctl);
385*4882a593Smuzhiyun return -ENOMEM;
386*4882a593Smuzhiyun }
387*4882a593Smuzhiyun io_ctl->pages[i] = page;
388*4882a593Smuzhiyun if (uptodate && !PageUptodate(page)) {
389*4882a593Smuzhiyun btrfs_readpage(NULL, page);
390*4882a593Smuzhiyun lock_page(page);
391*4882a593Smuzhiyun if (page->mapping != inode->i_mapping) {
392*4882a593Smuzhiyun btrfs_err(BTRFS_I(inode)->root->fs_info,
393*4882a593Smuzhiyun "free space cache page truncated");
394*4882a593Smuzhiyun io_ctl_drop_pages(io_ctl);
395*4882a593Smuzhiyun return -EIO;
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun if (!PageUptodate(page)) {
398*4882a593Smuzhiyun btrfs_err(BTRFS_I(inode)->root->fs_info,
399*4882a593Smuzhiyun "error reading free space cache");
400*4882a593Smuzhiyun io_ctl_drop_pages(io_ctl);
401*4882a593Smuzhiyun return -EIO;
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun }
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun for (i = 0; i < io_ctl->num_pages; i++) {
407*4882a593Smuzhiyun clear_page_dirty_for_io(io_ctl->pages[i]);
408*4882a593Smuzhiyun set_page_extent_mapped(io_ctl->pages[i]);
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun return 0;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun
io_ctl_set_generation(struct btrfs_io_ctl * io_ctl,u64 generation)414*4882a593Smuzhiyun static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
415*4882a593Smuzhiyun {
416*4882a593Smuzhiyun io_ctl_map_page(io_ctl, 1);
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun /*
419*4882a593Smuzhiyun * Skip the csum areas. If we don't check crcs then we just have a
420*4882a593Smuzhiyun * 64bit chunk at the front of the first page.
421*4882a593Smuzhiyun */
422*4882a593Smuzhiyun if (io_ctl->check_crcs) {
423*4882a593Smuzhiyun io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
424*4882a593Smuzhiyun io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
425*4882a593Smuzhiyun } else {
426*4882a593Smuzhiyun io_ctl->cur += sizeof(u64);
427*4882a593Smuzhiyun io_ctl->size -= sizeof(u64) * 2;
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun put_unaligned_le64(generation, io_ctl->cur);
431*4882a593Smuzhiyun io_ctl->cur += sizeof(u64);
432*4882a593Smuzhiyun }
433*4882a593Smuzhiyun
io_ctl_check_generation(struct btrfs_io_ctl * io_ctl,u64 generation)434*4882a593Smuzhiyun static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
435*4882a593Smuzhiyun {
436*4882a593Smuzhiyun u64 cache_gen;
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun /*
439*4882a593Smuzhiyun * Skip the crc area. If we don't check crcs then we just have a 64bit
440*4882a593Smuzhiyun * chunk at the front of the first page.
441*4882a593Smuzhiyun */
442*4882a593Smuzhiyun if (io_ctl->check_crcs) {
443*4882a593Smuzhiyun io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
444*4882a593Smuzhiyun io_ctl->size -= sizeof(u64) +
445*4882a593Smuzhiyun (sizeof(u32) * io_ctl->num_pages);
446*4882a593Smuzhiyun } else {
447*4882a593Smuzhiyun io_ctl->cur += sizeof(u64);
448*4882a593Smuzhiyun io_ctl->size -= sizeof(u64) * 2;
449*4882a593Smuzhiyun }
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun cache_gen = get_unaligned_le64(io_ctl->cur);
452*4882a593Smuzhiyun if (cache_gen != generation) {
453*4882a593Smuzhiyun btrfs_err_rl(io_ctl->fs_info,
454*4882a593Smuzhiyun "space cache generation (%llu) does not match inode (%llu)",
455*4882a593Smuzhiyun cache_gen, generation);
456*4882a593Smuzhiyun io_ctl_unmap_page(io_ctl);
457*4882a593Smuzhiyun return -EIO;
458*4882a593Smuzhiyun }
459*4882a593Smuzhiyun io_ctl->cur += sizeof(u64);
460*4882a593Smuzhiyun return 0;
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun
io_ctl_set_crc(struct btrfs_io_ctl * io_ctl,int index)463*4882a593Smuzhiyun static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
464*4882a593Smuzhiyun {
465*4882a593Smuzhiyun u32 *tmp;
466*4882a593Smuzhiyun u32 crc = ~(u32)0;
467*4882a593Smuzhiyun unsigned offset = 0;
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun if (!io_ctl->check_crcs) {
470*4882a593Smuzhiyun io_ctl_unmap_page(io_ctl);
471*4882a593Smuzhiyun return;
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun if (index == 0)
475*4882a593Smuzhiyun offset = sizeof(u32) * io_ctl->num_pages;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
478*4882a593Smuzhiyun btrfs_crc32c_final(crc, (u8 *)&crc);
479*4882a593Smuzhiyun io_ctl_unmap_page(io_ctl);
480*4882a593Smuzhiyun tmp = page_address(io_ctl->pages[0]);
481*4882a593Smuzhiyun tmp += index;
482*4882a593Smuzhiyun *tmp = crc;
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun
io_ctl_check_crc(struct btrfs_io_ctl * io_ctl,int index)485*4882a593Smuzhiyun static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
486*4882a593Smuzhiyun {
487*4882a593Smuzhiyun u32 *tmp, val;
488*4882a593Smuzhiyun u32 crc = ~(u32)0;
489*4882a593Smuzhiyun unsigned offset = 0;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun if (!io_ctl->check_crcs) {
492*4882a593Smuzhiyun io_ctl_map_page(io_ctl, 0);
493*4882a593Smuzhiyun return 0;
494*4882a593Smuzhiyun }
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun if (index == 0)
497*4882a593Smuzhiyun offset = sizeof(u32) * io_ctl->num_pages;
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun tmp = page_address(io_ctl->pages[0]);
500*4882a593Smuzhiyun tmp += index;
501*4882a593Smuzhiyun val = *tmp;
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun io_ctl_map_page(io_ctl, 0);
504*4882a593Smuzhiyun crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
505*4882a593Smuzhiyun btrfs_crc32c_final(crc, (u8 *)&crc);
506*4882a593Smuzhiyun if (val != crc) {
507*4882a593Smuzhiyun btrfs_err_rl(io_ctl->fs_info,
508*4882a593Smuzhiyun "csum mismatch on free space cache");
509*4882a593Smuzhiyun io_ctl_unmap_page(io_ctl);
510*4882a593Smuzhiyun return -EIO;
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun return 0;
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun
io_ctl_add_entry(struct btrfs_io_ctl * io_ctl,u64 offset,u64 bytes,void * bitmap)516*4882a593Smuzhiyun static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
517*4882a593Smuzhiyun void *bitmap)
518*4882a593Smuzhiyun {
519*4882a593Smuzhiyun struct btrfs_free_space_entry *entry;
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun if (!io_ctl->cur)
522*4882a593Smuzhiyun return -ENOSPC;
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun entry = io_ctl->cur;
525*4882a593Smuzhiyun put_unaligned_le64(offset, &entry->offset);
526*4882a593Smuzhiyun put_unaligned_le64(bytes, &entry->bytes);
527*4882a593Smuzhiyun entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
528*4882a593Smuzhiyun BTRFS_FREE_SPACE_EXTENT;
529*4882a593Smuzhiyun io_ctl->cur += sizeof(struct btrfs_free_space_entry);
530*4882a593Smuzhiyun io_ctl->size -= sizeof(struct btrfs_free_space_entry);
531*4882a593Smuzhiyun
532*4882a593Smuzhiyun if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
533*4882a593Smuzhiyun return 0;
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun io_ctl_set_crc(io_ctl, io_ctl->index - 1);
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun /* No more pages to map */
538*4882a593Smuzhiyun if (io_ctl->index >= io_ctl->num_pages)
539*4882a593Smuzhiyun return 0;
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun /* map the next page */
542*4882a593Smuzhiyun io_ctl_map_page(io_ctl, 1);
543*4882a593Smuzhiyun return 0;
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun
io_ctl_add_bitmap(struct btrfs_io_ctl * io_ctl,void * bitmap)546*4882a593Smuzhiyun static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
547*4882a593Smuzhiyun {
548*4882a593Smuzhiyun if (!io_ctl->cur)
549*4882a593Smuzhiyun return -ENOSPC;
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun /*
552*4882a593Smuzhiyun * If we aren't at the start of the current page, unmap this one and
553*4882a593Smuzhiyun * map the next one if there is any left.
554*4882a593Smuzhiyun */
555*4882a593Smuzhiyun if (io_ctl->cur != io_ctl->orig) {
556*4882a593Smuzhiyun io_ctl_set_crc(io_ctl, io_ctl->index - 1);
557*4882a593Smuzhiyun if (io_ctl->index >= io_ctl->num_pages)
558*4882a593Smuzhiyun return -ENOSPC;
559*4882a593Smuzhiyun io_ctl_map_page(io_ctl, 0);
560*4882a593Smuzhiyun }
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun copy_page(io_ctl->cur, bitmap);
563*4882a593Smuzhiyun io_ctl_set_crc(io_ctl, io_ctl->index - 1);
564*4882a593Smuzhiyun if (io_ctl->index < io_ctl->num_pages)
565*4882a593Smuzhiyun io_ctl_map_page(io_ctl, 0);
566*4882a593Smuzhiyun return 0;
567*4882a593Smuzhiyun }
568*4882a593Smuzhiyun
io_ctl_zero_remaining_pages(struct btrfs_io_ctl * io_ctl)569*4882a593Smuzhiyun static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
570*4882a593Smuzhiyun {
571*4882a593Smuzhiyun /*
572*4882a593Smuzhiyun * If we're not on the boundary we know we've modified the page and we
573*4882a593Smuzhiyun * need to crc the page.
574*4882a593Smuzhiyun */
575*4882a593Smuzhiyun if (io_ctl->cur != io_ctl->orig)
576*4882a593Smuzhiyun io_ctl_set_crc(io_ctl, io_ctl->index - 1);
577*4882a593Smuzhiyun else
578*4882a593Smuzhiyun io_ctl_unmap_page(io_ctl);
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun while (io_ctl->index < io_ctl->num_pages) {
581*4882a593Smuzhiyun io_ctl_map_page(io_ctl, 1);
582*4882a593Smuzhiyun io_ctl_set_crc(io_ctl, io_ctl->index - 1);
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun
io_ctl_read_entry(struct btrfs_io_ctl * io_ctl,struct btrfs_free_space * entry,u8 * type)586*4882a593Smuzhiyun static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
587*4882a593Smuzhiyun struct btrfs_free_space *entry, u8 *type)
588*4882a593Smuzhiyun {
589*4882a593Smuzhiyun struct btrfs_free_space_entry *e;
590*4882a593Smuzhiyun int ret;
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun if (!io_ctl->cur) {
593*4882a593Smuzhiyun ret = io_ctl_check_crc(io_ctl, io_ctl->index);
594*4882a593Smuzhiyun if (ret)
595*4882a593Smuzhiyun return ret;
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun
598*4882a593Smuzhiyun e = io_ctl->cur;
599*4882a593Smuzhiyun entry->offset = get_unaligned_le64(&e->offset);
600*4882a593Smuzhiyun entry->bytes = get_unaligned_le64(&e->bytes);
601*4882a593Smuzhiyun *type = e->type;
602*4882a593Smuzhiyun io_ctl->cur += sizeof(struct btrfs_free_space_entry);
603*4882a593Smuzhiyun io_ctl->size -= sizeof(struct btrfs_free_space_entry);
604*4882a593Smuzhiyun
605*4882a593Smuzhiyun if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
606*4882a593Smuzhiyun return 0;
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun io_ctl_unmap_page(io_ctl);
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun return 0;
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun
io_ctl_read_bitmap(struct btrfs_io_ctl * io_ctl,struct btrfs_free_space * entry)613*4882a593Smuzhiyun static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
614*4882a593Smuzhiyun struct btrfs_free_space *entry)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun int ret;
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun ret = io_ctl_check_crc(io_ctl, io_ctl->index);
619*4882a593Smuzhiyun if (ret)
620*4882a593Smuzhiyun return ret;
621*4882a593Smuzhiyun
622*4882a593Smuzhiyun copy_page(entry->bitmap, io_ctl->cur);
623*4882a593Smuzhiyun io_ctl_unmap_page(io_ctl);
624*4882a593Smuzhiyun
625*4882a593Smuzhiyun return 0;
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun
628*4882a593Smuzhiyun /*
629*4882a593Smuzhiyun * Since we attach pinned extents after the fact we can have contiguous sections
630*4882a593Smuzhiyun * of free space that are split up in entries. This poses a problem with the
631*4882a593Smuzhiyun * tree logging stuff since it could have allocated across what appears to be 2
632*4882a593Smuzhiyun * entries since we would have merged the entries when adding the pinned extents
633*4882a593Smuzhiyun * back to the free space cache. So run through the space cache that we just
634*4882a593Smuzhiyun * loaded and merge contiguous entries. This will make the log replay stuff not
635*4882a593Smuzhiyun * blow up and it will make for nicer allocator behavior.
636*4882a593Smuzhiyun */
merge_space_tree(struct btrfs_free_space_ctl * ctl)637*4882a593Smuzhiyun static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun struct btrfs_free_space *e, *prev = NULL;
640*4882a593Smuzhiyun struct rb_node *n;
641*4882a593Smuzhiyun
642*4882a593Smuzhiyun again:
643*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
644*4882a593Smuzhiyun for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
645*4882a593Smuzhiyun e = rb_entry(n, struct btrfs_free_space, offset_index);
646*4882a593Smuzhiyun if (!prev)
647*4882a593Smuzhiyun goto next;
648*4882a593Smuzhiyun if (e->bitmap || prev->bitmap)
649*4882a593Smuzhiyun goto next;
650*4882a593Smuzhiyun if (prev->offset + prev->bytes == e->offset) {
651*4882a593Smuzhiyun unlink_free_space(ctl, prev);
652*4882a593Smuzhiyun unlink_free_space(ctl, e);
653*4882a593Smuzhiyun prev->bytes += e->bytes;
654*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, e);
655*4882a593Smuzhiyun link_free_space(ctl, prev);
656*4882a593Smuzhiyun prev = NULL;
657*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
658*4882a593Smuzhiyun goto again;
659*4882a593Smuzhiyun }
660*4882a593Smuzhiyun next:
661*4882a593Smuzhiyun prev = e;
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
664*4882a593Smuzhiyun }
665*4882a593Smuzhiyun
__load_free_space_cache(struct btrfs_root * root,struct inode * inode,struct btrfs_free_space_ctl * ctl,struct btrfs_path * path,u64 offset)666*4882a593Smuzhiyun static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
667*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl,
668*4882a593Smuzhiyun struct btrfs_path *path, u64 offset)
669*4882a593Smuzhiyun {
670*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = root->fs_info;
671*4882a593Smuzhiyun struct btrfs_free_space_header *header;
672*4882a593Smuzhiyun struct extent_buffer *leaf;
673*4882a593Smuzhiyun struct btrfs_io_ctl io_ctl;
674*4882a593Smuzhiyun struct btrfs_key key;
675*4882a593Smuzhiyun struct btrfs_free_space *e, *n;
676*4882a593Smuzhiyun LIST_HEAD(bitmaps);
677*4882a593Smuzhiyun u64 num_entries;
678*4882a593Smuzhiyun u64 num_bitmaps;
679*4882a593Smuzhiyun u64 generation;
680*4882a593Smuzhiyun u8 type;
681*4882a593Smuzhiyun int ret = 0;
682*4882a593Smuzhiyun
683*4882a593Smuzhiyun /* Nothing in the space cache, goodbye */
684*4882a593Smuzhiyun if (!i_size_read(inode))
685*4882a593Smuzhiyun return 0;
686*4882a593Smuzhiyun
687*4882a593Smuzhiyun key.objectid = BTRFS_FREE_SPACE_OBJECTID;
688*4882a593Smuzhiyun key.offset = offset;
689*4882a593Smuzhiyun key.type = 0;
690*4882a593Smuzhiyun
691*4882a593Smuzhiyun ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
692*4882a593Smuzhiyun if (ret < 0)
693*4882a593Smuzhiyun return 0;
694*4882a593Smuzhiyun else if (ret > 0) {
695*4882a593Smuzhiyun btrfs_release_path(path);
696*4882a593Smuzhiyun return 0;
697*4882a593Smuzhiyun }
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun ret = -1;
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun leaf = path->nodes[0];
702*4882a593Smuzhiyun header = btrfs_item_ptr(leaf, path->slots[0],
703*4882a593Smuzhiyun struct btrfs_free_space_header);
704*4882a593Smuzhiyun num_entries = btrfs_free_space_entries(leaf, header);
705*4882a593Smuzhiyun num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
706*4882a593Smuzhiyun generation = btrfs_free_space_generation(leaf, header);
707*4882a593Smuzhiyun btrfs_release_path(path);
708*4882a593Smuzhiyun
709*4882a593Smuzhiyun if (!BTRFS_I(inode)->generation) {
710*4882a593Smuzhiyun btrfs_info(fs_info,
711*4882a593Smuzhiyun "the free space cache file (%llu) is invalid, skip it",
712*4882a593Smuzhiyun offset);
713*4882a593Smuzhiyun return 0;
714*4882a593Smuzhiyun }
715*4882a593Smuzhiyun
716*4882a593Smuzhiyun if (BTRFS_I(inode)->generation != generation) {
717*4882a593Smuzhiyun btrfs_err(fs_info,
718*4882a593Smuzhiyun "free space inode generation (%llu) did not match free space cache generation (%llu)",
719*4882a593Smuzhiyun BTRFS_I(inode)->generation, generation);
720*4882a593Smuzhiyun return 0;
721*4882a593Smuzhiyun }
722*4882a593Smuzhiyun
723*4882a593Smuzhiyun if (!num_entries)
724*4882a593Smuzhiyun return 0;
725*4882a593Smuzhiyun
726*4882a593Smuzhiyun ret = io_ctl_init(&io_ctl, inode, 0);
727*4882a593Smuzhiyun if (ret)
728*4882a593Smuzhiyun return ret;
729*4882a593Smuzhiyun
730*4882a593Smuzhiyun readahead_cache(inode);
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun ret = io_ctl_prepare_pages(&io_ctl, true);
733*4882a593Smuzhiyun if (ret)
734*4882a593Smuzhiyun goto out;
735*4882a593Smuzhiyun
736*4882a593Smuzhiyun ret = io_ctl_check_crc(&io_ctl, 0);
737*4882a593Smuzhiyun if (ret)
738*4882a593Smuzhiyun goto free_cache;
739*4882a593Smuzhiyun
740*4882a593Smuzhiyun ret = io_ctl_check_generation(&io_ctl, generation);
741*4882a593Smuzhiyun if (ret)
742*4882a593Smuzhiyun goto free_cache;
743*4882a593Smuzhiyun
744*4882a593Smuzhiyun while (num_entries) {
745*4882a593Smuzhiyun e = kmem_cache_zalloc(btrfs_free_space_cachep,
746*4882a593Smuzhiyun GFP_NOFS);
747*4882a593Smuzhiyun if (!e) {
748*4882a593Smuzhiyun ret = -ENOMEM;
749*4882a593Smuzhiyun goto free_cache;
750*4882a593Smuzhiyun }
751*4882a593Smuzhiyun
752*4882a593Smuzhiyun ret = io_ctl_read_entry(&io_ctl, e, &type);
753*4882a593Smuzhiyun if (ret) {
754*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, e);
755*4882a593Smuzhiyun goto free_cache;
756*4882a593Smuzhiyun }
757*4882a593Smuzhiyun
758*4882a593Smuzhiyun /*
759*4882a593Smuzhiyun * Sync discard ensures that the free space cache is always
760*4882a593Smuzhiyun * trimmed. So when reading this in, the state should reflect
761*4882a593Smuzhiyun * that. We also do this for async as a stop gap for lack of
762*4882a593Smuzhiyun * persistence.
763*4882a593Smuzhiyun */
764*4882a593Smuzhiyun if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
765*4882a593Smuzhiyun btrfs_test_opt(fs_info, DISCARD_ASYNC))
766*4882a593Smuzhiyun e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
767*4882a593Smuzhiyun
768*4882a593Smuzhiyun if (!e->bytes) {
769*4882a593Smuzhiyun ret = -1;
770*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, e);
771*4882a593Smuzhiyun goto free_cache;
772*4882a593Smuzhiyun }
773*4882a593Smuzhiyun
774*4882a593Smuzhiyun if (type == BTRFS_FREE_SPACE_EXTENT) {
775*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
776*4882a593Smuzhiyun ret = link_free_space(ctl, e);
777*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
778*4882a593Smuzhiyun if (ret) {
779*4882a593Smuzhiyun btrfs_err(fs_info,
780*4882a593Smuzhiyun "Duplicate entries in free space cache, dumping");
781*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, e);
782*4882a593Smuzhiyun goto free_cache;
783*4882a593Smuzhiyun }
784*4882a593Smuzhiyun } else {
785*4882a593Smuzhiyun ASSERT(num_bitmaps);
786*4882a593Smuzhiyun num_bitmaps--;
787*4882a593Smuzhiyun e->bitmap = kmem_cache_zalloc(
788*4882a593Smuzhiyun btrfs_free_space_bitmap_cachep, GFP_NOFS);
789*4882a593Smuzhiyun if (!e->bitmap) {
790*4882a593Smuzhiyun ret = -ENOMEM;
791*4882a593Smuzhiyun kmem_cache_free(
792*4882a593Smuzhiyun btrfs_free_space_cachep, e);
793*4882a593Smuzhiyun goto free_cache;
794*4882a593Smuzhiyun }
795*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
796*4882a593Smuzhiyun ret = link_free_space(ctl, e);
797*4882a593Smuzhiyun ctl->total_bitmaps++;
798*4882a593Smuzhiyun ctl->op->recalc_thresholds(ctl);
799*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
800*4882a593Smuzhiyun if (ret) {
801*4882a593Smuzhiyun btrfs_err(fs_info,
802*4882a593Smuzhiyun "Duplicate entries in free space cache, dumping");
803*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, e);
804*4882a593Smuzhiyun goto free_cache;
805*4882a593Smuzhiyun }
806*4882a593Smuzhiyun list_add_tail(&e->list, &bitmaps);
807*4882a593Smuzhiyun }
808*4882a593Smuzhiyun
809*4882a593Smuzhiyun num_entries--;
810*4882a593Smuzhiyun }
811*4882a593Smuzhiyun
812*4882a593Smuzhiyun io_ctl_unmap_page(&io_ctl);
813*4882a593Smuzhiyun
814*4882a593Smuzhiyun /*
815*4882a593Smuzhiyun * We add the bitmaps at the end of the entries in order that
816*4882a593Smuzhiyun * the bitmap entries are added to the cache.
817*4882a593Smuzhiyun */
818*4882a593Smuzhiyun list_for_each_entry_safe(e, n, &bitmaps, list) {
819*4882a593Smuzhiyun list_del_init(&e->list);
820*4882a593Smuzhiyun ret = io_ctl_read_bitmap(&io_ctl, e);
821*4882a593Smuzhiyun if (ret)
822*4882a593Smuzhiyun goto free_cache;
823*4882a593Smuzhiyun e->bitmap_extents = count_bitmap_extents(ctl, e);
824*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(e)) {
825*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR] +=
826*4882a593Smuzhiyun e->bitmap_extents;
827*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
828*4882a593Smuzhiyun }
829*4882a593Smuzhiyun }
830*4882a593Smuzhiyun
831*4882a593Smuzhiyun io_ctl_drop_pages(&io_ctl);
832*4882a593Smuzhiyun merge_space_tree(ctl);
833*4882a593Smuzhiyun ret = 1;
834*4882a593Smuzhiyun out:
835*4882a593Smuzhiyun btrfs_discard_update_discardable(ctl->private, ctl);
836*4882a593Smuzhiyun io_ctl_free(&io_ctl);
837*4882a593Smuzhiyun return ret;
838*4882a593Smuzhiyun free_cache:
839*4882a593Smuzhiyun io_ctl_drop_pages(&io_ctl);
840*4882a593Smuzhiyun __btrfs_remove_free_space_cache(ctl);
841*4882a593Smuzhiyun goto out;
842*4882a593Smuzhiyun }
843*4882a593Smuzhiyun
load_free_space_cache(struct btrfs_block_group * block_group)844*4882a593Smuzhiyun int load_free_space_cache(struct btrfs_block_group *block_group)
845*4882a593Smuzhiyun {
846*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = block_group->fs_info;
847*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
848*4882a593Smuzhiyun struct inode *inode;
849*4882a593Smuzhiyun struct btrfs_path *path;
850*4882a593Smuzhiyun int ret = 0;
851*4882a593Smuzhiyun bool matched;
852*4882a593Smuzhiyun u64 used = block_group->used;
853*4882a593Smuzhiyun
854*4882a593Smuzhiyun /*
855*4882a593Smuzhiyun * If this block group has been marked to be cleared for one reason or
856*4882a593Smuzhiyun * another then we can't trust the on disk cache, so just return.
857*4882a593Smuzhiyun */
858*4882a593Smuzhiyun spin_lock(&block_group->lock);
859*4882a593Smuzhiyun if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
860*4882a593Smuzhiyun spin_unlock(&block_group->lock);
861*4882a593Smuzhiyun return 0;
862*4882a593Smuzhiyun }
863*4882a593Smuzhiyun spin_unlock(&block_group->lock);
864*4882a593Smuzhiyun
865*4882a593Smuzhiyun path = btrfs_alloc_path();
866*4882a593Smuzhiyun if (!path)
867*4882a593Smuzhiyun return 0;
868*4882a593Smuzhiyun path->search_commit_root = 1;
869*4882a593Smuzhiyun path->skip_locking = 1;
870*4882a593Smuzhiyun
871*4882a593Smuzhiyun /*
872*4882a593Smuzhiyun * We must pass a path with search_commit_root set to btrfs_iget in
873*4882a593Smuzhiyun * order to avoid a deadlock when allocating extents for the tree root.
874*4882a593Smuzhiyun *
875*4882a593Smuzhiyun * When we are COWing an extent buffer from the tree root, when looking
876*4882a593Smuzhiyun * for a free extent, at extent-tree.c:find_free_extent(), we can find
877*4882a593Smuzhiyun * block group without its free space cache loaded. When we find one
878*4882a593Smuzhiyun * we must load its space cache which requires reading its free space
879*4882a593Smuzhiyun * cache's inode item from the root tree. If this inode item is located
880*4882a593Smuzhiyun * in the same leaf that we started COWing before, then we end up in
881*4882a593Smuzhiyun * deadlock on the extent buffer (trying to read lock it when we
882*4882a593Smuzhiyun * previously write locked it).
883*4882a593Smuzhiyun *
884*4882a593Smuzhiyun * It's safe to read the inode item using the commit root because
885*4882a593Smuzhiyun * block groups, once loaded, stay in memory forever (until they are
886*4882a593Smuzhiyun * removed) as well as their space caches once loaded. New block groups
887*4882a593Smuzhiyun * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
888*4882a593Smuzhiyun * we will never try to read their inode item while the fs is mounted.
889*4882a593Smuzhiyun */
890*4882a593Smuzhiyun inode = lookup_free_space_inode(block_group, path);
891*4882a593Smuzhiyun if (IS_ERR(inode)) {
892*4882a593Smuzhiyun btrfs_free_path(path);
893*4882a593Smuzhiyun return 0;
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun
896*4882a593Smuzhiyun /* We may have converted the inode and made the cache invalid. */
897*4882a593Smuzhiyun spin_lock(&block_group->lock);
898*4882a593Smuzhiyun if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
899*4882a593Smuzhiyun spin_unlock(&block_group->lock);
900*4882a593Smuzhiyun btrfs_free_path(path);
901*4882a593Smuzhiyun goto out;
902*4882a593Smuzhiyun }
903*4882a593Smuzhiyun spin_unlock(&block_group->lock);
904*4882a593Smuzhiyun
905*4882a593Smuzhiyun ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
906*4882a593Smuzhiyun path, block_group->start);
907*4882a593Smuzhiyun btrfs_free_path(path);
908*4882a593Smuzhiyun if (ret <= 0)
909*4882a593Smuzhiyun goto out;
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
912*4882a593Smuzhiyun matched = (ctl->free_space == (block_group->length - used -
913*4882a593Smuzhiyun block_group->bytes_super));
914*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
915*4882a593Smuzhiyun
916*4882a593Smuzhiyun if (!matched) {
917*4882a593Smuzhiyun __btrfs_remove_free_space_cache(ctl);
918*4882a593Smuzhiyun btrfs_warn(fs_info,
919*4882a593Smuzhiyun "block group %llu has wrong amount of free space",
920*4882a593Smuzhiyun block_group->start);
921*4882a593Smuzhiyun ret = -1;
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun out:
924*4882a593Smuzhiyun if (ret < 0) {
925*4882a593Smuzhiyun /* This cache is bogus, make sure it gets cleared */
926*4882a593Smuzhiyun spin_lock(&block_group->lock);
927*4882a593Smuzhiyun block_group->disk_cache_state = BTRFS_DC_CLEAR;
928*4882a593Smuzhiyun spin_unlock(&block_group->lock);
929*4882a593Smuzhiyun ret = 0;
930*4882a593Smuzhiyun
931*4882a593Smuzhiyun btrfs_warn(fs_info,
932*4882a593Smuzhiyun "failed to load free space cache for block group %llu, rebuilding it now",
933*4882a593Smuzhiyun block_group->start);
934*4882a593Smuzhiyun }
935*4882a593Smuzhiyun
936*4882a593Smuzhiyun iput(inode);
937*4882a593Smuzhiyun return ret;
938*4882a593Smuzhiyun }
939*4882a593Smuzhiyun
940*4882a593Smuzhiyun static noinline_for_stack
write_cache_extent_entries(struct btrfs_io_ctl * io_ctl,struct btrfs_free_space_ctl * ctl,struct btrfs_block_group * block_group,int * entries,int * bitmaps,struct list_head * bitmap_list)941*4882a593Smuzhiyun int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
942*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl,
943*4882a593Smuzhiyun struct btrfs_block_group *block_group,
944*4882a593Smuzhiyun int *entries, int *bitmaps,
945*4882a593Smuzhiyun struct list_head *bitmap_list)
946*4882a593Smuzhiyun {
947*4882a593Smuzhiyun int ret;
948*4882a593Smuzhiyun struct btrfs_free_cluster *cluster = NULL;
949*4882a593Smuzhiyun struct btrfs_free_cluster *cluster_locked = NULL;
950*4882a593Smuzhiyun struct rb_node *node = rb_first(&ctl->free_space_offset);
951*4882a593Smuzhiyun struct btrfs_trim_range *trim_entry;
952*4882a593Smuzhiyun
953*4882a593Smuzhiyun /* Get the cluster for this block_group if it exists */
954*4882a593Smuzhiyun if (block_group && !list_empty(&block_group->cluster_list)) {
955*4882a593Smuzhiyun cluster = list_entry(block_group->cluster_list.next,
956*4882a593Smuzhiyun struct btrfs_free_cluster,
957*4882a593Smuzhiyun block_group_list);
958*4882a593Smuzhiyun }
959*4882a593Smuzhiyun
960*4882a593Smuzhiyun if (!node && cluster) {
961*4882a593Smuzhiyun cluster_locked = cluster;
962*4882a593Smuzhiyun spin_lock(&cluster_locked->lock);
963*4882a593Smuzhiyun node = rb_first(&cluster->root);
964*4882a593Smuzhiyun cluster = NULL;
965*4882a593Smuzhiyun }
966*4882a593Smuzhiyun
967*4882a593Smuzhiyun /* Write out the extent entries */
968*4882a593Smuzhiyun while (node) {
969*4882a593Smuzhiyun struct btrfs_free_space *e;
970*4882a593Smuzhiyun
971*4882a593Smuzhiyun e = rb_entry(node, struct btrfs_free_space, offset_index);
972*4882a593Smuzhiyun *entries += 1;
973*4882a593Smuzhiyun
974*4882a593Smuzhiyun ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
975*4882a593Smuzhiyun e->bitmap);
976*4882a593Smuzhiyun if (ret)
977*4882a593Smuzhiyun goto fail;
978*4882a593Smuzhiyun
979*4882a593Smuzhiyun if (e->bitmap) {
980*4882a593Smuzhiyun list_add_tail(&e->list, bitmap_list);
981*4882a593Smuzhiyun *bitmaps += 1;
982*4882a593Smuzhiyun }
983*4882a593Smuzhiyun node = rb_next(node);
984*4882a593Smuzhiyun if (!node && cluster) {
985*4882a593Smuzhiyun node = rb_first(&cluster->root);
986*4882a593Smuzhiyun cluster_locked = cluster;
987*4882a593Smuzhiyun spin_lock(&cluster_locked->lock);
988*4882a593Smuzhiyun cluster = NULL;
989*4882a593Smuzhiyun }
990*4882a593Smuzhiyun }
991*4882a593Smuzhiyun if (cluster_locked) {
992*4882a593Smuzhiyun spin_unlock(&cluster_locked->lock);
993*4882a593Smuzhiyun cluster_locked = NULL;
994*4882a593Smuzhiyun }
995*4882a593Smuzhiyun
996*4882a593Smuzhiyun /*
997*4882a593Smuzhiyun * Make sure we don't miss any range that was removed from our rbtree
998*4882a593Smuzhiyun * because trimming is running. Otherwise after a umount+mount (or crash
999*4882a593Smuzhiyun * after committing the transaction) we would leak free space and get
1000*4882a593Smuzhiyun * an inconsistent free space cache report from fsck.
1001*4882a593Smuzhiyun */
1002*4882a593Smuzhiyun list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
1003*4882a593Smuzhiyun ret = io_ctl_add_entry(io_ctl, trim_entry->start,
1004*4882a593Smuzhiyun trim_entry->bytes, NULL);
1005*4882a593Smuzhiyun if (ret)
1006*4882a593Smuzhiyun goto fail;
1007*4882a593Smuzhiyun *entries += 1;
1008*4882a593Smuzhiyun }
1009*4882a593Smuzhiyun
1010*4882a593Smuzhiyun return 0;
1011*4882a593Smuzhiyun fail:
1012*4882a593Smuzhiyun if (cluster_locked)
1013*4882a593Smuzhiyun spin_unlock(&cluster_locked->lock);
1014*4882a593Smuzhiyun return -ENOSPC;
1015*4882a593Smuzhiyun }
1016*4882a593Smuzhiyun
1017*4882a593Smuzhiyun static noinline_for_stack int
update_cache_item(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct inode * inode,struct btrfs_path * path,u64 offset,int entries,int bitmaps)1018*4882a593Smuzhiyun update_cache_item(struct btrfs_trans_handle *trans,
1019*4882a593Smuzhiyun struct btrfs_root *root,
1020*4882a593Smuzhiyun struct inode *inode,
1021*4882a593Smuzhiyun struct btrfs_path *path, u64 offset,
1022*4882a593Smuzhiyun int entries, int bitmaps)
1023*4882a593Smuzhiyun {
1024*4882a593Smuzhiyun struct btrfs_key key;
1025*4882a593Smuzhiyun struct btrfs_free_space_header *header;
1026*4882a593Smuzhiyun struct extent_buffer *leaf;
1027*4882a593Smuzhiyun int ret;
1028*4882a593Smuzhiyun
1029*4882a593Smuzhiyun key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1030*4882a593Smuzhiyun key.offset = offset;
1031*4882a593Smuzhiyun key.type = 0;
1032*4882a593Smuzhiyun
1033*4882a593Smuzhiyun ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1034*4882a593Smuzhiyun if (ret < 0) {
1035*4882a593Smuzhiyun clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1036*4882a593Smuzhiyun EXTENT_DELALLOC, 0, 0, NULL);
1037*4882a593Smuzhiyun goto fail;
1038*4882a593Smuzhiyun }
1039*4882a593Smuzhiyun leaf = path->nodes[0];
1040*4882a593Smuzhiyun if (ret > 0) {
1041*4882a593Smuzhiyun struct btrfs_key found_key;
1042*4882a593Smuzhiyun ASSERT(path->slots[0]);
1043*4882a593Smuzhiyun path->slots[0]--;
1044*4882a593Smuzhiyun btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1045*4882a593Smuzhiyun if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1046*4882a593Smuzhiyun found_key.offset != offset) {
1047*4882a593Smuzhiyun clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1048*4882a593Smuzhiyun inode->i_size - 1, EXTENT_DELALLOC, 0,
1049*4882a593Smuzhiyun 0, NULL);
1050*4882a593Smuzhiyun btrfs_release_path(path);
1051*4882a593Smuzhiyun goto fail;
1052*4882a593Smuzhiyun }
1053*4882a593Smuzhiyun }
1054*4882a593Smuzhiyun
1055*4882a593Smuzhiyun BTRFS_I(inode)->generation = trans->transid;
1056*4882a593Smuzhiyun header = btrfs_item_ptr(leaf, path->slots[0],
1057*4882a593Smuzhiyun struct btrfs_free_space_header);
1058*4882a593Smuzhiyun btrfs_set_free_space_entries(leaf, header, entries);
1059*4882a593Smuzhiyun btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1060*4882a593Smuzhiyun btrfs_set_free_space_generation(leaf, header, trans->transid);
1061*4882a593Smuzhiyun btrfs_mark_buffer_dirty(leaf);
1062*4882a593Smuzhiyun btrfs_release_path(path);
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun return 0;
1065*4882a593Smuzhiyun
1066*4882a593Smuzhiyun fail:
1067*4882a593Smuzhiyun return -1;
1068*4882a593Smuzhiyun }
1069*4882a593Smuzhiyun
write_pinned_extent_entries(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_io_ctl * io_ctl,int * entries)1070*4882a593Smuzhiyun static noinline_for_stack int write_pinned_extent_entries(
1071*4882a593Smuzhiyun struct btrfs_trans_handle *trans,
1072*4882a593Smuzhiyun struct btrfs_block_group *block_group,
1073*4882a593Smuzhiyun struct btrfs_io_ctl *io_ctl,
1074*4882a593Smuzhiyun int *entries)
1075*4882a593Smuzhiyun {
1076*4882a593Smuzhiyun u64 start, extent_start, extent_end, len;
1077*4882a593Smuzhiyun struct extent_io_tree *unpin = NULL;
1078*4882a593Smuzhiyun int ret;
1079*4882a593Smuzhiyun
1080*4882a593Smuzhiyun if (!block_group)
1081*4882a593Smuzhiyun return 0;
1082*4882a593Smuzhiyun
1083*4882a593Smuzhiyun /*
1084*4882a593Smuzhiyun * We want to add any pinned extents to our free space cache
1085*4882a593Smuzhiyun * so we don't leak the space
1086*4882a593Smuzhiyun *
1087*4882a593Smuzhiyun * We shouldn't have switched the pinned extents yet so this is the
1088*4882a593Smuzhiyun * right one
1089*4882a593Smuzhiyun */
1090*4882a593Smuzhiyun unpin = &trans->transaction->pinned_extents;
1091*4882a593Smuzhiyun
1092*4882a593Smuzhiyun start = block_group->start;
1093*4882a593Smuzhiyun
1094*4882a593Smuzhiyun while (start < block_group->start + block_group->length) {
1095*4882a593Smuzhiyun ret = find_first_extent_bit(unpin, start,
1096*4882a593Smuzhiyun &extent_start, &extent_end,
1097*4882a593Smuzhiyun EXTENT_DIRTY, NULL);
1098*4882a593Smuzhiyun if (ret)
1099*4882a593Smuzhiyun return 0;
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun /* This pinned extent is out of our range */
1102*4882a593Smuzhiyun if (extent_start >= block_group->start + block_group->length)
1103*4882a593Smuzhiyun return 0;
1104*4882a593Smuzhiyun
1105*4882a593Smuzhiyun extent_start = max(extent_start, start);
1106*4882a593Smuzhiyun extent_end = min(block_group->start + block_group->length,
1107*4882a593Smuzhiyun extent_end + 1);
1108*4882a593Smuzhiyun len = extent_end - extent_start;
1109*4882a593Smuzhiyun
1110*4882a593Smuzhiyun *entries += 1;
1111*4882a593Smuzhiyun ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
1112*4882a593Smuzhiyun if (ret)
1113*4882a593Smuzhiyun return -ENOSPC;
1114*4882a593Smuzhiyun
1115*4882a593Smuzhiyun start = extent_end;
1116*4882a593Smuzhiyun }
1117*4882a593Smuzhiyun
1118*4882a593Smuzhiyun return 0;
1119*4882a593Smuzhiyun }
1120*4882a593Smuzhiyun
1121*4882a593Smuzhiyun static noinline_for_stack int
write_bitmap_entries(struct btrfs_io_ctl * io_ctl,struct list_head * bitmap_list)1122*4882a593Smuzhiyun write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
1123*4882a593Smuzhiyun {
1124*4882a593Smuzhiyun struct btrfs_free_space *entry, *next;
1125*4882a593Smuzhiyun int ret;
1126*4882a593Smuzhiyun
1127*4882a593Smuzhiyun /* Write out the bitmaps */
1128*4882a593Smuzhiyun list_for_each_entry_safe(entry, next, bitmap_list, list) {
1129*4882a593Smuzhiyun ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
1130*4882a593Smuzhiyun if (ret)
1131*4882a593Smuzhiyun return -ENOSPC;
1132*4882a593Smuzhiyun list_del_init(&entry->list);
1133*4882a593Smuzhiyun }
1134*4882a593Smuzhiyun
1135*4882a593Smuzhiyun return 0;
1136*4882a593Smuzhiyun }
1137*4882a593Smuzhiyun
flush_dirty_cache(struct inode * inode)1138*4882a593Smuzhiyun static int flush_dirty_cache(struct inode *inode)
1139*4882a593Smuzhiyun {
1140*4882a593Smuzhiyun int ret;
1141*4882a593Smuzhiyun
1142*4882a593Smuzhiyun ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
1143*4882a593Smuzhiyun if (ret)
1144*4882a593Smuzhiyun clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1145*4882a593Smuzhiyun EXTENT_DELALLOC, 0, 0, NULL);
1146*4882a593Smuzhiyun
1147*4882a593Smuzhiyun return ret;
1148*4882a593Smuzhiyun }
1149*4882a593Smuzhiyun
1150*4882a593Smuzhiyun static void noinline_for_stack
cleanup_bitmap_list(struct list_head * bitmap_list)1151*4882a593Smuzhiyun cleanup_bitmap_list(struct list_head *bitmap_list)
1152*4882a593Smuzhiyun {
1153*4882a593Smuzhiyun struct btrfs_free_space *entry, *next;
1154*4882a593Smuzhiyun
1155*4882a593Smuzhiyun list_for_each_entry_safe(entry, next, bitmap_list, list)
1156*4882a593Smuzhiyun list_del_init(&entry->list);
1157*4882a593Smuzhiyun }
1158*4882a593Smuzhiyun
1159*4882a593Smuzhiyun static void noinline_for_stack
cleanup_write_cache_enospc(struct inode * inode,struct btrfs_io_ctl * io_ctl,struct extent_state ** cached_state)1160*4882a593Smuzhiyun cleanup_write_cache_enospc(struct inode *inode,
1161*4882a593Smuzhiyun struct btrfs_io_ctl *io_ctl,
1162*4882a593Smuzhiyun struct extent_state **cached_state)
1163*4882a593Smuzhiyun {
1164*4882a593Smuzhiyun io_ctl_drop_pages(io_ctl);
1165*4882a593Smuzhiyun unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1166*4882a593Smuzhiyun i_size_read(inode) - 1, cached_state);
1167*4882a593Smuzhiyun }
1168*4882a593Smuzhiyun
__btrfs_wait_cache_io(struct btrfs_root * root,struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_io_ctl * io_ctl,struct btrfs_path * path,u64 offset)1169*4882a593Smuzhiyun static int __btrfs_wait_cache_io(struct btrfs_root *root,
1170*4882a593Smuzhiyun struct btrfs_trans_handle *trans,
1171*4882a593Smuzhiyun struct btrfs_block_group *block_group,
1172*4882a593Smuzhiyun struct btrfs_io_ctl *io_ctl,
1173*4882a593Smuzhiyun struct btrfs_path *path, u64 offset)
1174*4882a593Smuzhiyun {
1175*4882a593Smuzhiyun int ret;
1176*4882a593Smuzhiyun struct inode *inode = io_ctl->inode;
1177*4882a593Smuzhiyun
1178*4882a593Smuzhiyun if (!inode)
1179*4882a593Smuzhiyun return 0;
1180*4882a593Smuzhiyun
1181*4882a593Smuzhiyun /* Flush the dirty pages in the cache file. */
1182*4882a593Smuzhiyun ret = flush_dirty_cache(inode);
1183*4882a593Smuzhiyun if (ret)
1184*4882a593Smuzhiyun goto out;
1185*4882a593Smuzhiyun
1186*4882a593Smuzhiyun /* Update the cache item to tell everyone this cache file is valid. */
1187*4882a593Smuzhiyun ret = update_cache_item(trans, root, inode, path, offset,
1188*4882a593Smuzhiyun io_ctl->entries, io_ctl->bitmaps);
1189*4882a593Smuzhiyun out:
1190*4882a593Smuzhiyun if (ret) {
1191*4882a593Smuzhiyun invalidate_inode_pages2(inode->i_mapping);
1192*4882a593Smuzhiyun BTRFS_I(inode)->generation = 0;
1193*4882a593Smuzhiyun if (block_group)
1194*4882a593Smuzhiyun btrfs_debug(root->fs_info,
1195*4882a593Smuzhiyun "failed to write free space cache for block group %llu error %d",
1196*4882a593Smuzhiyun block_group->start, ret);
1197*4882a593Smuzhiyun }
1198*4882a593Smuzhiyun btrfs_update_inode(trans, root, inode);
1199*4882a593Smuzhiyun
1200*4882a593Smuzhiyun if (block_group) {
1201*4882a593Smuzhiyun /* the dirty list is protected by the dirty_bgs_lock */
1202*4882a593Smuzhiyun spin_lock(&trans->transaction->dirty_bgs_lock);
1203*4882a593Smuzhiyun
1204*4882a593Smuzhiyun /* the disk_cache_state is protected by the block group lock */
1205*4882a593Smuzhiyun spin_lock(&block_group->lock);
1206*4882a593Smuzhiyun
1207*4882a593Smuzhiyun /*
1208*4882a593Smuzhiyun * only mark this as written if we didn't get put back on
1209*4882a593Smuzhiyun * the dirty list while waiting for IO. Otherwise our
1210*4882a593Smuzhiyun * cache state won't be right, and we won't get written again
1211*4882a593Smuzhiyun */
1212*4882a593Smuzhiyun if (!ret && list_empty(&block_group->dirty_list))
1213*4882a593Smuzhiyun block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1214*4882a593Smuzhiyun else if (ret)
1215*4882a593Smuzhiyun block_group->disk_cache_state = BTRFS_DC_ERROR;
1216*4882a593Smuzhiyun
1217*4882a593Smuzhiyun spin_unlock(&block_group->lock);
1218*4882a593Smuzhiyun spin_unlock(&trans->transaction->dirty_bgs_lock);
1219*4882a593Smuzhiyun io_ctl->inode = NULL;
1220*4882a593Smuzhiyun iput(inode);
1221*4882a593Smuzhiyun }
1222*4882a593Smuzhiyun
1223*4882a593Smuzhiyun return ret;
1224*4882a593Smuzhiyun
1225*4882a593Smuzhiyun }
1226*4882a593Smuzhiyun
btrfs_wait_cache_io_root(struct btrfs_root * root,struct btrfs_trans_handle * trans,struct btrfs_io_ctl * io_ctl,struct btrfs_path * path)1227*4882a593Smuzhiyun static int btrfs_wait_cache_io_root(struct btrfs_root *root,
1228*4882a593Smuzhiyun struct btrfs_trans_handle *trans,
1229*4882a593Smuzhiyun struct btrfs_io_ctl *io_ctl,
1230*4882a593Smuzhiyun struct btrfs_path *path)
1231*4882a593Smuzhiyun {
1232*4882a593Smuzhiyun return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0);
1233*4882a593Smuzhiyun }
1234*4882a593Smuzhiyun
btrfs_wait_cache_io(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)1235*4882a593Smuzhiyun int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
1236*4882a593Smuzhiyun struct btrfs_block_group *block_group,
1237*4882a593Smuzhiyun struct btrfs_path *path)
1238*4882a593Smuzhiyun {
1239*4882a593Smuzhiyun return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
1240*4882a593Smuzhiyun block_group, &block_group->io_ctl,
1241*4882a593Smuzhiyun path, block_group->start);
1242*4882a593Smuzhiyun }
1243*4882a593Smuzhiyun
1244*4882a593Smuzhiyun /**
1245*4882a593Smuzhiyun * __btrfs_write_out_cache - write out cached info to an inode
1246*4882a593Smuzhiyun * @root - the root the inode belongs to
1247*4882a593Smuzhiyun * @ctl - the free space cache we are going to write out
1248*4882a593Smuzhiyun * @block_group - the block_group for this cache if it belongs to a block_group
1249*4882a593Smuzhiyun * @trans - the trans handle
1250*4882a593Smuzhiyun *
1251*4882a593Smuzhiyun * This function writes out a free space cache struct to disk for quick recovery
1252*4882a593Smuzhiyun * on mount. This will return 0 if it was successful in writing the cache out,
1253*4882a593Smuzhiyun * or an errno if it was not.
1254*4882a593Smuzhiyun */
__btrfs_write_out_cache(struct btrfs_root * root,struct inode * inode,struct btrfs_free_space_ctl * ctl,struct btrfs_block_group * block_group,struct btrfs_io_ctl * io_ctl,struct btrfs_trans_handle * trans)1255*4882a593Smuzhiyun static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1256*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl,
1257*4882a593Smuzhiyun struct btrfs_block_group *block_group,
1258*4882a593Smuzhiyun struct btrfs_io_ctl *io_ctl,
1259*4882a593Smuzhiyun struct btrfs_trans_handle *trans)
1260*4882a593Smuzhiyun {
1261*4882a593Smuzhiyun struct extent_state *cached_state = NULL;
1262*4882a593Smuzhiyun LIST_HEAD(bitmap_list);
1263*4882a593Smuzhiyun int entries = 0;
1264*4882a593Smuzhiyun int bitmaps = 0;
1265*4882a593Smuzhiyun int ret;
1266*4882a593Smuzhiyun int must_iput = 0;
1267*4882a593Smuzhiyun
1268*4882a593Smuzhiyun if (!i_size_read(inode))
1269*4882a593Smuzhiyun return -EIO;
1270*4882a593Smuzhiyun
1271*4882a593Smuzhiyun WARN_ON(io_ctl->pages);
1272*4882a593Smuzhiyun ret = io_ctl_init(io_ctl, inode, 1);
1273*4882a593Smuzhiyun if (ret)
1274*4882a593Smuzhiyun return ret;
1275*4882a593Smuzhiyun
1276*4882a593Smuzhiyun if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1277*4882a593Smuzhiyun down_write(&block_group->data_rwsem);
1278*4882a593Smuzhiyun spin_lock(&block_group->lock);
1279*4882a593Smuzhiyun if (block_group->delalloc_bytes) {
1280*4882a593Smuzhiyun block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1281*4882a593Smuzhiyun spin_unlock(&block_group->lock);
1282*4882a593Smuzhiyun up_write(&block_group->data_rwsem);
1283*4882a593Smuzhiyun BTRFS_I(inode)->generation = 0;
1284*4882a593Smuzhiyun ret = 0;
1285*4882a593Smuzhiyun must_iput = 1;
1286*4882a593Smuzhiyun goto out;
1287*4882a593Smuzhiyun }
1288*4882a593Smuzhiyun spin_unlock(&block_group->lock);
1289*4882a593Smuzhiyun }
1290*4882a593Smuzhiyun
1291*4882a593Smuzhiyun /* Lock all pages first so we can lock the extent safely. */
1292*4882a593Smuzhiyun ret = io_ctl_prepare_pages(io_ctl, false);
1293*4882a593Smuzhiyun if (ret)
1294*4882a593Smuzhiyun goto out_unlock;
1295*4882a593Smuzhiyun
1296*4882a593Smuzhiyun lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1297*4882a593Smuzhiyun &cached_state);
1298*4882a593Smuzhiyun
1299*4882a593Smuzhiyun io_ctl_set_generation(io_ctl, trans->transid);
1300*4882a593Smuzhiyun
1301*4882a593Smuzhiyun mutex_lock(&ctl->cache_writeout_mutex);
1302*4882a593Smuzhiyun /* Write out the extent entries in the free space cache */
1303*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
1304*4882a593Smuzhiyun ret = write_cache_extent_entries(io_ctl, ctl,
1305*4882a593Smuzhiyun block_group, &entries, &bitmaps,
1306*4882a593Smuzhiyun &bitmap_list);
1307*4882a593Smuzhiyun if (ret)
1308*4882a593Smuzhiyun goto out_nospc_locked;
1309*4882a593Smuzhiyun
1310*4882a593Smuzhiyun /*
1311*4882a593Smuzhiyun * Some spaces that are freed in the current transaction are pinned,
1312*4882a593Smuzhiyun * they will be added into free space cache after the transaction is
1313*4882a593Smuzhiyun * committed, we shouldn't lose them.
1314*4882a593Smuzhiyun *
1315*4882a593Smuzhiyun * If this changes while we are working we'll get added back to
1316*4882a593Smuzhiyun * the dirty list and redo it. No locking needed
1317*4882a593Smuzhiyun */
1318*4882a593Smuzhiyun ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
1319*4882a593Smuzhiyun if (ret)
1320*4882a593Smuzhiyun goto out_nospc_locked;
1321*4882a593Smuzhiyun
1322*4882a593Smuzhiyun /*
1323*4882a593Smuzhiyun * At last, we write out all the bitmaps and keep cache_writeout_mutex
1324*4882a593Smuzhiyun * locked while doing it because a concurrent trim can be manipulating
1325*4882a593Smuzhiyun * or freeing the bitmap.
1326*4882a593Smuzhiyun */
1327*4882a593Smuzhiyun ret = write_bitmap_entries(io_ctl, &bitmap_list);
1328*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
1329*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
1330*4882a593Smuzhiyun if (ret)
1331*4882a593Smuzhiyun goto out_nospc;
1332*4882a593Smuzhiyun
1333*4882a593Smuzhiyun /* Zero out the rest of the pages just to make sure */
1334*4882a593Smuzhiyun io_ctl_zero_remaining_pages(io_ctl);
1335*4882a593Smuzhiyun
1336*4882a593Smuzhiyun /* Everything is written out, now we dirty the pages in the file. */
1337*4882a593Smuzhiyun ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1338*4882a593Smuzhiyun io_ctl->num_pages, 0, i_size_read(inode),
1339*4882a593Smuzhiyun &cached_state);
1340*4882a593Smuzhiyun if (ret)
1341*4882a593Smuzhiyun goto out_nospc;
1342*4882a593Smuzhiyun
1343*4882a593Smuzhiyun if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1344*4882a593Smuzhiyun up_write(&block_group->data_rwsem);
1345*4882a593Smuzhiyun /*
1346*4882a593Smuzhiyun * Release the pages and unlock the extent, we will flush
1347*4882a593Smuzhiyun * them out later
1348*4882a593Smuzhiyun */
1349*4882a593Smuzhiyun io_ctl_drop_pages(io_ctl);
1350*4882a593Smuzhiyun io_ctl_free(io_ctl);
1351*4882a593Smuzhiyun
1352*4882a593Smuzhiyun unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1353*4882a593Smuzhiyun i_size_read(inode) - 1, &cached_state);
1354*4882a593Smuzhiyun
1355*4882a593Smuzhiyun /*
1356*4882a593Smuzhiyun * at this point the pages are under IO and we're happy,
1357*4882a593Smuzhiyun * The caller is responsible for waiting on them and updating
1358*4882a593Smuzhiyun * the cache and the inode
1359*4882a593Smuzhiyun */
1360*4882a593Smuzhiyun io_ctl->entries = entries;
1361*4882a593Smuzhiyun io_ctl->bitmaps = bitmaps;
1362*4882a593Smuzhiyun
1363*4882a593Smuzhiyun ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
1364*4882a593Smuzhiyun if (ret)
1365*4882a593Smuzhiyun goto out;
1366*4882a593Smuzhiyun
1367*4882a593Smuzhiyun return 0;
1368*4882a593Smuzhiyun
1369*4882a593Smuzhiyun out_nospc_locked:
1370*4882a593Smuzhiyun cleanup_bitmap_list(&bitmap_list);
1371*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
1372*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
1373*4882a593Smuzhiyun
1374*4882a593Smuzhiyun out_nospc:
1375*4882a593Smuzhiyun cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
1376*4882a593Smuzhiyun
1377*4882a593Smuzhiyun out_unlock:
1378*4882a593Smuzhiyun if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1379*4882a593Smuzhiyun up_write(&block_group->data_rwsem);
1380*4882a593Smuzhiyun
1381*4882a593Smuzhiyun out:
1382*4882a593Smuzhiyun io_ctl->inode = NULL;
1383*4882a593Smuzhiyun io_ctl_free(io_ctl);
1384*4882a593Smuzhiyun if (ret) {
1385*4882a593Smuzhiyun invalidate_inode_pages2(inode->i_mapping);
1386*4882a593Smuzhiyun BTRFS_I(inode)->generation = 0;
1387*4882a593Smuzhiyun }
1388*4882a593Smuzhiyun btrfs_update_inode(trans, root, inode);
1389*4882a593Smuzhiyun if (must_iput)
1390*4882a593Smuzhiyun iput(inode);
1391*4882a593Smuzhiyun return ret;
1392*4882a593Smuzhiyun }
1393*4882a593Smuzhiyun
btrfs_write_out_cache(struct btrfs_trans_handle * trans,struct btrfs_block_group * block_group,struct btrfs_path * path)1394*4882a593Smuzhiyun int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
1395*4882a593Smuzhiyun struct btrfs_block_group *block_group,
1396*4882a593Smuzhiyun struct btrfs_path *path)
1397*4882a593Smuzhiyun {
1398*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = trans->fs_info;
1399*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1400*4882a593Smuzhiyun struct inode *inode;
1401*4882a593Smuzhiyun int ret = 0;
1402*4882a593Smuzhiyun
1403*4882a593Smuzhiyun spin_lock(&block_group->lock);
1404*4882a593Smuzhiyun if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1405*4882a593Smuzhiyun spin_unlock(&block_group->lock);
1406*4882a593Smuzhiyun return 0;
1407*4882a593Smuzhiyun }
1408*4882a593Smuzhiyun spin_unlock(&block_group->lock);
1409*4882a593Smuzhiyun
1410*4882a593Smuzhiyun inode = lookup_free_space_inode(block_group, path);
1411*4882a593Smuzhiyun if (IS_ERR(inode))
1412*4882a593Smuzhiyun return 0;
1413*4882a593Smuzhiyun
1414*4882a593Smuzhiyun ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
1415*4882a593Smuzhiyun block_group, &block_group->io_ctl, trans);
1416*4882a593Smuzhiyun if (ret) {
1417*4882a593Smuzhiyun btrfs_debug(fs_info,
1418*4882a593Smuzhiyun "failed to write free space cache for block group %llu error %d",
1419*4882a593Smuzhiyun block_group->start, ret);
1420*4882a593Smuzhiyun spin_lock(&block_group->lock);
1421*4882a593Smuzhiyun block_group->disk_cache_state = BTRFS_DC_ERROR;
1422*4882a593Smuzhiyun spin_unlock(&block_group->lock);
1423*4882a593Smuzhiyun
1424*4882a593Smuzhiyun block_group->io_ctl.inode = NULL;
1425*4882a593Smuzhiyun iput(inode);
1426*4882a593Smuzhiyun }
1427*4882a593Smuzhiyun
1428*4882a593Smuzhiyun /*
1429*4882a593Smuzhiyun * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1430*4882a593Smuzhiyun * to wait for IO and put the inode
1431*4882a593Smuzhiyun */
1432*4882a593Smuzhiyun
1433*4882a593Smuzhiyun return ret;
1434*4882a593Smuzhiyun }
1435*4882a593Smuzhiyun
offset_to_bit(u64 bitmap_start,u32 unit,u64 offset)1436*4882a593Smuzhiyun static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1437*4882a593Smuzhiyun u64 offset)
1438*4882a593Smuzhiyun {
1439*4882a593Smuzhiyun ASSERT(offset >= bitmap_start);
1440*4882a593Smuzhiyun offset -= bitmap_start;
1441*4882a593Smuzhiyun return (unsigned long)(div_u64(offset, unit));
1442*4882a593Smuzhiyun }
1443*4882a593Smuzhiyun
bytes_to_bits(u64 bytes,u32 unit)1444*4882a593Smuzhiyun static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1445*4882a593Smuzhiyun {
1446*4882a593Smuzhiyun return (unsigned long)(div_u64(bytes, unit));
1447*4882a593Smuzhiyun }
1448*4882a593Smuzhiyun
offset_to_bitmap(struct btrfs_free_space_ctl * ctl,u64 offset)1449*4882a593Smuzhiyun static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1450*4882a593Smuzhiyun u64 offset)
1451*4882a593Smuzhiyun {
1452*4882a593Smuzhiyun u64 bitmap_start;
1453*4882a593Smuzhiyun u64 bytes_per_bitmap;
1454*4882a593Smuzhiyun
1455*4882a593Smuzhiyun bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1456*4882a593Smuzhiyun bitmap_start = offset - ctl->start;
1457*4882a593Smuzhiyun bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1458*4882a593Smuzhiyun bitmap_start *= bytes_per_bitmap;
1459*4882a593Smuzhiyun bitmap_start += ctl->start;
1460*4882a593Smuzhiyun
1461*4882a593Smuzhiyun return bitmap_start;
1462*4882a593Smuzhiyun }
1463*4882a593Smuzhiyun
tree_insert_offset(struct rb_root * root,u64 offset,struct rb_node * node,int bitmap)1464*4882a593Smuzhiyun static int tree_insert_offset(struct rb_root *root, u64 offset,
1465*4882a593Smuzhiyun struct rb_node *node, int bitmap)
1466*4882a593Smuzhiyun {
1467*4882a593Smuzhiyun struct rb_node **p = &root->rb_node;
1468*4882a593Smuzhiyun struct rb_node *parent = NULL;
1469*4882a593Smuzhiyun struct btrfs_free_space *info;
1470*4882a593Smuzhiyun
1471*4882a593Smuzhiyun while (*p) {
1472*4882a593Smuzhiyun parent = *p;
1473*4882a593Smuzhiyun info = rb_entry(parent, struct btrfs_free_space, offset_index);
1474*4882a593Smuzhiyun
1475*4882a593Smuzhiyun if (offset < info->offset) {
1476*4882a593Smuzhiyun p = &(*p)->rb_left;
1477*4882a593Smuzhiyun } else if (offset > info->offset) {
1478*4882a593Smuzhiyun p = &(*p)->rb_right;
1479*4882a593Smuzhiyun } else {
1480*4882a593Smuzhiyun /*
1481*4882a593Smuzhiyun * we could have a bitmap entry and an extent entry
1482*4882a593Smuzhiyun * share the same offset. If this is the case, we want
1483*4882a593Smuzhiyun * the extent entry to always be found first if we do a
1484*4882a593Smuzhiyun * linear search through the tree, since we want to have
1485*4882a593Smuzhiyun * the quickest allocation time, and allocating from an
1486*4882a593Smuzhiyun * extent is faster than allocating from a bitmap. So
1487*4882a593Smuzhiyun * if we're inserting a bitmap and we find an entry at
1488*4882a593Smuzhiyun * this offset, we want to go right, or after this entry
1489*4882a593Smuzhiyun * logically. If we are inserting an extent and we've
1490*4882a593Smuzhiyun * found a bitmap, we want to go left, or before
1491*4882a593Smuzhiyun * logically.
1492*4882a593Smuzhiyun */
1493*4882a593Smuzhiyun if (bitmap) {
1494*4882a593Smuzhiyun if (info->bitmap) {
1495*4882a593Smuzhiyun WARN_ON_ONCE(1);
1496*4882a593Smuzhiyun return -EEXIST;
1497*4882a593Smuzhiyun }
1498*4882a593Smuzhiyun p = &(*p)->rb_right;
1499*4882a593Smuzhiyun } else {
1500*4882a593Smuzhiyun if (!info->bitmap) {
1501*4882a593Smuzhiyun WARN_ON_ONCE(1);
1502*4882a593Smuzhiyun return -EEXIST;
1503*4882a593Smuzhiyun }
1504*4882a593Smuzhiyun p = &(*p)->rb_left;
1505*4882a593Smuzhiyun }
1506*4882a593Smuzhiyun }
1507*4882a593Smuzhiyun }
1508*4882a593Smuzhiyun
1509*4882a593Smuzhiyun rb_link_node(node, parent, p);
1510*4882a593Smuzhiyun rb_insert_color(node, root);
1511*4882a593Smuzhiyun
1512*4882a593Smuzhiyun return 0;
1513*4882a593Smuzhiyun }
1514*4882a593Smuzhiyun
1515*4882a593Smuzhiyun /*
1516*4882a593Smuzhiyun * searches the tree for the given offset.
1517*4882a593Smuzhiyun *
1518*4882a593Smuzhiyun * fuzzy - If this is set, then we are trying to make an allocation, and we just
1519*4882a593Smuzhiyun * want a section that has at least bytes size and comes at or after the given
1520*4882a593Smuzhiyun * offset.
1521*4882a593Smuzhiyun */
1522*4882a593Smuzhiyun static struct btrfs_free_space *
tree_search_offset(struct btrfs_free_space_ctl * ctl,u64 offset,int bitmap_only,int fuzzy)1523*4882a593Smuzhiyun tree_search_offset(struct btrfs_free_space_ctl *ctl,
1524*4882a593Smuzhiyun u64 offset, int bitmap_only, int fuzzy)
1525*4882a593Smuzhiyun {
1526*4882a593Smuzhiyun struct rb_node *n = ctl->free_space_offset.rb_node;
1527*4882a593Smuzhiyun struct btrfs_free_space *entry, *prev = NULL;
1528*4882a593Smuzhiyun
1529*4882a593Smuzhiyun /* find entry that is closest to the 'offset' */
1530*4882a593Smuzhiyun while (1) {
1531*4882a593Smuzhiyun if (!n) {
1532*4882a593Smuzhiyun entry = NULL;
1533*4882a593Smuzhiyun break;
1534*4882a593Smuzhiyun }
1535*4882a593Smuzhiyun
1536*4882a593Smuzhiyun entry = rb_entry(n, struct btrfs_free_space, offset_index);
1537*4882a593Smuzhiyun prev = entry;
1538*4882a593Smuzhiyun
1539*4882a593Smuzhiyun if (offset < entry->offset)
1540*4882a593Smuzhiyun n = n->rb_left;
1541*4882a593Smuzhiyun else if (offset > entry->offset)
1542*4882a593Smuzhiyun n = n->rb_right;
1543*4882a593Smuzhiyun else
1544*4882a593Smuzhiyun break;
1545*4882a593Smuzhiyun }
1546*4882a593Smuzhiyun
1547*4882a593Smuzhiyun if (bitmap_only) {
1548*4882a593Smuzhiyun if (!entry)
1549*4882a593Smuzhiyun return NULL;
1550*4882a593Smuzhiyun if (entry->bitmap)
1551*4882a593Smuzhiyun return entry;
1552*4882a593Smuzhiyun
1553*4882a593Smuzhiyun /*
1554*4882a593Smuzhiyun * bitmap entry and extent entry may share same offset,
1555*4882a593Smuzhiyun * in that case, bitmap entry comes after extent entry.
1556*4882a593Smuzhiyun */
1557*4882a593Smuzhiyun n = rb_next(n);
1558*4882a593Smuzhiyun if (!n)
1559*4882a593Smuzhiyun return NULL;
1560*4882a593Smuzhiyun entry = rb_entry(n, struct btrfs_free_space, offset_index);
1561*4882a593Smuzhiyun if (entry->offset != offset)
1562*4882a593Smuzhiyun return NULL;
1563*4882a593Smuzhiyun
1564*4882a593Smuzhiyun WARN_ON(!entry->bitmap);
1565*4882a593Smuzhiyun return entry;
1566*4882a593Smuzhiyun } else if (entry) {
1567*4882a593Smuzhiyun if (entry->bitmap) {
1568*4882a593Smuzhiyun /*
1569*4882a593Smuzhiyun * if previous extent entry covers the offset,
1570*4882a593Smuzhiyun * we should return it instead of the bitmap entry
1571*4882a593Smuzhiyun */
1572*4882a593Smuzhiyun n = rb_prev(&entry->offset_index);
1573*4882a593Smuzhiyun if (n) {
1574*4882a593Smuzhiyun prev = rb_entry(n, struct btrfs_free_space,
1575*4882a593Smuzhiyun offset_index);
1576*4882a593Smuzhiyun if (!prev->bitmap &&
1577*4882a593Smuzhiyun prev->offset + prev->bytes > offset)
1578*4882a593Smuzhiyun entry = prev;
1579*4882a593Smuzhiyun }
1580*4882a593Smuzhiyun }
1581*4882a593Smuzhiyun return entry;
1582*4882a593Smuzhiyun }
1583*4882a593Smuzhiyun
1584*4882a593Smuzhiyun if (!prev)
1585*4882a593Smuzhiyun return NULL;
1586*4882a593Smuzhiyun
1587*4882a593Smuzhiyun /* find last entry before the 'offset' */
1588*4882a593Smuzhiyun entry = prev;
1589*4882a593Smuzhiyun if (entry->offset > offset) {
1590*4882a593Smuzhiyun n = rb_prev(&entry->offset_index);
1591*4882a593Smuzhiyun if (n) {
1592*4882a593Smuzhiyun entry = rb_entry(n, struct btrfs_free_space,
1593*4882a593Smuzhiyun offset_index);
1594*4882a593Smuzhiyun ASSERT(entry->offset <= offset);
1595*4882a593Smuzhiyun } else {
1596*4882a593Smuzhiyun if (fuzzy)
1597*4882a593Smuzhiyun return entry;
1598*4882a593Smuzhiyun else
1599*4882a593Smuzhiyun return NULL;
1600*4882a593Smuzhiyun }
1601*4882a593Smuzhiyun }
1602*4882a593Smuzhiyun
1603*4882a593Smuzhiyun if (entry->bitmap) {
1604*4882a593Smuzhiyun n = rb_prev(&entry->offset_index);
1605*4882a593Smuzhiyun if (n) {
1606*4882a593Smuzhiyun prev = rb_entry(n, struct btrfs_free_space,
1607*4882a593Smuzhiyun offset_index);
1608*4882a593Smuzhiyun if (!prev->bitmap &&
1609*4882a593Smuzhiyun prev->offset + prev->bytes > offset)
1610*4882a593Smuzhiyun return prev;
1611*4882a593Smuzhiyun }
1612*4882a593Smuzhiyun if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1613*4882a593Smuzhiyun return entry;
1614*4882a593Smuzhiyun } else if (entry->offset + entry->bytes > offset)
1615*4882a593Smuzhiyun return entry;
1616*4882a593Smuzhiyun
1617*4882a593Smuzhiyun if (!fuzzy)
1618*4882a593Smuzhiyun return NULL;
1619*4882a593Smuzhiyun
1620*4882a593Smuzhiyun while (1) {
1621*4882a593Smuzhiyun if (entry->bitmap) {
1622*4882a593Smuzhiyun if (entry->offset + BITS_PER_BITMAP *
1623*4882a593Smuzhiyun ctl->unit > offset)
1624*4882a593Smuzhiyun break;
1625*4882a593Smuzhiyun } else {
1626*4882a593Smuzhiyun if (entry->offset + entry->bytes > offset)
1627*4882a593Smuzhiyun break;
1628*4882a593Smuzhiyun }
1629*4882a593Smuzhiyun
1630*4882a593Smuzhiyun n = rb_next(&entry->offset_index);
1631*4882a593Smuzhiyun if (!n)
1632*4882a593Smuzhiyun return NULL;
1633*4882a593Smuzhiyun entry = rb_entry(n, struct btrfs_free_space, offset_index);
1634*4882a593Smuzhiyun }
1635*4882a593Smuzhiyun return entry;
1636*4882a593Smuzhiyun }
1637*4882a593Smuzhiyun
1638*4882a593Smuzhiyun static inline void
__unlink_free_space(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info)1639*4882a593Smuzhiyun __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1640*4882a593Smuzhiyun struct btrfs_free_space *info)
1641*4882a593Smuzhiyun {
1642*4882a593Smuzhiyun rb_erase(&info->offset_index, &ctl->free_space_offset);
1643*4882a593Smuzhiyun ctl->free_extents--;
1644*4882a593Smuzhiyun
1645*4882a593Smuzhiyun if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1646*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR]--;
1647*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1648*4882a593Smuzhiyun }
1649*4882a593Smuzhiyun }
1650*4882a593Smuzhiyun
unlink_free_space(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info)1651*4882a593Smuzhiyun static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1652*4882a593Smuzhiyun struct btrfs_free_space *info)
1653*4882a593Smuzhiyun {
1654*4882a593Smuzhiyun __unlink_free_space(ctl, info);
1655*4882a593Smuzhiyun ctl->free_space -= info->bytes;
1656*4882a593Smuzhiyun }
1657*4882a593Smuzhiyun
link_free_space(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info)1658*4882a593Smuzhiyun static int link_free_space(struct btrfs_free_space_ctl *ctl,
1659*4882a593Smuzhiyun struct btrfs_free_space *info)
1660*4882a593Smuzhiyun {
1661*4882a593Smuzhiyun int ret = 0;
1662*4882a593Smuzhiyun
1663*4882a593Smuzhiyun ASSERT(info->bytes || info->bitmap);
1664*4882a593Smuzhiyun ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1665*4882a593Smuzhiyun &info->offset_index, (info->bitmap != NULL));
1666*4882a593Smuzhiyun if (ret)
1667*4882a593Smuzhiyun return ret;
1668*4882a593Smuzhiyun
1669*4882a593Smuzhiyun if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
1670*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR]++;
1671*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1672*4882a593Smuzhiyun }
1673*4882a593Smuzhiyun
1674*4882a593Smuzhiyun ctl->free_space += info->bytes;
1675*4882a593Smuzhiyun ctl->free_extents++;
1676*4882a593Smuzhiyun return ret;
1677*4882a593Smuzhiyun }
1678*4882a593Smuzhiyun
recalculate_thresholds(struct btrfs_free_space_ctl * ctl)1679*4882a593Smuzhiyun static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1680*4882a593Smuzhiyun {
1681*4882a593Smuzhiyun struct btrfs_block_group *block_group = ctl->private;
1682*4882a593Smuzhiyun u64 max_bytes;
1683*4882a593Smuzhiyun u64 bitmap_bytes;
1684*4882a593Smuzhiyun u64 extent_bytes;
1685*4882a593Smuzhiyun u64 size = block_group->length;
1686*4882a593Smuzhiyun u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1687*4882a593Smuzhiyun u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1688*4882a593Smuzhiyun
1689*4882a593Smuzhiyun max_bitmaps = max_t(u64, max_bitmaps, 1);
1690*4882a593Smuzhiyun
1691*4882a593Smuzhiyun ASSERT(ctl->total_bitmaps <= max_bitmaps);
1692*4882a593Smuzhiyun
1693*4882a593Smuzhiyun /*
1694*4882a593Smuzhiyun * We are trying to keep the total amount of memory used per 1GiB of
1695*4882a593Smuzhiyun * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
1696*4882a593Smuzhiyun * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
1697*4882a593Smuzhiyun * bitmaps, we may end up using more memory than this.
1698*4882a593Smuzhiyun */
1699*4882a593Smuzhiyun if (size < SZ_1G)
1700*4882a593Smuzhiyun max_bytes = MAX_CACHE_BYTES_PER_GIG;
1701*4882a593Smuzhiyun else
1702*4882a593Smuzhiyun max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
1703*4882a593Smuzhiyun
1704*4882a593Smuzhiyun bitmap_bytes = ctl->total_bitmaps * ctl->unit;
1705*4882a593Smuzhiyun
1706*4882a593Smuzhiyun /*
1707*4882a593Smuzhiyun * we want the extent entry threshold to always be at most 1/2 the max
1708*4882a593Smuzhiyun * bytes we can have, or whatever is less than that.
1709*4882a593Smuzhiyun */
1710*4882a593Smuzhiyun extent_bytes = max_bytes - bitmap_bytes;
1711*4882a593Smuzhiyun extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
1712*4882a593Smuzhiyun
1713*4882a593Smuzhiyun ctl->extents_thresh =
1714*4882a593Smuzhiyun div_u64(extent_bytes, sizeof(struct btrfs_free_space));
1715*4882a593Smuzhiyun }
1716*4882a593Smuzhiyun
__bitmap_clear_bits(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,u64 offset,u64 bytes)1717*4882a593Smuzhiyun static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1718*4882a593Smuzhiyun struct btrfs_free_space *info,
1719*4882a593Smuzhiyun u64 offset, u64 bytes)
1720*4882a593Smuzhiyun {
1721*4882a593Smuzhiyun unsigned long start, count, end;
1722*4882a593Smuzhiyun int extent_delta = -1;
1723*4882a593Smuzhiyun
1724*4882a593Smuzhiyun start = offset_to_bit(info->offset, ctl->unit, offset);
1725*4882a593Smuzhiyun count = bytes_to_bits(bytes, ctl->unit);
1726*4882a593Smuzhiyun end = start + count;
1727*4882a593Smuzhiyun ASSERT(end <= BITS_PER_BITMAP);
1728*4882a593Smuzhiyun
1729*4882a593Smuzhiyun bitmap_clear(info->bitmap, start, count);
1730*4882a593Smuzhiyun
1731*4882a593Smuzhiyun info->bytes -= bytes;
1732*4882a593Smuzhiyun if (info->max_extent_size > ctl->unit)
1733*4882a593Smuzhiyun info->max_extent_size = 0;
1734*4882a593Smuzhiyun
1735*4882a593Smuzhiyun if (start && test_bit(start - 1, info->bitmap))
1736*4882a593Smuzhiyun extent_delta++;
1737*4882a593Smuzhiyun
1738*4882a593Smuzhiyun if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1739*4882a593Smuzhiyun extent_delta++;
1740*4882a593Smuzhiyun
1741*4882a593Smuzhiyun info->bitmap_extents += extent_delta;
1742*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(info)) {
1743*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1744*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1745*4882a593Smuzhiyun }
1746*4882a593Smuzhiyun }
1747*4882a593Smuzhiyun
bitmap_clear_bits(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,u64 offset,u64 bytes)1748*4882a593Smuzhiyun static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1749*4882a593Smuzhiyun struct btrfs_free_space *info, u64 offset,
1750*4882a593Smuzhiyun u64 bytes)
1751*4882a593Smuzhiyun {
1752*4882a593Smuzhiyun __bitmap_clear_bits(ctl, info, offset, bytes);
1753*4882a593Smuzhiyun ctl->free_space -= bytes;
1754*4882a593Smuzhiyun }
1755*4882a593Smuzhiyun
bitmap_set_bits(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,u64 offset,u64 bytes)1756*4882a593Smuzhiyun static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1757*4882a593Smuzhiyun struct btrfs_free_space *info, u64 offset,
1758*4882a593Smuzhiyun u64 bytes)
1759*4882a593Smuzhiyun {
1760*4882a593Smuzhiyun unsigned long start, count, end;
1761*4882a593Smuzhiyun int extent_delta = 1;
1762*4882a593Smuzhiyun
1763*4882a593Smuzhiyun start = offset_to_bit(info->offset, ctl->unit, offset);
1764*4882a593Smuzhiyun count = bytes_to_bits(bytes, ctl->unit);
1765*4882a593Smuzhiyun end = start + count;
1766*4882a593Smuzhiyun ASSERT(end <= BITS_PER_BITMAP);
1767*4882a593Smuzhiyun
1768*4882a593Smuzhiyun bitmap_set(info->bitmap, start, count);
1769*4882a593Smuzhiyun
1770*4882a593Smuzhiyun info->bytes += bytes;
1771*4882a593Smuzhiyun ctl->free_space += bytes;
1772*4882a593Smuzhiyun
1773*4882a593Smuzhiyun if (start && test_bit(start - 1, info->bitmap))
1774*4882a593Smuzhiyun extent_delta--;
1775*4882a593Smuzhiyun
1776*4882a593Smuzhiyun if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1777*4882a593Smuzhiyun extent_delta--;
1778*4882a593Smuzhiyun
1779*4882a593Smuzhiyun info->bitmap_extents += extent_delta;
1780*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(info)) {
1781*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
1782*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1783*4882a593Smuzhiyun }
1784*4882a593Smuzhiyun }
1785*4882a593Smuzhiyun
1786*4882a593Smuzhiyun /*
1787*4882a593Smuzhiyun * If we can not find suitable extent, we will use bytes to record
1788*4882a593Smuzhiyun * the size of the max extent.
1789*4882a593Smuzhiyun */
search_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * bitmap_info,u64 * offset,u64 * bytes,bool for_alloc)1790*4882a593Smuzhiyun static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1791*4882a593Smuzhiyun struct btrfs_free_space *bitmap_info, u64 *offset,
1792*4882a593Smuzhiyun u64 *bytes, bool for_alloc)
1793*4882a593Smuzhiyun {
1794*4882a593Smuzhiyun unsigned long found_bits = 0;
1795*4882a593Smuzhiyun unsigned long max_bits = 0;
1796*4882a593Smuzhiyun unsigned long bits, i;
1797*4882a593Smuzhiyun unsigned long next_zero;
1798*4882a593Smuzhiyun unsigned long extent_bits;
1799*4882a593Smuzhiyun
1800*4882a593Smuzhiyun /*
1801*4882a593Smuzhiyun * Skip searching the bitmap if we don't have a contiguous section that
1802*4882a593Smuzhiyun * is large enough for this allocation.
1803*4882a593Smuzhiyun */
1804*4882a593Smuzhiyun if (for_alloc &&
1805*4882a593Smuzhiyun bitmap_info->max_extent_size &&
1806*4882a593Smuzhiyun bitmap_info->max_extent_size < *bytes) {
1807*4882a593Smuzhiyun *bytes = bitmap_info->max_extent_size;
1808*4882a593Smuzhiyun return -1;
1809*4882a593Smuzhiyun }
1810*4882a593Smuzhiyun
1811*4882a593Smuzhiyun i = offset_to_bit(bitmap_info->offset, ctl->unit,
1812*4882a593Smuzhiyun max_t(u64, *offset, bitmap_info->offset));
1813*4882a593Smuzhiyun bits = bytes_to_bits(*bytes, ctl->unit);
1814*4882a593Smuzhiyun
1815*4882a593Smuzhiyun for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1816*4882a593Smuzhiyun if (for_alloc && bits == 1) {
1817*4882a593Smuzhiyun found_bits = 1;
1818*4882a593Smuzhiyun break;
1819*4882a593Smuzhiyun }
1820*4882a593Smuzhiyun next_zero = find_next_zero_bit(bitmap_info->bitmap,
1821*4882a593Smuzhiyun BITS_PER_BITMAP, i);
1822*4882a593Smuzhiyun extent_bits = next_zero - i;
1823*4882a593Smuzhiyun if (extent_bits >= bits) {
1824*4882a593Smuzhiyun found_bits = extent_bits;
1825*4882a593Smuzhiyun break;
1826*4882a593Smuzhiyun } else if (extent_bits > max_bits) {
1827*4882a593Smuzhiyun max_bits = extent_bits;
1828*4882a593Smuzhiyun }
1829*4882a593Smuzhiyun i = next_zero;
1830*4882a593Smuzhiyun }
1831*4882a593Smuzhiyun
1832*4882a593Smuzhiyun if (found_bits) {
1833*4882a593Smuzhiyun *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1834*4882a593Smuzhiyun *bytes = (u64)(found_bits) * ctl->unit;
1835*4882a593Smuzhiyun return 0;
1836*4882a593Smuzhiyun }
1837*4882a593Smuzhiyun
1838*4882a593Smuzhiyun *bytes = (u64)(max_bits) * ctl->unit;
1839*4882a593Smuzhiyun bitmap_info->max_extent_size = *bytes;
1840*4882a593Smuzhiyun return -1;
1841*4882a593Smuzhiyun }
1842*4882a593Smuzhiyun
get_max_extent_size(struct btrfs_free_space * entry)1843*4882a593Smuzhiyun static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
1844*4882a593Smuzhiyun {
1845*4882a593Smuzhiyun if (entry->bitmap)
1846*4882a593Smuzhiyun return entry->max_extent_size;
1847*4882a593Smuzhiyun return entry->bytes;
1848*4882a593Smuzhiyun }
1849*4882a593Smuzhiyun
1850*4882a593Smuzhiyun /* Cache the size of the max extent in bytes */
1851*4882a593Smuzhiyun static struct btrfs_free_space *
find_free_space(struct btrfs_free_space_ctl * ctl,u64 * offset,u64 * bytes,unsigned long align,u64 * max_extent_size)1852*4882a593Smuzhiyun find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1853*4882a593Smuzhiyun unsigned long align, u64 *max_extent_size)
1854*4882a593Smuzhiyun {
1855*4882a593Smuzhiyun struct btrfs_free_space *entry;
1856*4882a593Smuzhiyun struct rb_node *node;
1857*4882a593Smuzhiyun u64 tmp;
1858*4882a593Smuzhiyun u64 align_off;
1859*4882a593Smuzhiyun int ret;
1860*4882a593Smuzhiyun
1861*4882a593Smuzhiyun if (!ctl->free_space_offset.rb_node)
1862*4882a593Smuzhiyun goto out;
1863*4882a593Smuzhiyun
1864*4882a593Smuzhiyun entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1865*4882a593Smuzhiyun if (!entry)
1866*4882a593Smuzhiyun goto out;
1867*4882a593Smuzhiyun
1868*4882a593Smuzhiyun for (node = &entry->offset_index; node; node = rb_next(node)) {
1869*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space, offset_index);
1870*4882a593Smuzhiyun if (entry->bytes < *bytes) {
1871*4882a593Smuzhiyun *max_extent_size = max(get_max_extent_size(entry),
1872*4882a593Smuzhiyun *max_extent_size);
1873*4882a593Smuzhiyun continue;
1874*4882a593Smuzhiyun }
1875*4882a593Smuzhiyun
1876*4882a593Smuzhiyun /* make sure the space returned is big enough
1877*4882a593Smuzhiyun * to match our requested alignment
1878*4882a593Smuzhiyun */
1879*4882a593Smuzhiyun if (*bytes >= align) {
1880*4882a593Smuzhiyun tmp = entry->offset - ctl->start + align - 1;
1881*4882a593Smuzhiyun tmp = div64_u64(tmp, align);
1882*4882a593Smuzhiyun tmp = tmp * align + ctl->start;
1883*4882a593Smuzhiyun align_off = tmp - entry->offset;
1884*4882a593Smuzhiyun } else {
1885*4882a593Smuzhiyun align_off = 0;
1886*4882a593Smuzhiyun tmp = entry->offset;
1887*4882a593Smuzhiyun }
1888*4882a593Smuzhiyun
1889*4882a593Smuzhiyun if (entry->bytes < *bytes + align_off) {
1890*4882a593Smuzhiyun *max_extent_size = max(get_max_extent_size(entry),
1891*4882a593Smuzhiyun *max_extent_size);
1892*4882a593Smuzhiyun continue;
1893*4882a593Smuzhiyun }
1894*4882a593Smuzhiyun
1895*4882a593Smuzhiyun if (entry->bitmap) {
1896*4882a593Smuzhiyun u64 size = *bytes;
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun ret = search_bitmap(ctl, entry, &tmp, &size, true);
1899*4882a593Smuzhiyun if (!ret) {
1900*4882a593Smuzhiyun *offset = tmp;
1901*4882a593Smuzhiyun *bytes = size;
1902*4882a593Smuzhiyun return entry;
1903*4882a593Smuzhiyun } else {
1904*4882a593Smuzhiyun *max_extent_size =
1905*4882a593Smuzhiyun max(get_max_extent_size(entry),
1906*4882a593Smuzhiyun *max_extent_size);
1907*4882a593Smuzhiyun }
1908*4882a593Smuzhiyun continue;
1909*4882a593Smuzhiyun }
1910*4882a593Smuzhiyun
1911*4882a593Smuzhiyun *offset = tmp;
1912*4882a593Smuzhiyun *bytes = entry->bytes - align_off;
1913*4882a593Smuzhiyun return entry;
1914*4882a593Smuzhiyun }
1915*4882a593Smuzhiyun out:
1916*4882a593Smuzhiyun return NULL;
1917*4882a593Smuzhiyun }
1918*4882a593Smuzhiyun
count_bitmap_extents(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * bitmap_info)1919*4882a593Smuzhiyun static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
1920*4882a593Smuzhiyun struct btrfs_free_space *bitmap_info)
1921*4882a593Smuzhiyun {
1922*4882a593Smuzhiyun struct btrfs_block_group *block_group = ctl->private;
1923*4882a593Smuzhiyun u64 bytes = bitmap_info->bytes;
1924*4882a593Smuzhiyun unsigned int rs, re;
1925*4882a593Smuzhiyun int count = 0;
1926*4882a593Smuzhiyun
1927*4882a593Smuzhiyun if (!block_group || !bytes)
1928*4882a593Smuzhiyun return count;
1929*4882a593Smuzhiyun
1930*4882a593Smuzhiyun bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
1931*4882a593Smuzhiyun BITS_PER_BITMAP) {
1932*4882a593Smuzhiyun bytes -= (rs - re) * ctl->unit;
1933*4882a593Smuzhiyun count++;
1934*4882a593Smuzhiyun
1935*4882a593Smuzhiyun if (!bytes)
1936*4882a593Smuzhiyun break;
1937*4882a593Smuzhiyun }
1938*4882a593Smuzhiyun
1939*4882a593Smuzhiyun return count;
1940*4882a593Smuzhiyun }
1941*4882a593Smuzhiyun
add_new_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,u64 offset)1942*4882a593Smuzhiyun static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1943*4882a593Smuzhiyun struct btrfs_free_space *info, u64 offset)
1944*4882a593Smuzhiyun {
1945*4882a593Smuzhiyun info->offset = offset_to_bitmap(ctl, offset);
1946*4882a593Smuzhiyun info->bytes = 0;
1947*4882a593Smuzhiyun info->bitmap_extents = 0;
1948*4882a593Smuzhiyun INIT_LIST_HEAD(&info->list);
1949*4882a593Smuzhiyun link_free_space(ctl, info);
1950*4882a593Smuzhiyun ctl->total_bitmaps++;
1951*4882a593Smuzhiyun
1952*4882a593Smuzhiyun ctl->op->recalc_thresholds(ctl);
1953*4882a593Smuzhiyun }
1954*4882a593Smuzhiyun
free_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * bitmap_info)1955*4882a593Smuzhiyun static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1956*4882a593Smuzhiyun struct btrfs_free_space *bitmap_info)
1957*4882a593Smuzhiyun {
1958*4882a593Smuzhiyun /*
1959*4882a593Smuzhiyun * Normally when this is called, the bitmap is completely empty. However,
1960*4882a593Smuzhiyun * if we are blowing up the free space cache for one reason or another
1961*4882a593Smuzhiyun * via __btrfs_remove_free_space_cache(), then it may not be freed and
1962*4882a593Smuzhiyun * we may leave stats on the table.
1963*4882a593Smuzhiyun */
1964*4882a593Smuzhiyun if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1965*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR] -=
1966*4882a593Smuzhiyun bitmap_info->bitmap_extents;
1967*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1968*4882a593Smuzhiyun
1969*4882a593Smuzhiyun }
1970*4882a593Smuzhiyun unlink_free_space(ctl, bitmap_info);
1971*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
1972*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1973*4882a593Smuzhiyun ctl->total_bitmaps--;
1974*4882a593Smuzhiyun ctl->op->recalc_thresholds(ctl);
1975*4882a593Smuzhiyun }
1976*4882a593Smuzhiyun
remove_from_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * bitmap_info,u64 * offset,u64 * bytes)1977*4882a593Smuzhiyun static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1978*4882a593Smuzhiyun struct btrfs_free_space *bitmap_info,
1979*4882a593Smuzhiyun u64 *offset, u64 *bytes)
1980*4882a593Smuzhiyun {
1981*4882a593Smuzhiyun u64 end;
1982*4882a593Smuzhiyun u64 search_start, search_bytes;
1983*4882a593Smuzhiyun int ret;
1984*4882a593Smuzhiyun
1985*4882a593Smuzhiyun again:
1986*4882a593Smuzhiyun end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1987*4882a593Smuzhiyun
1988*4882a593Smuzhiyun /*
1989*4882a593Smuzhiyun * We need to search for bits in this bitmap. We could only cover some
1990*4882a593Smuzhiyun * of the extent in this bitmap thanks to how we add space, so we need
1991*4882a593Smuzhiyun * to search for as much as it as we can and clear that amount, and then
1992*4882a593Smuzhiyun * go searching for the next bit.
1993*4882a593Smuzhiyun */
1994*4882a593Smuzhiyun search_start = *offset;
1995*4882a593Smuzhiyun search_bytes = ctl->unit;
1996*4882a593Smuzhiyun search_bytes = min(search_bytes, end - search_start + 1);
1997*4882a593Smuzhiyun ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
1998*4882a593Smuzhiyun false);
1999*4882a593Smuzhiyun if (ret < 0 || search_start != *offset)
2000*4882a593Smuzhiyun return -EINVAL;
2001*4882a593Smuzhiyun
2002*4882a593Smuzhiyun /* We may have found more bits than what we need */
2003*4882a593Smuzhiyun search_bytes = min(search_bytes, *bytes);
2004*4882a593Smuzhiyun
2005*4882a593Smuzhiyun /* Cannot clear past the end of the bitmap */
2006*4882a593Smuzhiyun search_bytes = min(search_bytes, end - search_start + 1);
2007*4882a593Smuzhiyun
2008*4882a593Smuzhiyun bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
2009*4882a593Smuzhiyun *offset += search_bytes;
2010*4882a593Smuzhiyun *bytes -= search_bytes;
2011*4882a593Smuzhiyun
2012*4882a593Smuzhiyun if (*bytes) {
2013*4882a593Smuzhiyun struct rb_node *next = rb_next(&bitmap_info->offset_index);
2014*4882a593Smuzhiyun if (!bitmap_info->bytes)
2015*4882a593Smuzhiyun free_bitmap(ctl, bitmap_info);
2016*4882a593Smuzhiyun
2017*4882a593Smuzhiyun /*
2018*4882a593Smuzhiyun * no entry after this bitmap, but we still have bytes to
2019*4882a593Smuzhiyun * remove, so something has gone wrong.
2020*4882a593Smuzhiyun */
2021*4882a593Smuzhiyun if (!next)
2022*4882a593Smuzhiyun return -EINVAL;
2023*4882a593Smuzhiyun
2024*4882a593Smuzhiyun bitmap_info = rb_entry(next, struct btrfs_free_space,
2025*4882a593Smuzhiyun offset_index);
2026*4882a593Smuzhiyun
2027*4882a593Smuzhiyun /*
2028*4882a593Smuzhiyun * if the next entry isn't a bitmap we need to return to let the
2029*4882a593Smuzhiyun * extent stuff do its work.
2030*4882a593Smuzhiyun */
2031*4882a593Smuzhiyun if (!bitmap_info->bitmap)
2032*4882a593Smuzhiyun return -EAGAIN;
2033*4882a593Smuzhiyun
2034*4882a593Smuzhiyun /*
2035*4882a593Smuzhiyun * Ok the next item is a bitmap, but it may not actually hold
2036*4882a593Smuzhiyun * the information for the rest of this free space stuff, so
2037*4882a593Smuzhiyun * look for it, and if we don't find it return so we can try
2038*4882a593Smuzhiyun * everything over again.
2039*4882a593Smuzhiyun */
2040*4882a593Smuzhiyun search_start = *offset;
2041*4882a593Smuzhiyun search_bytes = ctl->unit;
2042*4882a593Smuzhiyun ret = search_bitmap(ctl, bitmap_info, &search_start,
2043*4882a593Smuzhiyun &search_bytes, false);
2044*4882a593Smuzhiyun if (ret < 0 || search_start != *offset)
2045*4882a593Smuzhiyun return -EAGAIN;
2046*4882a593Smuzhiyun
2047*4882a593Smuzhiyun goto again;
2048*4882a593Smuzhiyun } else if (!bitmap_info->bytes)
2049*4882a593Smuzhiyun free_bitmap(ctl, bitmap_info);
2050*4882a593Smuzhiyun
2051*4882a593Smuzhiyun return 0;
2052*4882a593Smuzhiyun }
2053*4882a593Smuzhiyun
add_bytes_to_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,u64 offset,u64 bytes,enum btrfs_trim_state trim_state)2054*4882a593Smuzhiyun static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
2055*4882a593Smuzhiyun struct btrfs_free_space *info, u64 offset,
2056*4882a593Smuzhiyun u64 bytes, enum btrfs_trim_state trim_state)
2057*4882a593Smuzhiyun {
2058*4882a593Smuzhiyun u64 bytes_to_set = 0;
2059*4882a593Smuzhiyun u64 end;
2060*4882a593Smuzhiyun
2061*4882a593Smuzhiyun /*
2062*4882a593Smuzhiyun * This is a tradeoff to make bitmap trim state minimal. We mark the
2063*4882a593Smuzhiyun * whole bitmap untrimmed if at any point we add untrimmed regions.
2064*4882a593Smuzhiyun */
2065*4882a593Smuzhiyun if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
2066*4882a593Smuzhiyun if (btrfs_free_space_trimmed(info)) {
2067*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR] +=
2068*4882a593Smuzhiyun info->bitmap_extents;
2069*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2070*4882a593Smuzhiyun }
2071*4882a593Smuzhiyun info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2072*4882a593Smuzhiyun }
2073*4882a593Smuzhiyun
2074*4882a593Smuzhiyun end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
2075*4882a593Smuzhiyun
2076*4882a593Smuzhiyun bytes_to_set = min(end - offset, bytes);
2077*4882a593Smuzhiyun
2078*4882a593Smuzhiyun bitmap_set_bits(ctl, info, offset, bytes_to_set);
2079*4882a593Smuzhiyun
2080*4882a593Smuzhiyun /*
2081*4882a593Smuzhiyun * We set some bytes, we have no idea what the max extent size is
2082*4882a593Smuzhiyun * anymore.
2083*4882a593Smuzhiyun */
2084*4882a593Smuzhiyun info->max_extent_size = 0;
2085*4882a593Smuzhiyun
2086*4882a593Smuzhiyun return bytes_to_set;
2087*4882a593Smuzhiyun
2088*4882a593Smuzhiyun }
2089*4882a593Smuzhiyun
use_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info)2090*4882a593Smuzhiyun static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
2091*4882a593Smuzhiyun struct btrfs_free_space *info)
2092*4882a593Smuzhiyun {
2093*4882a593Smuzhiyun struct btrfs_block_group *block_group = ctl->private;
2094*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = block_group->fs_info;
2095*4882a593Smuzhiyun bool forced = false;
2096*4882a593Smuzhiyun
2097*4882a593Smuzhiyun #ifdef CONFIG_BTRFS_DEBUG
2098*4882a593Smuzhiyun if (btrfs_should_fragment_free_space(block_group))
2099*4882a593Smuzhiyun forced = true;
2100*4882a593Smuzhiyun #endif
2101*4882a593Smuzhiyun
2102*4882a593Smuzhiyun /* This is a way to reclaim large regions from the bitmaps. */
2103*4882a593Smuzhiyun if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2104*4882a593Smuzhiyun return false;
2105*4882a593Smuzhiyun
2106*4882a593Smuzhiyun /*
2107*4882a593Smuzhiyun * If we are below the extents threshold then we can add this as an
2108*4882a593Smuzhiyun * extent, and don't have to deal with the bitmap
2109*4882a593Smuzhiyun */
2110*4882a593Smuzhiyun if (!forced && ctl->free_extents < ctl->extents_thresh) {
2111*4882a593Smuzhiyun /*
2112*4882a593Smuzhiyun * If this block group has some small extents we don't want to
2113*4882a593Smuzhiyun * use up all of our free slots in the cache with them, we want
2114*4882a593Smuzhiyun * to reserve them to larger extents, however if we have plenty
2115*4882a593Smuzhiyun * of cache left then go ahead an dadd them, no sense in adding
2116*4882a593Smuzhiyun * the overhead of a bitmap if we don't have to.
2117*4882a593Smuzhiyun */
2118*4882a593Smuzhiyun if (info->bytes <= fs_info->sectorsize * 8) {
2119*4882a593Smuzhiyun if (ctl->free_extents * 3 <= ctl->extents_thresh)
2120*4882a593Smuzhiyun return false;
2121*4882a593Smuzhiyun } else {
2122*4882a593Smuzhiyun return false;
2123*4882a593Smuzhiyun }
2124*4882a593Smuzhiyun }
2125*4882a593Smuzhiyun
2126*4882a593Smuzhiyun /*
2127*4882a593Smuzhiyun * The original block groups from mkfs can be really small, like 8
2128*4882a593Smuzhiyun * megabytes, so don't bother with a bitmap for those entries. However
2129*4882a593Smuzhiyun * some block groups can be smaller than what a bitmap would cover but
2130*4882a593Smuzhiyun * are still large enough that they could overflow the 32k memory limit,
2131*4882a593Smuzhiyun * so allow those block groups to still be allowed to have a bitmap
2132*4882a593Smuzhiyun * entry.
2133*4882a593Smuzhiyun */
2134*4882a593Smuzhiyun if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
2135*4882a593Smuzhiyun return false;
2136*4882a593Smuzhiyun
2137*4882a593Smuzhiyun return true;
2138*4882a593Smuzhiyun }
2139*4882a593Smuzhiyun
2140*4882a593Smuzhiyun static const struct btrfs_free_space_op free_space_op = {
2141*4882a593Smuzhiyun .recalc_thresholds = recalculate_thresholds,
2142*4882a593Smuzhiyun .use_bitmap = use_bitmap,
2143*4882a593Smuzhiyun };
2144*4882a593Smuzhiyun
insert_into_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info)2145*4882a593Smuzhiyun static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
2146*4882a593Smuzhiyun struct btrfs_free_space *info)
2147*4882a593Smuzhiyun {
2148*4882a593Smuzhiyun struct btrfs_free_space *bitmap_info;
2149*4882a593Smuzhiyun struct btrfs_block_group *block_group = NULL;
2150*4882a593Smuzhiyun int added = 0;
2151*4882a593Smuzhiyun u64 bytes, offset, bytes_added;
2152*4882a593Smuzhiyun enum btrfs_trim_state trim_state;
2153*4882a593Smuzhiyun int ret;
2154*4882a593Smuzhiyun
2155*4882a593Smuzhiyun bytes = info->bytes;
2156*4882a593Smuzhiyun offset = info->offset;
2157*4882a593Smuzhiyun trim_state = info->trim_state;
2158*4882a593Smuzhiyun
2159*4882a593Smuzhiyun if (!ctl->op->use_bitmap(ctl, info))
2160*4882a593Smuzhiyun return 0;
2161*4882a593Smuzhiyun
2162*4882a593Smuzhiyun if (ctl->op == &free_space_op)
2163*4882a593Smuzhiyun block_group = ctl->private;
2164*4882a593Smuzhiyun again:
2165*4882a593Smuzhiyun /*
2166*4882a593Smuzhiyun * Since we link bitmaps right into the cluster we need to see if we
2167*4882a593Smuzhiyun * have a cluster here, and if so and it has our bitmap we need to add
2168*4882a593Smuzhiyun * the free space to that bitmap.
2169*4882a593Smuzhiyun */
2170*4882a593Smuzhiyun if (block_group && !list_empty(&block_group->cluster_list)) {
2171*4882a593Smuzhiyun struct btrfs_free_cluster *cluster;
2172*4882a593Smuzhiyun struct rb_node *node;
2173*4882a593Smuzhiyun struct btrfs_free_space *entry;
2174*4882a593Smuzhiyun
2175*4882a593Smuzhiyun cluster = list_entry(block_group->cluster_list.next,
2176*4882a593Smuzhiyun struct btrfs_free_cluster,
2177*4882a593Smuzhiyun block_group_list);
2178*4882a593Smuzhiyun spin_lock(&cluster->lock);
2179*4882a593Smuzhiyun node = rb_first(&cluster->root);
2180*4882a593Smuzhiyun if (!node) {
2181*4882a593Smuzhiyun spin_unlock(&cluster->lock);
2182*4882a593Smuzhiyun goto no_cluster_bitmap;
2183*4882a593Smuzhiyun }
2184*4882a593Smuzhiyun
2185*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space, offset_index);
2186*4882a593Smuzhiyun if (!entry->bitmap) {
2187*4882a593Smuzhiyun spin_unlock(&cluster->lock);
2188*4882a593Smuzhiyun goto no_cluster_bitmap;
2189*4882a593Smuzhiyun }
2190*4882a593Smuzhiyun
2191*4882a593Smuzhiyun if (entry->offset == offset_to_bitmap(ctl, offset)) {
2192*4882a593Smuzhiyun bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2193*4882a593Smuzhiyun bytes, trim_state);
2194*4882a593Smuzhiyun bytes -= bytes_added;
2195*4882a593Smuzhiyun offset += bytes_added;
2196*4882a593Smuzhiyun }
2197*4882a593Smuzhiyun spin_unlock(&cluster->lock);
2198*4882a593Smuzhiyun if (!bytes) {
2199*4882a593Smuzhiyun ret = 1;
2200*4882a593Smuzhiyun goto out;
2201*4882a593Smuzhiyun }
2202*4882a593Smuzhiyun }
2203*4882a593Smuzhiyun
2204*4882a593Smuzhiyun no_cluster_bitmap:
2205*4882a593Smuzhiyun bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2206*4882a593Smuzhiyun 1, 0);
2207*4882a593Smuzhiyun if (!bitmap_info) {
2208*4882a593Smuzhiyun ASSERT(added == 0);
2209*4882a593Smuzhiyun goto new_bitmap;
2210*4882a593Smuzhiyun }
2211*4882a593Smuzhiyun
2212*4882a593Smuzhiyun bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2213*4882a593Smuzhiyun trim_state);
2214*4882a593Smuzhiyun bytes -= bytes_added;
2215*4882a593Smuzhiyun offset += bytes_added;
2216*4882a593Smuzhiyun added = 0;
2217*4882a593Smuzhiyun
2218*4882a593Smuzhiyun if (!bytes) {
2219*4882a593Smuzhiyun ret = 1;
2220*4882a593Smuzhiyun goto out;
2221*4882a593Smuzhiyun } else
2222*4882a593Smuzhiyun goto again;
2223*4882a593Smuzhiyun
2224*4882a593Smuzhiyun new_bitmap:
2225*4882a593Smuzhiyun if (info && info->bitmap) {
2226*4882a593Smuzhiyun add_new_bitmap(ctl, info, offset);
2227*4882a593Smuzhiyun added = 1;
2228*4882a593Smuzhiyun info = NULL;
2229*4882a593Smuzhiyun goto again;
2230*4882a593Smuzhiyun } else {
2231*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2232*4882a593Smuzhiyun
2233*4882a593Smuzhiyun /* no pre-allocated info, allocate a new one */
2234*4882a593Smuzhiyun if (!info) {
2235*4882a593Smuzhiyun info = kmem_cache_zalloc(btrfs_free_space_cachep,
2236*4882a593Smuzhiyun GFP_NOFS);
2237*4882a593Smuzhiyun if (!info) {
2238*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2239*4882a593Smuzhiyun ret = -ENOMEM;
2240*4882a593Smuzhiyun goto out;
2241*4882a593Smuzhiyun }
2242*4882a593Smuzhiyun }
2243*4882a593Smuzhiyun
2244*4882a593Smuzhiyun /* allocate the bitmap */
2245*4882a593Smuzhiyun info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
2246*4882a593Smuzhiyun GFP_NOFS);
2247*4882a593Smuzhiyun info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
2248*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2249*4882a593Smuzhiyun if (!info->bitmap) {
2250*4882a593Smuzhiyun ret = -ENOMEM;
2251*4882a593Smuzhiyun goto out;
2252*4882a593Smuzhiyun }
2253*4882a593Smuzhiyun goto again;
2254*4882a593Smuzhiyun }
2255*4882a593Smuzhiyun
2256*4882a593Smuzhiyun out:
2257*4882a593Smuzhiyun if (info) {
2258*4882a593Smuzhiyun if (info->bitmap)
2259*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_bitmap_cachep,
2260*4882a593Smuzhiyun info->bitmap);
2261*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, info);
2262*4882a593Smuzhiyun }
2263*4882a593Smuzhiyun
2264*4882a593Smuzhiyun return ret;
2265*4882a593Smuzhiyun }
2266*4882a593Smuzhiyun
2267*4882a593Smuzhiyun /*
2268*4882a593Smuzhiyun * Free space merging rules:
2269*4882a593Smuzhiyun * 1) Merge trimmed areas together
2270*4882a593Smuzhiyun * 2) Let untrimmed areas coalesce with trimmed areas
2271*4882a593Smuzhiyun * 3) Always pull neighboring regions from bitmaps
2272*4882a593Smuzhiyun *
2273*4882a593Smuzhiyun * The above rules are for when we merge free space based on btrfs_trim_state.
2274*4882a593Smuzhiyun * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2275*4882a593Smuzhiyun * same reason: to promote larger extent regions which makes life easier for
2276*4882a593Smuzhiyun * find_free_extent(). Rule 2 enables coalescing based on the common path
2277*4882a593Smuzhiyun * being returning free space from btrfs_finish_extent_commit(). So when free
2278*4882a593Smuzhiyun * space is trimmed, it will prevent aggregating trimmed new region and
2279*4882a593Smuzhiyun * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents
2280*4882a593Smuzhiyun * and provide find_free_extent() with the largest extents possible hoping for
2281*4882a593Smuzhiyun * the reuse path.
2282*4882a593Smuzhiyun */
try_merge_free_space(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,bool update_stat)2283*4882a593Smuzhiyun static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
2284*4882a593Smuzhiyun struct btrfs_free_space *info, bool update_stat)
2285*4882a593Smuzhiyun {
2286*4882a593Smuzhiyun struct btrfs_free_space *left_info = NULL;
2287*4882a593Smuzhiyun struct btrfs_free_space *right_info;
2288*4882a593Smuzhiyun bool merged = false;
2289*4882a593Smuzhiyun u64 offset = info->offset;
2290*4882a593Smuzhiyun u64 bytes = info->bytes;
2291*4882a593Smuzhiyun const bool is_trimmed = btrfs_free_space_trimmed(info);
2292*4882a593Smuzhiyun
2293*4882a593Smuzhiyun /*
2294*4882a593Smuzhiyun * first we want to see if there is free space adjacent to the range we
2295*4882a593Smuzhiyun * are adding, if there is remove that struct and add a new one to
2296*4882a593Smuzhiyun * cover the entire range
2297*4882a593Smuzhiyun */
2298*4882a593Smuzhiyun right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
2299*4882a593Smuzhiyun if (right_info && rb_prev(&right_info->offset_index))
2300*4882a593Smuzhiyun left_info = rb_entry(rb_prev(&right_info->offset_index),
2301*4882a593Smuzhiyun struct btrfs_free_space, offset_index);
2302*4882a593Smuzhiyun else if (!right_info)
2303*4882a593Smuzhiyun left_info = tree_search_offset(ctl, offset - 1, 0, 0);
2304*4882a593Smuzhiyun
2305*4882a593Smuzhiyun /* See try_merge_free_space() comment. */
2306*4882a593Smuzhiyun if (right_info && !right_info->bitmap &&
2307*4882a593Smuzhiyun (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
2308*4882a593Smuzhiyun if (update_stat)
2309*4882a593Smuzhiyun unlink_free_space(ctl, right_info);
2310*4882a593Smuzhiyun else
2311*4882a593Smuzhiyun __unlink_free_space(ctl, right_info);
2312*4882a593Smuzhiyun info->bytes += right_info->bytes;
2313*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, right_info);
2314*4882a593Smuzhiyun merged = true;
2315*4882a593Smuzhiyun }
2316*4882a593Smuzhiyun
2317*4882a593Smuzhiyun /* See try_merge_free_space() comment. */
2318*4882a593Smuzhiyun if (left_info && !left_info->bitmap &&
2319*4882a593Smuzhiyun left_info->offset + left_info->bytes == offset &&
2320*4882a593Smuzhiyun (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
2321*4882a593Smuzhiyun if (update_stat)
2322*4882a593Smuzhiyun unlink_free_space(ctl, left_info);
2323*4882a593Smuzhiyun else
2324*4882a593Smuzhiyun __unlink_free_space(ctl, left_info);
2325*4882a593Smuzhiyun info->offset = left_info->offset;
2326*4882a593Smuzhiyun info->bytes += left_info->bytes;
2327*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, left_info);
2328*4882a593Smuzhiyun merged = true;
2329*4882a593Smuzhiyun }
2330*4882a593Smuzhiyun
2331*4882a593Smuzhiyun return merged;
2332*4882a593Smuzhiyun }
2333*4882a593Smuzhiyun
steal_from_bitmap_to_end(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,bool update_stat)2334*4882a593Smuzhiyun static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
2335*4882a593Smuzhiyun struct btrfs_free_space *info,
2336*4882a593Smuzhiyun bool update_stat)
2337*4882a593Smuzhiyun {
2338*4882a593Smuzhiyun struct btrfs_free_space *bitmap;
2339*4882a593Smuzhiyun unsigned long i;
2340*4882a593Smuzhiyun unsigned long j;
2341*4882a593Smuzhiyun const u64 end = info->offset + info->bytes;
2342*4882a593Smuzhiyun const u64 bitmap_offset = offset_to_bitmap(ctl, end);
2343*4882a593Smuzhiyun u64 bytes;
2344*4882a593Smuzhiyun
2345*4882a593Smuzhiyun bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2346*4882a593Smuzhiyun if (!bitmap)
2347*4882a593Smuzhiyun return false;
2348*4882a593Smuzhiyun
2349*4882a593Smuzhiyun i = offset_to_bit(bitmap->offset, ctl->unit, end);
2350*4882a593Smuzhiyun j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
2351*4882a593Smuzhiyun if (j == i)
2352*4882a593Smuzhiyun return false;
2353*4882a593Smuzhiyun bytes = (j - i) * ctl->unit;
2354*4882a593Smuzhiyun info->bytes += bytes;
2355*4882a593Smuzhiyun
2356*4882a593Smuzhiyun /* See try_merge_free_space() comment. */
2357*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(bitmap))
2358*4882a593Smuzhiyun info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2359*4882a593Smuzhiyun
2360*4882a593Smuzhiyun if (update_stat)
2361*4882a593Smuzhiyun bitmap_clear_bits(ctl, bitmap, end, bytes);
2362*4882a593Smuzhiyun else
2363*4882a593Smuzhiyun __bitmap_clear_bits(ctl, bitmap, end, bytes);
2364*4882a593Smuzhiyun
2365*4882a593Smuzhiyun if (!bitmap->bytes)
2366*4882a593Smuzhiyun free_bitmap(ctl, bitmap);
2367*4882a593Smuzhiyun
2368*4882a593Smuzhiyun return true;
2369*4882a593Smuzhiyun }
2370*4882a593Smuzhiyun
steal_from_bitmap_to_front(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,bool update_stat)2371*4882a593Smuzhiyun static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
2372*4882a593Smuzhiyun struct btrfs_free_space *info,
2373*4882a593Smuzhiyun bool update_stat)
2374*4882a593Smuzhiyun {
2375*4882a593Smuzhiyun struct btrfs_free_space *bitmap;
2376*4882a593Smuzhiyun u64 bitmap_offset;
2377*4882a593Smuzhiyun unsigned long i;
2378*4882a593Smuzhiyun unsigned long j;
2379*4882a593Smuzhiyun unsigned long prev_j;
2380*4882a593Smuzhiyun u64 bytes;
2381*4882a593Smuzhiyun
2382*4882a593Smuzhiyun bitmap_offset = offset_to_bitmap(ctl, info->offset);
2383*4882a593Smuzhiyun /* If we're on a boundary, try the previous logical bitmap. */
2384*4882a593Smuzhiyun if (bitmap_offset == info->offset) {
2385*4882a593Smuzhiyun if (info->offset == 0)
2386*4882a593Smuzhiyun return false;
2387*4882a593Smuzhiyun bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
2388*4882a593Smuzhiyun }
2389*4882a593Smuzhiyun
2390*4882a593Smuzhiyun bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2391*4882a593Smuzhiyun if (!bitmap)
2392*4882a593Smuzhiyun return false;
2393*4882a593Smuzhiyun
2394*4882a593Smuzhiyun i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
2395*4882a593Smuzhiyun j = 0;
2396*4882a593Smuzhiyun prev_j = (unsigned long)-1;
2397*4882a593Smuzhiyun for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
2398*4882a593Smuzhiyun if (j > i)
2399*4882a593Smuzhiyun break;
2400*4882a593Smuzhiyun prev_j = j;
2401*4882a593Smuzhiyun }
2402*4882a593Smuzhiyun if (prev_j == i)
2403*4882a593Smuzhiyun return false;
2404*4882a593Smuzhiyun
2405*4882a593Smuzhiyun if (prev_j == (unsigned long)-1)
2406*4882a593Smuzhiyun bytes = (i + 1) * ctl->unit;
2407*4882a593Smuzhiyun else
2408*4882a593Smuzhiyun bytes = (i - prev_j) * ctl->unit;
2409*4882a593Smuzhiyun
2410*4882a593Smuzhiyun info->offset -= bytes;
2411*4882a593Smuzhiyun info->bytes += bytes;
2412*4882a593Smuzhiyun
2413*4882a593Smuzhiyun /* See try_merge_free_space() comment. */
2414*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(bitmap))
2415*4882a593Smuzhiyun info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2416*4882a593Smuzhiyun
2417*4882a593Smuzhiyun if (update_stat)
2418*4882a593Smuzhiyun bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2419*4882a593Smuzhiyun else
2420*4882a593Smuzhiyun __bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2421*4882a593Smuzhiyun
2422*4882a593Smuzhiyun if (!bitmap->bytes)
2423*4882a593Smuzhiyun free_bitmap(ctl, bitmap);
2424*4882a593Smuzhiyun
2425*4882a593Smuzhiyun return true;
2426*4882a593Smuzhiyun }
2427*4882a593Smuzhiyun
2428*4882a593Smuzhiyun /*
2429*4882a593Smuzhiyun * We prefer always to allocate from extent entries, both for clustered and
2430*4882a593Smuzhiyun * non-clustered allocation requests. So when attempting to add a new extent
2431*4882a593Smuzhiyun * entry, try to see if there's adjacent free space in bitmap entries, and if
2432*4882a593Smuzhiyun * there is, migrate that space from the bitmaps to the extent.
2433*4882a593Smuzhiyun * Like this we get better chances of satisfying space allocation requests
2434*4882a593Smuzhiyun * because we attempt to satisfy them based on a single cache entry, and never
2435*4882a593Smuzhiyun * on 2 or more entries - even if the entries represent a contiguous free space
2436*4882a593Smuzhiyun * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
2437*4882a593Smuzhiyun * ends).
2438*4882a593Smuzhiyun */
steal_from_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * info,bool update_stat)2439*4882a593Smuzhiyun static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
2440*4882a593Smuzhiyun struct btrfs_free_space *info,
2441*4882a593Smuzhiyun bool update_stat)
2442*4882a593Smuzhiyun {
2443*4882a593Smuzhiyun /*
2444*4882a593Smuzhiyun * Only work with disconnected entries, as we can change their offset,
2445*4882a593Smuzhiyun * and must be extent entries.
2446*4882a593Smuzhiyun */
2447*4882a593Smuzhiyun ASSERT(!info->bitmap);
2448*4882a593Smuzhiyun ASSERT(RB_EMPTY_NODE(&info->offset_index));
2449*4882a593Smuzhiyun
2450*4882a593Smuzhiyun if (ctl->total_bitmaps > 0) {
2451*4882a593Smuzhiyun bool stole_end;
2452*4882a593Smuzhiyun bool stole_front = false;
2453*4882a593Smuzhiyun
2454*4882a593Smuzhiyun stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
2455*4882a593Smuzhiyun if (ctl->total_bitmaps > 0)
2456*4882a593Smuzhiyun stole_front = steal_from_bitmap_to_front(ctl, info,
2457*4882a593Smuzhiyun update_stat);
2458*4882a593Smuzhiyun
2459*4882a593Smuzhiyun if (stole_end || stole_front)
2460*4882a593Smuzhiyun try_merge_free_space(ctl, info, update_stat);
2461*4882a593Smuzhiyun }
2462*4882a593Smuzhiyun }
2463*4882a593Smuzhiyun
__btrfs_add_free_space(struct btrfs_fs_info * fs_info,struct btrfs_free_space_ctl * ctl,u64 offset,u64 bytes,enum btrfs_trim_state trim_state)2464*4882a593Smuzhiyun int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
2465*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl,
2466*4882a593Smuzhiyun u64 offset, u64 bytes,
2467*4882a593Smuzhiyun enum btrfs_trim_state trim_state)
2468*4882a593Smuzhiyun {
2469*4882a593Smuzhiyun struct btrfs_block_group *block_group = ctl->private;
2470*4882a593Smuzhiyun struct btrfs_free_space *info;
2471*4882a593Smuzhiyun int ret = 0;
2472*4882a593Smuzhiyun u64 filter_bytes = bytes;
2473*4882a593Smuzhiyun
2474*4882a593Smuzhiyun info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
2475*4882a593Smuzhiyun if (!info)
2476*4882a593Smuzhiyun return -ENOMEM;
2477*4882a593Smuzhiyun
2478*4882a593Smuzhiyun info->offset = offset;
2479*4882a593Smuzhiyun info->bytes = bytes;
2480*4882a593Smuzhiyun info->trim_state = trim_state;
2481*4882a593Smuzhiyun RB_CLEAR_NODE(&info->offset_index);
2482*4882a593Smuzhiyun
2483*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2484*4882a593Smuzhiyun
2485*4882a593Smuzhiyun if (try_merge_free_space(ctl, info, true))
2486*4882a593Smuzhiyun goto link;
2487*4882a593Smuzhiyun
2488*4882a593Smuzhiyun /*
2489*4882a593Smuzhiyun * There was no extent directly to the left or right of this new
2490*4882a593Smuzhiyun * extent then we know we're going to have to allocate a new extent, so
2491*4882a593Smuzhiyun * before we do that see if we need to drop this into a bitmap
2492*4882a593Smuzhiyun */
2493*4882a593Smuzhiyun ret = insert_into_bitmap(ctl, info);
2494*4882a593Smuzhiyun if (ret < 0) {
2495*4882a593Smuzhiyun goto out;
2496*4882a593Smuzhiyun } else if (ret) {
2497*4882a593Smuzhiyun ret = 0;
2498*4882a593Smuzhiyun goto out;
2499*4882a593Smuzhiyun }
2500*4882a593Smuzhiyun link:
2501*4882a593Smuzhiyun /*
2502*4882a593Smuzhiyun * Only steal free space from adjacent bitmaps if we're sure we're not
2503*4882a593Smuzhiyun * going to add the new free space to existing bitmap entries - because
2504*4882a593Smuzhiyun * that would mean unnecessary work that would be reverted. Therefore
2505*4882a593Smuzhiyun * attempt to steal space from bitmaps if we're adding an extent entry.
2506*4882a593Smuzhiyun */
2507*4882a593Smuzhiyun steal_from_bitmap(ctl, info, true);
2508*4882a593Smuzhiyun
2509*4882a593Smuzhiyun filter_bytes = max(filter_bytes, info->bytes);
2510*4882a593Smuzhiyun
2511*4882a593Smuzhiyun ret = link_free_space(ctl, info);
2512*4882a593Smuzhiyun if (ret)
2513*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, info);
2514*4882a593Smuzhiyun out:
2515*4882a593Smuzhiyun btrfs_discard_update_discardable(block_group, ctl);
2516*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2517*4882a593Smuzhiyun
2518*4882a593Smuzhiyun if (ret) {
2519*4882a593Smuzhiyun btrfs_crit(fs_info, "unable to add free space :%d", ret);
2520*4882a593Smuzhiyun ASSERT(ret != -EEXIST);
2521*4882a593Smuzhiyun }
2522*4882a593Smuzhiyun
2523*4882a593Smuzhiyun if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2524*4882a593Smuzhiyun btrfs_discard_check_filter(block_group, filter_bytes);
2525*4882a593Smuzhiyun btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
2526*4882a593Smuzhiyun }
2527*4882a593Smuzhiyun
2528*4882a593Smuzhiyun return ret;
2529*4882a593Smuzhiyun }
2530*4882a593Smuzhiyun
btrfs_add_free_space(struct btrfs_block_group * block_group,u64 bytenr,u64 size)2531*4882a593Smuzhiyun int btrfs_add_free_space(struct btrfs_block_group *block_group,
2532*4882a593Smuzhiyun u64 bytenr, u64 size)
2533*4882a593Smuzhiyun {
2534*4882a593Smuzhiyun enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2535*4882a593Smuzhiyun
2536*4882a593Smuzhiyun if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2537*4882a593Smuzhiyun trim_state = BTRFS_TRIM_STATE_TRIMMED;
2538*4882a593Smuzhiyun
2539*4882a593Smuzhiyun return __btrfs_add_free_space(block_group->fs_info,
2540*4882a593Smuzhiyun block_group->free_space_ctl,
2541*4882a593Smuzhiyun bytenr, size, trim_state);
2542*4882a593Smuzhiyun }
2543*4882a593Smuzhiyun
2544*4882a593Smuzhiyun /*
2545*4882a593Smuzhiyun * This is a subtle distinction because when adding free space back in general,
2546*4882a593Smuzhiyun * we want it to be added as untrimmed for async. But in the case where we add
2547*4882a593Smuzhiyun * it on loading of a block group, we want to consider it trimmed.
2548*4882a593Smuzhiyun */
btrfs_add_free_space_async_trimmed(struct btrfs_block_group * block_group,u64 bytenr,u64 size)2549*4882a593Smuzhiyun int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2550*4882a593Smuzhiyun u64 bytenr, u64 size)
2551*4882a593Smuzhiyun {
2552*4882a593Smuzhiyun enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2553*4882a593Smuzhiyun
2554*4882a593Smuzhiyun if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2555*4882a593Smuzhiyun btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2556*4882a593Smuzhiyun trim_state = BTRFS_TRIM_STATE_TRIMMED;
2557*4882a593Smuzhiyun
2558*4882a593Smuzhiyun return __btrfs_add_free_space(block_group->fs_info,
2559*4882a593Smuzhiyun block_group->free_space_ctl,
2560*4882a593Smuzhiyun bytenr, size, trim_state);
2561*4882a593Smuzhiyun }
2562*4882a593Smuzhiyun
btrfs_remove_free_space(struct btrfs_block_group * block_group,u64 offset,u64 bytes)2563*4882a593Smuzhiyun int btrfs_remove_free_space(struct btrfs_block_group *block_group,
2564*4882a593Smuzhiyun u64 offset, u64 bytes)
2565*4882a593Smuzhiyun {
2566*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2567*4882a593Smuzhiyun struct btrfs_free_space *info;
2568*4882a593Smuzhiyun int ret;
2569*4882a593Smuzhiyun bool re_search = false;
2570*4882a593Smuzhiyun
2571*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2572*4882a593Smuzhiyun
2573*4882a593Smuzhiyun again:
2574*4882a593Smuzhiyun ret = 0;
2575*4882a593Smuzhiyun if (!bytes)
2576*4882a593Smuzhiyun goto out_lock;
2577*4882a593Smuzhiyun
2578*4882a593Smuzhiyun info = tree_search_offset(ctl, offset, 0, 0);
2579*4882a593Smuzhiyun if (!info) {
2580*4882a593Smuzhiyun /*
2581*4882a593Smuzhiyun * oops didn't find an extent that matched the space we wanted
2582*4882a593Smuzhiyun * to remove, look for a bitmap instead
2583*4882a593Smuzhiyun */
2584*4882a593Smuzhiyun info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
2585*4882a593Smuzhiyun 1, 0);
2586*4882a593Smuzhiyun if (!info) {
2587*4882a593Smuzhiyun /*
2588*4882a593Smuzhiyun * If we found a partial bit of our free space in a
2589*4882a593Smuzhiyun * bitmap but then couldn't find the other part this may
2590*4882a593Smuzhiyun * be a problem, so WARN about it.
2591*4882a593Smuzhiyun */
2592*4882a593Smuzhiyun WARN_ON(re_search);
2593*4882a593Smuzhiyun goto out_lock;
2594*4882a593Smuzhiyun }
2595*4882a593Smuzhiyun }
2596*4882a593Smuzhiyun
2597*4882a593Smuzhiyun re_search = false;
2598*4882a593Smuzhiyun if (!info->bitmap) {
2599*4882a593Smuzhiyun unlink_free_space(ctl, info);
2600*4882a593Smuzhiyun if (offset == info->offset) {
2601*4882a593Smuzhiyun u64 to_free = min(bytes, info->bytes);
2602*4882a593Smuzhiyun
2603*4882a593Smuzhiyun info->bytes -= to_free;
2604*4882a593Smuzhiyun info->offset += to_free;
2605*4882a593Smuzhiyun if (info->bytes) {
2606*4882a593Smuzhiyun ret = link_free_space(ctl, info);
2607*4882a593Smuzhiyun WARN_ON(ret);
2608*4882a593Smuzhiyun } else {
2609*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, info);
2610*4882a593Smuzhiyun }
2611*4882a593Smuzhiyun
2612*4882a593Smuzhiyun offset += to_free;
2613*4882a593Smuzhiyun bytes -= to_free;
2614*4882a593Smuzhiyun goto again;
2615*4882a593Smuzhiyun } else {
2616*4882a593Smuzhiyun u64 old_end = info->bytes + info->offset;
2617*4882a593Smuzhiyun
2618*4882a593Smuzhiyun info->bytes = offset - info->offset;
2619*4882a593Smuzhiyun ret = link_free_space(ctl, info);
2620*4882a593Smuzhiyun WARN_ON(ret);
2621*4882a593Smuzhiyun if (ret)
2622*4882a593Smuzhiyun goto out_lock;
2623*4882a593Smuzhiyun
2624*4882a593Smuzhiyun /* Not enough bytes in this entry to satisfy us */
2625*4882a593Smuzhiyun if (old_end < offset + bytes) {
2626*4882a593Smuzhiyun bytes -= old_end - offset;
2627*4882a593Smuzhiyun offset = old_end;
2628*4882a593Smuzhiyun goto again;
2629*4882a593Smuzhiyun } else if (old_end == offset + bytes) {
2630*4882a593Smuzhiyun /* all done */
2631*4882a593Smuzhiyun goto out_lock;
2632*4882a593Smuzhiyun }
2633*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2634*4882a593Smuzhiyun
2635*4882a593Smuzhiyun ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2636*4882a593Smuzhiyun offset + bytes,
2637*4882a593Smuzhiyun old_end - (offset + bytes),
2638*4882a593Smuzhiyun info->trim_state);
2639*4882a593Smuzhiyun WARN_ON(ret);
2640*4882a593Smuzhiyun goto out;
2641*4882a593Smuzhiyun }
2642*4882a593Smuzhiyun }
2643*4882a593Smuzhiyun
2644*4882a593Smuzhiyun ret = remove_from_bitmap(ctl, info, &offset, &bytes);
2645*4882a593Smuzhiyun if (ret == -EAGAIN) {
2646*4882a593Smuzhiyun re_search = true;
2647*4882a593Smuzhiyun goto again;
2648*4882a593Smuzhiyun }
2649*4882a593Smuzhiyun out_lock:
2650*4882a593Smuzhiyun btrfs_discard_update_discardable(block_group, ctl);
2651*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2652*4882a593Smuzhiyun out:
2653*4882a593Smuzhiyun return ret;
2654*4882a593Smuzhiyun }
2655*4882a593Smuzhiyun
btrfs_dump_free_space(struct btrfs_block_group * block_group,u64 bytes)2656*4882a593Smuzhiyun void btrfs_dump_free_space(struct btrfs_block_group *block_group,
2657*4882a593Smuzhiyun u64 bytes)
2658*4882a593Smuzhiyun {
2659*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = block_group->fs_info;
2660*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2661*4882a593Smuzhiyun struct btrfs_free_space *info;
2662*4882a593Smuzhiyun struct rb_node *n;
2663*4882a593Smuzhiyun int count = 0;
2664*4882a593Smuzhiyun
2665*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2666*4882a593Smuzhiyun for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
2667*4882a593Smuzhiyun info = rb_entry(n, struct btrfs_free_space, offset_index);
2668*4882a593Smuzhiyun if (info->bytes >= bytes && !block_group->ro)
2669*4882a593Smuzhiyun count++;
2670*4882a593Smuzhiyun btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
2671*4882a593Smuzhiyun info->offset, info->bytes,
2672*4882a593Smuzhiyun (info->bitmap) ? "yes" : "no");
2673*4882a593Smuzhiyun }
2674*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2675*4882a593Smuzhiyun btrfs_info(fs_info, "block group has cluster?: %s",
2676*4882a593Smuzhiyun list_empty(&block_group->cluster_list) ? "no" : "yes");
2677*4882a593Smuzhiyun btrfs_info(fs_info,
2678*4882a593Smuzhiyun "%d blocks of free space at or bigger than bytes is", count);
2679*4882a593Smuzhiyun }
2680*4882a593Smuzhiyun
btrfs_init_free_space_ctl(struct btrfs_block_group * block_group)2681*4882a593Smuzhiyun void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
2682*4882a593Smuzhiyun {
2683*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = block_group->fs_info;
2684*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2685*4882a593Smuzhiyun
2686*4882a593Smuzhiyun spin_lock_init(&ctl->tree_lock);
2687*4882a593Smuzhiyun ctl->unit = fs_info->sectorsize;
2688*4882a593Smuzhiyun ctl->start = block_group->start;
2689*4882a593Smuzhiyun ctl->private = block_group;
2690*4882a593Smuzhiyun ctl->op = &free_space_op;
2691*4882a593Smuzhiyun INIT_LIST_HEAD(&ctl->trimming_ranges);
2692*4882a593Smuzhiyun mutex_init(&ctl->cache_writeout_mutex);
2693*4882a593Smuzhiyun
2694*4882a593Smuzhiyun /*
2695*4882a593Smuzhiyun * we only want to have 32k of ram per block group for keeping
2696*4882a593Smuzhiyun * track of free space, and if we pass 1/2 of that we want to
2697*4882a593Smuzhiyun * start converting things over to using bitmaps
2698*4882a593Smuzhiyun */
2699*4882a593Smuzhiyun ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
2700*4882a593Smuzhiyun }
2701*4882a593Smuzhiyun
2702*4882a593Smuzhiyun /*
2703*4882a593Smuzhiyun * for a given cluster, put all of its extents back into the free
2704*4882a593Smuzhiyun * space cache. If the block group passed doesn't match the block group
2705*4882a593Smuzhiyun * pointed to by the cluster, someone else raced in and freed the
2706*4882a593Smuzhiyun * cluster already. In that case, we just return without changing anything
2707*4882a593Smuzhiyun */
__btrfs_return_cluster_to_free_space(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster)2708*4882a593Smuzhiyun static void __btrfs_return_cluster_to_free_space(
2709*4882a593Smuzhiyun struct btrfs_block_group *block_group,
2710*4882a593Smuzhiyun struct btrfs_free_cluster *cluster)
2711*4882a593Smuzhiyun {
2712*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2713*4882a593Smuzhiyun struct btrfs_free_space *entry;
2714*4882a593Smuzhiyun struct rb_node *node;
2715*4882a593Smuzhiyun
2716*4882a593Smuzhiyun spin_lock(&cluster->lock);
2717*4882a593Smuzhiyun if (cluster->block_group != block_group) {
2718*4882a593Smuzhiyun spin_unlock(&cluster->lock);
2719*4882a593Smuzhiyun return;
2720*4882a593Smuzhiyun }
2721*4882a593Smuzhiyun
2722*4882a593Smuzhiyun cluster->block_group = NULL;
2723*4882a593Smuzhiyun cluster->window_start = 0;
2724*4882a593Smuzhiyun list_del_init(&cluster->block_group_list);
2725*4882a593Smuzhiyun
2726*4882a593Smuzhiyun node = rb_first(&cluster->root);
2727*4882a593Smuzhiyun while (node) {
2728*4882a593Smuzhiyun bool bitmap;
2729*4882a593Smuzhiyun
2730*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space, offset_index);
2731*4882a593Smuzhiyun node = rb_next(&entry->offset_index);
2732*4882a593Smuzhiyun rb_erase(&entry->offset_index, &cluster->root);
2733*4882a593Smuzhiyun RB_CLEAR_NODE(&entry->offset_index);
2734*4882a593Smuzhiyun
2735*4882a593Smuzhiyun bitmap = (entry->bitmap != NULL);
2736*4882a593Smuzhiyun if (!bitmap) {
2737*4882a593Smuzhiyun /* Merging treats extents as if they were new */
2738*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(entry)) {
2739*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR]--;
2740*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2741*4882a593Smuzhiyun entry->bytes;
2742*4882a593Smuzhiyun }
2743*4882a593Smuzhiyun
2744*4882a593Smuzhiyun try_merge_free_space(ctl, entry, false);
2745*4882a593Smuzhiyun steal_from_bitmap(ctl, entry, false);
2746*4882a593Smuzhiyun
2747*4882a593Smuzhiyun /* As we insert directly, update these statistics */
2748*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(entry)) {
2749*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR]++;
2750*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2751*4882a593Smuzhiyun entry->bytes;
2752*4882a593Smuzhiyun }
2753*4882a593Smuzhiyun }
2754*4882a593Smuzhiyun tree_insert_offset(&ctl->free_space_offset,
2755*4882a593Smuzhiyun entry->offset, &entry->offset_index, bitmap);
2756*4882a593Smuzhiyun }
2757*4882a593Smuzhiyun cluster->root = RB_ROOT;
2758*4882a593Smuzhiyun spin_unlock(&cluster->lock);
2759*4882a593Smuzhiyun btrfs_put_block_group(block_group);
2760*4882a593Smuzhiyun }
2761*4882a593Smuzhiyun
__btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl * ctl)2762*4882a593Smuzhiyun static void __btrfs_remove_free_space_cache_locked(
2763*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl)
2764*4882a593Smuzhiyun {
2765*4882a593Smuzhiyun struct btrfs_free_space *info;
2766*4882a593Smuzhiyun struct rb_node *node;
2767*4882a593Smuzhiyun
2768*4882a593Smuzhiyun while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2769*4882a593Smuzhiyun info = rb_entry(node, struct btrfs_free_space, offset_index);
2770*4882a593Smuzhiyun if (!info->bitmap) {
2771*4882a593Smuzhiyun unlink_free_space(ctl, info);
2772*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, info);
2773*4882a593Smuzhiyun } else {
2774*4882a593Smuzhiyun free_bitmap(ctl, info);
2775*4882a593Smuzhiyun }
2776*4882a593Smuzhiyun
2777*4882a593Smuzhiyun cond_resched_lock(&ctl->tree_lock);
2778*4882a593Smuzhiyun }
2779*4882a593Smuzhiyun }
2780*4882a593Smuzhiyun
__btrfs_remove_free_space_cache(struct btrfs_free_space_ctl * ctl)2781*4882a593Smuzhiyun void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2782*4882a593Smuzhiyun {
2783*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2784*4882a593Smuzhiyun __btrfs_remove_free_space_cache_locked(ctl);
2785*4882a593Smuzhiyun if (ctl->private)
2786*4882a593Smuzhiyun btrfs_discard_update_discardable(ctl->private, ctl);
2787*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2788*4882a593Smuzhiyun }
2789*4882a593Smuzhiyun
btrfs_remove_free_space_cache(struct btrfs_block_group * block_group)2790*4882a593Smuzhiyun void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
2791*4882a593Smuzhiyun {
2792*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2793*4882a593Smuzhiyun struct btrfs_free_cluster *cluster;
2794*4882a593Smuzhiyun struct list_head *head;
2795*4882a593Smuzhiyun
2796*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2797*4882a593Smuzhiyun while ((head = block_group->cluster_list.next) !=
2798*4882a593Smuzhiyun &block_group->cluster_list) {
2799*4882a593Smuzhiyun cluster = list_entry(head, struct btrfs_free_cluster,
2800*4882a593Smuzhiyun block_group_list);
2801*4882a593Smuzhiyun
2802*4882a593Smuzhiyun WARN_ON(cluster->block_group != block_group);
2803*4882a593Smuzhiyun __btrfs_return_cluster_to_free_space(block_group, cluster);
2804*4882a593Smuzhiyun
2805*4882a593Smuzhiyun cond_resched_lock(&ctl->tree_lock);
2806*4882a593Smuzhiyun }
2807*4882a593Smuzhiyun __btrfs_remove_free_space_cache_locked(ctl);
2808*4882a593Smuzhiyun btrfs_discard_update_discardable(block_group, ctl);
2809*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2810*4882a593Smuzhiyun
2811*4882a593Smuzhiyun }
2812*4882a593Smuzhiyun
2813*4882a593Smuzhiyun /**
2814*4882a593Smuzhiyun * btrfs_is_free_space_trimmed - see if everything is trimmed
2815*4882a593Smuzhiyun * @block_group: block_group of interest
2816*4882a593Smuzhiyun *
2817*4882a593Smuzhiyun * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2818*4882a593Smuzhiyun */
btrfs_is_free_space_trimmed(struct btrfs_block_group * block_group)2819*4882a593Smuzhiyun bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2820*4882a593Smuzhiyun {
2821*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2822*4882a593Smuzhiyun struct btrfs_free_space *info;
2823*4882a593Smuzhiyun struct rb_node *node;
2824*4882a593Smuzhiyun bool ret = true;
2825*4882a593Smuzhiyun
2826*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2827*4882a593Smuzhiyun node = rb_first(&ctl->free_space_offset);
2828*4882a593Smuzhiyun
2829*4882a593Smuzhiyun while (node) {
2830*4882a593Smuzhiyun info = rb_entry(node, struct btrfs_free_space, offset_index);
2831*4882a593Smuzhiyun
2832*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(info)) {
2833*4882a593Smuzhiyun ret = false;
2834*4882a593Smuzhiyun break;
2835*4882a593Smuzhiyun }
2836*4882a593Smuzhiyun
2837*4882a593Smuzhiyun node = rb_next(node);
2838*4882a593Smuzhiyun }
2839*4882a593Smuzhiyun
2840*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2841*4882a593Smuzhiyun return ret;
2842*4882a593Smuzhiyun }
2843*4882a593Smuzhiyun
btrfs_find_space_for_alloc(struct btrfs_block_group * block_group,u64 offset,u64 bytes,u64 empty_size,u64 * max_extent_size)2844*4882a593Smuzhiyun u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
2845*4882a593Smuzhiyun u64 offset, u64 bytes, u64 empty_size,
2846*4882a593Smuzhiyun u64 *max_extent_size)
2847*4882a593Smuzhiyun {
2848*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2849*4882a593Smuzhiyun struct btrfs_discard_ctl *discard_ctl =
2850*4882a593Smuzhiyun &block_group->fs_info->discard_ctl;
2851*4882a593Smuzhiyun struct btrfs_free_space *entry = NULL;
2852*4882a593Smuzhiyun u64 bytes_search = bytes + empty_size;
2853*4882a593Smuzhiyun u64 ret = 0;
2854*4882a593Smuzhiyun u64 align_gap = 0;
2855*4882a593Smuzhiyun u64 align_gap_len = 0;
2856*4882a593Smuzhiyun enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2857*4882a593Smuzhiyun
2858*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2859*4882a593Smuzhiyun entry = find_free_space(ctl, &offset, &bytes_search,
2860*4882a593Smuzhiyun block_group->full_stripe_len, max_extent_size);
2861*4882a593Smuzhiyun if (!entry)
2862*4882a593Smuzhiyun goto out;
2863*4882a593Smuzhiyun
2864*4882a593Smuzhiyun ret = offset;
2865*4882a593Smuzhiyun if (entry->bitmap) {
2866*4882a593Smuzhiyun bitmap_clear_bits(ctl, entry, offset, bytes);
2867*4882a593Smuzhiyun
2868*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(entry))
2869*4882a593Smuzhiyun atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2870*4882a593Smuzhiyun
2871*4882a593Smuzhiyun if (!entry->bytes)
2872*4882a593Smuzhiyun free_bitmap(ctl, entry);
2873*4882a593Smuzhiyun } else {
2874*4882a593Smuzhiyun unlink_free_space(ctl, entry);
2875*4882a593Smuzhiyun align_gap_len = offset - entry->offset;
2876*4882a593Smuzhiyun align_gap = entry->offset;
2877*4882a593Smuzhiyun align_gap_trim_state = entry->trim_state;
2878*4882a593Smuzhiyun
2879*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(entry))
2880*4882a593Smuzhiyun atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2881*4882a593Smuzhiyun
2882*4882a593Smuzhiyun entry->offset = offset + bytes;
2883*4882a593Smuzhiyun WARN_ON(entry->bytes < bytes + align_gap_len);
2884*4882a593Smuzhiyun
2885*4882a593Smuzhiyun entry->bytes -= bytes + align_gap_len;
2886*4882a593Smuzhiyun if (!entry->bytes)
2887*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, entry);
2888*4882a593Smuzhiyun else
2889*4882a593Smuzhiyun link_free_space(ctl, entry);
2890*4882a593Smuzhiyun }
2891*4882a593Smuzhiyun out:
2892*4882a593Smuzhiyun btrfs_discard_update_discardable(block_group, ctl);
2893*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2894*4882a593Smuzhiyun
2895*4882a593Smuzhiyun if (align_gap_len)
2896*4882a593Smuzhiyun __btrfs_add_free_space(block_group->fs_info, ctl,
2897*4882a593Smuzhiyun align_gap, align_gap_len,
2898*4882a593Smuzhiyun align_gap_trim_state);
2899*4882a593Smuzhiyun return ret;
2900*4882a593Smuzhiyun }
2901*4882a593Smuzhiyun
2902*4882a593Smuzhiyun /*
2903*4882a593Smuzhiyun * given a cluster, put all of its extents back into the free space
2904*4882a593Smuzhiyun * cache. If a block group is passed, this function will only free
2905*4882a593Smuzhiyun * a cluster that belongs to the passed block group.
2906*4882a593Smuzhiyun *
2907*4882a593Smuzhiyun * Otherwise, it'll get a reference on the block group pointed to by the
2908*4882a593Smuzhiyun * cluster and remove the cluster from it.
2909*4882a593Smuzhiyun */
btrfs_return_cluster_to_free_space(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster)2910*4882a593Smuzhiyun void btrfs_return_cluster_to_free_space(
2911*4882a593Smuzhiyun struct btrfs_block_group *block_group,
2912*4882a593Smuzhiyun struct btrfs_free_cluster *cluster)
2913*4882a593Smuzhiyun {
2914*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl;
2915*4882a593Smuzhiyun
2916*4882a593Smuzhiyun /* first, get a safe pointer to the block group */
2917*4882a593Smuzhiyun spin_lock(&cluster->lock);
2918*4882a593Smuzhiyun if (!block_group) {
2919*4882a593Smuzhiyun block_group = cluster->block_group;
2920*4882a593Smuzhiyun if (!block_group) {
2921*4882a593Smuzhiyun spin_unlock(&cluster->lock);
2922*4882a593Smuzhiyun return;
2923*4882a593Smuzhiyun }
2924*4882a593Smuzhiyun } else if (cluster->block_group != block_group) {
2925*4882a593Smuzhiyun /* someone else has already freed it don't redo their work */
2926*4882a593Smuzhiyun spin_unlock(&cluster->lock);
2927*4882a593Smuzhiyun return;
2928*4882a593Smuzhiyun }
2929*4882a593Smuzhiyun btrfs_get_block_group(block_group);
2930*4882a593Smuzhiyun spin_unlock(&cluster->lock);
2931*4882a593Smuzhiyun
2932*4882a593Smuzhiyun ctl = block_group->free_space_ctl;
2933*4882a593Smuzhiyun
2934*4882a593Smuzhiyun /* now return any extents the cluster had on it */
2935*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
2936*4882a593Smuzhiyun __btrfs_return_cluster_to_free_space(block_group, cluster);
2937*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
2938*4882a593Smuzhiyun
2939*4882a593Smuzhiyun btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
2940*4882a593Smuzhiyun
2941*4882a593Smuzhiyun /* finally drop our ref */
2942*4882a593Smuzhiyun btrfs_put_block_group(block_group);
2943*4882a593Smuzhiyun }
2944*4882a593Smuzhiyun
btrfs_alloc_from_bitmap(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster,struct btrfs_free_space * entry,u64 bytes,u64 min_start,u64 * max_extent_size)2945*4882a593Smuzhiyun static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
2946*4882a593Smuzhiyun struct btrfs_free_cluster *cluster,
2947*4882a593Smuzhiyun struct btrfs_free_space *entry,
2948*4882a593Smuzhiyun u64 bytes, u64 min_start,
2949*4882a593Smuzhiyun u64 *max_extent_size)
2950*4882a593Smuzhiyun {
2951*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2952*4882a593Smuzhiyun int err;
2953*4882a593Smuzhiyun u64 search_start = cluster->window_start;
2954*4882a593Smuzhiyun u64 search_bytes = bytes;
2955*4882a593Smuzhiyun u64 ret = 0;
2956*4882a593Smuzhiyun
2957*4882a593Smuzhiyun search_start = min_start;
2958*4882a593Smuzhiyun search_bytes = bytes;
2959*4882a593Smuzhiyun
2960*4882a593Smuzhiyun err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
2961*4882a593Smuzhiyun if (err) {
2962*4882a593Smuzhiyun *max_extent_size = max(get_max_extent_size(entry),
2963*4882a593Smuzhiyun *max_extent_size);
2964*4882a593Smuzhiyun return 0;
2965*4882a593Smuzhiyun }
2966*4882a593Smuzhiyun
2967*4882a593Smuzhiyun ret = search_start;
2968*4882a593Smuzhiyun __bitmap_clear_bits(ctl, entry, ret, bytes);
2969*4882a593Smuzhiyun
2970*4882a593Smuzhiyun return ret;
2971*4882a593Smuzhiyun }
2972*4882a593Smuzhiyun
2973*4882a593Smuzhiyun /*
2974*4882a593Smuzhiyun * given a cluster, try to allocate 'bytes' from it, returns 0
2975*4882a593Smuzhiyun * if it couldn't find anything suitably large, or a logical disk offset
2976*4882a593Smuzhiyun * if things worked out
2977*4882a593Smuzhiyun */
btrfs_alloc_from_cluster(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster,u64 bytes,u64 min_start,u64 * max_extent_size)2978*4882a593Smuzhiyun u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
2979*4882a593Smuzhiyun struct btrfs_free_cluster *cluster, u64 bytes,
2980*4882a593Smuzhiyun u64 min_start, u64 *max_extent_size)
2981*4882a593Smuzhiyun {
2982*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2983*4882a593Smuzhiyun struct btrfs_discard_ctl *discard_ctl =
2984*4882a593Smuzhiyun &block_group->fs_info->discard_ctl;
2985*4882a593Smuzhiyun struct btrfs_free_space *entry = NULL;
2986*4882a593Smuzhiyun struct rb_node *node;
2987*4882a593Smuzhiyun u64 ret = 0;
2988*4882a593Smuzhiyun
2989*4882a593Smuzhiyun spin_lock(&cluster->lock);
2990*4882a593Smuzhiyun if (bytes > cluster->max_size)
2991*4882a593Smuzhiyun goto out;
2992*4882a593Smuzhiyun
2993*4882a593Smuzhiyun if (cluster->block_group != block_group)
2994*4882a593Smuzhiyun goto out;
2995*4882a593Smuzhiyun
2996*4882a593Smuzhiyun node = rb_first(&cluster->root);
2997*4882a593Smuzhiyun if (!node)
2998*4882a593Smuzhiyun goto out;
2999*4882a593Smuzhiyun
3000*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space, offset_index);
3001*4882a593Smuzhiyun while (1) {
3002*4882a593Smuzhiyun if (entry->bytes < bytes)
3003*4882a593Smuzhiyun *max_extent_size = max(get_max_extent_size(entry),
3004*4882a593Smuzhiyun *max_extent_size);
3005*4882a593Smuzhiyun
3006*4882a593Smuzhiyun if (entry->bytes < bytes ||
3007*4882a593Smuzhiyun (!entry->bitmap && entry->offset < min_start)) {
3008*4882a593Smuzhiyun node = rb_next(&entry->offset_index);
3009*4882a593Smuzhiyun if (!node)
3010*4882a593Smuzhiyun break;
3011*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space,
3012*4882a593Smuzhiyun offset_index);
3013*4882a593Smuzhiyun continue;
3014*4882a593Smuzhiyun }
3015*4882a593Smuzhiyun
3016*4882a593Smuzhiyun if (entry->bitmap) {
3017*4882a593Smuzhiyun ret = btrfs_alloc_from_bitmap(block_group,
3018*4882a593Smuzhiyun cluster, entry, bytes,
3019*4882a593Smuzhiyun cluster->window_start,
3020*4882a593Smuzhiyun max_extent_size);
3021*4882a593Smuzhiyun if (ret == 0) {
3022*4882a593Smuzhiyun node = rb_next(&entry->offset_index);
3023*4882a593Smuzhiyun if (!node)
3024*4882a593Smuzhiyun break;
3025*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space,
3026*4882a593Smuzhiyun offset_index);
3027*4882a593Smuzhiyun continue;
3028*4882a593Smuzhiyun }
3029*4882a593Smuzhiyun cluster->window_start += bytes;
3030*4882a593Smuzhiyun } else {
3031*4882a593Smuzhiyun ret = entry->offset;
3032*4882a593Smuzhiyun
3033*4882a593Smuzhiyun entry->offset += bytes;
3034*4882a593Smuzhiyun entry->bytes -= bytes;
3035*4882a593Smuzhiyun }
3036*4882a593Smuzhiyun
3037*4882a593Smuzhiyun break;
3038*4882a593Smuzhiyun }
3039*4882a593Smuzhiyun out:
3040*4882a593Smuzhiyun spin_unlock(&cluster->lock);
3041*4882a593Smuzhiyun
3042*4882a593Smuzhiyun if (!ret)
3043*4882a593Smuzhiyun return 0;
3044*4882a593Smuzhiyun
3045*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
3046*4882a593Smuzhiyun
3047*4882a593Smuzhiyun if (!btrfs_free_space_trimmed(entry))
3048*4882a593Smuzhiyun atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3049*4882a593Smuzhiyun
3050*4882a593Smuzhiyun ctl->free_space -= bytes;
3051*4882a593Smuzhiyun if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3052*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
3053*4882a593Smuzhiyun
3054*4882a593Smuzhiyun spin_lock(&cluster->lock);
3055*4882a593Smuzhiyun if (entry->bytes == 0) {
3056*4882a593Smuzhiyun rb_erase(&entry->offset_index, &cluster->root);
3057*4882a593Smuzhiyun ctl->free_extents--;
3058*4882a593Smuzhiyun if (entry->bitmap) {
3059*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_bitmap_cachep,
3060*4882a593Smuzhiyun entry->bitmap);
3061*4882a593Smuzhiyun ctl->total_bitmaps--;
3062*4882a593Smuzhiyun ctl->op->recalc_thresholds(ctl);
3063*4882a593Smuzhiyun } else if (!btrfs_free_space_trimmed(entry)) {
3064*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR]--;
3065*4882a593Smuzhiyun }
3066*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, entry);
3067*4882a593Smuzhiyun }
3068*4882a593Smuzhiyun
3069*4882a593Smuzhiyun spin_unlock(&cluster->lock);
3070*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3071*4882a593Smuzhiyun
3072*4882a593Smuzhiyun return ret;
3073*4882a593Smuzhiyun }
3074*4882a593Smuzhiyun
btrfs_bitmap_cluster(struct btrfs_block_group * block_group,struct btrfs_free_space * entry,struct btrfs_free_cluster * cluster,u64 offset,u64 bytes,u64 cont1_bytes,u64 min_bytes)3075*4882a593Smuzhiyun static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
3076*4882a593Smuzhiyun struct btrfs_free_space *entry,
3077*4882a593Smuzhiyun struct btrfs_free_cluster *cluster,
3078*4882a593Smuzhiyun u64 offset, u64 bytes,
3079*4882a593Smuzhiyun u64 cont1_bytes, u64 min_bytes)
3080*4882a593Smuzhiyun {
3081*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3082*4882a593Smuzhiyun unsigned long next_zero;
3083*4882a593Smuzhiyun unsigned long i;
3084*4882a593Smuzhiyun unsigned long want_bits;
3085*4882a593Smuzhiyun unsigned long min_bits;
3086*4882a593Smuzhiyun unsigned long found_bits;
3087*4882a593Smuzhiyun unsigned long max_bits = 0;
3088*4882a593Smuzhiyun unsigned long start = 0;
3089*4882a593Smuzhiyun unsigned long total_found = 0;
3090*4882a593Smuzhiyun int ret;
3091*4882a593Smuzhiyun
3092*4882a593Smuzhiyun i = offset_to_bit(entry->offset, ctl->unit,
3093*4882a593Smuzhiyun max_t(u64, offset, entry->offset));
3094*4882a593Smuzhiyun want_bits = bytes_to_bits(bytes, ctl->unit);
3095*4882a593Smuzhiyun min_bits = bytes_to_bits(min_bytes, ctl->unit);
3096*4882a593Smuzhiyun
3097*4882a593Smuzhiyun /*
3098*4882a593Smuzhiyun * Don't bother looking for a cluster in this bitmap if it's heavily
3099*4882a593Smuzhiyun * fragmented.
3100*4882a593Smuzhiyun */
3101*4882a593Smuzhiyun if (entry->max_extent_size &&
3102*4882a593Smuzhiyun entry->max_extent_size < cont1_bytes)
3103*4882a593Smuzhiyun return -ENOSPC;
3104*4882a593Smuzhiyun again:
3105*4882a593Smuzhiyun found_bits = 0;
3106*4882a593Smuzhiyun for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
3107*4882a593Smuzhiyun next_zero = find_next_zero_bit(entry->bitmap,
3108*4882a593Smuzhiyun BITS_PER_BITMAP, i);
3109*4882a593Smuzhiyun if (next_zero - i >= min_bits) {
3110*4882a593Smuzhiyun found_bits = next_zero - i;
3111*4882a593Smuzhiyun if (found_bits > max_bits)
3112*4882a593Smuzhiyun max_bits = found_bits;
3113*4882a593Smuzhiyun break;
3114*4882a593Smuzhiyun }
3115*4882a593Smuzhiyun if (next_zero - i > max_bits)
3116*4882a593Smuzhiyun max_bits = next_zero - i;
3117*4882a593Smuzhiyun i = next_zero;
3118*4882a593Smuzhiyun }
3119*4882a593Smuzhiyun
3120*4882a593Smuzhiyun if (!found_bits) {
3121*4882a593Smuzhiyun entry->max_extent_size = (u64)max_bits * ctl->unit;
3122*4882a593Smuzhiyun return -ENOSPC;
3123*4882a593Smuzhiyun }
3124*4882a593Smuzhiyun
3125*4882a593Smuzhiyun if (!total_found) {
3126*4882a593Smuzhiyun start = i;
3127*4882a593Smuzhiyun cluster->max_size = 0;
3128*4882a593Smuzhiyun }
3129*4882a593Smuzhiyun
3130*4882a593Smuzhiyun total_found += found_bits;
3131*4882a593Smuzhiyun
3132*4882a593Smuzhiyun if (cluster->max_size < found_bits * ctl->unit)
3133*4882a593Smuzhiyun cluster->max_size = found_bits * ctl->unit;
3134*4882a593Smuzhiyun
3135*4882a593Smuzhiyun if (total_found < want_bits || cluster->max_size < cont1_bytes) {
3136*4882a593Smuzhiyun i = next_zero + 1;
3137*4882a593Smuzhiyun goto again;
3138*4882a593Smuzhiyun }
3139*4882a593Smuzhiyun
3140*4882a593Smuzhiyun cluster->window_start = start * ctl->unit + entry->offset;
3141*4882a593Smuzhiyun rb_erase(&entry->offset_index, &ctl->free_space_offset);
3142*4882a593Smuzhiyun ret = tree_insert_offset(&cluster->root, entry->offset,
3143*4882a593Smuzhiyun &entry->offset_index, 1);
3144*4882a593Smuzhiyun ASSERT(!ret); /* -EEXIST; Logic error */
3145*4882a593Smuzhiyun
3146*4882a593Smuzhiyun trace_btrfs_setup_cluster(block_group, cluster,
3147*4882a593Smuzhiyun total_found * ctl->unit, 1);
3148*4882a593Smuzhiyun return 0;
3149*4882a593Smuzhiyun }
3150*4882a593Smuzhiyun
3151*4882a593Smuzhiyun /*
3152*4882a593Smuzhiyun * This searches the block group for just extents to fill the cluster with.
3153*4882a593Smuzhiyun * Try to find a cluster with at least bytes total bytes, at least one
3154*4882a593Smuzhiyun * extent of cont1_bytes, and other clusters of at least min_bytes.
3155*4882a593Smuzhiyun */
3156*4882a593Smuzhiyun static noinline int
setup_cluster_no_bitmap(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster,struct list_head * bitmaps,u64 offset,u64 bytes,u64 cont1_bytes,u64 min_bytes)3157*4882a593Smuzhiyun setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
3158*4882a593Smuzhiyun struct btrfs_free_cluster *cluster,
3159*4882a593Smuzhiyun struct list_head *bitmaps, u64 offset, u64 bytes,
3160*4882a593Smuzhiyun u64 cont1_bytes, u64 min_bytes)
3161*4882a593Smuzhiyun {
3162*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3163*4882a593Smuzhiyun struct btrfs_free_space *first = NULL;
3164*4882a593Smuzhiyun struct btrfs_free_space *entry = NULL;
3165*4882a593Smuzhiyun struct btrfs_free_space *last;
3166*4882a593Smuzhiyun struct rb_node *node;
3167*4882a593Smuzhiyun u64 window_free;
3168*4882a593Smuzhiyun u64 max_extent;
3169*4882a593Smuzhiyun u64 total_size = 0;
3170*4882a593Smuzhiyun
3171*4882a593Smuzhiyun entry = tree_search_offset(ctl, offset, 0, 1);
3172*4882a593Smuzhiyun if (!entry)
3173*4882a593Smuzhiyun return -ENOSPC;
3174*4882a593Smuzhiyun
3175*4882a593Smuzhiyun /*
3176*4882a593Smuzhiyun * We don't want bitmaps, so just move along until we find a normal
3177*4882a593Smuzhiyun * extent entry.
3178*4882a593Smuzhiyun */
3179*4882a593Smuzhiyun while (entry->bitmap || entry->bytes < min_bytes) {
3180*4882a593Smuzhiyun if (entry->bitmap && list_empty(&entry->list))
3181*4882a593Smuzhiyun list_add_tail(&entry->list, bitmaps);
3182*4882a593Smuzhiyun node = rb_next(&entry->offset_index);
3183*4882a593Smuzhiyun if (!node)
3184*4882a593Smuzhiyun return -ENOSPC;
3185*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space, offset_index);
3186*4882a593Smuzhiyun }
3187*4882a593Smuzhiyun
3188*4882a593Smuzhiyun window_free = entry->bytes;
3189*4882a593Smuzhiyun max_extent = entry->bytes;
3190*4882a593Smuzhiyun first = entry;
3191*4882a593Smuzhiyun last = entry;
3192*4882a593Smuzhiyun
3193*4882a593Smuzhiyun for (node = rb_next(&entry->offset_index); node;
3194*4882a593Smuzhiyun node = rb_next(&entry->offset_index)) {
3195*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space, offset_index);
3196*4882a593Smuzhiyun
3197*4882a593Smuzhiyun if (entry->bitmap) {
3198*4882a593Smuzhiyun if (list_empty(&entry->list))
3199*4882a593Smuzhiyun list_add_tail(&entry->list, bitmaps);
3200*4882a593Smuzhiyun continue;
3201*4882a593Smuzhiyun }
3202*4882a593Smuzhiyun
3203*4882a593Smuzhiyun if (entry->bytes < min_bytes)
3204*4882a593Smuzhiyun continue;
3205*4882a593Smuzhiyun
3206*4882a593Smuzhiyun last = entry;
3207*4882a593Smuzhiyun window_free += entry->bytes;
3208*4882a593Smuzhiyun if (entry->bytes > max_extent)
3209*4882a593Smuzhiyun max_extent = entry->bytes;
3210*4882a593Smuzhiyun }
3211*4882a593Smuzhiyun
3212*4882a593Smuzhiyun if (window_free < bytes || max_extent < cont1_bytes)
3213*4882a593Smuzhiyun return -ENOSPC;
3214*4882a593Smuzhiyun
3215*4882a593Smuzhiyun cluster->window_start = first->offset;
3216*4882a593Smuzhiyun
3217*4882a593Smuzhiyun node = &first->offset_index;
3218*4882a593Smuzhiyun
3219*4882a593Smuzhiyun /*
3220*4882a593Smuzhiyun * now we've found our entries, pull them out of the free space
3221*4882a593Smuzhiyun * cache and put them into the cluster rbtree
3222*4882a593Smuzhiyun */
3223*4882a593Smuzhiyun do {
3224*4882a593Smuzhiyun int ret;
3225*4882a593Smuzhiyun
3226*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space, offset_index);
3227*4882a593Smuzhiyun node = rb_next(&entry->offset_index);
3228*4882a593Smuzhiyun if (entry->bitmap || entry->bytes < min_bytes)
3229*4882a593Smuzhiyun continue;
3230*4882a593Smuzhiyun
3231*4882a593Smuzhiyun rb_erase(&entry->offset_index, &ctl->free_space_offset);
3232*4882a593Smuzhiyun ret = tree_insert_offset(&cluster->root, entry->offset,
3233*4882a593Smuzhiyun &entry->offset_index, 0);
3234*4882a593Smuzhiyun total_size += entry->bytes;
3235*4882a593Smuzhiyun ASSERT(!ret); /* -EEXIST; Logic error */
3236*4882a593Smuzhiyun } while (node && entry != last);
3237*4882a593Smuzhiyun
3238*4882a593Smuzhiyun cluster->max_size = max_extent;
3239*4882a593Smuzhiyun trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
3240*4882a593Smuzhiyun return 0;
3241*4882a593Smuzhiyun }
3242*4882a593Smuzhiyun
3243*4882a593Smuzhiyun /*
3244*4882a593Smuzhiyun * This specifically looks for bitmaps that may work in the cluster, we assume
3245*4882a593Smuzhiyun * that we have already failed to find extents that will work.
3246*4882a593Smuzhiyun */
3247*4882a593Smuzhiyun static noinline int
setup_cluster_bitmap(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster,struct list_head * bitmaps,u64 offset,u64 bytes,u64 cont1_bytes,u64 min_bytes)3248*4882a593Smuzhiyun setup_cluster_bitmap(struct btrfs_block_group *block_group,
3249*4882a593Smuzhiyun struct btrfs_free_cluster *cluster,
3250*4882a593Smuzhiyun struct list_head *bitmaps, u64 offset, u64 bytes,
3251*4882a593Smuzhiyun u64 cont1_bytes, u64 min_bytes)
3252*4882a593Smuzhiyun {
3253*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3254*4882a593Smuzhiyun struct btrfs_free_space *entry = NULL;
3255*4882a593Smuzhiyun int ret = -ENOSPC;
3256*4882a593Smuzhiyun u64 bitmap_offset = offset_to_bitmap(ctl, offset);
3257*4882a593Smuzhiyun
3258*4882a593Smuzhiyun if (ctl->total_bitmaps == 0)
3259*4882a593Smuzhiyun return -ENOSPC;
3260*4882a593Smuzhiyun
3261*4882a593Smuzhiyun /*
3262*4882a593Smuzhiyun * The bitmap that covers offset won't be in the list unless offset
3263*4882a593Smuzhiyun * is just its start offset.
3264*4882a593Smuzhiyun */
3265*4882a593Smuzhiyun if (!list_empty(bitmaps))
3266*4882a593Smuzhiyun entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
3267*4882a593Smuzhiyun
3268*4882a593Smuzhiyun if (!entry || entry->offset != bitmap_offset) {
3269*4882a593Smuzhiyun entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
3270*4882a593Smuzhiyun if (entry && list_empty(&entry->list))
3271*4882a593Smuzhiyun list_add(&entry->list, bitmaps);
3272*4882a593Smuzhiyun }
3273*4882a593Smuzhiyun
3274*4882a593Smuzhiyun list_for_each_entry(entry, bitmaps, list) {
3275*4882a593Smuzhiyun if (entry->bytes < bytes)
3276*4882a593Smuzhiyun continue;
3277*4882a593Smuzhiyun ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
3278*4882a593Smuzhiyun bytes, cont1_bytes, min_bytes);
3279*4882a593Smuzhiyun if (!ret)
3280*4882a593Smuzhiyun return 0;
3281*4882a593Smuzhiyun }
3282*4882a593Smuzhiyun
3283*4882a593Smuzhiyun /*
3284*4882a593Smuzhiyun * The bitmaps list has all the bitmaps that record free space
3285*4882a593Smuzhiyun * starting after offset, so no more search is required.
3286*4882a593Smuzhiyun */
3287*4882a593Smuzhiyun return -ENOSPC;
3288*4882a593Smuzhiyun }
3289*4882a593Smuzhiyun
3290*4882a593Smuzhiyun /*
3291*4882a593Smuzhiyun * here we try to find a cluster of blocks in a block group. The goal
3292*4882a593Smuzhiyun * is to find at least bytes+empty_size.
3293*4882a593Smuzhiyun * We might not find them all in one contiguous area.
3294*4882a593Smuzhiyun *
3295*4882a593Smuzhiyun * returns zero and sets up cluster if things worked out, otherwise
3296*4882a593Smuzhiyun * it returns -enospc
3297*4882a593Smuzhiyun */
btrfs_find_space_cluster(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster,u64 offset,u64 bytes,u64 empty_size)3298*4882a593Smuzhiyun int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
3299*4882a593Smuzhiyun struct btrfs_free_cluster *cluster,
3300*4882a593Smuzhiyun u64 offset, u64 bytes, u64 empty_size)
3301*4882a593Smuzhiyun {
3302*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = block_group->fs_info;
3303*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3304*4882a593Smuzhiyun struct btrfs_free_space *entry, *tmp;
3305*4882a593Smuzhiyun LIST_HEAD(bitmaps);
3306*4882a593Smuzhiyun u64 min_bytes;
3307*4882a593Smuzhiyun u64 cont1_bytes;
3308*4882a593Smuzhiyun int ret;
3309*4882a593Smuzhiyun
3310*4882a593Smuzhiyun /*
3311*4882a593Smuzhiyun * Choose the minimum extent size we'll require for this
3312*4882a593Smuzhiyun * cluster. For SSD_SPREAD, don't allow any fragmentation.
3313*4882a593Smuzhiyun * For metadata, allow allocates with smaller extents. For
3314*4882a593Smuzhiyun * data, keep it dense.
3315*4882a593Smuzhiyun */
3316*4882a593Smuzhiyun if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
3317*4882a593Smuzhiyun cont1_bytes = min_bytes = bytes + empty_size;
3318*4882a593Smuzhiyun } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
3319*4882a593Smuzhiyun cont1_bytes = bytes;
3320*4882a593Smuzhiyun min_bytes = fs_info->sectorsize;
3321*4882a593Smuzhiyun } else {
3322*4882a593Smuzhiyun cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
3323*4882a593Smuzhiyun min_bytes = fs_info->sectorsize;
3324*4882a593Smuzhiyun }
3325*4882a593Smuzhiyun
3326*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
3327*4882a593Smuzhiyun
3328*4882a593Smuzhiyun /*
3329*4882a593Smuzhiyun * If we know we don't have enough space to make a cluster don't even
3330*4882a593Smuzhiyun * bother doing all the work to try and find one.
3331*4882a593Smuzhiyun */
3332*4882a593Smuzhiyun if (ctl->free_space < bytes) {
3333*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3334*4882a593Smuzhiyun return -ENOSPC;
3335*4882a593Smuzhiyun }
3336*4882a593Smuzhiyun
3337*4882a593Smuzhiyun spin_lock(&cluster->lock);
3338*4882a593Smuzhiyun
3339*4882a593Smuzhiyun /* someone already found a cluster, hooray */
3340*4882a593Smuzhiyun if (cluster->block_group) {
3341*4882a593Smuzhiyun ret = 0;
3342*4882a593Smuzhiyun goto out;
3343*4882a593Smuzhiyun }
3344*4882a593Smuzhiyun
3345*4882a593Smuzhiyun trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
3346*4882a593Smuzhiyun min_bytes);
3347*4882a593Smuzhiyun
3348*4882a593Smuzhiyun ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
3349*4882a593Smuzhiyun bytes + empty_size,
3350*4882a593Smuzhiyun cont1_bytes, min_bytes);
3351*4882a593Smuzhiyun if (ret)
3352*4882a593Smuzhiyun ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
3353*4882a593Smuzhiyun offset, bytes + empty_size,
3354*4882a593Smuzhiyun cont1_bytes, min_bytes);
3355*4882a593Smuzhiyun
3356*4882a593Smuzhiyun /* Clear our temporary list */
3357*4882a593Smuzhiyun list_for_each_entry_safe(entry, tmp, &bitmaps, list)
3358*4882a593Smuzhiyun list_del_init(&entry->list);
3359*4882a593Smuzhiyun
3360*4882a593Smuzhiyun if (!ret) {
3361*4882a593Smuzhiyun btrfs_get_block_group(block_group);
3362*4882a593Smuzhiyun list_add_tail(&cluster->block_group_list,
3363*4882a593Smuzhiyun &block_group->cluster_list);
3364*4882a593Smuzhiyun cluster->block_group = block_group;
3365*4882a593Smuzhiyun } else {
3366*4882a593Smuzhiyun trace_btrfs_failed_cluster_setup(block_group);
3367*4882a593Smuzhiyun }
3368*4882a593Smuzhiyun out:
3369*4882a593Smuzhiyun spin_unlock(&cluster->lock);
3370*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3371*4882a593Smuzhiyun
3372*4882a593Smuzhiyun return ret;
3373*4882a593Smuzhiyun }
3374*4882a593Smuzhiyun
3375*4882a593Smuzhiyun /*
3376*4882a593Smuzhiyun * simple code to zero out a cluster
3377*4882a593Smuzhiyun */
btrfs_init_free_cluster(struct btrfs_free_cluster * cluster)3378*4882a593Smuzhiyun void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3379*4882a593Smuzhiyun {
3380*4882a593Smuzhiyun spin_lock_init(&cluster->lock);
3381*4882a593Smuzhiyun spin_lock_init(&cluster->refill_lock);
3382*4882a593Smuzhiyun cluster->root = RB_ROOT;
3383*4882a593Smuzhiyun cluster->max_size = 0;
3384*4882a593Smuzhiyun cluster->fragmented = false;
3385*4882a593Smuzhiyun INIT_LIST_HEAD(&cluster->block_group_list);
3386*4882a593Smuzhiyun cluster->block_group = NULL;
3387*4882a593Smuzhiyun }
3388*4882a593Smuzhiyun
do_trimming(struct btrfs_block_group * block_group,u64 * total_trimmed,u64 start,u64 bytes,u64 reserved_start,u64 reserved_bytes,enum btrfs_trim_state reserved_trim_state,struct btrfs_trim_range * trim_entry)3389*4882a593Smuzhiyun static int do_trimming(struct btrfs_block_group *block_group,
3390*4882a593Smuzhiyun u64 *total_trimmed, u64 start, u64 bytes,
3391*4882a593Smuzhiyun u64 reserved_start, u64 reserved_bytes,
3392*4882a593Smuzhiyun enum btrfs_trim_state reserved_trim_state,
3393*4882a593Smuzhiyun struct btrfs_trim_range *trim_entry)
3394*4882a593Smuzhiyun {
3395*4882a593Smuzhiyun struct btrfs_space_info *space_info = block_group->space_info;
3396*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = block_group->fs_info;
3397*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3398*4882a593Smuzhiyun int ret;
3399*4882a593Smuzhiyun int update = 0;
3400*4882a593Smuzhiyun const u64 end = start + bytes;
3401*4882a593Smuzhiyun const u64 reserved_end = reserved_start + reserved_bytes;
3402*4882a593Smuzhiyun enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3403*4882a593Smuzhiyun u64 trimmed = 0;
3404*4882a593Smuzhiyun
3405*4882a593Smuzhiyun spin_lock(&space_info->lock);
3406*4882a593Smuzhiyun spin_lock(&block_group->lock);
3407*4882a593Smuzhiyun if (!block_group->ro) {
3408*4882a593Smuzhiyun block_group->reserved += reserved_bytes;
3409*4882a593Smuzhiyun space_info->bytes_reserved += reserved_bytes;
3410*4882a593Smuzhiyun update = 1;
3411*4882a593Smuzhiyun }
3412*4882a593Smuzhiyun spin_unlock(&block_group->lock);
3413*4882a593Smuzhiyun spin_unlock(&space_info->lock);
3414*4882a593Smuzhiyun
3415*4882a593Smuzhiyun ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
3416*4882a593Smuzhiyun if (!ret) {
3417*4882a593Smuzhiyun *total_trimmed += trimmed;
3418*4882a593Smuzhiyun trim_state = BTRFS_TRIM_STATE_TRIMMED;
3419*4882a593Smuzhiyun }
3420*4882a593Smuzhiyun
3421*4882a593Smuzhiyun mutex_lock(&ctl->cache_writeout_mutex);
3422*4882a593Smuzhiyun if (reserved_start < start)
3423*4882a593Smuzhiyun __btrfs_add_free_space(fs_info, ctl, reserved_start,
3424*4882a593Smuzhiyun start - reserved_start,
3425*4882a593Smuzhiyun reserved_trim_state);
3426*4882a593Smuzhiyun if (start + bytes < reserved_start + reserved_bytes)
3427*4882a593Smuzhiyun __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3428*4882a593Smuzhiyun reserved_trim_state);
3429*4882a593Smuzhiyun __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
3430*4882a593Smuzhiyun list_del(&trim_entry->list);
3431*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3432*4882a593Smuzhiyun
3433*4882a593Smuzhiyun if (update) {
3434*4882a593Smuzhiyun spin_lock(&space_info->lock);
3435*4882a593Smuzhiyun spin_lock(&block_group->lock);
3436*4882a593Smuzhiyun if (block_group->ro)
3437*4882a593Smuzhiyun space_info->bytes_readonly += reserved_bytes;
3438*4882a593Smuzhiyun block_group->reserved -= reserved_bytes;
3439*4882a593Smuzhiyun space_info->bytes_reserved -= reserved_bytes;
3440*4882a593Smuzhiyun spin_unlock(&block_group->lock);
3441*4882a593Smuzhiyun spin_unlock(&space_info->lock);
3442*4882a593Smuzhiyun }
3443*4882a593Smuzhiyun
3444*4882a593Smuzhiyun return ret;
3445*4882a593Smuzhiyun }
3446*4882a593Smuzhiyun
3447*4882a593Smuzhiyun /*
3448*4882a593Smuzhiyun * If @async is set, then we will trim 1 region and return.
3449*4882a593Smuzhiyun */
trim_no_bitmap(struct btrfs_block_group * block_group,u64 * total_trimmed,u64 start,u64 end,u64 minlen,bool async)3450*4882a593Smuzhiyun static int trim_no_bitmap(struct btrfs_block_group *block_group,
3451*4882a593Smuzhiyun u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3452*4882a593Smuzhiyun bool async)
3453*4882a593Smuzhiyun {
3454*4882a593Smuzhiyun struct btrfs_discard_ctl *discard_ctl =
3455*4882a593Smuzhiyun &block_group->fs_info->discard_ctl;
3456*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3457*4882a593Smuzhiyun struct btrfs_free_space *entry;
3458*4882a593Smuzhiyun struct rb_node *node;
3459*4882a593Smuzhiyun int ret = 0;
3460*4882a593Smuzhiyun u64 extent_start;
3461*4882a593Smuzhiyun u64 extent_bytes;
3462*4882a593Smuzhiyun enum btrfs_trim_state extent_trim_state;
3463*4882a593Smuzhiyun u64 bytes;
3464*4882a593Smuzhiyun const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3465*4882a593Smuzhiyun
3466*4882a593Smuzhiyun while (start < end) {
3467*4882a593Smuzhiyun struct btrfs_trim_range trim_entry;
3468*4882a593Smuzhiyun
3469*4882a593Smuzhiyun mutex_lock(&ctl->cache_writeout_mutex);
3470*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
3471*4882a593Smuzhiyun
3472*4882a593Smuzhiyun if (ctl->free_space < minlen)
3473*4882a593Smuzhiyun goto out_unlock;
3474*4882a593Smuzhiyun
3475*4882a593Smuzhiyun entry = tree_search_offset(ctl, start, 0, 1);
3476*4882a593Smuzhiyun if (!entry)
3477*4882a593Smuzhiyun goto out_unlock;
3478*4882a593Smuzhiyun
3479*4882a593Smuzhiyun /* Skip bitmaps and if async, already trimmed entries */
3480*4882a593Smuzhiyun while (entry->bitmap ||
3481*4882a593Smuzhiyun (async && btrfs_free_space_trimmed(entry))) {
3482*4882a593Smuzhiyun node = rb_next(&entry->offset_index);
3483*4882a593Smuzhiyun if (!node)
3484*4882a593Smuzhiyun goto out_unlock;
3485*4882a593Smuzhiyun entry = rb_entry(node, struct btrfs_free_space,
3486*4882a593Smuzhiyun offset_index);
3487*4882a593Smuzhiyun }
3488*4882a593Smuzhiyun
3489*4882a593Smuzhiyun if (entry->offset >= end)
3490*4882a593Smuzhiyun goto out_unlock;
3491*4882a593Smuzhiyun
3492*4882a593Smuzhiyun extent_start = entry->offset;
3493*4882a593Smuzhiyun extent_bytes = entry->bytes;
3494*4882a593Smuzhiyun extent_trim_state = entry->trim_state;
3495*4882a593Smuzhiyun if (async) {
3496*4882a593Smuzhiyun start = entry->offset;
3497*4882a593Smuzhiyun bytes = entry->bytes;
3498*4882a593Smuzhiyun if (bytes < minlen) {
3499*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3500*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3501*4882a593Smuzhiyun goto next;
3502*4882a593Smuzhiyun }
3503*4882a593Smuzhiyun unlink_free_space(ctl, entry);
3504*4882a593Smuzhiyun /*
3505*4882a593Smuzhiyun * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3506*4882a593Smuzhiyun * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3507*4882a593Smuzhiyun * X when we come back around. So trim it now.
3508*4882a593Smuzhiyun */
3509*4882a593Smuzhiyun if (max_discard_size &&
3510*4882a593Smuzhiyun bytes >= (max_discard_size +
3511*4882a593Smuzhiyun BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
3512*4882a593Smuzhiyun bytes = max_discard_size;
3513*4882a593Smuzhiyun extent_bytes = max_discard_size;
3514*4882a593Smuzhiyun entry->offset += max_discard_size;
3515*4882a593Smuzhiyun entry->bytes -= max_discard_size;
3516*4882a593Smuzhiyun link_free_space(ctl, entry);
3517*4882a593Smuzhiyun } else {
3518*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, entry);
3519*4882a593Smuzhiyun }
3520*4882a593Smuzhiyun } else {
3521*4882a593Smuzhiyun start = max(start, extent_start);
3522*4882a593Smuzhiyun bytes = min(extent_start + extent_bytes, end) - start;
3523*4882a593Smuzhiyun if (bytes < minlen) {
3524*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3525*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3526*4882a593Smuzhiyun goto next;
3527*4882a593Smuzhiyun }
3528*4882a593Smuzhiyun
3529*4882a593Smuzhiyun unlink_free_space(ctl, entry);
3530*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, entry);
3531*4882a593Smuzhiyun }
3532*4882a593Smuzhiyun
3533*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3534*4882a593Smuzhiyun trim_entry.start = extent_start;
3535*4882a593Smuzhiyun trim_entry.bytes = extent_bytes;
3536*4882a593Smuzhiyun list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3537*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3538*4882a593Smuzhiyun
3539*4882a593Smuzhiyun ret = do_trimming(block_group, total_trimmed, start, bytes,
3540*4882a593Smuzhiyun extent_start, extent_bytes, extent_trim_state,
3541*4882a593Smuzhiyun &trim_entry);
3542*4882a593Smuzhiyun if (ret) {
3543*4882a593Smuzhiyun block_group->discard_cursor = start + bytes;
3544*4882a593Smuzhiyun break;
3545*4882a593Smuzhiyun }
3546*4882a593Smuzhiyun next:
3547*4882a593Smuzhiyun start += bytes;
3548*4882a593Smuzhiyun block_group->discard_cursor = start;
3549*4882a593Smuzhiyun if (async && *total_trimmed)
3550*4882a593Smuzhiyun break;
3551*4882a593Smuzhiyun
3552*4882a593Smuzhiyun if (fatal_signal_pending(current)) {
3553*4882a593Smuzhiyun ret = -ERESTARTSYS;
3554*4882a593Smuzhiyun break;
3555*4882a593Smuzhiyun }
3556*4882a593Smuzhiyun
3557*4882a593Smuzhiyun cond_resched();
3558*4882a593Smuzhiyun }
3559*4882a593Smuzhiyun
3560*4882a593Smuzhiyun return ret;
3561*4882a593Smuzhiyun
3562*4882a593Smuzhiyun out_unlock:
3563*4882a593Smuzhiyun block_group->discard_cursor = btrfs_block_group_end(block_group);
3564*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3565*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3566*4882a593Smuzhiyun
3567*4882a593Smuzhiyun return ret;
3568*4882a593Smuzhiyun }
3569*4882a593Smuzhiyun
3570*4882a593Smuzhiyun /*
3571*4882a593Smuzhiyun * If we break out of trimming a bitmap prematurely, we should reset the
3572*4882a593Smuzhiyun * trimming bit. In a rather contrieved case, it's possible to race here so
3573*4882a593Smuzhiyun * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3574*4882a593Smuzhiyun *
3575*4882a593Smuzhiyun * start = start of bitmap
3576*4882a593Smuzhiyun * end = near end of bitmap
3577*4882a593Smuzhiyun *
3578*4882a593Smuzhiyun * Thread 1: Thread 2:
3579*4882a593Smuzhiyun * trim_bitmaps(start)
3580*4882a593Smuzhiyun * trim_bitmaps(end)
3581*4882a593Smuzhiyun * end_trimming_bitmap()
3582*4882a593Smuzhiyun * reset_trimming_bitmap()
3583*4882a593Smuzhiyun */
reset_trimming_bitmap(struct btrfs_free_space_ctl * ctl,u64 offset)3584*4882a593Smuzhiyun static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
3585*4882a593Smuzhiyun {
3586*4882a593Smuzhiyun struct btrfs_free_space *entry;
3587*4882a593Smuzhiyun
3588*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
3589*4882a593Smuzhiyun entry = tree_search_offset(ctl, offset, 1, 0);
3590*4882a593Smuzhiyun if (entry) {
3591*4882a593Smuzhiyun if (btrfs_free_space_trimmed(entry)) {
3592*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR] +=
3593*4882a593Smuzhiyun entry->bitmap_extents;
3594*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3595*4882a593Smuzhiyun }
3596*4882a593Smuzhiyun entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3597*4882a593Smuzhiyun }
3598*4882a593Smuzhiyun
3599*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3600*4882a593Smuzhiyun }
3601*4882a593Smuzhiyun
end_trimming_bitmap(struct btrfs_free_space_ctl * ctl,struct btrfs_free_space * entry)3602*4882a593Smuzhiyun static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3603*4882a593Smuzhiyun struct btrfs_free_space *entry)
3604*4882a593Smuzhiyun {
3605*4882a593Smuzhiyun if (btrfs_free_space_trimming_bitmap(entry)) {
3606*4882a593Smuzhiyun entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
3607*4882a593Smuzhiyun ctl->discardable_extents[BTRFS_STAT_CURR] -=
3608*4882a593Smuzhiyun entry->bitmap_extents;
3609*4882a593Smuzhiyun ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
3610*4882a593Smuzhiyun }
3611*4882a593Smuzhiyun }
3612*4882a593Smuzhiyun
3613*4882a593Smuzhiyun /*
3614*4882a593Smuzhiyun * If @async is set, then we will trim 1 region and return.
3615*4882a593Smuzhiyun */
trim_bitmaps(struct btrfs_block_group * block_group,u64 * total_trimmed,u64 start,u64 end,u64 minlen,u64 maxlen,bool async)3616*4882a593Smuzhiyun static int trim_bitmaps(struct btrfs_block_group *block_group,
3617*4882a593Smuzhiyun u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3618*4882a593Smuzhiyun u64 maxlen, bool async)
3619*4882a593Smuzhiyun {
3620*4882a593Smuzhiyun struct btrfs_discard_ctl *discard_ctl =
3621*4882a593Smuzhiyun &block_group->fs_info->discard_ctl;
3622*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3623*4882a593Smuzhiyun struct btrfs_free_space *entry;
3624*4882a593Smuzhiyun int ret = 0;
3625*4882a593Smuzhiyun int ret2;
3626*4882a593Smuzhiyun u64 bytes;
3627*4882a593Smuzhiyun u64 offset = offset_to_bitmap(ctl, start);
3628*4882a593Smuzhiyun const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
3629*4882a593Smuzhiyun
3630*4882a593Smuzhiyun while (offset < end) {
3631*4882a593Smuzhiyun bool next_bitmap = false;
3632*4882a593Smuzhiyun struct btrfs_trim_range trim_entry;
3633*4882a593Smuzhiyun
3634*4882a593Smuzhiyun mutex_lock(&ctl->cache_writeout_mutex);
3635*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
3636*4882a593Smuzhiyun
3637*4882a593Smuzhiyun if (ctl->free_space < minlen) {
3638*4882a593Smuzhiyun block_group->discard_cursor =
3639*4882a593Smuzhiyun btrfs_block_group_end(block_group);
3640*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3641*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3642*4882a593Smuzhiyun break;
3643*4882a593Smuzhiyun }
3644*4882a593Smuzhiyun
3645*4882a593Smuzhiyun entry = tree_search_offset(ctl, offset, 1, 0);
3646*4882a593Smuzhiyun /*
3647*4882a593Smuzhiyun * Bitmaps are marked trimmed lossily now to prevent constant
3648*4882a593Smuzhiyun * discarding of the same bitmap (the reason why we are bound
3649*4882a593Smuzhiyun * by the filters). So, retrim the block group bitmaps when we
3650*4882a593Smuzhiyun * are preparing to punt to the unused_bgs list. This uses
3651*4882a593Smuzhiyun * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3652*4882a593Smuzhiyun * which is the only discard index which sets minlen to 0.
3653*4882a593Smuzhiyun */
3654*4882a593Smuzhiyun if (!entry || (async && minlen && start == offset &&
3655*4882a593Smuzhiyun btrfs_free_space_trimmed(entry))) {
3656*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3657*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3658*4882a593Smuzhiyun next_bitmap = true;
3659*4882a593Smuzhiyun goto next;
3660*4882a593Smuzhiyun }
3661*4882a593Smuzhiyun
3662*4882a593Smuzhiyun /*
3663*4882a593Smuzhiyun * Async discard bitmap trimming begins at by setting the start
3664*4882a593Smuzhiyun * to be key.objectid and the offset_to_bitmap() aligns to the
3665*4882a593Smuzhiyun * start of the bitmap. This lets us know we are fully
3666*4882a593Smuzhiyun * scanning the bitmap rather than only some portion of it.
3667*4882a593Smuzhiyun */
3668*4882a593Smuzhiyun if (start == offset)
3669*4882a593Smuzhiyun entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
3670*4882a593Smuzhiyun
3671*4882a593Smuzhiyun bytes = minlen;
3672*4882a593Smuzhiyun ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
3673*4882a593Smuzhiyun if (ret2 || start >= end) {
3674*4882a593Smuzhiyun /*
3675*4882a593Smuzhiyun * We lossily consider a bitmap trimmed if we only skip
3676*4882a593Smuzhiyun * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
3677*4882a593Smuzhiyun */
3678*4882a593Smuzhiyun if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
3679*4882a593Smuzhiyun end_trimming_bitmap(ctl, entry);
3680*4882a593Smuzhiyun else
3681*4882a593Smuzhiyun entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
3682*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3683*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3684*4882a593Smuzhiyun next_bitmap = true;
3685*4882a593Smuzhiyun goto next;
3686*4882a593Smuzhiyun }
3687*4882a593Smuzhiyun
3688*4882a593Smuzhiyun /*
3689*4882a593Smuzhiyun * We already trimmed a region, but are using the locking above
3690*4882a593Smuzhiyun * to reset the trim_state.
3691*4882a593Smuzhiyun */
3692*4882a593Smuzhiyun if (async && *total_trimmed) {
3693*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3694*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3695*4882a593Smuzhiyun goto out;
3696*4882a593Smuzhiyun }
3697*4882a593Smuzhiyun
3698*4882a593Smuzhiyun bytes = min(bytes, end - start);
3699*4882a593Smuzhiyun if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
3700*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3701*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3702*4882a593Smuzhiyun goto next;
3703*4882a593Smuzhiyun }
3704*4882a593Smuzhiyun
3705*4882a593Smuzhiyun /*
3706*4882a593Smuzhiyun * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3707*4882a593Smuzhiyun * If X < @minlen, we won't trim X when we come back around.
3708*4882a593Smuzhiyun * So trim it now. We differ here from trimming extents as we
3709*4882a593Smuzhiyun * don't keep individual state per bit.
3710*4882a593Smuzhiyun */
3711*4882a593Smuzhiyun if (async &&
3712*4882a593Smuzhiyun max_discard_size &&
3713*4882a593Smuzhiyun bytes > (max_discard_size + minlen))
3714*4882a593Smuzhiyun bytes = max_discard_size;
3715*4882a593Smuzhiyun
3716*4882a593Smuzhiyun bitmap_clear_bits(ctl, entry, start, bytes);
3717*4882a593Smuzhiyun if (entry->bytes == 0)
3718*4882a593Smuzhiyun free_bitmap(ctl, entry);
3719*4882a593Smuzhiyun
3720*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3721*4882a593Smuzhiyun trim_entry.start = start;
3722*4882a593Smuzhiyun trim_entry.bytes = bytes;
3723*4882a593Smuzhiyun list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3724*4882a593Smuzhiyun mutex_unlock(&ctl->cache_writeout_mutex);
3725*4882a593Smuzhiyun
3726*4882a593Smuzhiyun ret = do_trimming(block_group, total_trimmed, start, bytes,
3727*4882a593Smuzhiyun start, bytes, 0, &trim_entry);
3728*4882a593Smuzhiyun if (ret) {
3729*4882a593Smuzhiyun reset_trimming_bitmap(ctl, offset);
3730*4882a593Smuzhiyun block_group->discard_cursor =
3731*4882a593Smuzhiyun btrfs_block_group_end(block_group);
3732*4882a593Smuzhiyun break;
3733*4882a593Smuzhiyun }
3734*4882a593Smuzhiyun next:
3735*4882a593Smuzhiyun if (next_bitmap) {
3736*4882a593Smuzhiyun offset += BITS_PER_BITMAP * ctl->unit;
3737*4882a593Smuzhiyun start = offset;
3738*4882a593Smuzhiyun } else {
3739*4882a593Smuzhiyun start += bytes;
3740*4882a593Smuzhiyun }
3741*4882a593Smuzhiyun block_group->discard_cursor = start;
3742*4882a593Smuzhiyun
3743*4882a593Smuzhiyun if (fatal_signal_pending(current)) {
3744*4882a593Smuzhiyun if (start != offset)
3745*4882a593Smuzhiyun reset_trimming_bitmap(ctl, offset);
3746*4882a593Smuzhiyun ret = -ERESTARTSYS;
3747*4882a593Smuzhiyun break;
3748*4882a593Smuzhiyun }
3749*4882a593Smuzhiyun
3750*4882a593Smuzhiyun cond_resched();
3751*4882a593Smuzhiyun }
3752*4882a593Smuzhiyun
3753*4882a593Smuzhiyun if (offset >= end)
3754*4882a593Smuzhiyun block_group->discard_cursor = end;
3755*4882a593Smuzhiyun
3756*4882a593Smuzhiyun out:
3757*4882a593Smuzhiyun return ret;
3758*4882a593Smuzhiyun }
3759*4882a593Smuzhiyun
btrfs_trim_block_group(struct btrfs_block_group * block_group,u64 * trimmed,u64 start,u64 end,u64 minlen)3760*4882a593Smuzhiyun int btrfs_trim_block_group(struct btrfs_block_group *block_group,
3761*4882a593Smuzhiyun u64 *trimmed, u64 start, u64 end, u64 minlen)
3762*4882a593Smuzhiyun {
3763*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3764*4882a593Smuzhiyun int ret;
3765*4882a593Smuzhiyun u64 rem = 0;
3766*4882a593Smuzhiyun
3767*4882a593Smuzhiyun *trimmed = 0;
3768*4882a593Smuzhiyun
3769*4882a593Smuzhiyun spin_lock(&block_group->lock);
3770*4882a593Smuzhiyun if (block_group->removed) {
3771*4882a593Smuzhiyun spin_unlock(&block_group->lock);
3772*4882a593Smuzhiyun return 0;
3773*4882a593Smuzhiyun }
3774*4882a593Smuzhiyun btrfs_freeze_block_group(block_group);
3775*4882a593Smuzhiyun spin_unlock(&block_group->lock);
3776*4882a593Smuzhiyun
3777*4882a593Smuzhiyun ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
3778*4882a593Smuzhiyun if (ret)
3779*4882a593Smuzhiyun goto out;
3780*4882a593Smuzhiyun
3781*4882a593Smuzhiyun ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
3782*4882a593Smuzhiyun div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3783*4882a593Smuzhiyun /* If we ended in the middle of a bitmap, reset the trimming flag */
3784*4882a593Smuzhiyun if (rem)
3785*4882a593Smuzhiyun reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
3786*4882a593Smuzhiyun out:
3787*4882a593Smuzhiyun btrfs_unfreeze_block_group(block_group);
3788*4882a593Smuzhiyun return ret;
3789*4882a593Smuzhiyun }
3790*4882a593Smuzhiyun
btrfs_trim_block_group_extents(struct btrfs_block_group * block_group,u64 * trimmed,u64 start,u64 end,u64 minlen,bool async)3791*4882a593Smuzhiyun int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3792*4882a593Smuzhiyun u64 *trimmed, u64 start, u64 end, u64 minlen,
3793*4882a593Smuzhiyun bool async)
3794*4882a593Smuzhiyun {
3795*4882a593Smuzhiyun int ret;
3796*4882a593Smuzhiyun
3797*4882a593Smuzhiyun *trimmed = 0;
3798*4882a593Smuzhiyun
3799*4882a593Smuzhiyun spin_lock(&block_group->lock);
3800*4882a593Smuzhiyun if (block_group->removed) {
3801*4882a593Smuzhiyun spin_unlock(&block_group->lock);
3802*4882a593Smuzhiyun return 0;
3803*4882a593Smuzhiyun }
3804*4882a593Smuzhiyun btrfs_freeze_block_group(block_group);
3805*4882a593Smuzhiyun spin_unlock(&block_group->lock);
3806*4882a593Smuzhiyun
3807*4882a593Smuzhiyun ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
3808*4882a593Smuzhiyun btrfs_unfreeze_block_group(block_group);
3809*4882a593Smuzhiyun
3810*4882a593Smuzhiyun return ret;
3811*4882a593Smuzhiyun }
3812*4882a593Smuzhiyun
btrfs_trim_block_group_bitmaps(struct btrfs_block_group * block_group,u64 * trimmed,u64 start,u64 end,u64 minlen,u64 maxlen,bool async)3813*4882a593Smuzhiyun int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3814*4882a593Smuzhiyun u64 *trimmed, u64 start, u64 end, u64 minlen,
3815*4882a593Smuzhiyun u64 maxlen, bool async)
3816*4882a593Smuzhiyun {
3817*4882a593Smuzhiyun int ret;
3818*4882a593Smuzhiyun
3819*4882a593Smuzhiyun *trimmed = 0;
3820*4882a593Smuzhiyun
3821*4882a593Smuzhiyun spin_lock(&block_group->lock);
3822*4882a593Smuzhiyun if (block_group->removed) {
3823*4882a593Smuzhiyun spin_unlock(&block_group->lock);
3824*4882a593Smuzhiyun return 0;
3825*4882a593Smuzhiyun }
3826*4882a593Smuzhiyun btrfs_freeze_block_group(block_group);
3827*4882a593Smuzhiyun spin_unlock(&block_group->lock);
3828*4882a593Smuzhiyun
3829*4882a593Smuzhiyun ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3830*4882a593Smuzhiyun async);
3831*4882a593Smuzhiyun
3832*4882a593Smuzhiyun btrfs_unfreeze_block_group(block_group);
3833*4882a593Smuzhiyun
3834*4882a593Smuzhiyun return ret;
3835*4882a593Smuzhiyun }
3836*4882a593Smuzhiyun
3837*4882a593Smuzhiyun /*
3838*4882a593Smuzhiyun * Find the left-most item in the cache tree, and then return the
3839*4882a593Smuzhiyun * smallest inode number in the item.
3840*4882a593Smuzhiyun *
3841*4882a593Smuzhiyun * Note: the returned inode number may not be the smallest one in
3842*4882a593Smuzhiyun * the tree, if the left-most item is a bitmap.
3843*4882a593Smuzhiyun */
btrfs_find_ino_for_alloc(struct btrfs_root * fs_root)3844*4882a593Smuzhiyun u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
3845*4882a593Smuzhiyun {
3846*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
3847*4882a593Smuzhiyun struct btrfs_free_space *entry = NULL;
3848*4882a593Smuzhiyun u64 ino = 0;
3849*4882a593Smuzhiyun
3850*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
3851*4882a593Smuzhiyun
3852*4882a593Smuzhiyun if (RB_EMPTY_ROOT(&ctl->free_space_offset))
3853*4882a593Smuzhiyun goto out;
3854*4882a593Smuzhiyun
3855*4882a593Smuzhiyun entry = rb_entry(rb_first(&ctl->free_space_offset),
3856*4882a593Smuzhiyun struct btrfs_free_space, offset_index);
3857*4882a593Smuzhiyun
3858*4882a593Smuzhiyun if (!entry->bitmap) {
3859*4882a593Smuzhiyun ino = entry->offset;
3860*4882a593Smuzhiyun
3861*4882a593Smuzhiyun unlink_free_space(ctl, entry);
3862*4882a593Smuzhiyun entry->offset++;
3863*4882a593Smuzhiyun entry->bytes--;
3864*4882a593Smuzhiyun if (!entry->bytes)
3865*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, entry);
3866*4882a593Smuzhiyun else
3867*4882a593Smuzhiyun link_free_space(ctl, entry);
3868*4882a593Smuzhiyun } else {
3869*4882a593Smuzhiyun u64 offset = 0;
3870*4882a593Smuzhiyun u64 count = 1;
3871*4882a593Smuzhiyun int ret;
3872*4882a593Smuzhiyun
3873*4882a593Smuzhiyun ret = search_bitmap(ctl, entry, &offset, &count, true);
3874*4882a593Smuzhiyun /* Logic error; Should be empty if it can't find anything */
3875*4882a593Smuzhiyun ASSERT(!ret);
3876*4882a593Smuzhiyun
3877*4882a593Smuzhiyun ino = offset;
3878*4882a593Smuzhiyun bitmap_clear_bits(ctl, entry, offset, 1);
3879*4882a593Smuzhiyun if (entry->bytes == 0)
3880*4882a593Smuzhiyun free_bitmap(ctl, entry);
3881*4882a593Smuzhiyun }
3882*4882a593Smuzhiyun out:
3883*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
3884*4882a593Smuzhiyun
3885*4882a593Smuzhiyun return ino;
3886*4882a593Smuzhiyun }
3887*4882a593Smuzhiyun
lookup_free_ino_inode(struct btrfs_root * root,struct btrfs_path * path)3888*4882a593Smuzhiyun struct inode *lookup_free_ino_inode(struct btrfs_root *root,
3889*4882a593Smuzhiyun struct btrfs_path *path)
3890*4882a593Smuzhiyun {
3891*4882a593Smuzhiyun struct inode *inode = NULL;
3892*4882a593Smuzhiyun
3893*4882a593Smuzhiyun spin_lock(&root->ino_cache_lock);
3894*4882a593Smuzhiyun if (root->ino_cache_inode)
3895*4882a593Smuzhiyun inode = igrab(root->ino_cache_inode);
3896*4882a593Smuzhiyun spin_unlock(&root->ino_cache_lock);
3897*4882a593Smuzhiyun if (inode)
3898*4882a593Smuzhiyun return inode;
3899*4882a593Smuzhiyun
3900*4882a593Smuzhiyun inode = __lookup_free_space_inode(root, path, 0);
3901*4882a593Smuzhiyun if (IS_ERR(inode))
3902*4882a593Smuzhiyun return inode;
3903*4882a593Smuzhiyun
3904*4882a593Smuzhiyun spin_lock(&root->ino_cache_lock);
3905*4882a593Smuzhiyun if (!btrfs_fs_closing(root->fs_info))
3906*4882a593Smuzhiyun root->ino_cache_inode = igrab(inode);
3907*4882a593Smuzhiyun spin_unlock(&root->ino_cache_lock);
3908*4882a593Smuzhiyun
3909*4882a593Smuzhiyun return inode;
3910*4882a593Smuzhiyun }
3911*4882a593Smuzhiyun
create_free_ino_inode(struct btrfs_root * root,struct btrfs_trans_handle * trans,struct btrfs_path * path)3912*4882a593Smuzhiyun int create_free_ino_inode(struct btrfs_root *root,
3913*4882a593Smuzhiyun struct btrfs_trans_handle *trans,
3914*4882a593Smuzhiyun struct btrfs_path *path)
3915*4882a593Smuzhiyun {
3916*4882a593Smuzhiyun return __create_free_space_inode(root, trans, path,
3917*4882a593Smuzhiyun BTRFS_FREE_INO_OBJECTID, 0);
3918*4882a593Smuzhiyun }
3919*4882a593Smuzhiyun
load_free_ino_cache(struct btrfs_fs_info * fs_info,struct btrfs_root * root)3920*4882a593Smuzhiyun int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3921*4882a593Smuzhiyun {
3922*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3923*4882a593Smuzhiyun struct btrfs_path *path;
3924*4882a593Smuzhiyun struct inode *inode;
3925*4882a593Smuzhiyun int ret = 0;
3926*4882a593Smuzhiyun u64 root_gen = btrfs_root_generation(&root->root_item);
3927*4882a593Smuzhiyun
3928*4882a593Smuzhiyun if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
3929*4882a593Smuzhiyun return 0;
3930*4882a593Smuzhiyun
3931*4882a593Smuzhiyun /*
3932*4882a593Smuzhiyun * If we're unmounting then just return, since this does a search on the
3933*4882a593Smuzhiyun * normal root and not the commit root and we could deadlock.
3934*4882a593Smuzhiyun */
3935*4882a593Smuzhiyun if (btrfs_fs_closing(fs_info))
3936*4882a593Smuzhiyun return 0;
3937*4882a593Smuzhiyun
3938*4882a593Smuzhiyun path = btrfs_alloc_path();
3939*4882a593Smuzhiyun if (!path)
3940*4882a593Smuzhiyun return 0;
3941*4882a593Smuzhiyun
3942*4882a593Smuzhiyun inode = lookup_free_ino_inode(root, path);
3943*4882a593Smuzhiyun if (IS_ERR(inode))
3944*4882a593Smuzhiyun goto out;
3945*4882a593Smuzhiyun
3946*4882a593Smuzhiyun if (root_gen != BTRFS_I(inode)->generation)
3947*4882a593Smuzhiyun goto out_put;
3948*4882a593Smuzhiyun
3949*4882a593Smuzhiyun ret = __load_free_space_cache(root, inode, ctl, path, 0);
3950*4882a593Smuzhiyun
3951*4882a593Smuzhiyun if (ret < 0)
3952*4882a593Smuzhiyun btrfs_err(fs_info,
3953*4882a593Smuzhiyun "failed to load free ino cache for root %llu",
3954*4882a593Smuzhiyun root->root_key.objectid);
3955*4882a593Smuzhiyun out_put:
3956*4882a593Smuzhiyun iput(inode);
3957*4882a593Smuzhiyun out:
3958*4882a593Smuzhiyun btrfs_free_path(path);
3959*4882a593Smuzhiyun return ret;
3960*4882a593Smuzhiyun }
3961*4882a593Smuzhiyun
btrfs_write_out_ino_cache(struct btrfs_root * root,struct btrfs_trans_handle * trans,struct btrfs_path * path,struct inode * inode)3962*4882a593Smuzhiyun int btrfs_write_out_ino_cache(struct btrfs_root *root,
3963*4882a593Smuzhiyun struct btrfs_trans_handle *trans,
3964*4882a593Smuzhiyun struct btrfs_path *path,
3965*4882a593Smuzhiyun struct inode *inode)
3966*4882a593Smuzhiyun {
3967*4882a593Smuzhiyun struct btrfs_fs_info *fs_info = root->fs_info;
3968*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3969*4882a593Smuzhiyun int ret;
3970*4882a593Smuzhiyun struct btrfs_io_ctl io_ctl;
3971*4882a593Smuzhiyun bool release_metadata = true;
3972*4882a593Smuzhiyun
3973*4882a593Smuzhiyun if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
3974*4882a593Smuzhiyun return 0;
3975*4882a593Smuzhiyun
3976*4882a593Smuzhiyun memset(&io_ctl, 0, sizeof(io_ctl));
3977*4882a593Smuzhiyun ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
3978*4882a593Smuzhiyun if (!ret) {
3979*4882a593Smuzhiyun /*
3980*4882a593Smuzhiyun * At this point writepages() didn't error out, so our metadata
3981*4882a593Smuzhiyun * reservation is released when the writeback finishes, at
3982*4882a593Smuzhiyun * inode.c:btrfs_finish_ordered_io(), regardless of it finishing
3983*4882a593Smuzhiyun * with or without an error.
3984*4882a593Smuzhiyun */
3985*4882a593Smuzhiyun release_metadata = false;
3986*4882a593Smuzhiyun ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path);
3987*4882a593Smuzhiyun }
3988*4882a593Smuzhiyun
3989*4882a593Smuzhiyun if (ret) {
3990*4882a593Smuzhiyun if (release_metadata)
3991*4882a593Smuzhiyun btrfs_delalloc_release_metadata(BTRFS_I(inode),
3992*4882a593Smuzhiyun inode->i_size, true);
3993*4882a593Smuzhiyun btrfs_debug(fs_info,
3994*4882a593Smuzhiyun "failed to write free ino cache for root %llu error %d",
3995*4882a593Smuzhiyun root->root_key.objectid, ret);
3996*4882a593Smuzhiyun }
3997*4882a593Smuzhiyun
3998*4882a593Smuzhiyun return ret;
3999*4882a593Smuzhiyun }
4000*4882a593Smuzhiyun
4001*4882a593Smuzhiyun #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4002*4882a593Smuzhiyun /*
4003*4882a593Smuzhiyun * Use this if you need to make a bitmap or extent entry specifically, it
4004*4882a593Smuzhiyun * doesn't do any of the merging that add_free_space does, this acts a lot like
4005*4882a593Smuzhiyun * how the free space cache loading stuff works, so you can get really weird
4006*4882a593Smuzhiyun * configurations.
4007*4882a593Smuzhiyun */
test_add_free_space_entry(struct btrfs_block_group * cache,u64 offset,u64 bytes,bool bitmap)4008*4882a593Smuzhiyun int test_add_free_space_entry(struct btrfs_block_group *cache,
4009*4882a593Smuzhiyun u64 offset, u64 bytes, bool bitmap)
4010*4882a593Smuzhiyun {
4011*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4012*4882a593Smuzhiyun struct btrfs_free_space *info = NULL, *bitmap_info;
4013*4882a593Smuzhiyun void *map = NULL;
4014*4882a593Smuzhiyun enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
4015*4882a593Smuzhiyun u64 bytes_added;
4016*4882a593Smuzhiyun int ret;
4017*4882a593Smuzhiyun
4018*4882a593Smuzhiyun again:
4019*4882a593Smuzhiyun if (!info) {
4020*4882a593Smuzhiyun info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
4021*4882a593Smuzhiyun if (!info)
4022*4882a593Smuzhiyun return -ENOMEM;
4023*4882a593Smuzhiyun }
4024*4882a593Smuzhiyun
4025*4882a593Smuzhiyun if (!bitmap) {
4026*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
4027*4882a593Smuzhiyun info->offset = offset;
4028*4882a593Smuzhiyun info->bytes = bytes;
4029*4882a593Smuzhiyun info->max_extent_size = 0;
4030*4882a593Smuzhiyun ret = link_free_space(ctl, info);
4031*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
4032*4882a593Smuzhiyun if (ret)
4033*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, info);
4034*4882a593Smuzhiyun return ret;
4035*4882a593Smuzhiyun }
4036*4882a593Smuzhiyun
4037*4882a593Smuzhiyun if (!map) {
4038*4882a593Smuzhiyun map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
4039*4882a593Smuzhiyun if (!map) {
4040*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, info);
4041*4882a593Smuzhiyun return -ENOMEM;
4042*4882a593Smuzhiyun }
4043*4882a593Smuzhiyun }
4044*4882a593Smuzhiyun
4045*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
4046*4882a593Smuzhiyun bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4047*4882a593Smuzhiyun 1, 0);
4048*4882a593Smuzhiyun if (!bitmap_info) {
4049*4882a593Smuzhiyun info->bitmap = map;
4050*4882a593Smuzhiyun map = NULL;
4051*4882a593Smuzhiyun add_new_bitmap(ctl, info, offset);
4052*4882a593Smuzhiyun bitmap_info = info;
4053*4882a593Smuzhiyun info = NULL;
4054*4882a593Smuzhiyun }
4055*4882a593Smuzhiyun
4056*4882a593Smuzhiyun bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4057*4882a593Smuzhiyun trim_state);
4058*4882a593Smuzhiyun
4059*4882a593Smuzhiyun bytes -= bytes_added;
4060*4882a593Smuzhiyun offset += bytes_added;
4061*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
4062*4882a593Smuzhiyun
4063*4882a593Smuzhiyun if (bytes)
4064*4882a593Smuzhiyun goto again;
4065*4882a593Smuzhiyun
4066*4882a593Smuzhiyun if (info)
4067*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_cachep, info);
4068*4882a593Smuzhiyun if (map)
4069*4882a593Smuzhiyun kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
4070*4882a593Smuzhiyun return 0;
4071*4882a593Smuzhiyun }
4072*4882a593Smuzhiyun
4073*4882a593Smuzhiyun /*
4074*4882a593Smuzhiyun * Checks to see if the given range is in the free space cache. This is really
4075*4882a593Smuzhiyun * just used to check the absence of space, so if there is free space in the
4076*4882a593Smuzhiyun * range at all we will return 1.
4077*4882a593Smuzhiyun */
test_check_exists(struct btrfs_block_group * cache,u64 offset,u64 bytes)4078*4882a593Smuzhiyun int test_check_exists(struct btrfs_block_group *cache,
4079*4882a593Smuzhiyun u64 offset, u64 bytes)
4080*4882a593Smuzhiyun {
4081*4882a593Smuzhiyun struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4082*4882a593Smuzhiyun struct btrfs_free_space *info;
4083*4882a593Smuzhiyun int ret = 0;
4084*4882a593Smuzhiyun
4085*4882a593Smuzhiyun spin_lock(&ctl->tree_lock);
4086*4882a593Smuzhiyun info = tree_search_offset(ctl, offset, 0, 0);
4087*4882a593Smuzhiyun if (!info) {
4088*4882a593Smuzhiyun info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4089*4882a593Smuzhiyun 1, 0);
4090*4882a593Smuzhiyun if (!info)
4091*4882a593Smuzhiyun goto out;
4092*4882a593Smuzhiyun }
4093*4882a593Smuzhiyun
4094*4882a593Smuzhiyun have_info:
4095*4882a593Smuzhiyun if (info->bitmap) {
4096*4882a593Smuzhiyun u64 bit_off, bit_bytes;
4097*4882a593Smuzhiyun struct rb_node *n;
4098*4882a593Smuzhiyun struct btrfs_free_space *tmp;
4099*4882a593Smuzhiyun
4100*4882a593Smuzhiyun bit_off = offset;
4101*4882a593Smuzhiyun bit_bytes = ctl->unit;
4102*4882a593Smuzhiyun ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
4103*4882a593Smuzhiyun if (!ret) {
4104*4882a593Smuzhiyun if (bit_off == offset) {
4105*4882a593Smuzhiyun ret = 1;
4106*4882a593Smuzhiyun goto out;
4107*4882a593Smuzhiyun } else if (bit_off > offset &&
4108*4882a593Smuzhiyun offset + bytes > bit_off) {
4109*4882a593Smuzhiyun ret = 1;
4110*4882a593Smuzhiyun goto out;
4111*4882a593Smuzhiyun }
4112*4882a593Smuzhiyun }
4113*4882a593Smuzhiyun
4114*4882a593Smuzhiyun n = rb_prev(&info->offset_index);
4115*4882a593Smuzhiyun while (n) {
4116*4882a593Smuzhiyun tmp = rb_entry(n, struct btrfs_free_space,
4117*4882a593Smuzhiyun offset_index);
4118*4882a593Smuzhiyun if (tmp->offset + tmp->bytes < offset)
4119*4882a593Smuzhiyun break;
4120*4882a593Smuzhiyun if (offset + bytes < tmp->offset) {
4121*4882a593Smuzhiyun n = rb_prev(&tmp->offset_index);
4122*4882a593Smuzhiyun continue;
4123*4882a593Smuzhiyun }
4124*4882a593Smuzhiyun info = tmp;
4125*4882a593Smuzhiyun goto have_info;
4126*4882a593Smuzhiyun }
4127*4882a593Smuzhiyun
4128*4882a593Smuzhiyun n = rb_next(&info->offset_index);
4129*4882a593Smuzhiyun while (n) {
4130*4882a593Smuzhiyun tmp = rb_entry(n, struct btrfs_free_space,
4131*4882a593Smuzhiyun offset_index);
4132*4882a593Smuzhiyun if (offset + bytes < tmp->offset)
4133*4882a593Smuzhiyun break;
4134*4882a593Smuzhiyun if (tmp->offset + tmp->bytes < offset) {
4135*4882a593Smuzhiyun n = rb_next(&tmp->offset_index);
4136*4882a593Smuzhiyun continue;
4137*4882a593Smuzhiyun }
4138*4882a593Smuzhiyun info = tmp;
4139*4882a593Smuzhiyun goto have_info;
4140*4882a593Smuzhiyun }
4141*4882a593Smuzhiyun
4142*4882a593Smuzhiyun ret = 0;
4143*4882a593Smuzhiyun goto out;
4144*4882a593Smuzhiyun }
4145*4882a593Smuzhiyun
4146*4882a593Smuzhiyun if (info->offset == offset) {
4147*4882a593Smuzhiyun ret = 1;
4148*4882a593Smuzhiyun goto out;
4149*4882a593Smuzhiyun }
4150*4882a593Smuzhiyun
4151*4882a593Smuzhiyun if (offset > info->offset && offset < info->offset + info->bytes)
4152*4882a593Smuzhiyun ret = 1;
4153*4882a593Smuzhiyun out:
4154*4882a593Smuzhiyun spin_unlock(&ctl->tree_lock);
4155*4882a593Smuzhiyun return ret;
4156*4882a593Smuzhiyun }
4157*4882a593Smuzhiyun #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
4158