xref: /OK3568_Linux_fs/kernel/drivers/md/dm-clone-metadata.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include <linux/mm.h>
7*4882a593Smuzhiyun #include <linux/err.h>
8*4882a593Smuzhiyun #include <linux/slab.h>
9*4882a593Smuzhiyun #include <linux/rwsem.h>
10*4882a593Smuzhiyun #include <linux/bitops.h>
11*4882a593Smuzhiyun #include <linux/bitmap.h>
12*4882a593Smuzhiyun #include <linux/device-mapper.h>
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun #include "persistent-data/dm-bitset.h"
15*4882a593Smuzhiyun #include "persistent-data/dm-space-map.h"
16*4882a593Smuzhiyun #include "persistent-data/dm-block-manager.h"
17*4882a593Smuzhiyun #include "persistent-data/dm-transaction-manager.h"
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun #include "dm-clone-metadata.h"
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun #define DM_MSG_PREFIX "clone metadata"
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun #define SUPERBLOCK_LOCATION 0
24*4882a593Smuzhiyun #define SUPERBLOCK_MAGIC 0x8af27f64
25*4882a593Smuzhiyun #define SUPERBLOCK_CSUM_XOR 257649492
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #define DM_CLONE_MAX_CONCURRENT_LOCKS 5
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun #define UUID_LEN 16
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun /* Min and max dm-clone metadata versions supported */
32*4882a593Smuzhiyun #define DM_CLONE_MIN_METADATA_VERSION 1
33*4882a593Smuzhiyun #define DM_CLONE_MAX_METADATA_VERSION 1
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun  * On-disk metadata layout
37*4882a593Smuzhiyun  */
38*4882a593Smuzhiyun struct superblock_disk {
39*4882a593Smuzhiyun 	__le32 csum;
40*4882a593Smuzhiyun 	__le32 flags;
41*4882a593Smuzhiyun 	__le64 blocknr;
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	__u8 uuid[UUID_LEN];
44*4882a593Smuzhiyun 	__le64 magic;
45*4882a593Smuzhiyun 	__le32 version;
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun 	__le64 region_size;
50*4882a593Smuzhiyun 	__le64 target_size;
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	__le64 bitset_root;
53*4882a593Smuzhiyun } __packed;
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun /*
56*4882a593Smuzhiyun  * Region and Dirty bitmaps.
57*4882a593Smuzhiyun  *
58*4882a593Smuzhiyun  * dm-clone logically splits the source and destination devices in regions of
59*4882a593Smuzhiyun  * fixed size. The destination device's regions are gradually hydrated, i.e.,
60*4882a593Smuzhiyun  * we copy (clone) the source's regions to the destination device. Eventually,
61*4882a593Smuzhiyun  * all regions will get hydrated and all I/O will be served from the
62*4882a593Smuzhiyun  * destination device.
63*4882a593Smuzhiyun  *
64*4882a593Smuzhiyun  * We maintain an on-disk bitmap which tracks the state of each of the
65*4882a593Smuzhiyun  * destination device's regions, i.e., whether they are hydrated or not.
66*4882a593Smuzhiyun  *
67*4882a593Smuzhiyun  * To save constantly doing look ups on disk we keep an in core copy of the
68*4882a593Smuzhiyun  * on-disk bitmap, the region_map.
69*4882a593Smuzhiyun  *
70*4882a593Smuzhiyun  * In order to track which regions are hydrated during a metadata transaction,
71*4882a593Smuzhiyun  * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
72*4882a593Smuzhiyun  * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
73*4882a593Smuzhiyun  * tracks the regions that got hydrated during the current metadata
74*4882a593Smuzhiyun  * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
75*4882a593Smuzhiyun  * the dirty_regions bitmap.
76*4882a593Smuzhiyun  *
77*4882a593Smuzhiyun  * This allows us to precisely track the regions that were hydrated during the
78*4882a593Smuzhiyun  * current metadata transaction and update the metadata accordingly, when we
79*4882a593Smuzhiyun  * commit the current transaction. This is important because dm-clone should
80*4882a593Smuzhiyun  * only commit the metadata of regions that were properly flushed to the
81*4882a593Smuzhiyun  * destination device beforehand. Otherwise, in case of a crash, we could end
82*4882a593Smuzhiyun  * up with a corrupted dm-clone device.
83*4882a593Smuzhiyun  *
84*4882a593Smuzhiyun  * When a region finishes hydrating dm-clone calls
85*4882a593Smuzhiyun  * dm_clone_set_region_hydrated(), or for discard requests
86*4882a593Smuzhiyun  * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
87*4882a593Smuzhiyun  * and dmap.
88*4882a593Smuzhiyun  *
89*4882a593Smuzhiyun  * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
90*4882a593Smuzhiyun  * and update the on-disk metadata accordingly. Thus, we don't have to flush to
91*4882a593Smuzhiyun  * disk the whole region_map. We can just flush the dirty region_map bits.
92*4882a593Smuzhiyun  *
93*4882a593Smuzhiyun  * We use the helper dmap->dirty_words bitmap, which is smaller than the
94*4882a593Smuzhiyun  * original region_map, to reduce the amount of memory accesses during a
95*4882a593Smuzhiyun  * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
96*4882a593Smuzhiyun  * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
97*4882a593Smuzhiyun  * accesses.
98*4882a593Smuzhiyun  *
99*4882a593Smuzhiyun  * We could update directly the on-disk bitmap, when dm-clone calls either
100*4882a593Smuzhiyun  * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
101*4882a593Smuzhiyun  * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
102*4882a593Smuzhiyun  * these two functions don't block, we can call them in interrupt context,
103*4882a593Smuzhiyun  * e.g., in a hooked overwrite bio's completion routine, and further reduce the
104*4882a593Smuzhiyun  * I/O completion latency.
105*4882a593Smuzhiyun  *
106*4882a593Smuzhiyun  * We maintain two dirty bitmap sets. During a metadata commit we atomically
107*4882a593Smuzhiyun  * swap the currently used dmap with the unused one. This allows the metadata
108*4882a593Smuzhiyun  * update functions to run concurrently with an ongoing commit.
109*4882a593Smuzhiyun  */
110*4882a593Smuzhiyun struct dirty_map {
111*4882a593Smuzhiyun 	unsigned long *dirty_words;
112*4882a593Smuzhiyun 	unsigned long *dirty_regions;
113*4882a593Smuzhiyun 	unsigned int changed;
114*4882a593Smuzhiyun };
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun struct dm_clone_metadata {
117*4882a593Smuzhiyun 	/* The metadata block device */
118*4882a593Smuzhiyun 	struct block_device *bdev;
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	sector_t target_size;
121*4882a593Smuzhiyun 	sector_t region_size;
122*4882a593Smuzhiyun 	unsigned long nr_regions;
123*4882a593Smuzhiyun 	unsigned long nr_words;
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	/* Spinlock protecting the region and dirty bitmaps. */
126*4882a593Smuzhiyun 	spinlock_t bitmap_lock;
127*4882a593Smuzhiyun 	struct dirty_map dmap[2];
128*4882a593Smuzhiyun 	struct dirty_map *current_dmap;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	/* Protected by lock */
131*4882a593Smuzhiyun 	struct dirty_map *committing_dmap;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	/*
134*4882a593Smuzhiyun 	 * In core copy of the on-disk bitmap to save constantly doing look ups
135*4882a593Smuzhiyun 	 * on disk.
136*4882a593Smuzhiyun 	 */
137*4882a593Smuzhiyun 	unsigned long *region_map;
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	/* Protected by bitmap_lock */
140*4882a593Smuzhiyun 	unsigned int read_only;
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	struct dm_block_manager *bm;
143*4882a593Smuzhiyun 	struct dm_space_map *sm;
144*4882a593Smuzhiyun 	struct dm_transaction_manager *tm;
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	struct rw_semaphore lock;
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 	struct dm_disk_bitset bitset_info;
149*4882a593Smuzhiyun 	dm_block_t bitset_root;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	/*
152*4882a593Smuzhiyun 	 * Reading the space map root can fail, so we read it into this
153*4882a593Smuzhiyun 	 * buffer before the superblock is locked and updated.
154*4882a593Smuzhiyun 	 */
155*4882a593Smuzhiyun 	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	bool hydration_done:1;
158*4882a593Smuzhiyun 	bool fail_io:1;
159*4882a593Smuzhiyun };
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun /*---------------------------------------------------------------------------*/
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun /*
164*4882a593Smuzhiyun  * Superblock validation.
165*4882a593Smuzhiyun  */
sb_prepare_for_write(struct dm_block_validator * v,struct dm_block * b,size_t sb_block_size)166*4882a593Smuzhiyun static void sb_prepare_for_write(struct dm_block_validator *v,
167*4882a593Smuzhiyun 				 struct dm_block *b, size_t sb_block_size)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun 	struct superblock_disk *sb;
170*4882a593Smuzhiyun 	u32 csum;
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 	sb = dm_block_data(b);
173*4882a593Smuzhiyun 	sb->blocknr = cpu_to_le64(dm_block_location(b));
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
176*4882a593Smuzhiyun 			      SUPERBLOCK_CSUM_XOR);
177*4882a593Smuzhiyun 	sb->csum = cpu_to_le32(csum);
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun 
sb_check(struct dm_block_validator * v,struct dm_block * b,size_t sb_block_size)180*4882a593Smuzhiyun static int sb_check(struct dm_block_validator *v, struct dm_block *b,
181*4882a593Smuzhiyun 		    size_t sb_block_size)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun 	struct superblock_disk *sb;
184*4882a593Smuzhiyun 	u32 csum, metadata_version;
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	sb = dm_block_data(b);
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
189*4882a593Smuzhiyun 		DMERR("Superblock check failed: blocknr %llu, expected %llu",
190*4882a593Smuzhiyun 		      le64_to_cpu(sb->blocknr),
191*4882a593Smuzhiyun 		      (unsigned long long)dm_block_location(b));
192*4882a593Smuzhiyun 		return -ENOTBLK;
193*4882a593Smuzhiyun 	}
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
196*4882a593Smuzhiyun 		DMERR("Superblock check failed: magic %llu, expected %llu",
197*4882a593Smuzhiyun 		      le64_to_cpu(sb->magic),
198*4882a593Smuzhiyun 		      (unsigned long long)SUPERBLOCK_MAGIC);
199*4882a593Smuzhiyun 		return -EILSEQ;
200*4882a593Smuzhiyun 	}
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
203*4882a593Smuzhiyun 			      SUPERBLOCK_CSUM_XOR);
204*4882a593Smuzhiyun 	if (sb->csum != cpu_to_le32(csum)) {
205*4882a593Smuzhiyun 		DMERR("Superblock check failed: checksum %u, expected %u",
206*4882a593Smuzhiyun 		      csum, le32_to_cpu(sb->csum));
207*4882a593Smuzhiyun 		return -EILSEQ;
208*4882a593Smuzhiyun 	}
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 	/* Check metadata version */
211*4882a593Smuzhiyun 	metadata_version = le32_to_cpu(sb->version);
212*4882a593Smuzhiyun 	if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
213*4882a593Smuzhiyun 	    metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
214*4882a593Smuzhiyun 		DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
215*4882a593Smuzhiyun 		      metadata_version, DM_CLONE_MIN_METADATA_VERSION,
216*4882a593Smuzhiyun 		      DM_CLONE_MAX_METADATA_VERSION);
217*4882a593Smuzhiyun 		return -EINVAL;
218*4882a593Smuzhiyun 	}
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	return 0;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun static struct dm_block_validator sb_validator = {
224*4882a593Smuzhiyun 	.name = "superblock",
225*4882a593Smuzhiyun 	.prepare_for_write = sb_prepare_for_write,
226*4882a593Smuzhiyun 	.check = sb_check
227*4882a593Smuzhiyun };
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun /*
230*4882a593Smuzhiyun  * Check if the superblock is formatted or not. We consider the superblock to
231*4882a593Smuzhiyun  * be formatted in case we find non-zero bytes in it.
232*4882a593Smuzhiyun  */
__superblock_all_zeroes(struct dm_block_manager * bm,bool * formatted)233*4882a593Smuzhiyun static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun 	int r;
236*4882a593Smuzhiyun 	unsigned int i, nr_words;
237*4882a593Smuzhiyun 	struct dm_block *sblock;
238*4882a593Smuzhiyun 	__le64 *data_le, zero = cpu_to_le64(0);
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 	/*
241*4882a593Smuzhiyun 	 * We don't use a validator here because the superblock could be all
242*4882a593Smuzhiyun 	 * zeroes.
243*4882a593Smuzhiyun 	 */
244*4882a593Smuzhiyun 	r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
245*4882a593Smuzhiyun 	if (r) {
246*4882a593Smuzhiyun 		DMERR("Failed to read_lock superblock");
247*4882a593Smuzhiyun 		return r;
248*4882a593Smuzhiyun 	}
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	data_le = dm_block_data(sblock);
251*4882a593Smuzhiyun 	*formatted = false;
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	/* This assumes that the block size is a multiple of 8 bytes */
254*4882a593Smuzhiyun 	BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
255*4882a593Smuzhiyun 	nr_words = dm_bm_block_size(bm) / sizeof(__le64);
256*4882a593Smuzhiyun 	for (i = 0; i < nr_words; i++) {
257*4882a593Smuzhiyun 		if (data_le[i] != zero) {
258*4882a593Smuzhiyun 			*formatted = true;
259*4882a593Smuzhiyun 			break;
260*4882a593Smuzhiyun 		}
261*4882a593Smuzhiyun 	}
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 	dm_bm_unlock(sblock);
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	return 0;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun /*---------------------------------------------------------------------------*/
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun /*
271*4882a593Smuzhiyun  * Low-level metadata handling.
272*4882a593Smuzhiyun  */
superblock_read_lock(struct dm_clone_metadata * cmd,struct dm_block ** sblock)273*4882a593Smuzhiyun static inline int superblock_read_lock(struct dm_clone_metadata *cmd,
274*4882a593Smuzhiyun 				       struct dm_block **sblock)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun 	return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun 
superblock_write_lock(struct dm_clone_metadata * cmd,struct dm_block ** sblock)279*4882a593Smuzhiyun static inline int superblock_write_lock(struct dm_clone_metadata *cmd,
280*4882a593Smuzhiyun 					struct dm_block **sblock)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun 	return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun 
superblock_write_lock_zero(struct dm_clone_metadata * cmd,struct dm_block ** sblock)285*4882a593Smuzhiyun static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
286*4882a593Smuzhiyun 					     struct dm_block **sblock)
287*4882a593Smuzhiyun {
288*4882a593Smuzhiyun 	return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun 
__copy_sm_root(struct dm_clone_metadata * cmd)291*4882a593Smuzhiyun static int __copy_sm_root(struct dm_clone_metadata *cmd)
292*4882a593Smuzhiyun {
293*4882a593Smuzhiyun 	int r;
294*4882a593Smuzhiyun 	size_t root_size;
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	r = dm_sm_root_size(cmd->sm, &root_size);
297*4882a593Smuzhiyun 	if (r)
298*4882a593Smuzhiyun 		return r;
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
301*4882a593Smuzhiyun }
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun /* Save dm-clone metadata in superblock */
__prepare_superblock(struct dm_clone_metadata * cmd,struct superblock_disk * sb)304*4882a593Smuzhiyun static void __prepare_superblock(struct dm_clone_metadata *cmd,
305*4882a593Smuzhiyun 				 struct superblock_disk *sb)
306*4882a593Smuzhiyun {
307*4882a593Smuzhiyun 	sb->flags = cpu_to_le32(0UL);
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	/* FIXME: UUID is currently unused */
310*4882a593Smuzhiyun 	memset(sb->uuid, 0, sizeof(sb->uuid));
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
313*4882a593Smuzhiyun 	sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	/* Save the metadata space_map root */
316*4882a593Smuzhiyun 	memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
317*4882a593Smuzhiyun 	       sizeof(cmd->metadata_space_map_root));
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	sb->region_size = cpu_to_le64(cmd->region_size);
320*4882a593Smuzhiyun 	sb->target_size = cpu_to_le64(cmd->target_size);
321*4882a593Smuzhiyun 	sb->bitset_root = cpu_to_le64(cmd->bitset_root);
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun 
__open_metadata(struct dm_clone_metadata * cmd)324*4882a593Smuzhiyun static int __open_metadata(struct dm_clone_metadata *cmd)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun 	int r;
327*4882a593Smuzhiyun 	struct dm_block *sblock;
328*4882a593Smuzhiyun 	struct superblock_disk *sb;
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 	r = superblock_read_lock(cmd, &sblock);
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun 	if (r) {
333*4882a593Smuzhiyun 		DMERR("Failed to read_lock superblock");
334*4882a593Smuzhiyun 		return r;
335*4882a593Smuzhiyun 	}
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 	sb = dm_block_data(sblock);
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 	/* Verify that target_size and region_size haven't changed. */
340*4882a593Smuzhiyun 	if (cmd->region_size != le64_to_cpu(sb->region_size) ||
341*4882a593Smuzhiyun 	    cmd->target_size != le64_to_cpu(sb->target_size)) {
342*4882a593Smuzhiyun 		DMERR("Region and/or target size don't match the ones in metadata");
343*4882a593Smuzhiyun 		r = -EINVAL;
344*4882a593Smuzhiyun 		goto out_with_lock;
345*4882a593Smuzhiyun 	}
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
348*4882a593Smuzhiyun 			       sb->metadata_space_map_root,
349*4882a593Smuzhiyun 			       sizeof(sb->metadata_space_map_root),
350*4882a593Smuzhiyun 			       &cmd->tm, &cmd->sm);
351*4882a593Smuzhiyun 
352*4882a593Smuzhiyun 	if (r) {
353*4882a593Smuzhiyun 		DMERR("dm_tm_open_with_sm failed");
354*4882a593Smuzhiyun 		goto out_with_lock;
355*4882a593Smuzhiyun 	}
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun 	dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
358*4882a593Smuzhiyun 	cmd->bitset_root = le64_to_cpu(sb->bitset_root);
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun out_with_lock:
361*4882a593Smuzhiyun 	dm_bm_unlock(sblock);
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 	return r;
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun 
__format_metadata(struct dm_clone_metadata * cmd)366*4882a593Smuzhiyun static int __format_metadata(struct dm_clone_metadata *cmd)
367*4882a593Smuzhiyun {
368*4882a593Smuzhiyun 	int r;
369*4882a593Smuzhiyun 	struct dm_block *sblock;
370*4882a593Smuzhiyun 	struct superblock_disk *sb;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
373*4882a593Smuzhiyun 	if (r) {
374*4882a593Smuzhiyun 		DMERR("Failed to create transaction manager");
375*4882a593Smuzhiyun 		return r;
376*4882a593Smuzhiyun 	}
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 	dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
381*4882a593Smuzhiyun 	if (r) {
382*4882a593Smuzhiyun 		DMERR("Failed to create empty on-disk bitset");
383*4882a593Smuzhiyun 		goto err_with_tm;
384*4882a593Smuzhiyun 	}
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 	r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
387*4882a593Smuzhiyun 			     cmd->nr_regions, false, &cmd->bitset_root);
388*4882a593Smuzhiyun 	if (r) {
389*4882a593Smuzhiyun 		DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
390*4882a593Smuzhiyun 		goto err_with_tm;
391*4882a593Smuzhiyun 	}
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	/* Flush to disk all blocks, except the superblock */
394*4882a593Smuzhiyun 	r = dm_tm_pre_commit(cmd->tm);
395*4882a593Smuzhiyun 	if (r) {
396*4882a593Smuzhiyun 		DMERR("dm_tm_pre_commit failed");
397*4882a593Smuzhiyun 		goto err_with_tm;
398*4882a593Smuzhiyun 	}
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 	r = __copy_sm_root(cmd);
401*4882a593Smuzhiyun 	if (r) {
402*4882a593Smuzhiyun 		DMERR("__copy_sm_root failed");
403*4882a593Smuzhiyun 		goto err_with_tm;
404*4882a593Smuzhiyun 	}
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	r = superblock_write_lock_zero(cmd, &sblock);
407*4882a593Smuzhiyun 	if (r) {
408*4882a593Smuzhiyun 		DMERR("Failed to write_lock superblock");
409*4882a593Smuzhiyun 		goto err_with_tm;
410*4882a593Smuzhiyun 	}
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun 	sb = dm_block_data(sblock);
413*4882a593Smuzhiyun 	__prepare_superblock(cmd, sb);
414*4882a593Smuzhiyun 	r = dm_tm_commit(cmd->tm, sblock);
415*4882a593Smuzhiyun 	if (r) {
416*4882a593Smuzhiyun 		DMERR("Failed to commit superblock");
417*4882a593Smuzhiyun 		goto err_with_tm;
418*4882a593Smuzhiyun 	}
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 	return 0;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun err_with_tm:
423*4882a593Smuzhiyun 	dm_sm_destroy(cmd->sm);
424*4882a593Smuzhiyun 	dm_tm_destroy(cmd->tm);
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 	return r;
427*4882a593Smuzhiyun }
428*4882a593Smuzhiyun 
__open_or_format_metadata(struct dm_clone_metadata * cmd,bool may_format_device)429*4882a593Smuzhiyun static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
430*4882a593Smuzhiyun {
431*4882a593Smuzhiyun 	int r;
432*4882a593Smuzhiyun 	bool formatted = false;
433*4882a593Smuzhiyun 
434*4882a593Smuzhiyun 	r = __superblock_all_zeroes(cmd->bm, &formatted);
435*4882a593Smuzhiyun 	if (r)
436*4882a593Smuzhiyun 		return r;
437*4882a593Smuzhiyun 
438*4882a593Smuzhiyun 	if (!formatted)
439*4882a593Smuzhiyun 		return may_format_device ? __format_metadata(cmd) : -EPERM;
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun 	return __open_metadata(cmd);
442*4882a593Smuzhiyun }
443*4882a593Smuzhiyun 
__create_persistent_data_structures(struct dm_clone_metadata * cmd,bool may_format_device)444*4882a593Smuzhiyun static int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
445*4882a593Smuzhiyun 					       bool may_format_device)
446*4882a593Smuzhiyun {
447*4882a593Smuzhiyun 	int r;
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun 	/* Create block manager */
450*4882a593Smuzhiyun 	cmd->bm = dm_block_manager_create(cmd->bdev,
451*4882a593Smuzhiyun 					 DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
452*4882a593Smuzhiyun 					 DM_CLONE_MAX_CONCURRENT_LOCKS);
453*4882a593Smuzhiyun 	if (IS_ERR(cmd->bm)) {
454*4882a593Smuzhiyun 		DMERR("Failed to create block manager");
455*4882a593Smuzhiyun 		return PTR_ERR(cmd->bm);
456*4882a593Smuzhiyun 	}
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 	r = __open_or_format_metadata(cmd, may_format_device);
459*4882a593Smuzhiyun 	if (r)
460*4882a593Smuzhiyun 		dm_block_manager_destroy(cmd->bm);
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun 	return r;
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun 
__destroy_persistent_data_structures(struct dm_clone_metadata * cmd)465*4882a593Smuzhiyun static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
466*4882a593Smuzhiyun {
467*4882a593Smuzhiyun 	dm_sm_destroy(cmd->sm);
468*4882a593Smuzhiyun 	dm_tm_destroy(cmd->tm);
469*4882a593Smuzhiyun 	dm_block_manager_destroy(cmd->bm);
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun /*---------------------------------------------------------------------------*/
473*4882a593Smuzhiyun 
bitmap_size(unsigned long nr_bits)474*4882a593Smuzhiyun static size_t bitmap_size(unsigned long nr_bits)
475*4882a593Smuzhiyun {
476*4882a593Smuzhiyun 	return BITS_TO_LONGS(nr_bits) * sizeof(long);
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun 
__dirty_map_init(struct dirty_map * dmap,unsigned long nr_words,unsigned long nr_regions)479*4882a593Smuzhiyun static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
480*4882a593Smuzhiyun 			    unsigned long nr_regions)
481*4882a593Smuzhiyun {
482*4882a593Smuzhiyun 	dmap->changed = 0;
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
485*4882a593Smuzhiyun 	if (!dmap->dirty_words)
486*4882a593Smuzhiyun 		return -ENOMEM;
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun 	dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
489*4882a593Smuzhiyun 	if (!dmap->dirty_regions) {
490*4882a593Smuzhiyun 		kvfree(dmap->dirty_words);
491*4882a593Smuzhiyun 		return -ENOMEM;
492*4882a593Smuzhiyun 	}
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	return 0;
495*4882a593Smuzhiyun }
496*4882a593Smuzhiyun 
__dirty_map_exit(struct dirty_map * dmap)497*4882a593Smuzhiyun static void __dirty_map_exit(struct dirty_map *dmap)
498*4882a593Smuzhiyun {
499*4882a593Smuzhiyun 	kvfree(dmap->dirty_words);
500*4882a593Smuzhiyun 	kvfree(dmap->dirty_regions);
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun 
dirty_map_init(struct dm_clone_metadata * cmd)503*4882a593Smuzhiyun static int dirty_map_init(struct dm_clone_metadata *cmd)
504*4882a593Smuzhiyun {
505*4882a593Smuzhiyun 	if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
506*4882a593Smuzhiyun 		DMERR("Failed to allocate dirty bitmap");
507*4882a593Smuzhiyun 		return -ENOMEM;
508*4882a593Smuzhiyun 	}
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
511*4882a593Smuzhiyun 		DMERR("Failed to allocate dirty bitmap");
512*4882a593Smuzhiyun 		__dirty_map_exit(&cmd->dmap[0]);
513*4882a593Smuzhiyun 		return -ENOMEM;
514*4882a593Smuzhiyun 	}
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	cmd->current_dmap = &cmd->dmap[0];
517*4882a593Smuzhiyun 	cmd->committing_dmap = NULL;
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	return 0;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun 
dirty_map_exit(struct dm_clone_metadata * cmd)522*4882a593Smuzhiyun static void dirty_map_exit(struct dm_clone_metadata *cmd)
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun 	__dirty_map_exit(&cmd->dmap[0]);
525*4882a593Smuzhiyun 	__dirty_map_exit(&cmd->dmap[1]);
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun 
__load_bitset_in_core(struct dm_clone_metadata * cmd)528*4882a593Smuzhiyun static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
529*4882a593Smuzhiyun {
530*4882a593Smuzhiyun 	int r;
531*4882a593Smuzhiyun 	unsigned long i;
532*4882a593Smuzhiyun 	struct dm_bitset_cursor c;
533*4882a593Smuzhiyun 
534*4882a593Smuzhiyun 	/* Flush bitset cache */
535*4882a593Smuzhiyun 	r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
536*4882a593Smuzhiyun 	if (r)
537*4882a593Smuzhiyun 		return r;
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun 	r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
540*4882a593Smuzhiyun 	if (r)
541*4882a593Smuzhiyun 		return r;
542*4882a593Smuzhiyun 
543*4882a593Smuzhiyun 	for (i = 0; ; i++) {
544*4882a593Smuzhiyun 		if (dm_bitset_cursor_get_value(&c))
545*4882a593Smuzhiyun 			__set_bit(i, cmd->region_map);
546*4882a593Smuzhiyun 		else
547*4882a593Smuzhiyun 			__clear_bit(i, cmd->region_map);
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 		if (i >= (cmd->nr_regions - 1))
550*4882a593Smuzhiyun 			break;
551*4882a593Smuzhiyun 
552*4882a593Smuzhiyun 		r = dm_bitset_cursor_next(&c);
553*4882a593Smuzhiyun 
554*4882a593Smuzhiyun 		if (r)
555*4882a593Smuzhiyun 			break;
556*4882a593Smuzhiyun 	}
557*4882a593Smuzhiyun 
558*4882a593Smuzhiyun 	dm_bitset_cursor_end(&c);
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	return r;
561*4882a593Smuzhiyun }
562*4882a593Smuzhiyun 
dm_clone_metadata_open(struct block_device * bdev,sector_t target_size,sector_t region_size)563*4882a593Smuzhiyun struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
564*4882a593Smuzhiyun 						 sector_t target_size,
565*4882a593Smuzhiyun 						 sector_t region_size)
566*4882a593Smuzhiyun {
567*4882a593Smuzhiyun 	int r;
568*4882a593Smuzhiyun 	struct dm_clone_metadata *cmd;
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
571*4882a593Smuzhiyun 	if (!cmd) {
572*4882a593Smuzhiyun 		DMERR("Failed to allocate memory for dm-clone metadata");
573*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
574*4882a593Smuzhiyun 	}
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun 	cmd->bdev = bdev;
577*4882a593Smuzhiyun 	cmd->target_size = target_size;
578*4882a593Smuzhiyun 	cmd->region_size = region_size;
579*4882a593Smuzhiyun 	cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
580*4882a593Smuzhiyun 	cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
581*4882a593Smuzhiyun 
582*4882a593Smuzhiyun 	init_rwsem(&cmd->lock);
583*4882a593Smuzhiyun 	spin_lock_init(&cmd->bitmap_lock);
584*4882a593Smuzhiyun 	cmd->read_only = 0;
585*4882a593Smuzhiyun 	cmd->fail_io = false;
586*4882a593Smuzhiyun 	cmd->hydration_done = false;
587*4882a593Smuzhiyun 
588*4882a593Smuzhiyun 	cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
589*4882a593Smuzhiyun 	if (!cmd->region_map) {
590*4882a593Smuzhiyun 		DMERR("Failed to allocate memory for region bitmap");
591*4882a593Smuzhiyun 		r = -ENOMEM;
592*4882a593Smuzhiyun 		goto out_with_md;
593*4882a593Smuzhiyun 	}
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	r = __create_persistent_data_structures(cmd, true);
596*4882a593Smuzhiyun 	if (r)
597*4882a593Smuzhiyun 		goto out_with_region_map;
598*4882a593Smuzhiyun 
599*4882a593Smuzhiyun 	r = __load_bitset_in_core(cmd);
600*4882a593Smuzhiyun 	if (r) {
601*4882a593Smuzhiyun 		DMERR("Failed to load on-disk region map");
602*4882a593Smuzhiyun 		goto out_with_pds;
603*4882a593Smuzhiyun 	}
604*4882a593Smuzhiyun 
605*4882a593Smuzhiyun 	r = dirty_map_init(cmd);
606*4882a593Smuzhiyun 	if (r)
607*4882a593Smuzhiyun 		goto out_with_pds;
608*4882a593Smuzhiyun 
609*4882a593Smuzhiyun 	if (bitmap_full(cmd->region_map, cmd->nr_regions))
610*4882a593Smuzhiyun 		cmd->hydration_done = true;
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 	return cmd;
613*4882a593Smuzhiyun 
614*4882a593Smuzhiyun out_with_pds:
615*4882a593Smuzhiyun 	__destroy_persistent_data_structures(cmd);
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun out_with_region_map:
618*4882a593Smuzhiyun 	kvfree(cmd->region_map);
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun out_with_md:
621*4882a593Smuzhiyun 	kfree(cmd);
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	return ERR_PTR(r);
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun 
dm_clone_metadata_close(struct dm_clone_metadata * cmd)626*4882a593Smuzhiyun void dm_clone_metadata_close(struct dm_clone_metadata *cmd)
627*4882a593Smuzhiyun {
628*4882a593Smuzhiyun 	if (!cmd->fail_io)
629*4882a593Smuzhiyun 		__destroy_persistent_data_structures(cmd);
630*4882a593Smuzhiyun 
631*4882a593Smuzhiyun 	dirty_map_exit(cmd);
632*4882a593Smuzhiyun 	kvfree(cmd->region_map);
633*4882a593Smuzhiyun 	kfree(cmd);
634*4882a593Smuzhiyun }
635*4882a593Smuzhiyun 
dm_clone_is_hydration_done(struct dm_clone_metadata * cmd)636*4882a593Smuzhiyun bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
637*4882a593Smuzhiyun {
638*4882a593Smuzhiyun 	return cmd->hydration_done;
639*4882a593Smuzhiyun }
640*4882a593Smuzhiyun 
dm_clone_is_region_hydrated(struct dm_clone_metadata * cmd,unsigned long region_nr)641*4882a593Smuzhiyun bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
642*4882a593Smuzhiyun {
643*4882a593Smuzhiyun 	return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
644*4882a593Smuzhiyun }
645*4882a593Smuzhiyun 
dm_clone_is_range_hydrated(struct dm_clone_metadata * cmd,unsigned long start,unsigned long nr_regions)646*4882a593Smuzhiyun bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
647*4882a593Smuzhiyun 				unsigned long start, unsigned long nr_regions)
648*4882a593Smuzhiyun {
649*4882a593Smuzhiyun 	unsigned long bit;
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 	if (dm_clone_is_hydration_done(cmd))
652*4882a593Smuzhiyun 		return true;
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun 	return (bit >= (start + nr_regions));
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun 
dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata * cmd)659*4882a593Smuzhiyun unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
660*4882a593Smuzhiyun {
661*4882a593Smuzhiyun 	return bitmap_weight(cmd->region_map, cmd->nr_regions);
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun 
dm_clone_find_next_unhydrated_region(struct dm_clone_metadata * cmd,unsigned long start)664*4882a593Smuzhiyun unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
665*4882a593Smuzhiyun 						   unsigned long start)
666*4882a593Smuzhiyun {
667*4882a593Smuzhiyun 	return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
668*4882a593Smuzhiyun }
669*4882a593Smuzhiyun 
__update_metadata_word(struct dm_clone_metadata * cmd,unsigned long * dirty_regions,unsigned long word)670*4882a593Smuzhiyun static int __update_metadata_word(struct dm_clone_metadata *cmd,
671*4882a593Smuzhiyun 				  unsigned long *dirty_regions,
672*4882a593Smuzhiyun 				  unsigned long word)
673*4882a593Smuzhiyun {
674*4882a593Smuzhiyun 	int r;
675*4882a593Smuzhiyun 	unsigned long index = word * BITS_PER_LONG;
676*4882a593Smuzhiyun 	unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
677*4882a593Smuzhiyun 
678*4882a593Smuzhiyun 	while (index < max_index) {
679*4882a593Smuzhiyun 		if (test_bit(index, dirty_regions)) {
680*4882a593Smuzhiyun 			r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
681*4882a593Smuzhiyun 					      index, &cmd->bitset_root);
682*4882a593Smuzhiyun 			if (r) {
683*4882a593Smuzhiyun 				DMERR("dm_bitset_set_bit failed");
684*4882a593Smuzhiyun 				return r;
685*4882a593Smuzhiyun 			}
686*4882a593Smuzhiyun 			__clear_bit(index, dirty_regions);
687*4882a593Smuzhiyun 		}
688*4882a593Smuzhiyun 		index++;
689*4882a593Smuzhiyun 	}
690*4882a593Smuzhiyun 
691*4882a593Smuzhiyun 	return 0;
692*4882a593Smuzhiyun }
693*4882a593Smuzhiyun 
__metadata_commit(struct dm_clone_metadata * cmd)694*4882a593Smuzhiyun static int __metadata_commit(struct dm_clone_metadata *cmd)
695*4882a593Smuzhiyun {
696*4882a593Smuzhiyun 	int r;
697*4882a593Smuzhiyun 	struct dm_block *sblock;
698*4882a593Smuzhiyun 	struct superblock_disk *sb;
699*4882a593Smuzhiyun 
700*4882a593Smuzhiyun 	/* Flush bitset cache */
701*4882a593Smuzhiyun 	r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
702*4882a593Smuzhiyun 	if (r) {
703*4882a593Smuzhiyun 		DMERR("dm_bitset_flush failed");
704*4882a593Smuzhiyun 		return r;
705*4882a593Smuzhiyun 	}
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 	/* Flush to disk all blocks, except the superblock */
708*4882a593Smuzhiyun 	r = dm_tm_pre_commit(cmd->tm);
709*4882a593Smuzhiyun 	if (r) {
710*4882a593Smuzhiyun 		DMERR("dm_tm_pre_commit failed");
711*4882a593Smuzhiyun 		return r;
712*4882a593Smuzhiyun 	}
713*4882a593Smuzhiyun 
714*4882a593Smuzhiyun 	/* Save the space map root in cmd->metadata_space_map_root */
715*4882a593Smuzhiyun 	r = __copy_sm_root(cmd);
716*4882a593Smuzhiyun 	if (r) {
717*4882a593Smuzhiyun 		DMERR("__copy_sm_root failed");
718*4882a593Smuzhiyun 		return r;
719*4882a593Smuzhiyun 	}
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun 	/* Lock the superblock */
722*4882a593Smuzhiyun 	r = superblock_write_lock_zero(cmd, &sblock);
723*4882a593Smuzhiyun 	if (r) {
724*4882a593Smuzhiyun 		DMERR("Failed to write_lock superblock");
725*4882a593Smuzhiyun 		return r;
726*4882a593Smuzhiyun 	}
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun 	/* Save the metadata in superblock */
729*4882a593Smuzhiyun 	sb = dm_block_data(sblock);
730*4882a593Smuzhiyun 	__prepare_superblock(cmd, sb);
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun 	/* Unlock superblock and commit it to disk */
733*4882a593Smuzhiyun 	r = dm_tm_commit(cmd->tm, sblock);
734*4882a593Smuzhiyun 	if (r) {
735*4882a593Smuzhiyun 		DMERR("Failed to commit superblock");
736*4882a593Smuzhiyun 		return r;
737*4882a593Smuzhiyun 	}
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun 	/*
740*4882a593Smuzhiyun 	 * FIXME: Find a more efficient way to check if the hydration is done.
741*4882a593Smuzhiyun 	 */
742*4882a593Smuzhiyun 	if (bitmap_full(cmd->region_map, cmd->nr_regions))
743*4882a593Smuzhiyun 		cmd->hydration_done = true;
744*4882a593Smuzhiyun 
745*4882a593Smuzhiyun 	return 0;
746*4882a593Smuzhiyun }
747*4882a593Smuzhiyun 
__flush_dmap(struct dm_clone_metadata * cmd,struct dirty_map * dmap)748*4882a593Smuzhiyun static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
749*4882a593Smuzhiyun {
750*4882a593Smuzhiyun 	int r;
751*4882a593Smuzhiyun 	unsigned long word;
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun 	word = 0;
754*4882a593Smuzhiyun 	do {
755*4882a593Smuzhiyun 		word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
756*4882a593Smuzhiyun 
757*4882a593Smuzhiyun 		if (word == cmd->nr_words)
758*4882a593Smuzhiyun 			break;
759*4882a593Smuzhiyun 
760*4882a593Smuzhiyun 		r = __update_metadata_word(cmd, dmap->dirty_regions, word);
761*4882a593Smuzhiyun 
762*4882a593Smuzhiyun 		if (r)
763*4882a593Smuzhiyun 			return r;
764*4882a593Smuzhiyun 
765*4882a593Smuzhiyun 		__clear_bit(word, dmap->dirty_words);
766*4882a593Smuzhiyun 		word++;
767*4882a593Smuzhiyun 	} while (word < cmd->nr_words);
768*4882a593Smuzhiyun 
769*4882a593Smuzhiyun 	r = __metadata_commit(cmd);
770*4882a593Smuzhiyun 
771*4882a593Smuzhiyun 	if (r)
772*4882a593Smuzhiyun 		return r;
773*4882a593Smuzhiyun 
774*4882a593Smuzhiyun 	/* Update the changed flag */
775*4882a593Smuzhiyun 	spin_lock_irq(&cmd->bitmap_lock);
776*4882a593Smuzhiyun 	dmap->changed = 0;
777*4882a593Smuzhiyun 	spin_unlock_irq(&cmd->bitmap_lock);
778*4882a593Smuzhiyun 
779*4882a593Smuzhiyun 	return 0;
780*4882a593Smuzhiyun }
781*4882a593Smuzhiyun 
dm_clone_metadata_pre_commit(struct dm_clone_metadata * cmd)782*4882a593Smuzhiyun int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
783*4882a593Smuzhiyun {
784*4882a593Smuzhiyun 	int r = 0;
785*4882a593Smuzhiyun 	struct dirty_map *dmap, *next_dmap;
786*4882a593Smuzhiyun 
787*4882a593Smuzhiyun 	down_write(&cmd->lock);
788*4882a593Smuzhiyun 
789*4882a593Smuzhiyun 	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
790*4882a593Smuzhiyun 		r = -EPERM;
791*4882a593Smuzhiyun 		goto out;
792*4882a593Smuzhiyun 	}
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 	/* Get current dirty bitmap */
795*4882a593Smuzhiyun 	dmap = cmd->current_dmap;
796*4882a593Smuzhiyun 
797*4882a593Smuzhiyun 	/* Get next dirty bitmap */
798*4882a593Smuzhiyun 	next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
799*4882a593Smuzhiyun 
800*4882a593Smuzhiyun 	/*
801*4882a593Smuzhiyun 	 * The last commit failed, so we don't have a clean dirty-bitmap to
802*4882a593Smuzhiyun 	 * use.
803*4882a593Smuzhiyun 	 */
804*4882a593Smuzhiyun 	if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
805*4882a593Smuzhiyun 		r = -EINVAL;
806*4882a593Smuzhiyun 		goto out;
807*4882a593Smuzhiyun 	}
808*4882a593Smuzhiyun 
809*4882a593Smuzhiyun 	/* Swap dirty bitmaps */
810*4882a593Smuzhiyun 	spin_lock_irq(&cmd->bitmap_lock);
811*4882a593Smuzhiyun 	cmd->current_dmap = next_dmap;
812*4882a593Smuzhiyun 	spin_unlock_irq(&cmd->bitmap_lock);
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 	/* Set old dirty bitmap as currently committing */
815*4882a593Smuzhiyun 	cmd->committing_dmap = dmap;
816*4882a593Smuzhiyun out:
817*4882a593Smuzhiyun 	up_write(&cmd->lock);
818*4882a593Smuzhiyun 
819*4882a593Smuzhiyun 	return r;
820*4882a593Smuzhiyun }
821*4882a593Smuzhiyun 
dm_clone_metadata_commit(struct dm_clone_metadata * cmd)822*4882a593Smuzhiyun int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
823*4882a593Smuzhiyun {
824*4882a593Smuzhiyun 	int r = -EPERM;
825*4882a593Smuzhiyun 
826*4882a593Smuzhiyun 	down_write(&cmd->lock);
827*4882a593Smuzhiyun 
828*4882a593Smuzhiyun 	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
829*4882a593Smuzhiyun 		goto out;
830*4882a593Smuzhiyun 
831*4882a593Smuzhiyun 	if (WARN_ON(!cmd->committing_dmap)) {
832*4882a593Smuzhiyun 		r = -EINVAL;
833*4882a593Smuzhiyun 		goto out;
834*4882a593Smuzhiyun 	}
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	r = __flush_dmap(cmd, cmd->committing_dmap);
837*4882a593Smuzhiyun 	if (!r) {
838*4882a593Smuzhiyun 		/* Clear committing dmap */
839*4882a593Smuzhiyun 		cmd->committing_dmap = NULL;
840*4882a593Smuzhiyun 	}
841*4882a593Smuzhiyun out:
842*4882a593Smuzhiyun 	up_write(&cmd->lock);
843*4882a593Smuzhiyun 
844*4882a593Smuzhiyun 	return r;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun 
dm_clone_set_region_hydrated(struct dm_clone_metadata * cmd,unsigned long region_nr)847*4882a593Smuzhiyun int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
848*4882a593Smuzhiyun {
849*4882a593Smuzhiyun 	int r = 0;
850*4882a593Smuzhiyun 	struct dirty_map *dmap;
851*4882a593Smuzhiyun 	unsigned long word, flags;
852*4882a593Smuzhiyun 
853*4882a593Smuzhiyun 	if (unlikely(region_nr >= cmd->nr_regions)) {
854*4882a593Smuzhiyun 		DMERR("Region %lu out of range (total number of regions %lu)",
855*4882a593Smuzhiyun 		      region_nr, cmd->nr_regions);
856*4882a593Smuzhiyun 		return -ERANGE;
857*4882a593Smuzhiyun 	}
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun 	word = region_nr / BITS_PER_LONG;
860*4882a593Smuzhiyun 
861*4882a593Smuzhiyun 	spin_lock_irqsave(&cmd->bitmap_lock, flags);
862*4882a593Smuzhiyun 
863*4882a593Smuzhiyun 	if (cmd->read_only) {
864*4882a593Smuzhiyun 		r = -EPERM;
865*4882a593Smuzhiyun 		goto out;
866*4882a593Smuzhiyun 	}
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 	dmap = cmd->current_dmap;
869*4882a593Smuzhiyun 
870*4882a593Smuzhiyun 	__set_bit(word, dmap->dirty_words);
871*4882a593Smuzhiyun 	__set_bit(region_nr, dmap->dirty_regions);
872*4882a593Smuzhiyun 	__set_bit(region_nr, cmd->region_map);
873*4882a593Smuzhiyun 	dmap->changed = 1;
874*4882a593Smuzhiyun 
875*4882a593Smuzhiyun out:
876*4882a593Smuzhiyun 	spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
877*4882a593Smuzhiyun 
878*4882a593Smuzhiyun 	return r;
879*4882a593Smuzhiyun }
880*4882a593Smuzhiyun 
dm_clone_cond_set_range(struct dm_clone_metadata * cmd,unsigned long start,unsigned long nr_regions)881*4882a593Smuzhiyun int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
882*4882a593Smuzhiyun 			    unsigned long nr_regions)
883*4882a593Smuzhiyun {
884*4882a593Smuzhiyun 	int r = 0;
885*4882a593Smuzhiyun 	struct dirty_map *dmap;
886*4882a593Smuzhiyun 	unsigned long word, region_nr;
887*4882a593Smuzhiyun 
888*4882a593Smuzhiyun 	if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start ||
889*4882a593Smuzhiyun 		     (start + nr_regions) > cmd->nr_regions)) {
890*4882a593Smuzhiyun 		DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)",
891*4882a593Smuzhiyun 		      start, nr_regions, cmd->nr_regions);
892*4882a593Smuzhiyun 		return -ERANGE;
893*4882a593Smuzhiyun 	}
894*4882a593Smuzhiyun 
895*4882a593Smuzhiyun 	spin_lock_irq(&cmd->bitmap_lock);
896*4882a593Smuzhiyun 
897*4882a593Smuzhiyun 	if (cmd->read_only) {
898*4882a593Smuzhiyun 		r = -EPERM;
899*4882a593Smuzhiyun 		goto out;
900*4882a593Smuzhiyun 	}
901*4882a593Smuzhiyun 
902*4882a593Smuzhiyun 	dmap = cmd->current_dmap;
903*4882a593Smuzhiyun 	for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
904*4882a593Smuzhiyun 		if (!test_bit(region_nr, cmd->region_map)) {
905*4882a593Smuzhiyun 			word = region_nr / BITS_PER_LONG;
906*4882a593Smuzhiyun 			__set_bit(word, dmap->dirty_words);
907*4882a593Smuzhiyun 			__set_bit(region_nr, dmap->dirty_regions);
908*4882a593Smuzhiyun 			__set_bit(region_nr, cmd->region_map);
909*4882a593Smuzhiyun 			dmap->changed = 1;
910*4882a593Smuzhiyun 		}
911*4882a593Smuzhiyun 	}
912*4882a593Smuzhiyun out:
913*4882a593Smuzhiyun 	spin_unlock_irq(&cmd->bitmap_lock);
914*4882a593Smuzhiyun 
915*4882a593Smuzhiyun 	return r;
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun 
918*4882a593Smuzhiyun /*
919*4882a593Smuzhiyun  * WARNING: This must not be called concurrently with either
920*4882a593Smuzhiyun  * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
921*4882a593Smuzhiyun  * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
922*4882a593Smuzhiyun  * exception is after setting the metadata to read-only mode, using
923*4882a593Smuzhiyun  * dm_clone_metadata_set_read_only().
924*4882a593Smuzhiyun  *
925*4882a593Smuzhiyun  * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
926*4882a593Smuzhiyun  * may block.
927*4882a593Smuzhiyun  */
dm_clone_reload_in_core_bitset(struct dm_clone_metadata * cmd)928*4882a593Smuzhiyun int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
929*4882a593Smuzhiyun {
930*4882a593Smuzhiyun 	int r = -EINVAL;
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun 	down_write(&cmd->lock);
933*4882a593Smuzhiyun 
934*4882a593Smuzhiyun 	if (cmd->fail_io)
935*4882a593Smuzhiyun 		goto out;
936*4882a593Smuzhiyun 
937*4882a593Smuzhiyun 	r = __load_bitset_in_core(cmd);
938*4882a593Smuzhiyun out:
939*4882a593Smuzhiyun 	up_write(&cmd->lock);
940*4882a593Smuzhiyun 
941*4882a593Smuzhiyun 	return r;
942*4882a593Smuzhiyun }
943*4882a593Smuzhiyun 
dm_clone_changed_this_transaction(struct dm_clone_metadata * cmd)944*4882a593Smuzhiyun bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
945*4882a593Smuzhiyun {
946*4882a593Smuzhiyun 	bool r;
947*4882a593Smuzhiyun 	unsigned long flags;
948*4882a593Smuzhiyun 
949*4882a593Smuzhiyun 	spin_lock_irqsave(&cmd->bitmap_lock, flags);
950*4882a593Smuzhiyun 	r = cmd->dmap[0].changed || cmd->dmap[1].changed;
951*4882a593Smuzhiyun 	spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
952*4882a593Smuzhiyun 
953*4882a593Smuzhiyun 	return r;
954*4882a593Smuzhiyun }
955*4882a593Smuzhiyun 
dm_clone_metadata_abort(struct dm_clone_metadata * cmd)956*4882a593Smuzhiyun int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
957*4882a593Smuzhiyun {
958*4882a593Smuzhiyun 	int r = -EPERM;
959*4882a593Smuzhiyun 
960*4882a593Smuzhiyun 	down_write(&cmd->lock);
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun 	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
963*4882a593Smuzhiyun 		goto out;
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun 	__destroy_persistent_data_structures(cmd);
966*4882a593Smuzhiyun 
967*4882a593Smuzhiyun 	r = __create_persistent_data_structures(cmd, false);
968*4882a593Smuzhiyun 	if (r) {
969*4882a593Smuzhiyun 		/* If something went wrong we can neither write nor read the metadata */
970*4882a593Smuzhiyun 		cmd->fail_io = true;
971*4882a593Smuzhiyun 	}
972*4882a593Smuzhiyun out:
973*4882a593Smuzhiyun 	up_write(&cmd->lock);
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	return r;
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun 
dm_clone_metadata_set_read_only(struct dm_clone_metadata * cmd)978*4882a593Smuzhiyun void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
979*4882a593Smuzhiyun {
980*4882a593Smuzhiyun 	down_write(&cmd->lock);
981*4882a593Smuzhiyun 
982*4882a593Smuzhiyun 	spin_lock_irq(&cmd->bitmap_lock);
983*4882a593Smuzhiyun 	cmd->read_only = 1;
984*4882a593Smuzhiyun 	spin_unlock_irq(&cmd->bitmap_lock);
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun 	if (!cmd->fail_io)
987*4882a593Smuzhiyun 		dm_bm_set_read_only(cmd->bm);
988*4882a593Smuzhiyun 
989*4882a593Smuzhiyun 	up_write(&cmd->lock);
990*4882a593Smuzhiyun }
991*4882a593Smuzhiyun 
dm_clone_metadata_set_read_write(struct dm_clone_metadata * cmd)992*4882a593Smuzhiyun void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
993*4882a593Smuzhiyun {
994*4882a593Smuzhiyun 	down_write(&cmd->lock);
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun 	spin_lock_irq(&cmd->bitmap_lock);
997*4882a593Smuzhiyun 	cmd->read_only = 0;
998*4882a593Smuzhiyun 	spin_unlock_irq(&cmd->bitmap_lock);
999*4882a593Smuzhiyun 
1000*4882a593Smuzhiyun 	if (!cmd->fail_io)
1001*4882a593Smuzhiyun 		dm_bm_set_read_write(cmd->bm);
1002*4882a593Smuzhiyun 
1003*4882a593Smuzhiyun 	up_write(&cmd->lock);
1004*4882a593Smuzhiyun }
1005*4882a593Smuzhiyun 
dm_clone_get_free_metadata_block_count(struct dm_clone_metadata * cmd,dm_block_t * result)1006*4882a593Smuzhiyun int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
1007*4882a593Smuzhiyun 					   dm_block_t *result)
1008*4882a593Smuzhiyun {
1009*4882a593Smuzhiyun 	int r = -EINVAL;
1010*4882a593Smuzhiyun 
1011*4882a593Smuzhiyun 	down_read(&cmd->lock);
1012*4882a593Smuzhiyun 
1013*4882a593Smuzhiyun 	if (!cmd->fail_io)
1014*4882a593Smuzhiyun 		r = dm_sm_get_nr_free(cmd->sm, result);
1015*4882a593Smuzhiyun 
1016*4882a593Smuzhiyun 	up_read(&cmd->lock);
1017*4882a593Smuzhiyun 
1018*4882a593Smuzhiyun 	return r;
1019*4882a593Smuzhiyun }
1020*4882a593Smuzhiyun 
dm_clone_get_metadata_dev_size(struct dm_clone_metadata * cmd,dm_block_t * result)1021*4882a593Smuzhiyun int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
1022*4882a593Smuzhiyun 				   dm_block_t *result)
1023*4882a593Smuzhiyun {
1024*4882a593Smuzhiyun 	int r = -EINVAL;
1025*4882a593Smuzhiyun 
1026*4882a593Smuzhiyun 	down_read(&cmd->lock);
1027*4882a593Smuzhiyun 
1028*4882a593Smuzhiyun 	if (!cmd->fail_io)
1029*4882a593Smuzhiyun 		r = dm_sm_get_nr_blocks(cmd->sm, result);
1030*4882a593Smuzhiyun 
1031*4882a593Smuzhiyun 	up_read(&cmd->lock);
1032*4882a593Smuzhiyun 
1033*4882a593Smuzhiyun 	return r;
1034*4882a593Smuzhiyun }
1035