1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun #include <linux/mm.h>
7*4882a593Smuzhiyun #include <linux/err.h>
8*4882a593Smuzhiyun #include <linux/slab.h>
9*4882a593Smuzhiyun #include <linux/rwsem.h>
10*4882a593Smuzhiyun #include <linux/bitops.h>
11*4882a593Smuzhiyun #include <linux/bitmap.h>
12*4882a593Smuzhiyun #include <linux/device-mapper.h>
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #include "persistent-data/dm-bitset.h"
15*4882a593Smuzhiyun #include "persistent-data/dm-space-map.h"
16*4882a593Smuzhiyun #include "persistent-data/dm-block-manager.h"
17*4882a593Smuzhiyun #include "persistent-data/dm-transaction-manager.h"
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun #include "dm-clone-metadata.h"
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun #define DM_MSG_PREFIX "clone metadata"
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun #define SUPERBLOCK_LOCATION 0
24*4882a593Smuzhiyun #define SUPERBLOCK_MAGIC 0x8af27f64
25*4882a593Smuzhiyun #define SUPERBLOCK_CSUM_XOR 257649492
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #define DM_CLONE_MAX_CONCURRENT_LOCKS 5
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #define UUID_LEN 16
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun /* Min and max dm-clone metadata versions supported */
32*4882a593Smuzhiyun #define DM_CLONE_MIN_METADATA_VERSION 1
33*4882a593Smuzhiyun #define DM_CLONE_MAX_METADATA_VERSION 1
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun * On-disk metadata layout
37*4882a593Smuzhiyun */
38*4882a593Smuzhiyun struct superblock_disk {
39*4882a593Smuzhiyun __le32 csum;
40*4882a593Smuzhiyun __le32 flags;
41*4882a593Smuzhiyun __le64 blocknr;
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun __u8 uuid[UUID_LEN];
44*4882a593Smuzhiyun __le64 magic;
45*4882a593Smuzhiyun __le32 version;
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun __le64 region_size;
50*4882a593Smuzhiyun __le64 target_size;
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun __le64 bitset_root;
53*4882a593Smuzhiyun } __packed;
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun /*
56*4882a593Smuzhiyun * Region and Dirty bitmaps.
57*4882a593Smuzhiyun *
58*4882a593Smuzhiyun * dm-clone logically splits the source and destination devices in regions of
59*4882a593Smuzhiyun * fixed size. The destination device's regions are gradually hydrated, i.e.,
60*4882a593Smuzhiyun * we copy (clone) the source's regions to the destination device. Eventually,
61*4882a593Smuzhiyun * all regions will get hydrated and all I/O will be served from the
62*4882a593Smuzhiyun * destination device.
63*4882a593Smuzhiyun *
64*4882a593Smuzhiyun * We maintain an on-disk bitmap which tracks the state of each of the
65*4882a593Smuzhiyun * destination device's regions, i.e., whether they are hydrated or not.
66*4882a593Smuzhiyun *
67*4882a593Smuzhiyun * To save constantly doing look ups on disk we keep an in core copy of the
68*4882a593Smuzhiyun * on-disk bitmap, the region_map.
69*4882a593Smuzhiyun *
70*4882a593Smuzhiyun * In order to track which regions are hydrated during a metadata transaction,
71*4882a593Smuzhiyun * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
72*4882a593Smuzhiyun * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
73*4882a593Smuzhiyun * tracks the regions that got hydrated during the current metadata
74*4882a593Smuzhiyun * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
75*4882a593Smuzhiyun * the dirty_regions bitmap.
76*4882a593Smuzhiyun *
77*4882a593Smuzhiyun * This allows us to precisely track the regions that were hydrated during the
78*4882a593Smuzhiyun * current metadata transaction and update the metadata accordingly, when we
79*4882a593Smuzhiyun * commit the current transaction. This is important because dm-clone should
80*4882a593Smuzhiyun * only commit the metadata of regions that were properly flushed to the
81*4882a593Smuzhiyun * destination device beforehand. Otherwise, in case of a crash, we could end
82*4882a593Smuzhiyun * up with a corrupted dm-clone device.
83*4882a593Smuzhiyun *
84*4882a593Smuzhiyun * When a region finishes hydrating dm-clone calls
85*4882a593Smuzhiyun * dm_clone_set_region_hydrated(), or for discard requests
86*4882a593Smuzhiyun * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
87*4882a593Smuzhiyun * and dmap.
88*4882a593Smuzhiyun *
89*4882a593Smuzhiyun * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
90*4882a593Smuzhiyun * and update the on-disk metadata accordingly. Thus, we don't have to flush to
91*4882a593Smuzhiyun * disk the whole region_map. We can just flush the dirty region_map bits.
92*4882a593Smuzhiyun *
93*4882a593Smuzhiyun * We use the helper dmap->dirty_words bitmap, which is smaller than the
94*4882a593Smuzhiyun * original region_map, to reduce the amount of memory accesses during a
95*4882a593Smuzhiyun * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
96*4882a593Smuzhiyun * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
97*4882a593Smuzhiyun * accesses.
98*4882a593Smuzhiyun *
99*4882a593Smuzhiyun * We could update directly the on-disk bitmap, when dm-clone calls either
100*4882a593Smuzhiyun * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
101*4882a593Smuzhiyun * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
102*4882a593Smuzhiyun * these two functions don't block, we can call them in interrupt context,
103*4882a593Smuzhiyun * e.g., in a hooked overwrite bio's completion routine, and further reduce the
104*4882a593Smuzhiyun * I/O completion latency.
105*4882a593Smuzhiyun *
106*4882a593Smuzhiyun * We maintain two dirty bitmap sets. During a metadata commit we atomically
107*4882a593Smuzhiyun * swap the currently used dmap with the unused one. This allows the metadata
108*4882a593Smuzhiyun * update functions to run concurrently with an ongoing commit.
109*4882a593Smuzhiyun */
110*4882a593Smuzhiyun struct dirty_map {
111*4882a593Smuzhiyun unsigned long *dirty_words;
112*4882a593Smuzhiyun unsigned long *dirty_regions;
113*4882a593Smuzhiyun unsigned int changed;
114*4882a593Smuzhiyun };
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun struct dm_clone_metadata {
117*4882a593Smuzhiyun /* The metadata block device */
118*4882a593Smuzhiyun struct block_device *bdev;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun sector_t target_size;
121*4882a593Smuzhiyun sector_t region_size;
122*4882a593Smuzhiyun unsigned long nr_regions;
123*4882a593Smuzhiyun unsigned long nr_words;
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun /* Spinlock protecting the region and dirty bitmaps. */
126*4882a593Smuzhiyun spinlock_t bitmap_lock;
127*4882a593Smuzhiyun struct dirty_map dmap[2];
128*4882a593Smuzhiyun struct dirty_map *current_dmap;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun /* Protected by lock */
131*4882a593Smuzhiyun struct dirty_map *committing_dmap;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun /*
134*4882a593Smuzhiyun * In core copy of the on-disk bitmap to save constantly doing look ups
135*4882a593Smuzhiyun * on disk.
136*4882a593Smuzhiyun */
137*4882a593Smuzhiyun unsigned long *region_map;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun /* Protected by bitmap_lock */
140*4882a593Smuzhiyun unsigned int read_only;
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun struct dm_block_manager *bm;
143*4882a593Smuzhiyun struct dm_space_map *sm;
144*4882a593Smuzhiyun struct dm_transaction_manager *tm;
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun struct rw_semaphore lock;
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun struct dm_disk_bitset bitset_info;
149*4882a593Smuzhiyun dm_block_t bitset_root;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun /*
152*4882a593Smuzhiyun * Reading the space map root can fail, so we read it into this
153*4882a593Smuzhiyun * buffer before the superblock is locked and updated.
154*4882a593Smuzhiyun */
155*4882a593Smuzhiyun __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun bool hydration_done:1;
158*4882a593Smuzhiyun bool fail_io:1;
159*4882a593Smuzhiyun };
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun /*---------------------------------------------------------------------------*/
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun /*
164*4882a593Smuzhiyun * Superblock validation.
165*4882a593Smuzhiyun */
sb_prepare_for_write(struct dm_block_validator * v,struct dm_block * b,size_t sb_block_size)166*4882a593Smuzhiyun static void sb_prepare_for_write(struct dm_block_validator *v,
167*4882a593Smuzhiyun struct dm_block *b, size_t sb_block_size)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun struct superblock_disk *sb;
170*4882a593Smuzhiyun u32 csum;
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun sb = dm_block_data(b);
173*4882a593Smuzhiyun sb->blocknr = cpu_to_le64(dm_block_location(b));
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
176*4882a593Smuzhiyun SUPERBLOCK_CSUM_XOR);
177*4882a593Smuzhiyun sb->csum = cpu_to_le32(csum);
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun
sb_check(struct dm_block_validator * v,struct dm_block * b,size_t sb_block_size)180*4882a593Smuzhiyun static int sb_check(struct dm_block_validator *v, struct dm_block *b,
181*4882a593Smuzhiyun size_t sb_block_size)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun struct superblock_disk *sb;
184*4882a593Smuzhiyun u32 csum, metadata_version;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun sb = dm_block_data(b);
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
189*4882a593Smuzhiyun DMERR("Superblock check failed: blocknr %llu, expected %llu",
190*4882a593Smuzhiyun le64_to_cpu(sb->blocknr),
191*4882a593Smuzhiyun (unsigned long long)dm_block_location(b));
192*4882a593Smuzhiyun return -ENOTBLK;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
196*4882a593Smuzhiyun DMERR("Superblock check failed: magic %llu, expected %llu",
197*4882a593Smuzhiyun le64_to_cpu(sb->magic),
198*4882a593Smuzhiyun (unsigned long long)SUPERBLOCK_MAGIC);
199*4882a593Smuzhiyun return -EILSEQ;
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
203*4882a593Smuzhiyun SUPERBLOCK_CSUM_XOR);
204*4882a593Smuzhiyun if (sb->csum != cpu_to_le32(csum)) {
205*4882a593Smuzhiyun DMERR("Superblock check failed: checksum %u, expected %u",
206*4882a593Smuzhiyun csum, le32_to_cpu(sb->csum));
207*4882a593Smuzhiyun return -EILSEQ;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun /* Check metadata version */
211*4882a593Smuzhiyun metadata_version = le32_to_cpu(sb->version);
212*4882a593Smuzhiyun if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
213*4882a593Smuzhiyun metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
214*4882a593Smuzhiyun DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
215*4882a593Smuzhiyun metadata_version, DM_CLONE_MIN_METADATA_VERSION,
216*4882a593Smuzhiyun DM_CLONE_MAX_METADATA_VERSION);
217*4882a593Smuzhiyun return -EINVAL;
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun return 0;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun static struct dm_block_validator sb_validator = {
224*4882a593Smuzhiyun .name = "superblock",
225*4882a593Smuzhiyun .prepare_for_write = sb_prepare_for_write,
226*4882a593Smuzhiyun .check = sb_check
227*4882a593Smuzhiyun };
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /*
230*4882a593Smuzhiyun * Check if the superblock is formatted or not. We consider the superblock to
231*4882a593Smuzhiyun * be formatted in case we find non-zero bytes in it.
232*4882a593Smuzhiyun */
__superblock_all_zeroes(struct dm_block_manager * bm,bool * formatted)233*4882a593Smuzhiyun static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun int r;
236*4882a593Smuzhiyun unsigned int i, nr_words;
237*4882a593Smuzhiyun struct dm_block *sblock;
238*4882a593Smuzhiyun __le64 *data_le, zero = cpu_to_le64(0);
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun /*
241*4882a593Smuzhiyun * We don't use a validator here because the superblock could be all
242*4882a593Smuzhiyun * zeroes.
243*4882a593Smuzhiyun */
244*4882a593Smuzhiyun r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
245*4882a593Smuzhiyun if (r) {
246*4882a593Smuzhiyun DMERR("Failed to read_lock superblock");
247*4882a593Smuzhiyun return r;
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun data_le = dm_block_data(sblock);
251*4882a593Smuzhiyun *formatted = false;
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun /* This assumes that the block size is a multiple of 8 bytes */
254*4882a593Smuzhiyun BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
255*4882a593Smuzhiyun nr_words = dm_bm_block_size(bm) / sizeof(__le64);
256*4882a593Smuzhiyun for (i = 0; i < nr_words; i++) {
257*4882a593Smuzhiyun if (data_le[i] != zero) {
258*4882a593Smuzhiyun *formatted = true;
259*4882a593Smuzhiyun break;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun dm_bm_unlock(sblock);
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun return 0;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun /*---------------------------------------------------------------------------*/
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun /*
271*4882a593Smuzhiyun * Low-level metadata handling.
272*4882a593Smuzhiyun */
superblock_read_lock(struct dm_clone_metadata * cmd,struct dm_block ** sblock)273*4882a593Smuzhiyun static inline int superblock_read_lock(struct dm_clone_metadata *cmd,
274*4882a593Smuzhiyun struct dm_block **sblock)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun
superblock_write_lock(struct dm_clone_metadata * cmd,struct dm_block ** sblock)279*4882a593Smuzhiyun static inline int superblock_write_lock(struct dm_clone_metadata *cmd,
280*4882a593Smuzhiyun struct dm_block **sblock)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
superblock_write_lock_zero(struct dm_clone_metadata * cmd,struct dm_block ** sblock)285*4882a593Smuzhiyun static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
286*4882a593Smuzhiyun struct dm_block **sblock)
287*4882a593Smuzhiyun {
288*4882a593Smuzhiyun return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun
__copy_sm_root(struct dm_clone_metadata * cmd)291*4882a593Smuzhiyun static int __copy_sm_root(struct dm_clone_metadata *cmd)
292*4882a593Smuzhiyun {
293*4882a593Smuzhiyun int r;
294*4882a593Smuzhiyun size_t root_size;
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun r = dm_sm_root_size(cmd->sm, &root_size);
297*4882a593Smuzhiyun if (r)
298*4882a593Smuzhiyun return r;
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
301*4882a593Smuzhiyun }
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun /* Save dm-clone metadata in superblock */
__prepare_superblock(struct dm_clone_metadata * cmd,struct superblock_disk * sb)304*4882a593Smuzhiyun static void __prepare_superblock(struct dm_clone_metadata *cmd,
305*4882a593Smuzhiyun struct superblock_disk *sb)
306*4882a593Smuzhiyun {
307*4882a593Smuzhiyun sb->flags = cpu_to_le32(0UL);
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun /* FIXME: UUID is currently unused */
310*4882a593Smuzhiyun memset(sb->uuid, 0, sizeof(sb->uuid));
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
313*4882a593Smuzhiyun sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun /* Save the metadata space_map root */
316*4882a593Smuzhiyun memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
317*4882a593Smuzhiyun sizeof(cmd->metadata_space_map_root));
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun sb->region_size = cpu_to_le64(cmd->region_size);
320*4882a593Smuzhiyun sb->target_size = cpu_to_le64(cmd->target_size);
321*4882a593Smuzhiyun sb->bitset_root = cpu_to_le64(cmd->bitset_root);
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
__open_metadata(struct dm_clone_metadata * cmd)324*4882a593Smuzhiyun static int __open_metadata(struct dm_clone_metadata *cmd)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun int r;
327*4882a593Smuzhiyun struct dm_block *sblock;
328*4882a593Smuzhiyun struct superblock_disk *sb;
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun r = superblock_read_lock(cmd, &sblock);
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun if (r) {
333*4882a593Smuzhiyun DMERR("Failed to read_lock superblock");
334*4882a593Smuzhiyun return r;
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun sb = dm_block_data(sblock);
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun /* Verify that target_size and region_size haven't changed. */
340*4882a593Smuzhiyun if (cmd->region_size != le64_to_cpu(sb->region_size) ||
341*4882a593Smuzhiyun cmd->target_size != le64_to_cpu(sb->target_size)) {
342*4882a593Smuzhiyun DMERR("Region and/or target size don't match the ones in metadata");
343*4882a593Smuzhiyun r = -EINVAL;
344*4882a593Smuzhiyun goto out_with_lock;
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
348*4882a593Smuzhiyun sb->metadata_space_map_root,
349*4882a593Smuzhiyun sizeof(sb->metadata_space_map_root),
350*4882a593Smuzhiyun &cmd->tm, &cmd->sm);
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun if (r) {
353*4882a593Smuzhiyun DMERR("dm_tm_open_with_sm failed");
354*4882a593Smuzhiyun goto out_with_lock;
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
358*4882a593Smuzhiyun cmd->bitset_root = le64_to_cpu(sb->bitset_root);
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun out_with_lock:
361*4882a593Smuzhiyun dm_bm_unlock(sblock);
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun return r;
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
__format_metadata(struct dm_clone_metadata * cmd)366*4882a593Smuzhiyun static int __format_metadata(struct dm_clone_metadata *cmd)
367*4882a593Smuzhiyun {
368*4882a593Smuzhiyun int r;
369*4882a593Smuzhiyun struct dm_block *sblock;
370*4882a593Smuzhiyun struct superblock_disk *sb;
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
373*4882a593Smuzhiyun if (r) {
374*4882a593Smuzhiyun DMERR("Failed to create transaction manager");
375*4882a593Smuzhiyun return r;
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
379*4882a593Smuzhiyun
380*4882a593Smuzhiyun r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
381*4882a593Smuzhiyun if (r) {
382*4882a593Smuzhiyun DMERR("Failed to create empty on-disk bitset");
383*4882a593Smuzhiyun goto err_with_tm;
384*4882a593Smuzhiyun }
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
387*4882a593Smuzhiyun cmd->nr_regions, false, &cmd->bitset_root);
388*4882a593Smuzhiyun if (r) {
389*4882a593Smuzhiyun DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
390*4882a593Smuzhiyun goto err_with_tm;
391*4882a593Smuzhiyun }
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun /* Flush to disk all blocks, except the superblock */
394*4882a593Smuzhiyun r = dm_tm_pre_commit(cmd->tm);
395*4882a593Smuzhiyun if (r) {
396*4882a593Smuzhiyun DMERR("dm_tm_pre_commit failed");
397*4882a593Smuzhiyun goto err_with_tm;
398*4882a593Smuzhiyun }
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun r = __copy_sm_root(cmd);
401*4882a593Smuzhiyun if (r) {
402*4882a593Smuzhiyun DMERR("__copy_sm_root failed");
403*4882a593Smuzhiyun goto err_with_tm;
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun r = superblock_write_lock_zero(cmd, &sblock);
407*4882a593Smuzhiyun if (r) {
408*4882a593Smuzhiyun DMERR("Failed to write_lock superblock");
409*4882a593Smuzhiyun goto err_with_tm;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun sb = dm_block_data(sblock);
413*4882a593Smuzhiyun __prepare_superblock(cmd, sb);
414*4882a593Smuzhiyun r = dm_tm_commit(cmd->tm, sblock);
415*4882a593Smuzhiyun if (r) {
416*4882a593Smuzhiyun DMERR("Failed to commit superblock");
417*4882a593Smuzhiyun goto err_with_tm;
418*4882a593Smuzhiyun }
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun return 0;
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun err_with_tm:
423*4882a593Smuzhiyun dm_sm_destroy(cmd->sm);
424*4882a593Smuzhiyun dm_tm_destroy(cmd->tm);
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun return r;
427*4882a593Smuzhiyun }
428*4882a593Smuzhiyun
__open_or_format_metadata(struct dm_clone_metadata * cmd,bool may_format_device)429*4882a593Smuzhiyun static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
430*4882a593Smuzhiyun {
431*4882a593Smuzhiyun int r;
432*4882a593Smuzhiyun bool formatted = false;
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun r = __superblock_all_zeroes(cmd->bm, &formatted);
435*4882a593Smuzhiyun if (r)
436*4882a593Smuzhiyun return r;
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun if (!formatted)
439*4882a593Smuzhiyun return may_format_device ? __format_metadata(cmd) : -EPERM;
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun return __open_metadata(cmd);
442*4882a593Smuzhiyun }
443*4882a593Smuzhiyun
__create_persistent_data_structures(struct dm_clone_metadata * cmd,bool may_format_device)444*4882a593Smuzhiyun static int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
445*4882a593Smuzhiyun bool may_format_device)
446*4882a593Smuzhiyun {
447*4882a593Smuzhiyun int r;
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun /* Create block manager */
450*4882a593Smuzhiyun cmd->bm = dm_block_manager_create(cmd->bdev,
451*4882a593Smuzhiyun DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
452*4882a593Smuzhiyun DM_CLONE_MAX_CONCURRENT_LOCKS);
453*4882a593Smuzhiyun if (IS_ERR(cmd->bm)) {
454*4882a593Smuzhiyun DMERR("Failed to create block manager");
455*4882a593Smuzhiyun return PTR_ERR(cmd->bm);
456*4882a593Smuzhiyun }
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun r = __open_or_format_metadata(cmd, may_format_device);
459*4882a593Smuzhiyun if (r)
460*4882a593Smuzhiyun dm_block_manager_destroy(cmd->bm);
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun return r;
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun
__destroy_persistent_data_structures(struct dm_clone_metadata * cmd)465*4882a593Smuzhiyun static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
466*4882a593Smuzhiyun {
467*4882a593Smuzhiyun dm_sm_destroy(cmd->sm);
468*4882a593Smuzhiyun dm_tm_destroy(cmd->tm);
469*4882a593Smuzhiyun dm_block_manager_destroy(cmd->bm);
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun /*---------------------------------------------------------------------------*/
473*4882a593Smuzhiyun
bitmap_size(unsigned long nr_bits)474*4882a593Smuzhiyun static size_t bitmap_size(unsigned long nr_bits)
475*4882a593Smuzhiyun {
476*4882a593Smuzhiyun return BITS_TO_LONGS(nr_bits) * sizeof(long);
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun
__dirty_map_init(struct dirty_map * dmap,unsigned long nr_words,unsigned long nr_regions)479*4882a593Smuzhiyun static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
480*4882a593Smuzhiyun unsigned long nr_regions)
481*4882a593Smuzhiyun {
482*4882a593Smuzhiyun dmap->changed = 0;
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
485*4882a593Smuzhiyun if (!dmap->dirty_words)
486*4882a593Smuzhiyun return -ENOMEM;
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
489*4882a593Smuzhiyun if (!dmap->dirty_regions) {
490*4882a593Smuzhiyun kvfree(dmap->dirty_words);
491*4882a593Smuzhiyun return -ENOMEM;
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun return 0;
495*4882a593Smuzhiyun }
496*4882a593Smuzhiyun
__dirty_map_exit(struct dirty_map * dmap)497*4882a593Smuzhiyun static void __dirty_map_exit(struct dirty_map *dmap)
498*4882a593Smuzhiyun {
499*4882a593Smuzhiyun kvfree(dmap->dirty_words);
500*4882a593Smuzhiyun kvfree(dmap->dirty_regions);
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun
dirty_map_init(struct dm_clone_metadata * cmd)503*4882a593Smuzhiyun static int dirty_map_init(struct dm_clone_metadata *cmd)
504*4882a593Smuzhiyun {
505*4882a593Smuzhiyun if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
506*4882a593Smuzhiyun DMERR("Failed to allocate dirty bitmap");
507*4882a593Smuzhiyun return -ENOMEM;
508*4882a593Smuzhiyun }
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
511*4882a593Smuzhiyun DMERR("Failed to allocate dirty bitmap");
512*4882a593Smuzhiyun __dirty_map_exit(&cmd->dmap[0]);
513*4882a593Smuzhiyun return -ENOMEM;
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun cmd->current_dmap = &cmd->dmap[0];
517*4882a593Smuzhiyun cmd->committing_dmap = NULL;
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun return 0;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun
dirty_map_exit(struct dm_clone_metadata * cmd)522*4882a593Smuzhiyun static void dirty_map_exit(struct dm_clone_metadata *cmd)
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun __dirty_map_exit(&cmd->dmap[0]);
525*4882a593Smuzhiyun __dirty_map_exit(&cmd->dmap[1]);
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun
__load_bitset_in_core(struct dm_clone_metadata * cmd)528*4882a593Smuzhiyun static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
529*4882a593Smuzhiyun {
530*4882a593Smuzhiyun int r;
531*4882a593Smuzhiyun unsigned long i;
532*4882a593Smuzhiyun struct dm_bitset_cursor c;
533*4882a593Smuzhiyun
534*4882a593Smuzhiyun /* Flush bitset cache */
535*4882a593Smuzhiyun r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
536*4882a593Smuzhiyun if (r)
537*4882a593Smuzhiyun return r;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
540*4882a593Smuzhiyun if (r)
541*4882a593Smuzhiyun return r;
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun for (i = 0; ; i++) {
544*4882a593Smuzhiyun if (dm_bitset_cursor_get_value(&c))
545*4882a593Smuzhiyun __set_bit(i, cmd->region_map);
546*4882a593Smuzhiyun else
547*4882a593Smuzhiyun __clear_bit(i, cmd->region_map);
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun if (i >= (cmd->nr_regions - 1))
550*4882a593Smuzhiyun break;
551*4882a593Smuzhiyun
552*4882a593Smuzhiyun r = dm_bitset_cursor_next(&c);
553*4882a593Smuzhiyun
554*4882a593Smuzhiyun if (r)
555*4882a593Smuzhiyun break;
556*4882a593Smuzhiyun }
557*4882a593Smuzhiyun
558*4882a593Smuzhiyun dm_bitset_cursor_end(&c);
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun return r;
561*4882a593Smuzhiyun }
562*4882a593Smuzhiyun
dm_clone_metadata_open(struct block_device * bdev,sector_t target_size,sector_t region_size)563*4882a593Smuzhiyun struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
564*4882a593Smuzhiyun sector_t target_size,
565*4882a593Smuzhiyun sector_t region_size)
566*4882a593Smuzhiyun {
567*4882a593Smuzhiyun int r;
568*4882a593Smuzhiyun struct dm_clone_metadata *cmd;
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
571*4882a593Smuzhiyun if (!cmd) {
572*4882a593Smuzhiyun DMERR("Failed to allocate memory for dm-clone metadata");
573*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun cmd->bdev = bdev;
577*4882a593Smuzhiyun cmd->target_size = target_size;
578*4882a593Smuzhiyun cmd->region_size = region_size;
579*4882a593Smuzhiyun cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
580*4882a593Smuzhiyun cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun init_rwsem(&cmd->lock);
583*4882a593Smuzhiyun spin_lock_init(&cmd->bitmap_lock);
584*4882a593Smuzhiyun cmd->read_only = 0;
585*4882a593Smuzhiyun cmd->fail_io = false;
586*4882a593Smuzhiyun cmd->hydration_done = false;
587*4882a593Smuzhiyun
588*4882a593Smuzhiyun cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
589*4882a593Smuzhiyun if (!cmd->region_map) {
590*4882a593Smuzhiyun DMERR("Failed to allocate memory for region bitmap");
591*4882a593Smuzhiyun r = -ENOMEM;
592*4882a593Smuzhiyun goto out_with_md;
593*4882a593Smuzhiyun }
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun r = __create_persistent_data_structures(cmd, true);
596*4882a593Smuzhiyun if (r)
597*4882a593Smuzhiyun goto out_with_region_map;
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun r = __load_bitset_in_core(cmd);
600*4882a593Smuzhiyun if (r) {
601*4882a593Smuzhiyun DMERR("Failed to load on-disk region map");
602*4882a593Smuzhiyun goto out_with_pds;
603*4882a593Smuzhiyun }
604*4882a593Smuzhiyun
605*4882a593Smuzhiyun r = dirty_map_init(cmd);
606*4882a593Smuzhiyun if (r)
607*4882a593Smuzhiyun goto out_with_pds;
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun if (bitmap_full(cmd->region_map, cmd->nr_regions))
610*4882a593Smuzhiyun cmd->hydration_done = true;
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun return cmd;
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun out_with_pds:
615*4882a593Smuzhiyun __destroy_persistent_data_structures(cmd);
616*4882a593Smuzhiyun
617*4882a593Smuzhiyun out_with_region_map:
618*4882a593Smuzhiyun kvfree(cmd->region_map);
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun out_with_md:
621*4882a593Smuzhiyun kfree(cmd);
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun return ERR_PTR(r);
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun
dm_clone_metadata_close(struct dm_clone_metadata * cmd)626*4882a593Smuzhiyun void dm_clone_metadata_close(struct dm_clone_metadata *cmd)
627*4882a593Smuzhiyun {
628*4882a593Smuzhiyun if (!cmd->fail_io)
629*4882a593Smuzhiyun __destroy_persistent_data_structures(cmd);
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun dirty_map_exit(cmd);
632*4882a593Smuzhiyun kvfree(cmd->region_map);
633*4882a593Smuzhiyun kfree(cmd);
634*4882a593Smuzhiyun }
635*4882a593Smuzhiyun
dm_clone_is_hydration_done(struct dm_clone_metadata * cmd)636*4882a593Smuzhiyun bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
637*4882a593Smuzhiyun {
638*4882a593Smuzhiyun return cmd->hydration_done;
639*4882a593Smuzhiyun }
640*4882a593Smuzhiyun
dm_clone_is_region_hydrated(struct dm_clone_metadata * cmd,unsigned long region_nr)641*4882a593Smuzhiyun bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
642*4882a593Smuzhiyun {
643*4882a593Smuzhiyun return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
644*4882a593Smuzhiyun }
645*4882a593Smuzhiyun
dm_clone_is_range_hydrated(struct dm_clone_metadata * cmd,unsigned long start,unsigned long nr_regions)646*4882a593Smuzhiyun bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
647*4882a593Smuzhiyun unsigned long start, unsigned long nr_regions)
648*4882a593Smuzhiyun {
649*4882a593Smuzhiyun unsigned long bit;
650*4882a593Smuzhiyun
651*4882a593Smuzhiyun if (dm_clone_is_hydration_done(cmd))
652*4882a593Smuzhiyun return true;
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
655*4882a593Smuzhiyun
656*4882a593Smuzhiyun return (bit >= (start + nr_regions));
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun
dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata * cmd)659*4882a593Smuzhiyun unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
660*4882a593Smuzhiyun {
661*4882a593Smuzhiyun return bitmap_weight(cmd->region_map, cmd->nr_regions);
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun
dm_clone_find_next_unhydrated_region(struct dm_clone_metadata * cmd,unsigned long start)664*4882a593Smuzhiyun unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
665*4882a593Smuzhiyun unsigned long start)
666*4882a593Smuzhiyun {
667*4882a593Smuzhiyun return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
668*4882a593Smuzhiyun }
669*4882a593Smuzhiyun
__update_metadata_word(struct dm_clone_metadata * cmd,unsigned long * dirty_regions,unsigned long word)670*4882a593Smuzhiyun static int __update_metadata_word(struct dm_clone_metadata *cmd,
671*4882a593Smuzhiyun unsigned long *dirty_regions,
672*4882a593Smuzhiyun unsigned long word)
673*4882a593Smuzhiyun {
674*4882a593Smuzhiyun int r;
675*4882a593Smuzhiyun unsigned long index = word * BITS_PER_LONG;
676*4882a593Smuzhiyun unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
677*4882a593Smuzhiyun
678*4882a593Smuzhiyun while (index < max_index) {
679*4882a593Smuzhiyun if (test_bit(index, dirty_regions)) {
680*4882a593Smuzhiyun r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
681*4882a593Smuzhiyun index, &cmd->bitset_root);
682*4882a593Smuzhiyun if (r) {
683*4882a593Smuzhiyun DMERR("dm_bitset_set_bit failed");
684*4882a593Smuzhiyun return r;
685*4882a593Smuzhiyun }
686*4882a593Smuzhiyun __clear_bit(index, dirty_regions);
687*4882a593Smuzhiyun }
688*4882a593Smuzhiyun index++;
689*4882a593Smuzhiyun }
690*4882a593Smuzhiyun
691*4882a593Smuzhiyun return 0;
692*4882a593Smuzhiyun }
693*4882a593Smuzhiyun
__metadata_commit(struct dm_clone_metadata * cmd)694*4882a593Smuzhiyun static int __metadata_commit(struct dm_clone_metadata *cmd)
695*4882a593Smuzhiyun {
696*4882a593Smuzhiyun int r;
697*4882a593Smuzhiyun struct dm_block *sblock;
698*4882a593Smuzhiyun struct superblock_disk *sb;
699*4882a593Smuzhiyun
700*4882a593Smuzhiyun /* Flush bitset cache */
701*4882a593Smuzhiyun r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
702*4882a593Smuzhiyun if (r) {
703*4882a593Smuzhiyun DMERR("dm_bitset_flush failed");
704*4882a593Smuzhiyun return r;
705*4882a593Smuzhiyun }
706*4882a593Smuzhiyun
707*4882a593Smuzhiyun /* Flush to disk all blocks, except the superblock */
708*4882a593Smuzhiyun r = dm_tm_pre_commit(cmd->tm);
709*4882a593Smuzhiyun if (r) {
710*4882a593Smuzhiyun DMERR("dm_tm_pre_commit failed");
711*4882a593Smuzhiyun return r;
712*4882a593Smuzhiyun }
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun /* Save the space map root in cmd->metadata_space_map_root */
715*4882a593Smuzhiyun r = __copy_sm_root(cmd);
716*4882a593Smuzhiyun if (r) {
717*4882a593Smuzhiyun DMERR("__copy_sm_root failed");
718*4882a593Smuzhiyun return r;
719*4882a593Smuzhiyun }
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun /* Lock the superblock */
722*4882a593Smuzhiyun r = superblock_write_lock_zero(cmd, &sblock);
723*4882a593Smuzhiyun if (r) {
724*4882a593Smuzhiyun DMERR("Failed to write_lock superblock");
725*4882a593Smuzhiyun return r;
726*4882a593Smuzhiyun }
727*4882a593Smuzhiyun
728*4882a593Smuzhiyun /* Save the metadata in superblock */
729*4882a593Smuzhiyun sb = dm_block_data(sblock);
730*4882a593Smuzhiyun __prepare_superblock(cmd, sb);
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun /* Unlock superblock and commit it to disk */
733*4882a593Smuzhiyun r = dm_tm_commit(cmd->tm, sblock);
734*4882a593Smuzhiyun if (r) {
735*4882a593Smuzhiyun DMERR("Failed to commit superblock");
736*4882a593Smuzhiyun return r;
737*4882a593Smuzhiyun }
738*4882a593Smuzhiyun
739*4882a593Smuzhiyun /*
740*4882a593Smuzhiyun * FIXME: Find a more efficient way to check if the hydration is done.
741*4882a593Smuzhiyun */
742*4882a593Smuzhiyun if (bitmap_full(cmd->region_map, cmd->nr_regions))
743*4882a593Smuzhiyun cmd->hydration_done = true;
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun return 0;
746*4882a593Smuzhiyun }
747*4882a593Smuzhiyun
__flush_dmap(struct dm_clone_metadata * cmd,struct dirty_map * dmap)748*4882a593Smuzhiyun static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
749*4882a593Smuzhiyun {
750*4882a593Smuzhiyun int r;
751*4882a593Smuzhiyun unsigned long word;
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun word = 0;
754*4882a593Smuzhiyun do {
755*4882a593Smuzhiyun word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
756*4882a593Smuzhiyun
757*4882a593Smuzhiyun if (word == cmd->nr_words)
758*4882a593Smuzhiyun break;
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun r = __update_metadata_word(cmd, dmap->dirty_regions, word);
761*4882a593Smuzhiyun
762*4882a593Smuzhiyun if (r)
763*4882a593Smuzhiyun return r;
764*4882a593Smuzhiyun
765*4882a593Smuzhiyun __clear_bit(word, dmap->dirty_words);
766*4882a593Smuzhiyun word++;
767*4882a593Smuzhiyun } while (word < cmd->nr_words);
768*4882a593Smuzhiyun
769*4882a593Smuzhiyun r = __metadata_commit(cmd);
770*4882a593Smuzhiyun
771*4882a593Smuzhiyun if (r)
772*4882a593Smuzhiyun return r;
773*4882a593Smuzhiyun
774*4882a593Smuzhiyun /* Update the changed flag */
775*4882a593Smuzhiyun spin_lock_irq(&cmd->bitmap_lock);
776*4882a593Smuzhiyun dmap->changed = 0;
777*4882a593Smuzhiyun spin_unlock_irq(&cmd->bitmap_lock);
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun return 0;
780*4882a593Smuzhiyun }
781*4882a593Smuzhiyun
dm_clone_metadata_pre_commit(struct dm_clone_metadata * cmd)782*4882a593Smuzhiyun int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
783*4882a593Smuzhiyun {
784*4882a593Smuzhiyun int r = 0;
785*4882a593Smuzhiyun struct dirty_map *dmap, *next_dmap;
786*4882a593Smuzhiyun
787*4882a593Smuzhiyun down_write(&cmd->lock);
788*4882a593Smuzhiyun
789*4882a593Smuzhiyun if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
790*4882a593Smuzhiyun r = -EPERM;
791*4882a593Smuzhiyun goto out;
792*4882a593Smuzhiyun }
793*4882a593Smuzhiyun
794*4882a593Smuzhiyun /* Get current dirty bitmap */
795*4882a593Smuzhiyun dmap = cmd->current_dmap;
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun /* Get next dirty bitmap */
798*4882a593Smuzhiyun next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun /*
801*4882a593Smuzhiyun * The last commit failed, so we don't have a clean dirty-bitmap to
802*4882a593Smuzhiyun * use.
803*4882a593Smuzhiyun */
804*4882a593Smuzhiyun if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
805*4882a593Smuzhiyun r = -EINVAL;
806*4882a593Smuzhiyun goto out;
807*4882a593Smuzhiyun }
808*4882a593Smuzhiyun
809*4882a593Smuzhiyun /* Swap dirty bitmaps */
810*4882a593Smuzhiyun spin_lock_irq(&cmd->bitmap_lock);
811*4882a593Smuzhiyun cmd->current_dmap = next_dmap;
812*4882a593Smuzhiyun spin_unlock_irq(&cmd->bitmap_lock);
813*4882a593Smuzhiyun
814*4882a593Smuzhiyun /* Set old dirty bitmap as currently committing */
815*4882a593Smuzhiyun cmd->committing_dmap = dmap;
816*4882a593Smuzhiyun out:
817*4882a593Smuzhiyun up_write(&cmd->lock);
818*4882a593Smuzhiyun
819*4882a593Smuzhiyun return r;
820*4882a593Smuzhiyun }
821*4882a593Smuzhiyun
dm_clone_metadata_commit(struct dm_clone_metadata * cmd)822*4882a593Smuzhiyun int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
823*4882a593Smuzhiyun {
824*4882a593Smuzhiyun int r = -EPERM;
825*4882a593Smuzhiyun
826*4882a593Smuzhiyun down_write(&cmd->lock);
827*4882a593Smuzhiyun
828*4882a593Smuzhiyun if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
829*4882a593Smuzhiyun goto out;
830*4882a593Smuzhiyun
831*4882a593Smuzhiyun if (WARN_ON(!cmd->committing_dmap)) {
832*4882a593Smuzhiyun r = -EINVAL;
833*4882a593Smuzhiyun goto out;
834*4882a593Smuzhiyun }
835*4882a593Smuzhiyun
836*4882a593Smuzhiyun r = __flush_dmap(cmd, cmd->committing_dmap);
837*4882a593Smuzhiyun if (!r) {
838*4882a593Smuzhiyun /* Clear committing dmap */
839*4882a593Smuzhiyun cmd->committing_dmap = NULL;
840*4882a593Smuzhiyun }
841*4882a593Smuzhiyun out:
842*4882a593Smuzhiyun up_write(&cmd->lock);
843*4882a593Smuzhiyun
844*4882a593Smuzhiyun return r;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun
dm_clone_set_region_hydrated(struct dm_clone_metadata * cmd,unsigned long region_nr)847*4882a593Smuzhiyun int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
848*4882a593Smuzhiyun {
849*4882a593Smuzhiyun int r = 0;
850*4882a593Smuzhiyun struct dirty_map *dmap;
851*4882a593Smuzhiyun unsigned long word, flags;
852*4882a593Smuzhiyun
853*4882a593Smuzhiyun if (unlikely(region_nr >= cmd->nr_regions)) {
854*4882a593Smuzhiyun DMERR("Region %lu out of range (total number of regions %lu)",
855*4882a593Smuzhiyun region_nr, cmd->nr_regions);
856*4882a593Smuzhiyun return -ERANGE;
857*4882a593Smuzhiyun }
858*4882a593Smuzhiyun
859*4882a593Smuzhiyun word = region_nr / BITS_PER_LONG;
860*4882a593Smuzhiyun
861*4882a593Smuzhiyun spin_lock_irqsave(&cmd->bitmap_lock, flags);
862*4882a593Smuzhiyun
863*4882a593Smuzhiyun if (cmd->read_only) {
864*4882a593Smuzhiyun r = -EPERM;
865*4882a593Smuzhiyun goto out;
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun
868*4882a593Smuzhiyun dmap = cmd->current_dmap;
869*4882a593Smuzhiyun
870*4882a593Smuzhiyun __set_bit(word, dmap->dirty_words);
871*4882a593Smuzhiyun __set_bit(region_nr, dmap->dirty_regions);
872*4882a593Smuzhiyun __set_bit(region_nr, cmd->region_map);
873*4882a593Smuzhiyun dmap->changed = 1;
874*4882a593Smuzhiyun
875*4882a593Smuzhiyun out:
876*4882a593Smuzhiyun spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
877*4882a593Smuzhiyun
878*4882a593Smuzhiyun return r;
879*4882a593Smuzhiyun }
880*4882a593Smuzhiyun
dm_clone_cond_set_range(struct dm_clone_metadata * cmd,unsigned long start,unsigned long nr_regions)881*4882a593Smuzhiyun int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
882*4882a593Smuzhiyun unsigned long nr_regions)
883*4882a593Smuzhiyun {
884*4882a593Smuzhiyun int r = 0;
885*4882a593Smuzhiyun struct dirty_map *dmap;
886*4882a593Smuzhiyun unsigned long word, region_nr;
887*4882a593Smuzhiyun
888*4882a593Smuzhiyun if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start ||
889*4882a593Smuzhiyun (start + nr_regions) > cmd->nr_regions)) {
890*4882a593Smuzhiyun DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)",
891*4882a593Smuzhiyun start, nr_regions, cmd->nr_regions);
892*4882a593Smuzhiyun return -ERANGE;
893*4882a593Smuzhiyun }
894*4882a593Smuzhiyun
895*4882a593Smuzhiyun spin_lock_irq(&cmd->bitmap_lock);
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun if (cmd->read_only) {
898*4882a593Smuzhiyun r = -EPERM;
899*4882a593Smuzhiyun goto out;
900*4882a593Smuzhiyun }
901*4882a593Smuzhiyun
902*4882a593Smuzhiyun dmap = cmd->current_dmap;
903*4882a593Smuzhiyun for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
904*4882a593Smuzhiyun if (!test_bit(region_nr, cmd->region_map)) {
905*4882a593Smuzhiyun word = region_nr / BITS_PER_LONG;
906*4882a593Smuzhiyun __set_bit(word, dmap->dirty_words);
907*4882a593Smuzhiyun __set_bit(region_nr, dmap->dirty_regions);
908*4882a593Smuzhiyun __set_bit(region_nr, cmd->region_map);
909*4882a593Smuzhiyun dmap->changed = 1;
910*4882a593Smuzhiyun }
911*4882a593Smuzhiyun }
912*4882a593Smuzhiyun out:
913*4882a593Smuzhiyun spin_unlock_irq(&cmd->bitmap_lock);
914*4882a593Smuzhiyun
915*4882a593Smuzhiyun return r;
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun
918*4882a593Smuzhiyun /*
919*4882a593Smuzhiyun * WARNING: This must not be called concurrently with either
920*4882a593Smuzhiyun * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
921*4882a593Smuzhiyun * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
922*4882a593Smuzhiyun * exception is after setting the metadata to read-only mode, using
923*4882a593Smuzhiyun * dm_clone_metadata_set_read_only().
924*4882a593Smuzhiyun *
925*4882a593Smuzhiyun * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
926*4882a593Smuzhiyun * may block.
927*4882a593Smuzhiyun */
dm_clone_reload_in_core_bitset(struct dm_clone_metadata * cmd)928*4882a593Smuzhiyun int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
929*4882a593Smuzhiyun {
930*4882a593Smuzhiyun int r = -EINVAL;
931*4882a593Smuzhiyun
932*4882a593Smuzhiyun down_write(&cmd->lock);
933*4882a593Smuzhiyun
934*4882a593Smuzhiyun if (cmd->fail_io)
935*4882a593Smuzhiyun goto out;
936*4882a593Smuzhiyun
937*4882a593Smuzhiyun r = __load_bitset_in_core(cmd);
938*4882a593Smuzhiyun out:
939*4882a593Smuzhiyun up_write(&cmd->lock);
940*4882a593Smuzhiyun
941*4882a593Smuzhiyun return r;
942*4882a593Smuzhiyun }
943*4882a593Smuzhiyun
dm_clone_changed_this_transaction(struct dm_clone_metadata * cmd)944*4882a593Smuzhiyun bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
945*4882a593Smuzhiyun {
946*4882a593Smuzhiyun bool r;
947*4882a593Smuzhiyun unsigned long flags;
948*4882a593Smuzhiyun
949*4882a593Smuzhiyun spin_lock_irqsave(&cmd->bitmap_lock, flags);
950*4882a593Smuzhiyun r = cmd->dmap[0].changed || cmd->dmap[1].changed;
951*4882a593Smuzhiyun spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
952*4882a593Smuzhiyun
953*4882a593Smuzhiyun return r;
954*4882a593Smuzhiyun }
955*4882a593Smuzhiyun
dm_clone_metadata_abort(struct dm_clone_metadata * cmd)956*4882a593Smuzhiyun int dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
957*4882a593Smuzhiyun {
958*4882a593Smuzhiyun int r = -EPERM;
959*4882a593Smuzhiyun
960*4882a593Smuzhiyun down_write(&cmd->lock);
961*4882a593Smuzhiyun
962*4882a593Smuzhiyun if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
963*4882a593Smuzhiyun goto out;
964*4882a593Smuzhiyun
965*4882a593Smuzhiyun __destroy_persistent_data_structures(cmd);
966*4882a593Smuzhiyun
967*4882a593Smuzhiyun r = __create_persistent_data_structures(cmd, false);
968*4882a593Smuzhiyun if (r) {
969*4882a593Smuzhiyun /* If something went wrong we can neither write nor read the metadata */
970*4882a593Smuzhiyun cmd->fail_io = true;
971*4882a593Smuzhiyun }
972*4882a593Smuzhiyun out:
973*4882a593Smuzhiyun up_write(&cmd->lock);
974*4882a593Smuzhiyun
975*4882a593Smuzhiyun return r;
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun
dm_clone_metadata_set_read_only(struct dm_clone_metadata * cmd)978*4882a593Smuzhiyun void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
979*4882a593Smuzhiyun {
980*4882a593Smuzhiyun down_write(&cmd->lock);
981*4882a593Smuzhiyun
982*4882a593Smuzhiyun spin_lock_irq(&cmd->bitmap_lock);
983*4882a593Smuzhiyun cmd->read_only = 1;
984*4882a593Smuzhiyun spin_unlock_irq(&cmd->bitmap_lock);
985*4882a593Smuzhiyun
986*4882a593Smuzhiyun if (!cmd->fail_io)
987*4882a593Smuzhiyun dm_bm_set_read_only(cmd->bm);
988*4882a593Smuzhiyun
989*4882a593Smuzhiyun up_write(&cmd->lock);
990*4882a593Smuzhiyun }
991*4882a593Smuzhiyun
dm_clone_metadata_set_read_write(struct dm_clone_metadata * cmd)992*4882a593Smuzhiyun void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
993*4882a593Smuzhiyun {
994*4882a593Smuzhiyun down_write(&cmd->lock);
995*4882a593Smuzhiyun
996*4882a593Smuzhiyun spin_lock_irq(&cmd->bitmap_lock);
997*4882a593Smuzhiyun cmd->read_only = 0;
998*4882a593Smuzhiyun spin_unlock_irq(&cmd->bitmap_lock);
999*4882a593Smuzhiyun
1000*4882a593Smuzhiyun if (!cmd->fail_io)
1001*4882a593Smuzhiyun dm_bm_set_read_write(cmd->bm);
1002*4882a593Smuzhiyun
1003*4882a593Smuzhiyun up_write(&cmd->lock);
1004*4882a593Smuzhiyun }
1005*4882a593Smuzhiyun
dm_clone_get_free_metadata_block_count(struct dm_clone_metadata * cmd,dm_block_t * result)1006*4882a593Smuzhiyun int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
1007*4882a593Smuzhiyun dm_block_t *result)
1008*4882a593Smuzhiyun {
1009*4882a593Smuzhiyun int r = -EINVAL;
1010*4882a593Smuzhiyun
1011*4882a593Smuzhiyun down_read(&cmd->lock);
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun if (!cmd->fail_io)
1014*4882a593Smuzhiyun r = dm_sm_get_nr_free(cmd->sm, result);
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun up_read(&cmd->lock);
1017*4882a593Smuzhiyun
1018*4882a593Smuzhiyun return r;
1019*4882a593Smuzhiyun }
1020*4882a593Smuzhiyun
dm_clone_get_metadata_dev_size(struct dm_clone_metadata * cmd,dm_block_t * result)1021*4882a593Smuzhiyun int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
1022*4882a593Smuzhiyun dm_block_t *result)
1023*4882a593Smuzhiyun {
1024*4882a593Smuzhiyun int r = -EINVAL;
1025*4882a593Smuzhiyun
1026*4882a593Smuzhiyun down_read(&cmd->lock);
1027*4882a593Smuzhiyun
1028*4882a593Smuzhiyun if (!cmd->fail_io)
1029*4882a593Smuzhiyun r = dm_sm_get_nr_blocks(cmd->sm, result);
1030*4882a593Smuzhiyun
1031*4882a593Smuzhiyun up_read(&cmd->lock);
1032*4882a593Smuzhiyun
1033*4882a593Smuzhiyun return r;
1034*4882a593Smuzhiyun }
1035