1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * This file is released under the GPL.
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <linux/blkdev.h>
8*4882a593Smuzhiyun #include <linux/device-mapper.h>
9*4882a593Smuzhiyun #include <linux/delay.h>
10*4882a593Smuzhiyun #include <linux/fs.h>
11*4882a593Smuzhiyun #include <linux/init.h>
12*4882a593Smuzhiyun #include <linux/kdev_t.h>
13*4882a593Smuzhiyun #include <linux/list.h>
14*4882a593Smuzhiyun #include <linux/list_bl.h>
15*4882a593Smuzhiyun #include <linux/mempool.h>
16*4882a593Smuzhiyun #include <linux/module.h>
17*4882a593Smuzhiyun #include <linux/slab.h>
18*4882a593Smuzhiyun #include <linux/vmalloc.h>
19*4882a593Smuzhiyun #include <linux/log2.h>
20*4882a593Smuzhiyun #include <linux/dm-kcopyd.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #include "dm.h"
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #include "dm-exception-store.h"
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun #define DM_MSG_PREFIX "snapshots"
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun #define dm_target_is_snapshot_merge(ti) \
31*4882a593Smuzhiyun ((ti)->type->name == dm_snapshot_merge_target_name)
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun /*
34*4882a593Smuzhiyun * The size of the mempool used to track chunks in use.
35*4882a593Smuzhiyun */
36*4882a593Smuzhiyun #define MIN_IOS 256
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun #define DM_TRACKED_CHUNK_HASH_SIZE 16
39*4882a593Smuzhiyun #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
40*4882a593Smuzhiyun (DM_TRACKED_CHUNK_HASH_SIZE - 1))
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun struct dm_exception_table {
43*4882a593Smuzhiyun uint32_t hash_mask;
44*4882a593Smuzhiyun unsigned hash_shift;
45*4882a593Smuzhiyun struct hlist_bl_head *table;
46*4882a593Smuzhiyun };
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun struct dm_snapshot {
49*4882a593Smuzhiyun struct rw_semaphore lock;
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun struct dm_dev *origin;
52*4882a593Smuzhiyun struct dm_dev *cow;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun struct dm_target *ti;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun /* List of snapshots per Origin */
57*4882a593Smuzhiyun struct list_head list;
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun /*
60*4882a593Smuzhiyun * You can't use a snapshot if this is 0 (e.g. if full).
61*4882a593Smuzhiyun * A snapshot-merge target never clears this.
62*4882a593Smuzhiyun */
63*4882a593Smuzhiyun int valid;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun /*
66*4882a593Smuzhiyun * The snapshot overflowed because of a write to the snapshot device.
67*4882a593Smuzhiyun * We don't have to invalidate the snapshot in this case, but we need
68*4882a593Smuzhiyun * to prevent further writes.
69*4882a593Smuzhiyun */
70*4882a593Smuzhiyun int snapshot_overflowed;
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun /* Origin writes don't trigger exceptions until this is set */
73*4882a593Smuzhiyun int active;
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun atomic_t pending_exceptions_count;
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun spinlock_t pe_allocation_lock;
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun /* Protected by "pe_allocation_lock" */
80*4882a593Smuzhiyun sector_t exception_start_sequence;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun /* Protected by kcopyd single-threaded callback */
83*4882a593Smuzhiyun sector_t exception_complete_sequence;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun /*
86*4882a593Smuzhiyun * A list of pending exceptions that completed out of order.
87*4882a593Smuzhiyun * Protected by kcopyd single-threaded callback.
88*4882a593Smuzhiyun */
89*4882a593Smuzhiyun struct rb_root out_of_order_tree;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun mempool_t pending_pool;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun struct dm_exception_table pending;
94*4882a593Smuzhiyun struct dm_exception_table complete;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun /*
97*4882a593Smuzhiyun * pe_lock protects all pending_exception operations and access
98*4882a593Smuzhiyun * as well as the snapshot_bios list.
99*4882a593Smuzhiyun */
100*4882a593Smuzhiyun spinlock_t pe_lock;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun /* Chunks with outstanding reads */
103*4882a593Smuzhiyun spinlock_t tracked_chunk_lock;
104*4882a593Smuzhiyun struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun /* The on disk metadata handler */
107*4882a593Smuzhiyun struct dm_exception_store *store;
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun unsigned in_progress;
110*4882a593Smuzhiyun struct wait_queue_head in_progress_wait;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun struct dm_kcopyd_client *kcopyd_client;
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun /* Wait for events based on state_bits */
115*4882a593Smuzhiyun unsigned long state_bits;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun /* Range of chunks currently being merged. */
118*4882a593Smuzhiyun chunk_t first_merging_chunk;
119*4882a593Smuzhiyun int num_merging_chunks;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun /*
122*4882a593Smuzhiyun * The merge operation failed if this flag is set.
123*4882a593Smuzhiyun * Failure modes are handled as follows:
124*4882a593Smuzhiyun * - I/O error reading the header
125*4882a593Smuzhiyun * => don't load the target; abort.
126*4882a593Smuzhiyun * - Header does not have "valid" flag set
127*4882a593Smuzhiyun * => use the origin; forget about the snapshot.
128*4882a593Smuzhiyun * - I/O error when reading exceptions
129*4882a593Smuzhiyun * => don't load the target; abort.
130*4882a593Smuzhiyun * (We can't use the intermediate origin state.)
131*4882a593Smuzhiyun * - I/O error while merging
132*4882a593Smuzhiyun * => stop merging; set merge_failed; process I/O normally.
133*4882a593Smuzhiyun */
134*4882a593Smuzhiyun bool merge_failed:1;
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun bool discard_zeroes_cow:1;
137*4882a593Smuzhiyun bool discard_passdown_origin:1;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun /*
140*4882a593Smuzhiyun * Incoming bios that overlap with chunks being merged must wait
141*4882a593Smuzhiyun * for them to be committed.
142*4882a593Smuzhiyun */
143*4882a593Smuzhiyun struct bio_list bios_queued_during_merge;
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun /*
146*4882a593Smuzhiyun * Flush data after merge.
147*4882a593Smuzhiyun */
148*4882a593Smuzhiyun struct bio flush_bio;
149*4882a593Smuzhiyun };
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun /*
152*4882a593Smuzhiyun * state_bits:
153*4882a593Smuzhiyun * RUNNING_MERGE - Merge operation is in progress.
154*4882a593Smuzhiyun * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped;
155*4882a593Smuzhiyun * cleared afterwards.
156*4882a593Smuzhiyun */
157*4882a593Smuzhiyun #define RUNNING_MERGE 0
158*4882a593Smuzhiyun #define SHUTDOWN_MERGE 1
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun /*
161*4882a593Smuzhiyun * Maximum number of chunks being copied on write.
162*4882a593Smuzhiyun *
163*4882a593Smuzhiyun * The value was decided experimentally as a trade-off between memory
164*4882a593Smuzhiyun * consumption, stalling the kernel's workqueues and maintaining a high enough
165*4882a593Smuzhiyun * throughput.
166*4882a593Smuzhiyun */
167*4882a593Smuzhiyun #define DEFAULT_COW_THRESHOLD 2048
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun static unsigned cow_threshold = DEFAULT_COW_THRESHOLD;
170*4882a593Smuzhiyun module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
171*4882a593Smuzhiyun MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
174*4882a593Smuzhiyun "A percentage of time allocated for copy on write");
175*4882a593Smuzhiyun
dm_snap_origin(struct dm_snapshot * s)176*4882a593Smuzhiyun struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun return s->origin;
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun EXPORT_SYMBOL(dm_snap_origin);
181*4882a593Smuzhiyun
dm_snap_cow(struct dm_snapshot * s)182*4882a593Smuzhiyun struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
183*4882a593Smuzhiyun {
184*4882a593Smuzhiyun return s->cow;
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun EXPORT_SYMBOL(dm_snap_cow);
187*4882a593Smuzhiyun
chunk_to_sector(struct dm_exception_store * store,chunk_t chunk)188*4882a593Smuzhiyun static sector_t chunk_to_sector(struct dm_exception_store *store,
189*4882a593Smuzhiyun chunk_t chunk)
190*4882a593Smuzhiyun {
191*4882a593Smuzhiyun return chunk << store->chunk_shift;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun
bdev_equal(struct block_device * lhs,struct block_device * rhs)194*4882a593Smuzhiyun static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun /*
197*4882a593Smuzhiyun * There is only ever one instance of a particular block
198*4882a593Smuzhiyun * device so we can compare pointers safely.
199*4882a593Smuzhiyun */
200*4882a593Smuzhiyun return lhs == rhs;
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun struct dm_snap_pending_exception {
204*4882a593Smuzhiyun struct dm_exception e;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun /*
207*4882a593Smuzhiyun * Origin buffers waiting for this to complete are held
208*4882a593Smuzhiyun * in a bio list
209*4882a593Smuzhiyun */
210*4882a593Smuzhiyun struct bio_list origin_bios;
211*4882a593Smuzhiyun struct bio_list snapshot_bios;
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun /* Pointer back to snapshot context */
214*4882a593Smuzhiyun struct dm_snapshot *snap;
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun /*
217*4882a593Smuzhiyun * 1 indicates the exception has already been sent to
218*4882a593Smuzhiyun * kcopyd.
219*4882a593Smuzhiyun */
220*4882a593Smuzhiyun int started;
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun /* There was copying error. */
223*4882a593Smuzhiyun int copy_error;
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun /* A sequence number, it is used for in-order completion. */
226*4882a593Smuzhiyun sector_t exception_sequence;
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun struct rb_node out_of_order_node;
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun /*
231*4882a593Smuzhiyun * For writing a complete chunk, bypassing the copy.
232*4882a593Smuzhiyun */
233*4882a593Smuzhiyun struct bio *full_bio;
234*4882a593Smuzhiyun bio_end_io_t *full_bio_end_io;
235*4882a593Smuzhiyun };
236*4882a593Smuzhiyun
237*4882a593Smuzhiyun /*
238*4882a593Smuzhiyun * Hash table mapping origin volumes to lists of snapshots and
239*4882a593Smuzhiyun * a lock to protect it
240*4882a593Smuzhiyun */
241*4882a593Smuzhiyun static struct kmem_cache *exception_cache;
242*4882a593Smuzhiyun static struct kmem_cache *pending_cache;
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun struct dm_snap_tracked_chunk {
245*4882a593Smuzhiyun struct hlist_node node;
246*4882a593Smuzhiyun chunk_t chunk;
247*4882a593Smuzhiyun };
248*4882a593Smuzhiyun
init_tracked_chunk(struct bio * bio)249*4882a593Smuzhiyun static void init_tracked_chunk(struct bio *bio)
250*4882a593Smuzhiyun {
251*4882a593Smuzhiyun struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
252*4882a593Smuzhiyun INIT_HLIST_NODE(&c->node);
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun
is_bio_tracked(struct bio * bio)255*4882a593Smuzhiyun static bool is_bio_tracked(struct bio *bio)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
258*4882a593Smuzhiyun return !hlist_unhashed(&c->node);
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun
track_chunk(struct dm_snapshot * s,struct bio * bio,chunk_t chunk)261*4882a593Smuzhiyun static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk)
262*4882a593Smuzhiyun {
263*4882a593Smuzhiyun struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun c->chunk = chunk;
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun spin_lock_irq(&s->tracked_chunk_lock);
268*4882a593Smuzhiyun hlist_add_head(&c->node,
269*4882a593Smuzhiyun &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
270*4882a593Smuzhiyun spin_unlock_irq(&s->tracked_chunk_lock);
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun
stop_tracking_chunk(struct dm_snapshot * s,struct bio * bio)273*4882a593Smuzhiyun static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio)
274*4882a593Smuzhiyun {
275*4882a593Smuzhiyun struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
276*4882a593Smuzhiyun unsigned long flags;
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun spin_lock_irqsave(&s->tracked_chunk_lock, flags);
279*4882a593Smuzhiyun hlist_del(&c->node);
280*4882a593Smuzhiyun spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun
__chunk_is_tracked(struct dm_snapshot * s,chunk_t chunk)283*4882a593Smuzhiyun static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
284*4882a593Smuzhiyun {
285*4882a593Smuzhiyun struct dm_snap_tracked_chunk *c;
286*4882a593Smuzhiyun int found = 0;
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun spin_lock_irq(&s->tracked_chunk_lock);
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun hlist_for_each_entry(c,
291*4882a593Smuzhiyun &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
292*4882a593Smuzhiyun if (c->chunk == chunk) {
293*4882a593Smuzhiyun found = 1;
294*4882a593Smuzhiyun break;
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun spin_unlock_irq(&s->tracked_chunk_lock);
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun return found;
301*4882a593Smuzhiyun }
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun /*
304*4882a593Smuzhiyun * This conflicting I/O is extremely improbable in the caller,
305*4882a593Smuzhiyun * so msleep(1) is sufficient and there is no need for a wait queue.
306*4882a593Smuzhiyun */
__check_for_conflicting_io(struct dm_snapshot * s,chunk_t chunk)307*4882a593Smuzhiyun static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
308*4882a593Smuzhiyun {
309*4882a593Smuzhiyun while (__chunk_is_tracked(s, chunk))
310*4882a593Smuzhiyun msleep(1);
311*4882a593Smuzhiyun }
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun /*
314*4882a593Smuzhiyun * One of these per registered origin, held in the snapshot_origins hash
315*4882a593Smuzhiyun */
316*4882a593Smuzhiyun struct origin {
317*4882a593Smuzhiyun /* The origin device */
318*4882a593Smuzhiyun struct block_device *bdev;
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun struct list_head hash_list;
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun /* List of snapshots for this origin */
323*4882a593Smuzhiyun struct list_head snapshots;
324*4882a593Smuzhiyun };
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun /*
327*4882a593Smuzhiyun * This structure is allocated for each origin target
328*4882a593Smuzhiyun */
329*4882a593Smuzhiyun struct dm_origin {
330*4882a593Smuzhiyun struct dm_dev *dev;
331*4882a593Smuzhiyun struct dm_target *ti;
332*4882a593Smuzhiyun unsigned split_boundary;
333*4882a593Smuzhiyun struct list_head hash_list;
334*4882a593Smuzhiyun };
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun /*
337*4882a593Smuzhiyun * Size of the hash table for origin volumes. If we make this
338*4882a593Smuzhiyun * the size of the minors list then it should be nearly perfect
339*4882a593Smuzhiyun */
340*4882a593Smuzhiyun #define ORIGIN_HASH_SIZE 256
341*4882a593Smuzhiyun #define ORIGIN_MASK 0xFF
342*4882a593Smuzhiyun static struct list_head *_origins;
343*4882a593Smuzhiyun static struct list_head *_dm_origins;
344*4882a593Smuzhiyun static struct rw_semaphore _origins_lock;
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
347*4882a593Smuzhiyun static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
348*4882a593Smuzhiyun static uint64_t _pending_exceptions_done_count;
349*4882a593Smuzhiyun
init_origin_hash(void)350*4882a593Smuzhiyun static int init_origin_hash(void)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun int i;
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun _origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head),
355*4882a593Smuzhiyun GFP_KERNEL);
356*4882a593Smuzhiyun if (!_origins) {
357*4882a593Smuzhiyun DMERR("unable to allocate memory for _origins");
358*4882a593Smuzhiyun return -ENOMEM;
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun for (i = 0; i < ORIGIN_HASH_SIZE; i++)
361*4882a593Smuzhiyun INIT_LIST_HEAD(_origins + i);
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun _dm_origins = kmalloc_array(ORIGIN_HASH_SIZE,
364*4882a593Smuzhiyun sizeof(struct list_head),
365*4882a593Smuzhiyun GFP_KERNEL);
366*4882a593Smuzhiyun if (!_dm_origins) {
367*4882a593Smuzhiyun DMERR("unable to allocate memory for _dm_origins");
368*4882a593Smuzhiyun kfree(_origins);
369*4882a593Smuzhiyun return -ENOMEM;
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun for (i = 0; i < ORIGIN_HASH_SIZE; i++)
372*4882a593Smuzhiyun INIT_LIST_HEAD(_dm_origins + i);
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun init_rwsem(&_origins_lock);
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun return 0;
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun
exit_origin_hash(void)379*4882a593Smuzhiyun static void exit_origin_hash(void)
380*4882a593Smuzhiyun {
381*4882a593Smuzhiyun kfree(_origins);
382*4882a593Smuzhiyun kfree(_dm_origins);
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun
origin_hash(struct block_device * bdev)385*4882a593Smuzhiyun static unsigned origin_hash(struct block_device *bdev)
386*4882a593Smuzhiyun {
387*4882a593Smuzhiyun return bdev->bd_dev & ORIGIN_MASK;
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun
__lookup_origin(struct block_device * origin)390*4882a593Smuzhiyun static struct origin *__lookup_origin(struct block_device *origin)
391*4882a593Smuzhiyun {
392*4882a593Smuzhiyun struct list_head *ol;
393*4882a593Smuzhiyun struct origin *o;
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun ol = &_origins[origin_hash(origin)];
396*4882a593Smuzhiyun list_for_each_entry (o, ol, hash_list)
397*4882a593Smuzhiyun if (bdev_equal(o->bdev, origin))
398*4882a593Smuzhiyun return o;
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun return NULL;
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun
__insert_origin(struct origin * o)403*4882a593Smuzhiyun static void __insert_origin(struct origin *o)
404*4882a593Smuzhiyun {
405*4882a593Smuzhiyun struct list_head *sl = &_origins[origin_hash(o->bdev)];
406*4882a593Smuzhiyun list_add_tail(&o->hash_list, sl);
407*4882a593Smuzhiyun }
408*4882a593Smuzhiyun
__lookup_dm_origin(struct block_device * origin)409*4882a593Smuzhiyun static struct dm_origin *__lookup_dm_origin(struct block_device *origin)
410*4882a593Smuzhiyun {
411*4882a593Smuzhiyun struct list_head *ol;
412*4882a593Smuzhiyun struct dm_origin *o;
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun ol = &_dm_origins[origin_hash(origin)];
415*4882a593Smuzhiyun list_for_each_entry (o, ol, hash_list)
416*4882a593Smuzhiyun if (bdev_equal(o->dev->bdev, origin))
417*4882a593Smuzhiyun return o;
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun return NULL;
420*4882a593Smuzhiyun }
421*4882a593Smuzhiyun
__insert_dm_origin(struct dm_origin * o)422*4882a593Smuzhiyun static void __insert_dm_origin(struct dm_origin *o)
423*4882a593Smuzhiyun {
424*4882a593Smuzhiyun struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)];
425*4882a593Smuzhiyun list_add_tail(&o->hash_list, sl);
426*4882a593Smuzhiyun }
427*4882a593Smuzhiyun
__remove_dm_origin(struct dm_origin * o)428*4882a593Smuzhiyun static void __remove_dm_origin(struct dm_origin *o)
429*4882a593Smuzhiyun {
430*4882a593Smuzhiyun list_del(&o->hash_list);
431*4882a593Smuzhiyun }
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun /*
434*4882a593Smuzhiyun * _origins_lock must be held when calling this function.
435*4882a593Smuzhiyun * Returns number of snapshots registered using the supplied cow device, plus:
436*4882a593Smuzhiyun * snap_src - a snapshot suitable for use as a source of exception handover
437*4882a593Smuzhiyun * snap_dest - a snapshot capable of receiving exception handover.
438*4882a593Smuzhiyun * snap_merge - an existing snapshot-merge target linked to the same origin.
439*4882a593Smuzhiyun * There can be at most one snapshot-merge target. The parameter is optional.
440*4882a593Smuzhiyun *
441*4882a593Smuzhiyun * Possible return values and states of snap_src and snap_dest.
442*4882a593Smuzhiyun * 0: NULL, NULL - first new snapshot
443*4882a593Smuzhiyun * 1: snap_src, NULL - normal snapshot
444*4882a593Smuzhiyun * 2: snap_src, snap_dest - waiting for handover
445*4882a593Smuzhiyun * 2: snap_src, NULL - handed over, waiting for old to be deleted
446*4882a593Smuzhiyun * 1: NULL, snap_dest - source got destroyed without handover
447*4882a593Smuzhiyun */
__find_snapshots_sharing_cow(struct dm_snapshot * snap,struct dm_snapshot ** snap_src,struct dm_snapshot ** snap_dest,struct dm_snapshot ** snap_merge)448*4882a593Smuzhiyun static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
449*4882a593Smuzhiyun struct dm_snapshot **snap_src,
450*4882a593Smuzhiyun struct dm_snapshot **snap_dest,
451*4882a593Smuzhiyun struct dm_snapshot **snap_merge)
452*4882a593Smuzhiyun {
453*4882a593Smuzhiyun struct dm_snapshot *s;
454*4882a593Smuzhiyun struct origin *o;
455*4882a593Smuzhiyun int count = 0;
456*4882a593Smuzhiyun int active;
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun o = __lookup_origin(snap->origin->bdev);
459*4882a593Smuzhiyun if (!o)
460*4882a593Smuzhiyun goto out;
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun list_for_each_entry(s, &o->snapshots, list) {
463*4882a593Smuzhiyun if (dm_target_is_snapshot_merge(s->ti) && snap_merge)
464*4882a593Smuzhiyun *snap_merge = s;
465*4882a593Smuzhiyun if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
466*4882a593Smuzhiyun continue;
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun down_read(&s->lock);
469*4882a593Smuzhiyun active = s->active;
470*4882a593Smuzhiyun up_read(&s->lock);
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun if (active) {
473*4882a593Smuzhiyun if (snap_src)
474*4882a593Smuzhiyun *snap_src = s;
475*4882a593Smuzhiyun } else if (snap_dest)
476*4882a593Smuzhiyun *snap_dest = s;
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun count++;
479*4882a593Smuzhiyun }
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun out:
482*4882a593Smuzhiyun return count;
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun /*
486*4882a593Smuzhiyun * On success, returns 1 if this snapshot is a handover destination,
487*4882a593Smuzhiyun * otherwise returns 0.
488*4882a593Smuzhiyun */
__validate_exception_handover(struct dm_snapshot * snap)489*4882a593Smuzhiyun static int __validate_exception_handover(struct dm_snapshot *snap)
490*4882a593Smuzhiyun {
491*4882a593Smuzhiyun struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
492*4882a593Smuzhiyun struct dm_snapshot *snap_merge = NULL;
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun /* Does snapshot need exceptions handed over to it? */
495*4882a593Smuzhiyun if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest,
496*4882a593Smuzhiyun &snap_merge) == 2) ||
497*4882a593Smuzhiyun snap_dest) {
498*4882a593Smuzhiyun snap->ti->error = "Snapshot cow pairing for exception "
499*4882a593Smuzhiyun "table handover failed";
500*4882a593Smuzhiyun return -EINVAL;
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun /*
504*4882a593Smuzhiyun * If no snap_src was found, snap cannot become a handover
505*4882a593Smuzhiyun * destination.
506*4882a593Smuzhiyun */
507*4882a593Smuzhiyun if (!snap_src)
508*4882a593Smuzhiyun return 0;
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun /*
511*4882a593Smuzhiyun * Non-snapshot-merge handover?
512*4882a593Smuzhiyun */
513*4882a593Smuzhiyun if (!dm_target_is_snapshot_merge(snap->ti))
514*4882a593Smuzhiyun return 1;
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun /*
517*4882a593Smuzhiyun * Do not allow more than one merging snapshot.
518*4882a593Smuzhiyun */
519*4882a593Smuzhiyun if (snap_merge) {
520*4882a593Smuzhiyun snap->ti->error = "A snapshot is already merging.";
521*4882a593Smuzhiyun return -EINVAL;
522*4882a593Smuzhiyun }
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun if (!snap_src->store->type->prepare_merge ||
525*4882a593Smuzhiyun !snap_src->store->type->commit_merge) {
526*4882a593Smuzhiyun snap->ti->error = "Snapshot exception store does not "
527*4882a593Smuzhiyun "support snapshot-merge.";
528*4882a593Smuzhiyun return -EINVAL;
529*4882a593Smuzhiyun }
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun return 1;
532*4882a593Smuzhiyun }
533*4882a593Smuzhiyun
__insert_snapshot(struct origin * o,struct dm_snapshot * s)534*4882a593Smuzhiyun static void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
535*4882a593Smuzhiyun {
536*4882a593Smuzhiyun struct dm_snapshot *l;
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun /* Sort the list according to chunk size, largest-first smallest-last */
539*4882a593Smuzhiyun list_for_each_entry(l, &o->snapshots, list)
540*4882a593Smuzhiyun if (l->store->chunk_size < s->store->chunk_size)
541*4882a593Smuzhiyun break;
542*4882a593Smuzhiyun list_add_tail(&s->list, &l->list);
543*4882a593Smuzhiyun }
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun /*
546*4882a593Smuzhiyun * Make a note of the snapshot and its origin so we can look it
547*4882a593Smuzhiyun * up when the origin has a write on it.
548*4882a593Smuzhiyun *
549*4882a593Smuzhiyun * Also validate snapshot exception store handovers.
550*4882a593Smuzhiyun * On success, returns 1 if this registration is a handover destination,
551*4882a593Smuzhiyun * otherwise returns 0.
552*4882a593Smuzhiyun */
register_snapshot(struct dm_snapshot * snap)553*4882a593Smuzhiyun static int register_snapshot(struct dm_snapshot *snap)
554*4882a593Smuzhiyun {
555*4882a593Smuzhiyun struct origin *o, *new_o = NULL;
556*4882a593Smuzhiyun struct block_device *bdev = snap->origin->bdev;
557*4882a593Smuzhiyun int r = 0;
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
560*4882a593Smuzhiyun if (!new_o)
561*4882a593Smuzhiyun return -ENOMEM;
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun down_write(&_origins_lock);
564*4882a593Smuzhiyun
565*4882a593Smuzhiyun r = __validate_exception_handover(snap);
566*4882a593Smuzhiyun if (r < 0) {
567*4882a593Smuzhiyun kfree(new_o);
568*4882a593Smuzhiyun goto out;
569*4882a593Smuzhiyun }
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun o = __lookup_origin(bdev);
572*4882a593Smuzhiyun if (o)
573*4882a593Smuzhiyun kfree(new_o);
574*4882a593Smuzhiyun else {
575*4882a593Smuzhiyun /* New origin */
576*4882a593Smuzhiyun o = new_o;
577*4882a593Smuzhiyun
578*4882a593Smuzhiyun /* Initialise the struct */
579*4882a593Smuzhiyun INIT_LIST_HEAD(&o->snapshots);
580*4882a593Smuzhiyun o->bdev = bdev;
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun __insert_origin(o);
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun
585*4882a593Smuzhiyun __insert_snapshot(o, snap);
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun out:
588*4882a593Smuzhiyun up_write(&_origins_lock);
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun return r;
591*4882a593Smuzhiyun }
592*4882a593Smuzhiyun
593*4882a593Smuzhiyun /*
594*4882a593Smuzhiyun * Move snapshot to correct place in list according to chunk size.
595*4882a593Smuzhiyun */
reregister_snapshot(struct dm_snapshot * s)596*4882a593Smuzhiyun static void reregister_snapshot(struct dm_snapshot *s)
597*4882a593Smuzhiyun {
598*4882a593Smuzhiyun struct block_device *bdev = s->origin->bdev;
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun down_write(&_origins_lock);
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun list_del(&s->list);
603*4882a593Smuzhiyun __insert_snapshot(__lookup_origin(bdev), s);
604*4882a593Smuzhiyun
605*4882a593Smuzhiyun up_write(&_origins_lock);
606*4882a593Smuzhiyun }
607*4882a593Smuzhiyun
unregister_snapshot(struct dm_snapshot * s)608*4882a593Smuzhiyun static void unregister_snapshot(struct dm_snapshot *s)
609*4882a593Smuzhiyun {
610*4882a593Smuzhiyun struct origin *o;
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun down_write(&_origins_lock);
613*4882a593Smuzhiyun o = __lookup_origin(s->origin->bdev);
614*4882a593Smuzhiyun
615*4882a593Smuzhiyun list_del(&s->list);
616*4882a593Smuzhiyun if (o && list_empty(&o->snapshots)) {
617*4882a593Smuzhiyun list_del(&o->hash_list);
618*4882a593Smuzhiyun kfree(o);
619*4882a593Smuzhiyun }
620*4882a593Smuzhiyun
621*4882a593Smuzhiyun up_write(&_origins_lock);
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun /*
625*4882a593Smuzhiyun * Implementation of the exception hash tables.
626*4882a593Smuzhiyun * The lowest hash_shift bits of the chunk number are ignored, allowing
627*4882a593Smuzhiyun * some consecutive chunks to be grouped together.
628*4882a593Smuzhiyun */
629*4882a593Smuzhiyun static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun /* Lock to protect access to the completed and pending exception hash tables. */
632*4882a593Smuzhiyun struct dm_exception_table_lock {
633*4882a593Smuzhiyun struct hlist_bl_head *complete_slot;
634*4882a593Smuzhiyun struct hlist_bl_head *pending_slot;
635*4882a593Smuzhiyun };
636*4882a593Smuzhiyun
dm_exception_table_lock_init(struct dm_snapshot * s,chunk_t chunk,struct dm_exception_table_lock * lock)637*4882a593Smuzhiyun static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
638*4882a593Smuzhiyun struct dm_exception_table_lock *lock)
639*4882a593Smuzhiyun {
640*4882a593Smuzhiyun struct dm_exception_table *complete = &s->complete;
641*4882a593Smuzhiyun struct dm_exception_table *pending = &s->pending;
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
644*4882a593Smuzhiyun lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
645*4882a593Smuzhiyun }
646*4882a593Smuzhiyun
dm_exception_table_lock(struct dm_exception_table_lock * lock)647*4882a593Smuzhiyun static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
648*4882a593Smuzhiyun {
649*4882a593Smuzhiyun hlist_bl_lock(lock->complete_slot);
650*4882a593Smuzhiyun hlist_bl_lock(lock->pending_slot);
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun
dm_exception_table_unlock(struct dm_exception_table_lock * lock)653*4882a593Smuzhiyun static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
654*4882a593Smuzhiyun {
655*4882a593Smuzhiyun hlist_bl_unlock(lock->pending_slot);
656*4882a593Smuzhiyun hlist_bl_unlock(lock->complete_slot);
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun
dm_exception_table_init(struct dm_exception_table * et,uint32_t size,unsigned hash_shift)659*4882a593Smuzhiyun static int dm_exception_table_init(struct dm_exception_table *et,
660*4882a593Smuzhiyun uint32_t size, unsigned hash_shift)
661*4882a593Smuzhiyun {
662*4882a593Smuzhiyun unsigned int i;
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun et->hash_shift = hash_shift;
665*4882a593Smuzhiyun et->hash_mask = size - 1;
666*4882a593Smuzhiyun et->table = dm_vcalloc(size, sizeof(struct hlist_bl_head));
667*4882a593Smuzhiyun if (!et->table)
668*4882a593Smuzhiyun return -ENOMEM;
669*4882a593Smuzhiyun
670*4882a593Smuzhiyun for (i = 0; i < size; i++)
671*4882a593Smuzhiyun INIT_HLIST_BL_HEAD(et->table + i);
672*4882a593Smuzhiyun
673*4882a593Smuzhiyun return 0;
674*4882a593Smuzhiyun }
675*4882a593Smuzhiyun
dm_exception_table_exit(struct dm_exception_table * et,struct kmem_cache * mem)676*4882a593Smuzhiyun static void dm_exception_table_exit(struct dm_exception_table *et,
677*4882a593Smuzhiyun struct kmem_cache *mem)
678*4882a593Smuzhiyun {
679*4882a593Smuzhiyun struct hlist_bl_head *slot;
680*4882a593Smuzhiyun struct dm_exception *ex;
681*4882a593Smuzhiyun struct hlist_bl_node *pos, *n;
682*4882a593Smuzhiyun int i, size;
683*4882a593Smuzhiyun
684*4882a593Smuzhiyun size = et->hash_mask + 1;
685*4882a593Smuzhiyun for (i = 0; i < size; i++) {
686*4882a593Smuzhiyun slot = et->table + i;
687*4882a593Smuzhiyun
688*4882a593Smuzhiyun hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list)
689*4882a593Smuzhiyun kmem_cache_free(mem, ex);
690*4882a593Smuzhiyun }
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun vfree(et->table);
693*4882a593Smuzhiyun }
694*4882a593Smuzhiyun
exception_hash(struct dm_exception_table * et,chunk_t chunk)695*4882a593Smuzhiyun static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
696*4882a593Smuzhiyun {
697*4882a593Smuzhiyun return (chunk >> et->hash_shift) & et->hash_mask;
698*4882a593Smuzhiyun }
699*4882a593Smuzhiyun
dm_remove_exception(struct dm_exception * e)700*4882a593Smuzhiyun static void dm_remove_exception(struct dm_exception *e)
701*4882a593Smuzhiyun {
702*4882a593Smuzhiyun hlist_bl_del(&e->hash_list);
703*4882a593Smuzhiyun }
704*4882a593Smuzhiyun
705*4882a593Smuzhiyun /*
706*4882a593Smuzhiyun * Return the exception data for a sector, or NULL if not
707*4882a593Smuzhiyun * remapped.
708*4882a593Smuzhiyun */
dm_lookup_exception(struct dm_exception_table * et,chunk_t chunk)709*4882a593Smuzhiyun static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
710*4882a593Smuzhiyun chunk_t chunk)
711*4882a593Smuzhiyun {
712*4882a593Smuzhiyun struct hlist_bl_head *slot;
713*4882a593Smuzhiyun struct hlist_bl_node *pos;
714*4882a593Smuzhiyun struct dm_exception *e;
715*4882a593Smuzhiyun
716*4882a593Smuzhiyun slot = &et->table[exception_hash(et, chunk)];
717*4882a593Smuzhiyun hlist_bl_for_each_entry(e, pos, slot, hash_list)
718*4882a593Smuzhiyun if (chunk >= e->old_chunk &&
719*4882a593Smuzhiyun chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
720*4882a593Smuzhiyun return e;
721*4882a593Smuzhiyun
722*4882a593Smuzhiyun return NULL;
723*4882a593Smuzhiyun }
724*4882a593Smuzhiyun
alloc_completed_exception(gfp_t gfp)725*4882a593Smuzhiyun static struct dm_exception *alloc_completed_exception(gfp_t gfp)
726*4882a593Smuzhiyun {
727*4882a593Smuzhiyun struct dm_exception *e;
728*4882a593Smuzhiyun
729*4882a593Smuzhiyun e = kmem_cache_alloc(exception_cache, gfp);
730*4882a593Smuzhiyun if (!e && gfp == GFP_NOIO)
731*4882a593Smuzhiyun e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
732*4882a593Smuzhiyun
733*4882a593Smuzhiyun return e;
734*4882a593Smuzhiyun }
735*4882a593Smuzhiyun
free_completed_exception(struct dm_exception * e)736*4882a593Smuzhiyun static void free_completed_exception(struct dm_exception *e)
737*4882a593Smuzhiyun {
738*4882a593Smuzhiyun kmem_cache_free(exception_cache, e);
739*4882a593Smuzhiyun }
740*4882a593Smuzhiyun
alloc_pending_exception(struct dm_snapshot * s)741*4882a593Smuzhiyun static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
742*4882a593Smuzhiyun {
743*4882a593Smuzhiyun struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool,
744*4882a593Smuzhiyun GFP_NOIO);
745*4882a593Smuzhiyun
746*4882a593Smuzhiyun atomic_inc(&s->pending_exceptions_count);
747*4882a593Smuzhiyun pe->snap = s;
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun return pe;
750*4882a593Smuzhiyun }
751*4882a593Smuzhiyun
free_pending_exception(struct dm_snap_pending_exception * pe)752*4882a593Smuzhiyun static void free_pending_exception(struct dm_snap_pending_exception *pe)
753*4882a593Smuzhiyun {
754*4882a593Smuzhiyun struct dm_snapshot *s = pe->snap;
755*4882a593Smuzhiyun
756*4882a593Smuzhiyun mempool_free(pe, &s->pending_pool);
757*4882a593Smuzhiyun smp_mb__before_atomic();
758*4882a593Smuzhiyun atomic_dec(&s->pending_exceptions_count);
759*4882a593Smuzhiyun }
760*4882a593Smuzhiyun
dm_insert_exception(struct dm_exception_table * eh,struct dm_exception * new_e)761*4882a593Smuzhiyun static void dm_insert_exception(struct dm_exception_table *eh,
762*4882a593Smuzhiyun struct dm_exception *new_e)
763*4882a593Smuzhiyun {
764*4882a593Smuzhiyun struct hlist_bl_head *l;
765*4882a593Smuzhiyun struct hlist_bl_node *pos;
766*4882a593Smuzhiyun struct dm_exception *e = NULL;
767*4882a593Smuzhiyun
768*4882a593Smuzhiyun l = &eh->table[exception_hash(eh, new_e->old_chunk)];
769*4882a593Smuzhiyun
770*4882a593Smuzhiyun /* Add immediately if this table doesn't support consecutive chunks */
771*4882a593Smuzhiyun if (!eh->hash_shift)
772*4882a593Smuzhiyun goto out;
773*4882a593Smuzhiyun
774*4882a593Smuzhiyun /* List is ordered by old_chunk */
775*4882a593Smuzhiyun hlist_bl_for_each_entry(e, pos, l, hash_list) {
776*4882a593Smuzhiyun /* Insert after an existing chunk? */
777*4882a593Smuzhiyun if (new_e->old_chunk == (e->old_chunk +
778*4882a593Smuzhiyun dm_consecutive_chunk_count(e) + 1) &&
779*4882a593Smuzhiyun new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
780*4882a593Smuzhiyun dm_consecutive_chunk_count(e) + 1)) {
781*4882a593Smuzhiyun dm_consecutive_chunk_count_inc(e);
782*4882a593Smuzhiyun free_completed_exception(new_e);
783*4882a593Smuzhiyun return;
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun /* Insert before an existing chunk? */
787*4882a593Smuzhiyun if (new_e->old_chunk == (e->old_chunk - 1) &&
788*4882a593Smuzhiyun new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
789*4882a593Smuzhiyun dm_consecutive_chunk_count_inc(e);
790*4882a593Smuzhiyun e->old_chunk--;
791*4882a593Smuzhiyun e->new_chunk--;
792*4882a593Smuzhiyun free_completed_exception(new_e);
793*4882a593Smuzhiyun return;
794*4882a593Smuzhiyun }
795*4882a593Smuzhiyun
796*4882a593Smuzhiyun if (new_e->old_chunk < e->old_chunk)
797*4882a593Smuzhiyun break;
798*4882a593Smuzhiyun }
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun out:
801*4882a593Smuzhiyun if (!e) {
802*4882a593Smuzhiyun /*
803*4882a593Smuzhiyun * Either the table doesn't support consecutive chunks or slot
804*4882a593Smuzhiyun * l is empty.
805*4882a593Smuzhiyun */
806*4882a593Smuzhiyun hlist_bl_add_head(&new_e->hash_list, l);
807*4882a593Smuzhiyun } else if (new_e->old_chunk < e->old_chunk) {
808*4882a593Smuzhiyun /* Add before an existing exception */
809*4882a593Smuzhiyun hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
810*4882a593Smuzhiyun } else {
811*4882a593Smuzhiyun /* Add to l's tail: e is the last exception in this slot */
812*4882a593Smuzhiyun hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
813*4882a593Smuzhiyun }
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun
816*4882a593Smuzhiyun /*
817*4882a593Smuzhiyun * Callback used by the exception stores to load exceptions when
818*4882a593Smuzhiyun * initialising.
819*4882a593Smuzhiyun */
dm_add_exception(void * context,chunk_t old,chunk_t new)820*4882a593Smuzhiyun static int dm_add_exception(void *context, chunk_t old, chunk_t new)
821*4882a593Smuzhiyun {
822*4882a593Smuzhiyun struct dm_exception_table_lock lock;
823*4882a593Smuzhiyun struct dm_snapshot *s = context;
824*4882a593Smuzhiyun struct dm_exception *e;
825*4882a593Smuzhiyun
826*4882a593Smuzhiyun e = alloc_completed_exception(GFP_KERNEL);
827*4882a593Smuzhiyun if (!e)
828*4882a593Smuzhiyun return -ENOMEM;
829*4882a593Smuzhiyun
830*4882a593Smuzhiyun e->old_chunk = old;
831*4882a593Smuzhiyun
832*4882a593Smuzhiyun /* Consecutive_count is implicitly initialised to zero */
833*4882a593Smuzhiyun e->new_chunk = new;
834*4882a593Smuzhiyun
835*4882a593Smuzhiyun /*
836*4882a593Smuzhiyun * Although there is no need to lock access to the exception tables
837*4882a593Smuzhiyun * here, if we don't then hlist_bl_add_head(), called by
838*4882a593Smuzhiyun * dm_insert_exception(), will complain about accessing the
839*4882a593Smuzhiyun * corresponding list without locking it first.
840*4882a593Smuzhiyun */
841*4882a593Smuzhiyun dm_exception_table_lock_init(s, old, &lock);
842*4882a593Smuzhiyun
843*4882a593Smuzhiyun dm_exception_table_lock(&lock);
844*4882a593Smuzhiyun dm_insert_exception(&s->complete, e);
845*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
846*4882a593Smuzhiyun
847*4882a593Smuzhiyun return 0;
848*4882a593Smuzhiyun }
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun /*
851*4882a593Smuzhiyun * Return a minimum chunk size of all snapshots that have the specified origin.
852*4882a593Smuzhiyun * Return zero if the origin has no snapshots.
853*4882a593Smuzhiyun */
__minimum_chunk_size(struct origin * o)854*4882a593Smuzhiyun static uint32_t __minimum_chunk_size(struct origin *o)
855*4882a593Smuzhiyun {
856*4882a593Smuzhiyun struct dm_snapshot *snap;
857*4882a593Smuzhiyun unsigned chunk_size = rounddown_pow_of_two(UINT_MAX);
858*4882a593Smuzhiyun
859*4882a593Smuzhiyun if (o)
860*4882a593Smuzhiyun list_for_each_entry(snap, &o->snapshots, list)
861*4882a593Smuzhiyun chunk_size = min_not_zero(chunk_size,
862*4882a593Smuzhiyun snap->store->chunk_size);
863*4882a593Smuzhiyun
864*4882a593Smuzhiyun return (uint32_t) chunk_size;
865*4882a593Smuzhiyun }
866*4882a593Smuzhiyun
867*4882a593Smuzhiyun /*
868*4882a593Smuzhiyun * Hard coded magic.
869*4882a593Smuzhiyun */
calc_max_buckets(void)870*4882a593Smuzhiyun static int calc_max_buckets(void)
871*4882a593Smuzhiyun {
872*4882a593Smuzhiyun /* use a fixed size of 2MB */
873*4882a593Smuzhiyun unsigned long mem = 2 * 1024 * 1024;
874*4882a593Smuzhiyun mem /= sizeof(struct hlist_bl_head);
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun return mem;
877*4882a593Smuzhiyun }
878*4882a593Smuzhiyun
879*4882a593Smuzhiyun /*
880*4882a593Smuzhiyun * Allocate room for a suitable hash table.
881*4882a593Smuzhiyun */
init_hash_tables(struct dm_snapshot * s)882*4882a593Smuzhiyun static int init_hash_tables(struct dm_snapshot *s)
883*4882a593Smuzhiyun {
884*4882a593Smuzhiyun sector_t hash_size, cow_dev_size, max_buckets;
885*4882a593Smuzhiyun
886*4882a593Smuzhiyun /*
887*4882a593Smuzhiyun * Calculate based on the size of the original volume or
888*4882a593Smuzhiyun * the COW volume...
889*4882a593Smuzhiyun */
890*4882a593Smuzhiyun cow_dev_size = get_dev_size(s->cow->bdev);
891*4882a593Smuzhiyun max_buckets = calc_max_buckets();
892*4882a593Smuzhiyun
893*4882a593Smuzhiyun hash_size = cow_dev_size >> s->store->chunk_shift;
894*4882a593Smuzhiyun hash_size = min(hash_size, max_buckets);
895*4882a593Smuzhiyun
896*4882a593Smuzhiyun if (hash_size < 64)
897*4882a593Smuzhiyun hash_size = 64;
898*4882a593Smuzhiyun hash_size = rounddown_pow_of_two(hash_size);
899*4882a593Smuzhiyun if (dm_exception_table_init(&s->complete, hash_size,
900*4882a593Smuzhiyun DM_CHUNK_CONSECUTIVE_BITS))
901*4882a593Smuzhiyun return -ENOMEM;
902*4882a593Smuzhiyun
903*4882a593Smuzhiyun /*
904*4882a593Smuzhiyun * Allocate hash table for in-flight exceptions
905*4882a593Smuzhiyun * Make this smaller than the real hash table
906*4882a593Smuzhiyun */
907*4882a593Smuzhiyun hash_size >>= 3;
908*4882a593Smuzhiyun if (hash_size < 64)
909*4882a593Smuzhiyun hash_size = 64;
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun if (dm_exception_table_init(&s->pending, hash_size, 0)) {
912*4882a593Smuzhiyun dm_exception_table_exit(&s->complete, exception_cache);
913*4882a593Smuzhiyun return -ENOMEM;
914*4882a593Smuzhiyun }
915*4882a593Smuzhiyun
916*4882a593Smuzhiyun return 0;
917*4882a593Smuzhiyun }
918*4882a593Smuzhiyun
merge_shutdown(struct dm_snapshot * s)919*4882a593Smuzhiyun static void merge_shutdown(struct dm_snapshot *s)
920*4882a593Smuzhiyun {
921*4882a593Smuzhiyun clear_bit_unlock(RUNNING_MERGE, &s->state_bits);
922*4882a593Smuzhiyun smp_mb__after_atomic();
923*4882a593Smuzhiyun wake_up_bit(&s->state_bits, RUNNING_MERGE);
924*4882a593Smuzhiyun }
925*4882a593Smuzhiyun
__release_queued_bios_after_merge(struct dm_snapshot * s)926*4882a593Smuzhiyun static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s)
927*4882a593Smuzhiyun {
928*4882a593Smuzhiyun s->first_merging_chunk = 0;
929*4882a593Smuzhiyun s->num_merging_chunks = 0;
930*4882a593Smuzhiyun
931*4882a593Smuzhiyun return bio_list_get(&s->bios_queued_during_merge);
932*4882a593Smuzhiyun }
933*4882a593Smuzhiyun
934*4882a593Smuzhiyun /*
935*4882a593Smuzhiyun * Remove one chunk from the index of completed exceptions.
936*4882a593Smuzhiyun */
__remove_single_exception_chunk(struct dm_snapshot * s,chunk_t old_chunk)937*4882a593Smuzhiyun static int __remove_single_exception_chunk(struct dm_snapshot *s,
938*4882a593Smuzhiyun chunk_t old_chunk)
939*4882a593Smuzhiyun {
940*4882a593Smuzhiyun struct dm_exception *e;
941*4882a593Smuzhiyun
942*4882a593Smuzhiyun e = dm_lookup_exception(&s->complete, old_chunk);
943*4882a593Smuzhiyun if (!e) {
944*4882a593Smuzhiyun DMERR("Corruption detected: exception for block %llu is "
945*4882a593Smuzhiyun "on disk but not in memory",
946*4882a593Smuzhiyun (unsigned long long)old_chunk);
947*4882a593Smuzhiyun return -EINVAL;
948*4882a593Smuzhiyun }
949*4882a593Smuzhiyun
950*4882a593Smuzhiyun /*
951*4882a593Smuzhiyun * If this is the only chunk using this exception, remove exception.
952*4882a593Smuzhiyun */
953*4882a593Smuzhiyun if (!dm_consecutive_chunk_count(e)) {
954*4882a593Smuzhiyun dm_remove_exception(e);
955*4882a593Smuzhiyun free_completed_exception(e);
956*4882a593Smuzhiyun return 0;
957*4882a593Smuzhiyun }
958*4882a593Smuzhiyun
959*4882a593Smuzhiyun /*
960*4882a593Smuzhiyun * The chunk may be either at the beginning or the end of a
961*4882a593Smuzhiyun * group of consecutive chunks - never in the middle. We are
962*4882a593Smuzhiyun * removing chunks in the opposite order to that in which they
963*4882a593Smuzhiyun * were added, so this should always be true.
964*4882a593Smuzhiyun * Decrement the consecutive chunk counter and adjust the
965*4882a593Smuzhiyun * starting point if necessary.
966*4882a593Smuzhiyun */
967*4882a593Smuzhiyun if (old_chunk == e->old_chunk) {
968*4882a593Smuzhiyun e->old_chunk++;
969*4882a593Smuzhiyun e->new_chunk++;
970*4882a593Smuzhiyun } else if (old_chunk != e->old_chunk +
971*4882a593Smuzhiyun dm_consecutive_chunk_count(e)) {
972*4882a593Smuzhiyun DMERR("Attempt to merge block %llu from the "
973*4882a593Smuzhiyun "middle of a chunk range [%llu - %llu]",
974*4882a593Smuzhiyun (unsigned long long)old_chunk,
975*4882a593Smuzhiyun (unsigned long long)e->old_chunk,
976*4882a593Smuzhiyun (unsigned long long)
977*4882a593Smuzhiyun e->old_chunk + dm_consecutive_chunk_count(e));
978*4882a593Smuzhiyun return -EINVAL;
979*4882a593Smuzhiyun }
980*4882a593Smuzhiyun
981*4882a593Smuzhiyun dm_consecutive_chunk_count_dec(e);
982*4882a593Smuzhiyun
983*4882a593Smuzhiyun return 0;
984*4882a593Smuzhiyun }
985*4882a593Smuzhiyun
986*4882a593Smuzhiyun static void flush_bios(struct bio *bio);
987*4882a593Smuzhiyun
remove_single_exception_chunk(struct dm_snapshot * s)988*4882a593Smuzhiyun static int remove_single_exception_chunk(struct dm_snapshot *s)
989*4882a593Smuzhiyun {
990*4882a593Smuzhiyun struct bio *b = NULL;
991*4882a593Smuzhiyun int r;
992*4882a593Smuzhiyun chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
993*4882a593Smuzhiyun
994*4882a593Smuzhiyun down_write(&s->lock);
995*4882a593Smuzhiyun
996*4882a593Smuzhiyun /*
997*4882a593Smuzhiyun * Process chunks (and associated exceptions) in reverse order
998*4882a593Smuzhiyun * so that dm_consecutive_chunk_count_dec() accounting works.
999*4882a593Smuzhiyun */
1000*4882a593Smuzhiyun do {
1001*4882a593Smuzhiyun r = __remove_single_exception_chunk(s, old_chunk);
1002*4882a593Smuzhiyun if (r)
1003*4882a593Smuzhiyun goto out;
1004*4882a593Smuzhiyun } while (old_chunk-- > s->first_merging_chunk);
1005*4882a593Smuzhiyun
1006*4882a593Smuzhiyun b = __release_queued_bios_after_merge(s);
1007*4882a593Smuzhiyun
1008*4882a593Smuzhiyun out:
1009*4882a593Smuzhiyun up_write(&s->lock);
1010*4882a593Smuzhiyun if (b)
1011*4882a593Smuzhiyun flush_bios(b);
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun return r;
1014*4882a593Smuzhiyun }
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun static int origin_write_extent(struct dm_snapshot *merging_snap,
1017*4882a593Smuzhiyun sector_t sector, unsigned chunk_size);
1018*4882a593Smuzhiyun
1019*4882a593Smuzhiyun static void merge_callback(int read_err, unsigned long write_err,
1020*4882a593Smuzhiyun void *context);
1021*4882a593Smuzhiyun
read_pending_exceptions_done_count(void)1022*4882a593Smuzhiyun static uint64_t read_pending_exceptions_done_count(void)
1023*4882a593Smuzhiyun {
1024*4882a593Smuzhiyun uint64_t pending_exceptions_done;
1025*4882a593Smuzhiyun
1026*4882a593Smuzhiyun spin_lock(&_pending_exceptions_done_spinlock);
1027*4882a593Smuzhiyun pending_exceptions_done = _pending_exceptions_done_count;
1028*4882a593Smuzhiyun spin_unlock(&_pending_exceptions_done_spinlock);
1029*4882a593Smuzhiyun
1030*4882a593Smuzhiyun return pending_exceptions_done;
1031*4882a593Smuzhiyun }
1032*4882a593Smuzhiyun
increment_pending_exceptions_done_count(void)1033*4882a593Smuzhiyun static void increment_pending_exceptions_done_count(void)
1034*4882a593Smuzhiyun {
1035*4882a593Smuzhiyun spin_lock(&_pending_exceptions_done_spinlock);
1036*4882a593Smuzhiyun _pending_exceptions_done_count++;
1037*4882a593Smuzhiyun spin_unlock(&_pending_exceptions_done_spinlock);
1038*4882a593Smuzhiyun
1039*4882a593Smuzhiyun wake_up_all(&_pending_exceptions_done);
1040*4882a593Smuzhiyun }
1041*4882a593Smuzhiyun
snapshot_merge_next_chunks(struct dm_snapshot * s)1042*4882a593Smuzhiyun static void snapshot_merge_next_chunks(struct dm_snapshot *s)
1043*4882a593Smuzhiyun {
1044*4882a593Smuzhiyun int i, linear_chunks;
1045*4882a593Smuzhiyun chunk_t old_chunk, new_chunk;
1046*4882a593Smuzhiyun struct dm_io_region src, dest;
1047*4882a593Smuzhiyun sector_t io_size;
1048*4882a593Smuzhiyun uint64_t previous_count;
1049*4882a593Smuzhiyun
1050*4882a593Smuzhiyun BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
1051*4882a593Smuzhiyun if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
1052*4882a593Smuzhiyun goto shut;
1053*4882a593Smuzhiyun
1054*4882a593Smuzhiyun /*
1055*4882a593Smuzhiyun * valid flag never changes during merge, so no lock required.
1056*4882a593Smuzhiyun */
1057*4882a593Smuzhiyun if (!s->valid) {
1058*4882a593Smuzhiyun DMERR("Snapshot is invalid: can't merge");
1059*4882a593Smuzhiyun goto shut;
1060*4882a593Smuzhiyun }
1061*4882a593Smuzhiyun
1062*4882a593Smuzhiyun linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
1063*4882a593Smuzhiyun &new_chunk);
1064*4882a593Smuzhiyun if (linear_chunks <= 0) {
1065*4882a593Smuzhiyun if (linear_chunks < 0) {
1066*4882a593Smuzhiyun DMERR("Read error in exception store: "
1067*4882a593Smuzhiyun "shutting down merge");
1068*4882a593Smuzhiyun down_write(&s->lock);
1069*4882a593Smuzhiyun s->merge_failed = true;
1070*4882a593Smuzhiyun up_write(&s->lock);
1071*4882a593Smuzhiyun }
1072*4882a593Smuzhiyun goto shut;
1073*4882a593Smuzhiyun }
1074*4882a593Smuzhiyun
1075*4882a593Smuzhiyun /* Adjust old_chunk and new_chunk to reflect start of linear region */
1076*4882a593Smuzhiyun old_chunk = old_chunk + 1 - linear_chunks;
1077*4882a593Smuzhiyun new_chunk = new_chunk + 1 - linear_chunks;
1078*4882a593Smuzhiyun
1079*4882a593Smuzhiyun /*
1080*4882a593Smuzhiyun * Use one (potentially large) I/O to copy all 'linear_chunks'
1081*4882a593Smuzhiyun * from the exception store to the origin
1082*4882a593Smuzhiyun */
1083*4882a593Smuzhiyun io_size = linear_chunks * s->store->chunk_size;
1084*4882a593Smuzhiyun
1085*4882a593Smuzhiyun dest.bdev = s->origin->bdev;
1086*4882a593Smuzhiyun dest.sector = chunk_to_sector(s->store, old_chunk);
1087*4882a593Smuzhiyun dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
1088*4882a593Smuzhiyun
1089*4882a593Smuzhiyun src.bdev = s->cow->bdev;
1090*4882a593Smuzhiyun src.sector = chunk_to_sector(s->store, new_chunk);
1091*4882a593Smuzhiyun src.count = dest.count;
1092*4882a593Smuzhiyun
1093*4882a593Smuzhiyun /*
1094*4882a593Smuzhiyun * Reallocate any exceptions needed in other snapshots then
1095*4882a593Smuzhiyun * wait for the pending exceptions to complete.
1096*4882a593Smuzhiyun * Each time any pending exception (globally on the system)
1097*4882a593Smuzhiyun * completes we are woken and repeat the process to find out
1098*4882a593Smuzhiyun * if we can proceed. While this may not seem a particularly
1099*4882a593Smuzhiyun * efficient algorithm, it is not expected to have any
1100*4882a593Smuzhiyun * significant impact on performance.
1101*4882a593Smuzhiyun */
1102*4882a593Smuzhiyun previous_count = read_pending_exceptions_done_count();
1103*4882a593Smuzhiyun while (origin_write_extent(s, dest.sector, io_size)) {
1104*4882a593Smuzhiyun wait_event(_pending_exceptions_done,
1105*4882a593Smuzhiyun (read_pending_exceptions_done_count() !=
1106*4882a593Smuzhiyun previous_count));
1107*4882a593Smuzhiyun /* Retry after the wait, until all exceptions are done. */
1108*4882a593Smuzhiyun previous_count = read_pending_exceptions_done_count();
1109*4882a593Smuzhiyun }
1110*4882a593Smuzhiyun
1111*4882a593Smuzhiyun down_write(&s->lock);
1112*4882a593Smuzhiyun s->first_merging_chunk = old_chunk;
1113*4882a593Smuzhiyun s->num_merging_chunks = linear_chunks;
1114*4882a593Smuzhiyun up_write(&s->lock);
1115*4882a593Smuzhiyun
1116*4882a593Smuzhiyun /* Wait until writes to all 'linear_chunks' drain */
1117*4882a593Smuzhiyun for (i = 0; i < linear_chunks; i++)
1118*4882a593Smuzhiyun __check_for_conflicting_io(s, old_chunk + i);
1119*4882a593Smuzhiyun
1120*4882a593Smuzhiyun dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 1 << DM_KCOPYD_SNAP_MERGE,
1121*4882a593Smuzhiyun merge_callback, s);
1122*4882a593Smuzhiyun return;
1123*4882a593Smuzhiyun
1124*4882a593Smuzhiyun shut:
1125*4882a593Smuzhiyun merge_shutdown(s);
1126*4882a593Smuzhiyun }
1127*4882a593Smuzhiyun
1128*4882a593Smuzhiyun static void error_bios(struct bio *bio);
1129*4882a593Smuzhiyun
flush_data(struct dm_snapshot * s)1130*4882a593Smuzhiyun static int flush_data(struct dm_snapshot *s)
1131*4882a593Smuzhiyun {
1132*4882a593Smuzhiyun struct bio *flush_bio = &s->flush_bio;
1133*4882a593Smuzhiyun
1134*4882a593Smuzhiyun bio_reset(flush_bio);
1135*4882a593Smuzhiyun bio_set_dev(flush_bio, s->origin->bdev);
1136*4882a593Smuzhiyun flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
1137*4882a593Smuzhiyun
1138*4882a593Smuzhiyun return submit_bio_wait(flush_bio);
1139*4882a593Smuzhiyun }
1140*4882a593Smuzhiyun
merge_callback(int read_err,unsigned long write_err,void * context)1141*4882a593Smuzhiyun static void merge_callback(int read_err, unsigned long write_err, void *context)
1142*4882a593Smuzhiyun {
1143*4882a593Smuzhiyun struct dm_snapshot *s = context;
1144*4882a593Smuzhiyun struct bio *b = NULL;
1145*4882a593Smuzhiyun
1146*4882a593Smuzhiyun if (read_err || write_err) {
1147*4882a593Smuzhiyun if (read_err)
1148*4882a593Smuzhiyun DMERR("Read error: shutting down merge.");
1149*4882a593Smuzhiyun else
1150*4882a593Smuzhiyun DMERR("Write error: shutting down merge.");
1151*4882a593Smuzhiyun goto shut;
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun
1154*4882a593Smuzhiyun if (flush_data(s) < 0) {
1155*4882a593Smuzhiyun DMERR("Flush after merge failed: shutting down merge");
1156*4882a593Smuzhiyun goto shut;
1157*4882a593Smuzhiyun }
1158*4882a593Smuzhiyun
1159*4882a593Smuzhiyun if (s->store->type->commit_merge(s->store,
1160*4882a593Smuzhiyun s->num_merging_chunks) < 0) {
1161*4882a593Smuzhiyun DMERR("Write error in exception store: shutting down merge");
1162*4882a593Smuzhiyun goto shut;
1163*4882a593Smuzhiyun }
1164*4882a593Smuzhiyun
1165*4882a593Smuzhiyun if (remove_single_exception_chunk(s) < 0)
1166*4882a593Smuzhiyun goto shut;
1167*4882a593Smuzhiyun
1168*4882a593Smuzhiyun snapshot_merge_next_chunks(s);
1169*4882a593Smuzhiyun
1170*4882a593Smuzhiyun return;
1171*4882a593Smuzhiyun
1172*4882a593Smuzhiyun shut:
1173*4882a593Smuzhiyun down_write(&s->lock);
1174*4882a593Smuzhiyun s->merge_failed = true;
1175*4882a593Smuzhiyun b = __release_queued_bios_after_merge(s);
1176*4882a593Smuzhiyun up_write(&s->lock);
1177*4882a593Smuzhiyun error_bios(b);
1178*4882a593Smuzhiyun
1179*4882a593Smuzhiyun merge_shutdown(s);
1180*4882a593Smuzhiyun }
1181*4882a593Smuzhiyun
start_merge(struct dm_snapshot * s)1182*4882a593Smuzhiyun static void start_merge(struct dm_snapshot *s)
1183*4882a593Smuzhiyun {
1184*4882a593Smuzhiyun if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits))
1185*4882a593Smuzhiyun snapshot_merge_next_chunks(s);
1186*4882a593Smuzhiyun }
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun /*
1189*4882a593Smuzhiyun * Stop the merging process and wait until it finishes.
1190*4882a593Smuzhiyun */
stop_merge(struct dm_snapshot * s)1191*4882a593Smuzhiyun static void stop_merge(struct dm_snapshot *s)
1192*4882a593Smuzhiyun {
1193*4882a593Smuzhiyun set_bit(SHUTDOWN_MERGE, &s->state_bits);
1194*4882a593Smuzhiyun wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE);
1195*4882a593Smuzhiyun clear_bit(SHUTDOWN_MERGE, &s->state_bits);
1196*4882a593Smuzhiyun }
1197*4882a593Smuzhiyun
parse_snapshot_features(struct dm_arg_set * as,struct dm_snapshot * s,struct dm_target * ti)1198*4882a593Smuzhiyun static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s,
1199*4882a593Smuzhiyun struct dm_target *ti)
1200*4882a593Smuzhiyun {
1201*4882a593Smuzhiyun int r;
1202*4882a593Smuzhiyun unsigned argc;
1203*4882a593Smuzhiyun const char *arg_name;
1204*4882a593Smuzhiyun
1205*4882a593Smuzhiyun static const struct dm_arg _args[] = {
1206*4882a593Smuzhiyun {0, 2, "Invalid number of feature arguments"},
1207*4882a593Smuzhiyun };
1208*4882a593Smuzhiyun
1209*4882a593Smuzhiyun /*
1210*4882a593Smuzhiyun * No feature arguments supplied.
1211*4882a593Smuzhiyun */
1212*4882a593Smuzhiyun if (!as->argc)
1213*4882a593Smuzhiyun return 0;
1214*4882a593Smuzhiyun
1215*4882a593Smuzhiyun r = dm_read_arg_group(_args, as, &argc, &ti->error);
1216*4882a593Smuzhiyun if (r)
1217*4882a593Smuzhiyun return -EINVAL;
1218*4882a593Smuzhiyun
1219*4882a593Smuzhiyun while (argc && !r) {
1220*4882a593Smuzhiyun arg_name = dm_shift_arg(as);
1221*4882a593Smuzhiyun argc--;
1222*4882a593Smuzhiyun
1223*4882a593Smuzhiyun if (!strcasecmp(arg_name, "discard_zeroes_cow"))
1224*4882a593Smuzhiyun s->discard_zeroes_cow = true;
1225*4882a593Smuzhiyun
1226*4882a593Smuzhiyun else if (!strcasecmp(arg_name, "discard_passdown_origin"))
1227*4882a593Smuzhiyun s->discard_passdown_origin = true;
1228*4882a593Smuzhiyun
1229*4882a593Smuzhiyun else {
1230*4882a593Smuzhiyun ti->error = "Unrecognised feature requested";
1231*4882a593Smuzhiyun r = -EINVAL;
1232*4882a593Smuzhiyun break;
1233*4882a593Smuzhiyun }
1234*4882a593Smuzhiyun }
1235*4882a593Smuzhiyun
1236*4882a593Smuzhiyun if (!s->discard_zeroes_cow && s->discard_passdown_origin) {
1237*4882a593Smuzhiyun /*
1238*4882a593Smuzhiyun * TODO: really these are disjoint.. but ti->num_discard_bios
1239*4882a593Smuzhiyun * and dm_bio_get_target_bio_nr() require rigid constraints.
1240*4882a593Smuzhiyun */
1241*4882a593Smuzhiyun ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow";
1242*4882a593Smuzhiyun r = -EINVAL;
1243*4882a593Smuzhiyun }
1244*4882a593Smuzhiyun
1245*4882a593Smuzhiyun return r;
1246*4882a593Smuzhiyun }
1247*4882a593Smuzhiyun
1248*4882a593Smuzhiyun /*
1249*4882a593Smuzhiyun * Construct a snapshot mapping:
1250*4882a593Smuzhiyun * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*]
1251*4882a593Smuzhiyun */
snapshot_ctr(struct dm_target * ti,unsigned int argc,char ** argv)1252*4882a593Smuzhiyun static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1253*4882a593Smuzhiyun {
1254*4882a593Smuzhiyun struct dm_snapshot *s;
1255*4882a593Smuzhiyun struct dm_arg_set as;
1256*4882a593Smuzhiyun int i;
1257*4882a593Smuzhiyun int r = -EINVAL;
1258*4882a593Smuzhiyun char *origin_path, *cow_path;
1259*4882a593Smuzhiyun dev_t origin_dev, cow_dev;
1260*4882a593Smuzhiyun unsigned args_used, num_flush_bios = 1;
1261*4882a593Smuzhiyun fmode_t origin_mode = FMODE_READ;
1262*4882a593Smuzhiyun
1263*4882a593Smuzhiyun if (argc < 4) {
1264*4882a593Smuzhiyun ti->error = "requires 4 or more arguments";
1265*4882a593Smuzhiyun r = -EINVAL;
1266*4882a593Smuzhiyun goto bad;
1267*4882a593Smuzhiyun }
1268*4882a593Smuzhiyun
1269*4882a593Smuzhiyun if (dm_target_is_snapshot_merge(ti)) {
1270*4882a593Smuzhiyun num_flush_bios = 2;
1271*4882a593Smuzhiyun origin_mode = FMODE_WRITE;
1272*4882a593Smuzhiyun }
1273*4882a593Smuzhiyun
1274*4882a593Smuzhiyun s = kzalloc(sizeof(*s), GFP_KERNEL);
1275*4882a593Smuzhiyun if (!s) {
1276*4882a593Smuzhiyun ti->error = "Cannot allocate private snapshot structure";
1277*4882a593Smuzhiyun r = -ENOMEM;
1278*4882a593Smuzhiyun goto bad;
1279*4882a593Smuzhiyun }
1280*4882a593Smuzhiyun
1281*4882a593Smuzhiyun as.argc = argc;
1282*4882a593Smuzhiyun as.argv = argv;
1283*4882a593Smuzhiyun dm_consume_args(&as, 4);
1284*4882a593Smuzhiyun r = parse_snapshot_features(&as, s, ti);
1285*4882a593Smuzhiyun if (r)
1286*4882a593Smuzhiyun goto bad_features;
1287*4882a593Smuzhiyun
1288*4882a593Smuzhiyun origin_path = argv[0];
1289*4882a593Smuzhiyun argv++;
1290*4882a593Smuzhiyun argc--;
1291*4882a593Smuzhiyun
1292*4882a593Smuzhiyun r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
1293*4882a593Smuzhiyun if (r) {
1294*4882a593Smuzhiyun ti->error = "Cannot get origin device";
1295*4882a593Smuzhiyun goto bad_origin;
1296*4882a593Smuzhiyun }
1297*4882a593Smuzhiyun origin_dev = s->origin->bdev->bd_dev;
1298*4882a593Smuzhiyun
1299*4882a593Smuzhiyun cow_path = argv[0];
1300*4882a593Smuzhiyun argv++;
1301*4882a593Smuzhiyun argc--;
1302*4882a593Smuzhiyun
1303*4882a593Smuzhiyun cow_dev = dm_get_dev_t(cow_path);
1304*4882a593Smuzhiyun if (cow_dev && cow_dev == origin_dev) {
1305*4882a593Smuzhiyun ti->error = "COW device cannot be the same as origin device";
1306*4882a593Smuzhiyun r = -EINVAL;
1307*4882a593Smuzhiyun goto bad_cow;
1308*4882a593Smuzhiyun }
1309*4882a593Smuzhiyun
1310*4882a593Smuzhiyun r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
1311*4882a593Smuzhiyun if (r) {
1312*4882a593Smuzhiyun ti->error = "Cannot get COW device";
1313*4882a593Smuzhiyun goto bad_cow;
1314*4882a593Smuzhiyun }
1315*4882a593Smuzhiyun
1316*4882a593Smuzhiyun r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
1317*4882a593Smuzhiyun if (r) {
1318*4882a593Smuzhiyun ti->error = "Couldn't create exception store";
1319*4882a593Smuzhiyun r = -EINVAL;
1320*4882a593Smuzhiyun goto bad_store;
1321*4882a593Smuzhiyun }
1322*4882a593Smuzhiyun
1323*4882a593Smuzhiyun argv += args_used;
1324*4882a593Smuzhiyun argc -= args_used;
1325*4882a593Smuzhiyun
1326*4882a593Smuzhiyun s->ti = ti;
1327*4882a593Smuzhiyun s->valid = 1;
1328*4882a593Smuzhiyun s->snapshot_overflowed = 0;
1329*4882a593Smuzhiyun s->active = 0;
1330*4882a593Smuzhiyun atomic_set(&s->pending_exceptions_count, 0);
1331*4882a593Smuzhiyun spin_lock_init(&s->pe_allocation_lock);
1332*4882a593Smuzhiyun s->exception_start_sequence = 0;
1333*4882a593Smuzhiyun s->exception_complete_sequence = 0;
1334*4882a593Smuzhiyun s->out_of_order_tree = RB_ROOT;
1335*4882a593Smuzhiyun init_rwsem(&s->lock);
1336*4882a593Smuzhiyun INIT_LIST_HEAD(&s->list);
1337*4882a593Smuzhiyun spin_lock_init(&s->pe_lock);
1338*4882a593Smuzhiyun s->state_bits = 0;
1339*4882a593Smuzhiyun s->merge_failed = false;
1340*4882a593Smuzhiyun s->first_merging_chunk = 0;
1341*4882a593Smuzhiyun s->num_merging_chunks = 0;
1342*4882a593Smuzhiyun bio_list_init(&s->bios_queued_during_merge);
1343*4882a593Smuzhiyun bio_init(&s->flush_bio, NULL, 0);
1344*4882a593Smuzhiyun
1345*4882a593Smuzhiyun /* Allocate hash table for COW data */
1346*4882a593Smuzhiyun if (init_hash_tables(s)) {
1347*4882a593Smuzhiyun ti->error = "Unable to allocate hash table space";
1348*4882a593Smuzhiyun r = -ENOMEM;
1349*4882a593Smuzhiyun goto bad_hash_tables;
1350*4882a593Smuzhiyun }
1351*4882a593Smuzhiyun
1352*4882a593Smuzhiyun init_waitqueue_head(&s->in_progress_wait);
1353*4882a593Smuzhiyun
1354*4882a593Smuzhiyun s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
1355*4882a593Smuzhiyun if (IS_ERR(s->kcopyd_client)) {
1356*4882a593Smuzhiyun r = PTR_ERR(s->kcopyd_client);
1357*4882a593Smuzhiyun ti->error = "Could not create kcopyd client";
1358*4882a593Smuzhiyun goto bad_kcopyd;
1359*4882a593Smuzhiyun }
1360*4882a593Smuzhiyun
1361*4882a593Smuzhiyun r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache);
1362*4882a593Smuzhiyun if (r) {
1363*4882a593Smuzhiyun ti->error = "Could not allocate mempool for pending exceptions";
1364*4882a593Smuzhiyun goto bad_pending_pool;
1365*4882a593Smuzhiyun }
1366*4882a593Smuzhiyun
1367*4882a593Smuzhiyun for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1368*4882a593Smuzhiyun INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
1369*4882a593Smuzhiyun
1370*4882a593Smuzhiyun spin_lock_init(&s->tracked_chunk_lock);
1371*4882a593Smuzhiyun
1372*4882a593Smuzhiyun ti->private = s;
1373*4882a593Smuzhiyun ti->num_flush_bios = num_flush_bios;
1374*4882a593Smuzhiyun if (s->discard_zeroes_cow)
1375*4882a593Smuzhiyun ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1);
1376*4882a593Smuzhiyun ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk);
1377*4882a593Smuzhiyun
1378*4882a593Smuzhiyun /* Add snapshot to the list of snapshots for this origin */
1379*4882a593Smuzhiyun /* Exceptions aren't triggered till snapshot_resume() is called */
1380*4882a593Smuzhiyun r = register_snapshot(s);
1381*4882a593Smuzhiyun if (r == -ENOMEM) {
1382*4882a593Smuzhiyun ti->error = "Snapshot origin struct allocation failed";
1383*4882a593Smuzhiyun goto bad_load_and_register;
1384*4882a593Smuzhiyun } else if (r < 0) {
1385*4882a593Smuzhiyun /* invalid handover, register_snapshot has set ti->error */
1386*4882a593Smuzhiyun goto bad_load_and_register;
1387*4882a593Smuzhiyun }
1388*4882a593Smuzhiyun
1389*4882a593Smuzhiyun /*
1390*4882a593Smuzhiyun * Metadata must only be loaded into one table at once, so skip this
1391*4882a593Smuzhiyun * if metadata will be handed over during resume.
1392*4882a593Smuzhiyun * Chunk size will be set during the handover - set it to zero to
1393*4882a593Smuzhiyun * ensure it's ignored.
1394*4882a593Smuzhiyun */
1395*4882a593Smuzhiyun if (r > 0) {
1396*4882a593Smuzhiyun s->store->chunk_size = 0;
1397*4882a593Smuzhiyun return 0;
1398*4882a593Smuzhiyun }
1399*4882a593Smuzhiyun
1400*4882a593Smuzhiyun r = s->store->type->read_metadata(s->store, dm_add_exception,
1401*4882a593Smuzhiyun (void *)s);
1402*4882a593Smuzhiyun if (r < 0) {
1403*4882a593Smuzhiyun ti->error = "Failed to read snapshot metadata";
1404*4882a593Smuzhiyun goto bad_read_metadata;
1405*4882a593Smuzhiyun } else if (r > 0) {
1406*4882a593Smuzhiyun s->valid = 0;
1407*4882a593Smuzhiyun DMWARN("Snapshot is marked invalid.");
1408*4882a593Smuzhiyun }
1409*4882a593Smuzhiyun
1410*4882a593Smuzhiyun if (!s->store->chunk_size) {
1411*4882a593Smuzhiyun ti->error = "Chunk size not set";
1412*4882a593Smuzhiyun r = -EINVAL;
1413*4882a593Smuzhiyun goto bad_read_metadata;
1414*4882a593Smuzhiyun }
1415*4882a593Smuzhiyun
1416*4882a593Smuzhiyun r = dm_set_target_max_io_len(ti, s->store->chunk_size);
1417*4882a593Smuzhiyun if (r)
1418*4882a593Smuzhiyun goto bad_read_metadata;
1419*4882a593Smuzhiyun
1420*4882a593Smuzhiyun return 0;
1421*4882a593Smuzhiyun
1422*4882a593Smuzhiyun bad_read_metadata:
1423*4882a593Smuzhiyun unregister_snapshot(s);
1424*4882a593Smuzhiyun bad_load_and_register:
1425*4882a593Smuzhiyun mempool_exit(&s->pending_pool);
1426*4882a593Smuzhiyun bad_pending_pool:
1427*4882a593Smuzhiyun dm_kcopyd_client_destroy(s->kcopyd_client);
1428*4882a593Smuzhiyun bad_kcopyd:
1429*4882a593Smuzhiyun dm_exception_table_exit(&s->pending, pending_cache);
1430*4882a593Smuzhiyun dm_exception_table_exit(&s->complete, exception_cache);
1431*4882a593Smuzhiyun bad_hash_tables:
1432*4882a593Smuzhiyun dm_exception_store_destroy(s->store);
1433*4882a593Smuzhiyun bad_store:
1434*4882a593Smuzhiyun dm_put_device(ti, s->cow);
1435*4882a593Smuzhiyun bad_cow:
1436*4882a593Smuzhiyun dm_put_device(ti, s->origin);
1437*4882a593Smuzhiyun bad_origin:
1438*4882a593Smuzhiyun bad_features:
1439*4882a593Smuzhiyun kfree(s);
1440*4882a593Smuzhiyun bad:
1441*4882a593Smuzhiyun return r;
1442*4882a593Smuzhiyun }
1443*4882a593Smuzhiyun
__free_exceptions(struct dm_snapshot * s)1444*4882a593Smuzhiyun static void __free_exceptions(struct dm_snapshot *s)
1445*4882a593Smuzhiyun {
1446*4882a593Smuzhiyun dm_kcopyd_client_destroy(s->kcopyd_client);
1447*4882a593Smuzhiyun s->kcopyd_client = NULL;
1448*4882a593Smuzhiyun
1449*4882a593Smuzhiyun dm_exception_table_exit(&s->pending, pending_cache);
1450*4882a593Smuzhiyun dm_exception_table_exit(&s->complete, exception_cache);
1451*4882a593Smuzhiyun }
1452*4882a593Smuzhiyun
__handover_exceptions(struct dm_snapshot * snap_src,struct dm_snapshot * snap_dest)1453*4882a593Smuzhiyun static void __handover_exceptions(struct dm_snapshot *snap_src,
1454*4882a593Smuzhiyun struct dm_snapshot *snap_dest)
1455*4882a593Smuzhiyun {
1456*4882a593Smuzhiyun union {
1457*4882a593Smuzhiyun struct dm_exception_table table_swap;
1458*4882a593Smuzhiyun struct dm_exception_store *store_swap;
1459*4882a593Smuzhiyun } u;
1460*4882a593Smuzhiyun
1461*4882a593Smuzhiyun /*
1462*4882a593Smuzhiyun * Swap all snapshot context information between the two instances.
1463*4882a593Smuzhiyun */
1464*4882a593Smuzhiyun u.table_swap = snap_dest->complete;
1465*4882a593Smuzhiyun snap_dest->complete = snap_src->complete;
1466*4882a593Smuzhiyun snap_src->complete = u.table_swap;
1467*4882a593Smuzhiyun
1468*4882a593Smuzhiyun u.store_swap = snap_dest->store;
1469*4882a593Smuzhiyun snap_dest->store = snap_src->store;
1470*4882a593Smuzhiyun snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow;
1471*4882a593Smuzhiyun snap_src->store = u.store_swap;
1472*4882a593Smuzhiyun
1473*4882a593Smuzhiyun snap_dest->store->snap = snap_dest;
1474*4882a593Smuzhiyun snap_src->store->snap = snap_src;
1475*4882a593Smuzhiyun
1476*4882a593Smuzhiyun snap_dest->ti->max_io_len = snap_dest->store->chunk_size;
1477*4882a593Smuzhiyun snap_dest->valid = snap_src->valid;
1478*4882a593Smuzhiyun snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed;
1479*4882a593Smuzhiyun
1480*4882a593Smuzhiyun /*
1481*4882a593Smuzhiyun * Set source invalid to ensure it receives no further I/O.
1482*4882a593Smuzhiyun */
1483*4882a593Smuzhiyun snap_src->valid = 0;
1484*4882a593Smuzhiyun }
1485*4882a593Smuzhiyun
snapshot_dtr(struct dm_target * ti)1486*4882a593Smuzhiyun static void snapshot_dtr(struct dm_target *ti)
1487*4882a593Smuzhiyun {
1488*4882a593Smuzhiyun #ifdef CONFIG_DM_DEBUG
1489*4882a593Smuzhiyun int i;
1490*4882a593Smuzhiyun #endif
1491*4882a593Smuzhiyun struct dm_snapshot *s = ti->private;
1492*4882a593Smuzhiyun struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
1493*4882a593Smuzhiyun
1494*4882a593Smuzhiyun down_read(&_origins_lock);
1495*4882a593Smuzhiyun /* Check whether exception handover must be cancelled */
1496*4882a593Smuzhiyun (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
1497*4882a593Smuzhiyun if (snap_src && snap_dest && (s == snap_src)) {
1498*4882a593Smuzhiyun down_write(&snap_dest->lock);
1499*4882a593Smuzhiyun snap_dest->valid = 0;
1500*4882a593Smuzhiyun up_write(&snap_dest->lock);
1501*4882a593Smuzhiyun DMERR("Cancelling snapshot handover.");
1502*4882a593Smuzhiyun }
1503*4882a593Smuzhiyun up_read(&_origins_lock);
1504*4882a593Smuzhiyun
1505*4882a593Smuzhiyun if (dm_target_is_snapshot_merge(ti))
1506*4882a593Smuzhiyun stop_merge(s);
1507*4882a593Smuzhiyun
1508*4882a593Smuzhiyun /* Prevent further origin writes from using this snapshot. */
1509*4882a593Smuzhiyun /* After this returns there can be no new kcopyd jobs. */
1510*4882a593Smuzhiyun unregister_snapshot(s);
1511*4882a593Smuzhiyun
1512*4882a593Smuzhiyun while (atomic_read(&s->pending_exceptions_count))
1513*4882a593Smuzhiyun msleep(1);
1514*4882a593Smuzhiyun /*
1515*4882a593Smuzhiyun * Ensure instructions in mempool_exit aren't reordered
1516*4882a593Smuzhiyun * before atomic_read.
1517*4882a593Smuzhiyun */
1518*4882a593Smuzhiyun smp_mb();
1519*4882a593Smuzhiyun
1520*4882a593Smuzhiyun #ifdef CONFIG_DM_DEBUG
1521*4882a593Smuzhiyun for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1522*4882a593Smuzhiyun BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
1523*4882a593Smuzhiyun #endif
1524*4882a593Smuzhiyun
1525*4882a593Smuzhiyun __free_exceptions(s);
1526*4882a593Smuzhiyun
1527*4882a593Smuzhiyun mempool_exit(&s->pending_pool);
1528*4882a593Smuzhiyun
1529*4882a593Smuzhiyun dm_exception_store_destroy(s->store);
1530*4882a593Smuzhiyun
1531*4882a593Smuzhiyun bio_uninit(&s->flush_bio);
1532*4882a593Smuzhiyun
1533*4882a593Smuzhiyun dm_put_device(ti, s->cow);
1534*4882a593Smuzhiyun
1535*4882a593Smuzhiyun dm_put_device(ti, s->origin);
1536*4882a593Smuzhiyun
1537*4882a593Smuzhiyun WARN_ON(s->in_progress);
1538*4882a593Smuzhiyun
1539*4882a593Smuzhiyun kfree(s);
1540*4882a593Smuzhiyun }
1541*4882a593Smuzhiyun
account_start_copy(struct dm_snapshot * s)1542*4882a593Smuzhiyun static void account_start_copy(struct dm_snapshot *s)
1543*4882a593Smuzhiyun {
1544*4882a593Smuzhiyun spin_lock(&s->in_progress_wait.lock);
1545*4882a593Smuzhiyun s->in_progress++;
1546*4882a593Smuzhiyun spin_unlock(&s->in_progress_wait.lock);
1547*4882a593Smuzhiyun }
1548*4882a593Smuzhiyun
account_end_copy(struct dm_snapshot * s)1549*4882a593Smuzhiyun static void account_end_copy(struct dm_snapshot *s)
1550*4882a593Smuzhiyun {
1551*4882a593Smuzhiyun spin_lock(&s->in_progress_wait.lock);
1552*4882a593Smuzhiyun BUG_ON(!s->in_progress);
1553*4882a593Smuzhiyun s->in_progress--;
1554*4882a593Smuzhiyun if (likely(s->in_progress <= cow_threshold) &&
1555*4882a593Smuzhiyun unlikely(waitqueue_active(&s->in_progress_wait)))
1556*4882a593Smuzhiyun wake_up_locked(&s->in_progress_wait);
1557*4882a593Smuzhiyun spin_unlock(&s->in_progress_wait.lock);
1558*4882a593Smuzhiyun }
1559*4882a593Smuzhiyun
wait_for_in_progress(struct dm_snapshot * s,bool unlock_origins)1560*4882a593Smuzhiyun static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
1561*4882a593Smuzhiyun {
1562*4882a593Smuzhiyun if (unlikely(s->in_progress > cow_threshold)) {
1563*4882a593Smuzhiyun spin_lock(&s->in_progress_wait.lock);
1564*4882a593Smuzhiyun if (likely(s->in_progress > cow_threshold)) {
1565*4882a593Smuzhiyun /*
1566*4882a593Smuzhiyun * NOTE: this throttle doesn't account for whether
1567*4882a593Smuzhiyun * the caller is servicing an IO that will trigger a COW
1568*4882a593Smuzhiyun * so excess throttling may result for chunks not required
1569*4882a593Smuzhiyun * to be COW'd. But if cow_threshold was reached, extra
1570*4882a593Smuzhiyun * throttling is unlikely to negatively impact performance.
1571*4882a593Smuzhiyun */
1572*4882a593Smuzhiyun DECLARE_WAITQUEUE(wait, current);
1573*4882a593Smuzhiyun __add_wait_queue(&s->in_progress_wait, &wait);
1574*4882a593Smuzhiyun __set_current_state(TASK_UNINTERRUPTIBLE);
1575*4882a593Smuzhiyun spin_unlock(&s->in_progress_wait.lock);
1576*4882a593Smuzhiyun if (unlock_origins)
1577*4882a593Smuzhiyun up_read(&_origins_lock);
1578*4882a593Smuzhiyun io_schedule();
1579*4882a593Smuzhiyun remove_wait_queue(&s->in_progress_wait, &wait);
1580*4882a593Smuzhiyun return false;
1581*4882a593Smuzhiyun }
1582*4882a593Smuzhiyun spin_unlock(&s->in_progress_wait.lock);
1583*4882a593Smuzhiyun }
1584*4882a593Smuzhiyun return true;
1585*4882a593Smuzhiyun }
1586*4882a593Smuzhiyun
1587*4882a593Smuzhiyun /*
1588*4882a593Smuzhiyun * Flush a list of buffers.
1589*4882a593Smuzhiyun */
flush_bios(struct bio * bio)1590*4882a593Smuzhiyun static void flush_bios(struct bio *bio)
1591*4882a593Smuzhiyun {
1592*4882a593Smuzhiyun struct bio *n;
1593*4882a593Smuzhiyun
1594*4882a593Smuzhiyun while (bio) {
1595*4882a593Smuzhiyun n = bio->bi_next;
1596*4882a593Smuzhiyun bio->bi_next = NULL;
1597*4882a593Smuzhiyun submit_bio_noacct(bio);
1598*4882a593Smuzhiyun bio = n;
1599*4882a593Smuzhiyun }
1600*4882a593Smuzhiyun }
1601*4882a593Smuzhiyun
1602*4882a593Smuzhiyun static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
1603*4882a593Smuzhiyun
1604*4882a593Smuzhiyun /*
1605*4882a593Smuzhiyun * Flush a list of buffers.
1606*4882a593Smuzhiyun */
retry_origin_bios(struct dm_snapshot * s,struct bio * bio)1607*4882a593Smuzhiyun static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
1608*4882a593Smuzhiyun {
1609*4882a593Smuzhiyun struct bio *n;
1610*4882a593Smuzhiyun int r;
1611*4882a593Smuzhiyun
1612*4882a593Smuzhiyun while (bio) {
1613*4882a593Smuzhiyun n = bio->bi_next;
1614*4882a593Smuzhiyun bio->bi_next = NULL;
1615*4882a593Smuzhiyun r = do_origin(s->origin, bio, false);
1616*4882a593Smuzhiyun if (r == DM_MAPIO_REMAPPED)
1617*4882a593Smuzhiyun submit_bio_noacct(bio);
1618*4882a593Smuzhiyun bio = n;
1619*4882a593Smuzhiyun }
1620*4882a593Smuzhiyun }
1621*4882a593Smuzhiyun
1622*4882a593Smuzhiyun /*
1623*4882a593Smuzhiyun * Error a list of buffers.
1624*4882a593Smuzhiyun */
error_bios(struct bio * bio)1625*4882a593Smuzhiyun static void error_bios(struct bio *bio)
1626*4882a593Smuzhiyun {
1627*4882a593Smuzhiyun struct bio *n;
1628*4882a593Smuzhiyun
1629*4882a593Smuzhiyun while (bio) {
1630*4882a593Smuzhiyun n = bio->bi_next;
1631*4882a593Smuzhiyun bio->bi_next = NULL;
1632*4882a593Smuzhiyun bio_io_error(bio);
1633*4882a593Smuzhiyun bio = n;
1634*4882a593Smuzhiyun }
1635*4882a593Smuzhiyun }
1636*4882a593Smuzhiyun
__invalidate_snapshot(struct dm_snapshot * s,int err)1637*4882a593Smuzhiyun static void __invalidate_snapshot(struct dm_snapshot *s, int err)
1638*4882a593Smuzhiyun {
1639*4882a593Smuzhiyun if (!s->valid)
1640*4882a593Smuzhiyun return;
1641*4882a593Smuzhiyun
1642*4882a593Smuzhiyun if (err == -EIO)
1643*4882a593Smuzhiyun DMERR("Invalidating snapshot: Error reading/writing.");
1644*4882a593Smuzhiyun else if (err == -ENOMEM)
1645*4882a593Smuzhiyun DMERR("Invalidating snapshot: Unable to allocate exception.");
1646*4882a593Smuzhiyun
1647*4882a593Smuzhiyun if (s->store->type->drop_snapshot)
1648*4882a593Smuzhiyun s->store->type->drop_snapshot(s->store);
1649*4882a593Smuzhiyun
1650*4882a593Smuzhiyun s->valid = 0;
1651*4882a593Smuzhiyun
1652*4882a593Smuzhiyun dm_table_event(s->ti->table);
1653*4882a593Smuzhiyun }
1654*4882a593Smuzhiyun
invalidate_snapshot(struct dm_snapshot * s,int err)1655*4882a593Smuzhiyun static void invalidate_snapshot(struct dm_snapshot *s, int err)
1656*4882a593Smuzhiyun {
1657*4882a593Smuzhiyun down_write(&s->lock);
1658*4882a593Smuzhiyun __invalidate_snapshot(s, err);
1659*4882a593Smuzhiyun up_write(&s->lock);
1660*4882a593Smuzhiyun }
1661*4882a593Smuzhiyun
pending_complete(void * context,int success)1662*4882a593Smuzhiyun static void pending_complete(void *context, int success)
1663*4882a593Smuzhiyun {
1664*4882a593Smuzhiyun struct dm_snap_pending_exception *pe = context;
1665*4882a593Smuzhiyun struct dm_exception *e;
1666*4882a593Smuzhiyun struct dm_snapshot *s = pe->snap;
1667*4882a593Smuzhiyun struct bio *origin_bios = NULL;
1668*4882a593Smuzhiyun struct bio *snapshot_bios = NULL;
1669*4882a593Smuzhiyun struct bio *full_bio = NULL;
1670*4882a593Smuzhiyun struct dm_exception_table_lock lock;
1671*4882a593Smuzhiyun int error = 0;
1672*4882a593Smuzhiyun
1673*4882a593Smuzhiyun dm_exception_table_lock_init(s, pe->e.old_chunk, &lock);
1674*4882a593Smuzhiyun
1675*4882a593Smuzhiyun if (!success) {
1676*4882a593Smuzhiyun /* Read/write error - snapshot is unusable */
1677*4882a593Smuzhiyun invalidate_snapshot(s, -EIO);
1678*4882a593Smuzhiyun error = 1;
1679*4882a593Smuzhiyun
1680*4882a593Smuzhiyun dm_exception_table_lock(&lock);
1681*4882a593Smuzhiyun goto out;
1682*4882a593Smuzhiyun }
1683*4882a593Smuzhiyun
1684*4882a593Smuzhiyun e = alloc_completed_exception(GFP_NOIO);
1685*4882a593Smuzhiyun if (!e) {
1686*4882a593Smuzhiyun invalidate_snapshot(s, -ENOMEM);
1687*4882a593Smuzhiyun error = 1;
1688*4882a593Smuzhiyun
1689*4882a593Smuzhiyun dm_exception_table_lock(&lock);
1690*4882a593Smuzhiyun goto out;
1691*4882a593Smuzhiyun }
1692*4882a593Smuzhiyun *e = pe->e;
1693*4882a593Smuzhiyun
1694*4882a593Smuzhiyun down_read(&s->lock);
1695*4882a593Smuzhiyun dm_exception_table_lock(&lock);
1696*4882a593Smuzhiyun if (!s->valid) {
1697*4882a593Smuzhiyun up_read(&s->lock);
1698*4882a593Smuzhiyun free_completed_exception(e);
1699*4882a593Smuzhiyun error = 1;
1700*4882a593Smuzhiyun
1701*4882a593Smuzhiyun goto out;
1702*4882a593Smuzhiyun }
1703*4882a593Smuzhiyun
1704*4882a593Smuzhiyun /*
1705*4882a593Smuzhiyun * Add a proper exception. After inserting the completed exception all
1706*4882a593Smuzhiyun * subsequent snapshot reads to this chunk will be redirected to the
1707*4882a593Smuzhiyun * COW device. This ensures that we do not starve. Moreover, as long
1708*4882a593Smuzhiyun * as the pending exception exists, neither origin writes nor snapshot
1709*4882a593Smuzhiyun * merging can overwrite the chunk in origin.
1710*4882a593Smuzhiyun */
1711*4882a593Smuzhiyun dm_insert_exception(&s->complete, e);
1712*4882a593Smuzhiyun up_read(&s->lock);
1713*4882a593Smuzhiyun
1714*4882a593Smuzhiyun /* Wait for conflicting reads to drain */
1715*4882a593Smuzhiyun if (__chunk_is_tracked(s, pe->e.old_chunk)) {
1716*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
1717*4882a593Smuzhiyun __check_for_conflicting_io(s, pe->e.old_chunk);
1718*4882a593Smuzhiyun dm_exception_table_lock(&lock);
1719*4882a593Smuzhiyun }
1720*4882a593Smuzhiyun
1721*4882a593Smuzhiyun out:
1722*4882a593Smuzhiyun /* Remove the in-flight exception from the list */
1723*4882a593Smuzhiyun dm_remove_exception(&pe->e);
1724*4882a593Smuzhiyun
1725*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
1726*4882a593Smuzhiyun
1727*4882a593Smuzhiyun snapshot_bios = bio_list_get(&pe->snapshot_bios);
1728*4882a593Smuzhiyun origin_bios = bio_list_get(&pe->origin_bios);
1729*4882a593Smuzhiyun full_bio = pe->full_bio;
1730*4882a593Smuzhiyun if (full_bio)
1731*4882a593Smuzhiyun full_bio->bi_end_io = pe->full_bio_end_io;
1732*4882a593Smuzhiyun increment_pending_exceptions_done_count();
1733*4882a593Smuzhiyun
1734*4882a593Smuzhiyun /* Submit any pending write bios */
1735*4882a593Smuzhiyun if (error) {
1736*4882a593Smuzhiyun if (full_bio)
1737*4882a593Smuzhiyun bio_io_error(full_bio);
1738*4882a593Smuzhiyun error_bios(snapshot_bios);
1739*4882a593Smuzhiyun } else {
1740*4882a593Smuzhiyun if (full_bio)
1741*4882a593Smuzhiyun bio_endio(full_bio);
1742*4882a593Smuzhiyun flush_bios(snapshot_bios);
1743*4882a593Smuzhiyun }
1744*4882a593Smuzhiyun
1745*4882a593Smuzhiyun retry_origin_bios(s, origin_bios);
1746*4882a593Smuzhiyun
1747*4882a593Smuzhiyun free_pending_exception(pe);
1748*4882a593Smuzhiyun }
1749*4882a593Smuzhiyun
complete_exception(struct dm_snap_pending_exception * pe)1750*4882a593Smuzhiyun static void complete_exception(struct dm_snap_pending_exception *pe)
1751*4882a593Smuzhiyun {
1752*4882a593Smuzhiyun struct dm_snapshot *s = pe->snap;
1753*4882a593Smuzhiyun
1754*4882a593Smuzhiyun /* Update the metadata if we are persistent */
1755*4882a593Smuzhiyun s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error,
1756*4882a593Smuzhiyun pending_complete, pe);
1757*4882a593Smuzhiyun }
1758*4882a593Smuzhiyun
1759*4882a593Smuzhiyun /*
1760*4882a593Smuzhiyun * Called when the copy I/O has finished. kcopyd actually runs
1761*4882a593Smuzhiyun * this code so don't block.
1762*4882a593Smuzhiyun */
copy_callback(int read_err,unsigned long write_err,void * context)1763*4882a593Smuzhiyun static void copy_callback(int read_err, unsigned long write_err, void *context)
1764*4882a593Smuzhiyun {
1765*4882a593Smuzhiyun struct dm_snap_pending_exception *pe = context;
1766*4882a593Smuzhiyun struct dm_snapshot *s = pe->snap;
1767*4882a593Smuzhiyun
1768*4882a593Smuzhiyun pe->copy_error = read_err || write_err;
1769*4882a593Smuzhiyun
1770*4882a593Smuzhiyun if (pe->exception_sequence == s->exception_complete_sequence) {
1771*4882a593Smuzhiyun struct rb_node *next;
1772*4882a593Smuzhiyun
1773*4882a593Smuzhiyun s->exception_complete_sequence++;
1774*4882a593Smuzhiyun complete_exception(pe);
1775*4882a593Smuzhiyun
1776*4882a593Smuzhiyun next = rb_first(&s->out_of_order_tree);
1777*4882a593Smuzhiyun while (next) {
1778*4882a593Smuzhiyun pe = rb_entry(next, struct dm_snap_pending_exception,
1779*4882a593Smuzhiyun out_of_order_node);
1780*4882a593Smuzhiyun if (pe->exception_sequence != s->exception_complete_sequence)
1781*4882a593Smuzhiyun break;
1782*4882a593Smuzhiyun next = rb_next(next);
1783*4882a593Smuzhiyun s->exception_complete_sequence++;
1784*4882a593Smuzhiyun rb_erase(&pe->out_of_order_node, &s->out_of_order_tree);
1785*4882a593Smuzhiyun complete_exception(pe);
1786*4882a593Smuzhiyun cond_resched();
1787*4882a593Smuzhiyun }
1788*4882a593Smuzhiyun } else {
1789*4882a593Smuzhiyun struct rb_node *parent = NULL;
1790*4882a593Smuzhiyun struct rb_node **p = &s->out_of_order_tree.rb_node;
1791*4882a593Smuzhiyun struct dm_snap_pending_exception *pe2;
1792*4882a593Smuzhiyun
1793*4882a593Smuzhiyun while (*p) {
1794*4882a593Smuzhiyun pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node);
1795*4882a593Smuzhiyun parent = *p;
1796*4882a593Smuzhiyun
1797*4882a593Smuzhiyun BUG_ON(pe->exception_sequence == pe2->exception_sequence);
1798*4882a593Smuzhiyun if (pe->exception_sequence < pe2->exception_sequence)
1799*4882a593Smuzhiyun p = &((*p)->rb_left);
1800*4882a593Smuzhiyun else
1801*4882a593Smuzhiyun p = &((*p)->rb_right);
1802*4882a593Smuzhiyun }
1803*4882a593Smuzhiyun
1804*4882a593Smuzhiyun rb_link_node(&pe->out_of_order_node, parent, p);
1805*4882a593Smuzhiyun rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
1806*4882a593Smuzhiyun }
1807*4882a593Smuzhiyun account_end_copy(s);
1808*4882a593Smuzhiyun }
1809*4882a593Smuzhiyun
1810*4882a593Smuzhiyun /*
1811*4882a593Smuzhiyun * Dispatches the copy operation to kcopyd.
1812*4882a593Smuzhiyun */
start_copy(struct dm_snap_pending_exception * pe)1813*4882a593Smuzhiyun static void start_copy(struct dm_snap_pending_exception *pe)
1814*4882a593Smuzhiyun {
1815*4882a593Smuzhiyun struct dm_snapshot *s = pe->snap;
1816*4882a593Smuzhiyun struct dm_io_region src, dest;
1817*4882a593Smuzhiyun struct block_device *bdev = s->origin->bdev;
1818*4882a593Smuzhiyun sector_t dev_size;
1819*4882a593Smuzhiyun
1820*4882a593Smuzhiyun dev_size = get_dev_size(bdev);
1821*4882a593Smuzhiyun
1822*4882a593Smuzhiyun src.bdev = bdev;
1823*4882a593Smuzhiyun src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
1824*4882a593Smuzhiyun src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
1825*4882a593Smuzhiyun
1826*4882a593Smuzhiyun dest.bdev = s->cow->bdev;
1827*4882a593Smuzhiyun dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
1828*4882a593Smuzhiyun dest.count = src.count;
1829*4882a593Smuzhiyun
1830*4882a593Smuzhiyun /* Hand over to kcopyd */
1831*4882a593Smuzhiyun account_start_copy(s);
1832*4882a593Smuzhiyun dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
1833*4882a593Smuzhiyun }
1834*4882a593Smuzhiyun
full_bio_end_io(struct bio * bio)1835*4882a593Smuzhiyun static void full_bio_end_io(struct bio *bio)
1836*4882a593Smuzhiyun {
1837*4882a593Smuzhiyun void *callback_data = bio->bi_private;
1838*4882a593Smuzhiyun
1839*4882a593Smuzhiyun dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
1840*4882a593Smuzhiyun }
1841*4882a593Smuzhiyun
start_full_bio(struct dm_snap_pending_exception * pe,struct bio * bio)1842*4882a593Smuzhiyun static void start_full_bio(struct dm_snap_pending_exception *pe,
1843*4882a593Smuzhiyun struct bio *bio)
1844*4882a593Smuzhiyun {
1845*4882a593Smuzhiyun struct dm_snapshot *s = pe->snap;
1846*4882a593Smuzhiyun void *callback_data;
1847*4882a593Smuzhiyun
1848*4882a593Smuzhiyun pe->full_bio = bio;
1849*4882a593Smuzhiyun pe->full_bio_end_io = bio->bi_end_io;
1850*4882a593Smuzhiyun
1851*4882a593Smuzhiyun account_start_copy(s);
1852*4882a593Smuzhiyun callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
1853*4882a593Smuzhiyun copy_callback, pe);
1854*4882a593Smuzhiyun
1855*4882a593Smuzhiyun bio->bi_end_io = full_bio_end_io;
1856*4882a593Smuzhiyun bio->bi_private = callback_data;
1857*4882a593Smuzhiyun
1858*4882a593Smuzhiyun submit_bio_noacct(bio);
1859*4882a593Smuzhiyun }
1860*4882a593Smuzhiyun
1861*4882a593Smuzhiyun static struct dm_snap_pending_exception *
__lookup_pending_exception(struct dm_snapshot * s,chunk_t chunk)1862*4882a593Smuzhiyun __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
1863*4882a593Smuzhiyun {
1864*4882a593Smuzhiyun struct dm_exception *e = dm_lookup_exception(&s->pending, chunk);
1865*4882a593Smuzhiyun
1866*4882a593Smuzhiyun if (!e)
1867*4882a593Smuzhiyun return NULL;
1868*4882a593Smuzhiyun
1869*4882a593Smuzhiyun return container_of(e, struct dm_snap_pending_exception, e);
1870*4882a593Smuzhiyun }
1871*4882a593Smuzhiyun
1872*4882a593Smuzhiyun /*
1873*4882a593Smuzhiyun * Inserts a pending exception into the pending table.
1874*4882a593Smuzhiyun *
1875*4882a593Smuzhiyun * NOTE: a write lock must be held on the chunk's pending exception table slot
1876*4882a593Smuzhiyun * before calling this.
1877*4882a593Smuzhiyun */
1878*4882a593Smuzhiyun static struct dm_snap_pending_exception *
__insert_pending_exception(struct dm_snapshot * s,struct dm_snap_pending_exception * pe,chunk_t chunk)1879*4882a593Smuzhiyun __insert_pending_exception(struct dm_snapshot *s,
1880*4882a593Smuzhiyun struct dm_snap_pending_exception *pe, chunk_t chunk)
1881*4882a593Smuzhiyun {
1882*4882a593Smuzhiyun pe->e.old_chunk = chunk;
1883*4882a593Smuzhiyun bio_list_init(&pe->origin_bios);
1884*4882a593Smuzhiyun bio_list_init(&pe->snapshot_bios);
1885*4882a593Smuzhiyun pe->started = 0;
1886*4882a593Smuzhiyun pe->full_bio = NULL;
1887*4882a593Smuzhiyun
1888*4882a593Smuzhiyun spin_lock(&s->pe_allocation_lock);
1889*4882a593Smuzhiyun if (s->store->type->prepare_exception(s->store, &pe->e)) {
1890*4882a593Smuzhiyun spin_unlock(&s->pe_allocation_lock);
1891*4882a593Smuzhiyun free_pending_exception(pe);
1892*4882a593Smuzhiyun return NULL;
1893*4882a593Smuzhiyun }
1894*4882a593Smuzhiyun
1895*4882a593Smuzhiyun pe->exception_sequence = s->exception_start_sequence++;
1896*4882a593Smuzhiyun spin_unlock(&s->pe_allocation_lock);
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun dm_insert_exception(&s->pending, &pe->e);
1899*4882a593Smuzhiyun
1900*4882a593Smuzhiyun return pe;
1901*4882a593Smuzhiyun }
1902*4882a593Smuzhiyun
1903*4882a593Smuzhiyun /*
1904*4882a593Smuzhiyun * Looks to see if this snapshot already has a pending exception
1905*4882a593Smuzhiyun * for this chunk, otherwise it allocates a new one and inserts
1906*4882a593Smuzhiyun * it into the pending table.
1907*4882a593Smuzhiyun *
1908*4882a593Smuzhiyun * NOTE: a write lock must be held on the chunk's pending exception table slot
1909*4882a593Smuzhiyun * before calling this.
1910*4882a593Smuzhiyun */
1911*4882a593Smuzhiyun static struct dm_snap_pending_exception *
__find_pending_exception(struct dm_snapshot * s,struct dm_snap_pending_exception * pe,chunk_t chunk)1912*4882a593Smuzhiyun __find_pending_exception(struct dm_snapshot *s,
1913*4882a593Smuzhiyun struct dm_snap_pending_exception *pe, chunk_t chunk)
1914*4882a593Smuzhiyun {
1915*4882a593Smuzhiyun struct dm_snap_pending_exception *pe2;
1916*4882a593Smuzhiyun
1917*4882a593Smuzhiyun pe2 = __lookup_pending_exception(s, chunk);
1918*4882a593Smuzhiyun if (pe2) {
1919*4882a593Smuzhiyun free_pending_exception(pe);
1920*4882a593Smuzhiyun return pe2;
1921*4882a593Smuzhiyun }
1922*4882a593Smuzhiyun
1923*4882a593Smuzhiyun return __insert_pending_exception(s, pe, chunk);
1924*4882a593Smuzhiyun }
1925*4882a593Smuzhiyun
remap_exception(struct dm_snapshot * s,struct dm_exception * e,struct bio * bio,chunk_t chunk)1926*4882a593Smuzhiyun static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
1927*4882a593Smuzhiyun struct bio *bio, chunk_t chunk)
1928*4882a593Smuzhiyun {
1929*4882a593Smuzhiyun bio_set_dev(bio, s->cow->bdev);
1930*4882a593Smuzhiyun bio->bi_iter.bi_sector =
1931*4882a593Smuzhiyun chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
1932*4882a593Smuzhiyun (chunk - e->old_chunk)) +
1933*4882a593Smuzhiyun (bio->bi_iter.bi_sector & s->store->chunk_mask);
1934*4882a593Smuzhiyun }
1935*4882a593Smuzhiyun
zero_callback(int read_err,unsigned long write_err,void * context)1936*4882a593Smuzhiyun static void zero_callback(int read_err, unsigned long write_err, void *context)
1937*4882a593Smuzhiyun {
1938*4882a593Smuzhiyun struct bio *bio = context;
1939*4882a593Smuzhiyun struct dm_snapshot *s = bio->bi_private;
1940*4882a593Smuzhiyun
1941*4882a593Smuzhiyun account_end_copy(s);
1942*4882a593Smuzhiyun bio->bi_status = write_err ? BLK_STS_IOERR : 0;
1943*4882a593Smuzhiyun bio_endio(bio);
1944*4882a593Smuzhiyun }
1945*4882a593Smuzhiyun
zero_exception(struct dm_snapshot * s,struct dm_exception * e,struct bio * bio,chunk_t chunk)1946*4882a593Smuzhiyun static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
1947*4882a593Smuzhiyun struct bio *bio, chunk_t chunk)
1948*4882a593Smuzhiyun {
1949*4882a593Smuzhiyun struct dm_io_region dest;
1950*4882a593Smuzhiyun
1951*4882a593Smuzhiyun dest.bdev = s->cow->bdev;
1952*4882a593Smuzhiyun dest.sector = bio->bi_iter.bi_sector;
1953*4882a593Smuzhiyun dest.count = s->store->chunk_size;
1954*4882a593Smuzhiyun
1955*4882a593Smuzhiyun account_start_copy(s);
1956*4882a593Smuzhiyun WARN_ON_ONCE(bio->bi_private);
1957*4882a593Smuzhiyun bio->bi_private = s;
1958*4882a593Smuzhiyun dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
1959*4882a593Smuzhiyun }
1960*4882a593Smuzhiyun
io_overlaps_chunk(struct dm_snapshot * s,struct bio * bio)1961*4882a593Smuzhiyun static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio)
1962*4882a593Smuzhiyun {
1963*4882a593Smuzhiyun return bio->bi_iter.bi_size ==
1964*4882a593Smuzhiyun (s->store->chunk_size << SECTOR_SHIFT);
1965*4882a593Smuzhiyun }
1966*4882a593Smuzhiyun
snapshot_map(struct dm_target * ti,struct bio * bio)1967*4882a593Smuzhiyun static int snapshot_map(struct dm_target *ti, struct bio *bio)
1968*4882a593Smuzhiyun {
1969*4882a593Smuzhiyun struct dm_exception *e;
1970*4882a593Smuzhiyun struct dm_snapshot *s = ti->private;
1971*4882a593Smuzhiyun int r = DM_MAPIO_REMAPPED;
1972*4882a593Smuzhiyun chunk_t chunk;
1973*4882a593Smuzhiyun struct dm_snap_pending_exception *pe = NULL;
1974*4882a593Smuzhiyun struct dm_exception_table_lock lock;
1975*4882a593Smuzhiyun
1976*4882a593Smuzhiyun init_tracked_chunk(bio);
1977*4882a593Smuzhiyun
1978*4882a593Smuzhiyun if (bio->bi_opf & REQ_PREFLUSH) {
1979*4882a593Smuzhiyun bio_set_dev(bio, s->cow->bdev);
1980*4882a593Smuzhiyun return DM_MAPIO_REMAPPED;
1981*4882a593Smuzhiyun }
1982*4882a593Smuzhiyun
1983*4882a593Smuzhiyun chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
1984*4882a593Smuzhiyun dm_exception_table_lock_init(s, chunk, &lock);
1985*4882a593Smuzhiyun
1986*4882a593Smuzhiyun /* Full snapshots are not usable */
1987*4882a593Smuzhiyun /* To get here the table must be live so s->active is always set. */
1988*4882a593Smuzhiyun if (!s->valid)
1989*4882a593Smuzhiyun return DM_MAPIO_KILL;
1990*4882a593Smuzhiyun
1991*4882a593Smuzhiyun if (bio_data_dir(bio) == WRITE) {
1992*4882a593Smuzhiyun while (unlikely(!wait_for_in_progress(s, false)))
1993*4882a593Smuzhiyun ; /* wait_for_in_progress() has slept */
1994*4882a593Smuzhiyun }
1995*4882a593Smuzhiyun
1996*4882a593Smuzhiyun down_read(&s->lock);
1997*4882a593Smuzhiyun dm_exception_table_lock(&lock);
1998*4882a593Smuzhiyun
1999*4882a593Smuzhiyun if (!s->valid || (unlikely(s->snapshot_overflowed) &&
2000*4882a593Smuzhiyun bio_data_dir(bio) == WRITE)) {
2001*4882a593Smuzhiyun r = DM_MAPIO_KILL;
2002*4882a593Smuzhiyun goto out_unlock;
2003*4882a593Smuzhiyun }
2004*4882a593Smuzhiyun
2005*4882a593Smuzhiyun if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
2006*4882a593Smuzhiyun if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) {
2007*4882a593Smuzhiyun /*
2008*4882a593Smuzhiyun * passdown discard to origin (without triggering
2009*4882a593Smuzhiyun * snapshot exceptions via do_origin; doing so would
2010*4882a593Smuzhiyun * defeat the goal of freeing space in origin that is
2011*4882a593Smuzhiyun * implied by the "discard_passdown_origin" feature)
2012*4882a593Smuzhiyun */
2013*4882a593Smuzhiyun bio_set_dev(bio, s->origin->bdev);
2014*4882a593Smuzhiyun track_chunk(s, bio, chunk);
2015*4882a593Smuzhiyun goto out_unlock;
2016*4882a593Smuzhiyun }
2017*4882a593Smuzhiyun /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */
2018*4882a593Smuzhiyun }
2019*4882a593Smuzhiyun
2020*4882a593Smuzhiyun /* If the block is already remapped - use that, else remap it */
2021*4882a593Smuzhiyun e = dm_lookup_exception(&s->complete, chunk);
2022*4882a593Smuzhiyun if (e) {
2023*4882a593Smuzhiyun remap_exception(s, e, bio, chunk);
2024*4882a593Smuzhiyun if (unlikely(bio_op(bio) == REQ_OP_DISCARD) &&
2025*4882a593Smuzhiyun io_overlaps_chunk(s, bio)) {
2026*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2027*4882a593Smuzhiyun up_read(&s->lock);
2028*4882a593Smuzhiyun zero_exception(s, e, bio, chunk);
2029*4882a593Smuzhiyun r = DM_MAPIO_SUBMITTED; /* discard is not issued */
2030*4882a593Smuzhiyun goto out;
2031*4882a593Smuzhiyun }
2032*4882a593Smuzhiyun goto out_unlock;
2033*4882a593Smuzhiyun }
2034*4882a593Smuzhiyun
2035*4882a593Smuzhiyun if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
2036*4882a593Smuzhiyun /*
2037*4882a593Smuzhiyun * If no exception exists, complete discard immediately
2038*4882a593Smuzhiyun * otherwise it'll trigger copy-out.
2039*4882a593Smuzhiyun */
2040*4882a593Smuzhiyun bio_endio(bio);
2041*4882a593Smuzhiyun r = DM_MAPIO_SUBMITTED;
2042*4882a593Smuzhiyun goto out_unlock;
2043*4882a593Smuzhiyun }
2044*4882a593Smuzhiyun
2045*4882a593Smuzhiyun /*
2046*4882a593Smuzhiyun * Write to snapshot - higher level takes care of RW/RO
2047*4882a593Smuzhiyun * flags so we should only get this if we are
2048*4882a593Smuzhiyun * writeable.
2049*4882a593Smuzhiyun */
2050*4882a593Smuzhiyun if (bio_data_dir(bio) == WRITE) {
2051*4882a593Smuzhiyun pe = __lookup_pending_exception(s, chunk);
2052*4882a593Smuzhiyun if (!pe) {
2053*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2054*4882a593Smuzhiyun pe = alloc_pending_exception(s);
2055*4882a593Smuzhiyun dm_exception_table_lock(&lock);
2056*4882a593Smuzhiyun
2057*4882a593Smuzhiyun e = dm_lookup_exception(&s->complete, chunk);
2058*4882a593Smuzhiyun if (e) {
2059*4882a593Smuzhiyun free_pending_exception(pe);
2060*4882a593Smuzhiyun remap_exception(s, e, bio, chunk);
2061*4882a593Smuzhiyun goto out_unlock;
2062*4882a593Smuzhiyun }
2063*4882a593Smuzhiyun
2064*4882a593Smuzhiyun pe = __find_pending_exception(s, pe, chunk);
2065*4882a593Smuzhiyun if (!pe) {
2066*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2067*4882a593Smuzhiyun up_read(&s->lock);
2068*4882a593Smuzhiyun
2069*4882a593Smuzhiyun down_write(&s->lock);
2070*4882a593Smuzhiyun
2071*4882a593Smuzhiyun if (s->store->userspace_supports_overflow) {
2072*4882a593Smuzhiyun if (s->valid && !s->snapshot_overflowed) {
2073*4882a593Smuzhiyun s->snapshot_overflowed = 1;
2074*4882a593Smuzhiyun DMERR("Snapshot overflowed: Unable to allocate exception.");
2075*4882a593Smuzhiyun }
2076*4882a593Smuzhiyun } else
2077*4882a593Smuzhiyun __invalidate_snapshot(s, -ENOMEM);
2078*4882a593Smuzhiyun up_write(&s->lock);
2079*4882a593Smuzhiyun
2080*4882a593Smuzhiyun r = DM_MAPIO_KILL;
2081*4882a593Smuzhiyun goto out;
2082*4882a593Smuzhiyun }
2083*4882a593Smuzhiyun }
2084*4882a593Smuzhiyun
2085*4882a593Smuzhiyun remap_exception(s, &pe->e, bio, chunk);
2086*4882a593Smuzhiyun
2087*4882a593Smuzhiyun r = DM_MAPIO_SUBMITTED;
2088*4882a593Smuzhiyun
2089*4882a593Smuzhiyun if (!pe->started && io_overlaps_chunk(s, bio)) {
2090*4882a593Smuzhiyun pe->started = 1;
2091*4882a593Smuzhiyun
2092*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2093*4882a593Smuzhiyun up_read(&s->lock);
2094*4882a593Smuzhiyun
2095*4882a593Smuzhiyun start_full_bio(pe, bio);
2096*4882a593Smuzhiyun goto out;
2097*4882a593Smuzhiyun }
2098*4882a593Smuzhiyun
2099*4882a593Smuzhiyun bio_list_add(&pe->snapshot_bios, bio);
2100*4882a593Smuzhiyun
2101*4882a593Smuzhiyun if (!pe->started) {
2102*4882a593Smuzhiyun /* this is protected by the exception table lock */
2103*4882a593Smuzhiyun pe->started = 1;
2104*4882a593Smuzhiyun
2105*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2106*4882a593Smuzhiyun up_read(&s->lock);
2107*4882a593Smuzhiyun
2108*4882a593Smuzhiyun start_copy(pe);
2109*4882a593Smuzhiyun goto out;
2110*4882a593Smuzhiyun }
2111*4882a593Smuzhiyun } else {
2112*4882a593Smuzhiyun bio_set_dev(bio, s->origin->bdev);
2113*4882a593Smuzhiyun track_chunk(s, bio, chunk);
2114*4882a593Smuzhiyun }
2115*4882a593Smuzhiyun
2116*4882a593Smuzhiyun out_unlock:
2117*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2118*4882a593Smuzhiyun up_read(&s->lock);
2119*4882a593Smuzhiyun out:
2120*4882a593Smuzhiyun return r;
2121*4882a593Smuzhiyun }
2122*4882a593Smuzhiyun
2123*4882a593Smuzhiyun /*
2124*4882a593Smuzhiyun * A snapshot-merge target behaves like a combination of a snapshot
2125*4882a593Smuzhiyun * target and a snapshot-origin target. It only generates new
2126*4882a593Smuzhiyun * exceptions in other snapshots and not in the one that is being
2127*4882a593Smuzhiyun * merged.
2128*4882a593Smuzhiyun *
2129*4882a593Smuzhiyun * For each chunk, if there is an existing exception, it is used to
2130*4882a593Smuzhiyun * redirect I/O to the cow device. Otherwise I/O is sent to the origin,
2131*4882a593Smuzhiyun * which in turn might generate exceptions in other snapshots.
2132*4882a593Smuzhiyun * If merging is currently taking place on the chunk in question, the
2133*4882a593Smuzhiyun * I/O is deferred by adding it to s->bios_queued_during_merge.
2134*4882a593Smuzhiyun */
snapshot_merge_map(struct dm_target * ti,struct bio * bio)2135*4882a593Smuzhiyun static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
2136*4882a593Smuzhiyun {
2137*4882a593Smuzhiyun struct dm_exception *e;
2138*4882a593Smuzhiyun struct dm_snapshot *s = ti->private;
2139*4882a593Smuzhiyun int r = DM_MAPIO_REMAPPED;
2140*4882a593Smuzhiyun chunk_t chunk;
2141*4882a593Smuzhiyun
2142*4882a593Smuzhiyun init_tracked_chunk(bio);
2143*4882a593Smuzhiyun
2144*4882a593Smuzhiyun if (bio->bi_opf & REQ_PREFLUSH) {
2145*4882a593Smuzhiyun if (!dm_bio_get_target_bio_nr(bio))
2146*4882a593Smuzhiyun bio_set_dev(bio, s->origin->bdev);
2147*4882a593Smuzhiyun else
2148*4882a593Smuzhiyun bio_set_dev(bio, s->cow->bdev);
2149*4882a593Smuzhiyun return DM_MAPIO_REMAPPED;
2150*4882a593Smuzhiyun }
2151*4882a593Smuzhiyun
2152*4882a593Smuzhiyun if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
2153*4882a593Smuzhiyun /* Once merging, discards no longer effect change */
2154*4882a593Smuzhiyun bio_endio(bio);
2155*4882a593Smuzhiyun return DM_MAPIO_SUBMITTED;
2156*4882a593Smuzhiyun }
2157*4882a593Smuzhiyun
2158*4882a593Smuzhiyun chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
2159*4882a593Smuzhiyun
2160*4882a593Smuzhiyun down_write(&s->lock);
2161*4882a593Smuzhiyun
2162*4882a593Smuzhiyun /* Full merging snapshots are redirected to the origin */
2163*4882a593Smuzhiyun if (!s->valid)
2164*4882a593Smuzhiyun goto redirect_to_origin;
2165*4882a593Smuzhiyun
2166*4882a593Smuzhiyun /* If the block is already remapped - use that */
2167*4882a593Smuzhiyun e = dm_lookup_exception(&s->complete, chunk);
2168*4882a593Smuzhiyun if (e) {
2169*4882a593Smuzhiyun /* Queue writes overlapping with chunks being merged */
2170*4882a593Smuzhiyun if (bio_data_dir(bio) == WRITE &&
2171*4882a593Smuzhiyun chunk >= s->first_merging_chunk &&
2172*4882a593Smuzhiyun chunk < (s->first_merging_chunk +
2173*4882a593Smuzhiyun s->num_merging_chunks)) {
2174*4882a593Smuzhiyun bio_set_dev(bio, s->origin->bdev);
2175*4882a593Smuzhiyun bio_list_add(&s->bios_queued_during_merge, bio);
2176*4882a593Smuzhiyun r = DM_MAPIO_SUBMITTED;
2177*4882a593Smuzhiyun goto out_unlock;
2178*4882a593Smuzhiyun }
2179*4882a593Smuzhiyun
2180*4882a593Smuzhiyun remap_exception(s, e, bio, chunk);
2181*4882a593Smuzhiyun
2182*4882a593Smuzhiyun if (bio_data_dir(bio) == WRITE)
2183*4882a593Smuzhiyun track_chunk(s, bio, chunk);
2184*4882a593Smuzhiyun goto out_unlock;
2185*4882a593Smuzhiyun }
2186*4882a593Smuzhiyun
2187*4882a593Smuzhiyun redirect_to_origin:
2188*4882a593Smuzhiyun bio_set_dev(bio, s->origin->bdev);
2189*4882a593Smuzhiyun
2190*4882a593Smuzhiyun if (bio_data_dir(bio) == WRITE) {
2191*4882a593Smuzhiyun up_write(&s->lock);
2192*4882a593Smuzhiyun return do_origin(s->origin, bio, false);
2193*4882a593Smuzhiyun }
2194*4882a593Smuzhiyun
2195*4882a593Smuzhiyun out_unlock:
2196*4882a593Smuzhiyun up_write(&s->lock);
2197*4882a593Smuzhiyun
2198*4882a593Smuzhiyun return r;
2199*4882a593Smuzhiyun }
2200*4882a593Smuzhiyun
snapshot_end_io(struct dm_target * ti,struct bio * bio,blk_status_t * error)2201*4882a593Smuzhiyun static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
2202*4882a593Smuzhiyun blk_status_t *error)
2203*4882a593Smuzhiyun {
2204*4882a593Smuzhiyun struct dm_snapshot *s = ti->private;
2205*4882a593Smuzhiyun
2206*4882a593Smuzhiyun if (is_bio_tracked(bio))
2207*4882a593Smuzhiyun stop_tracking_chunk(s, bio);
2208*4882a593Smuzhiyun
2209*4882a593Smuzhiyun return DM_ENDIO_DONE;
2210*4882a593Smuzhiyun }
2211*4882a593Smuzhiyun
snapshot_merge_presuspend(struct dm_target * ti)2212*4882a593Smuzhiyun static void snapshot_merge_presuspend(struct dm_target *ti)
2213*4882a593Smuzhiyun {
2214*4882a593Smuzhiyun struct dm_snapshot *s = ti->private;
2215*4882a593Smuzhiyun
2216*4882a593Smuzhiyun stop_merge(s);
2217*4882a593Smuzhiyun }
2218*4882a593Smuzhiyun
snapshot_preresume(struct dm_target * ti)2219*4882a593Smuzhiyun static int snapshot_preresume(struct dm_target *ti)
2220*4882a593Smuzhiyun {
2221*4882a593Smuzhiyun int r = 0;
2222*4882a593Smuzhiyun struct dm_snapshot *s = ti->private;
2223*4882a593Smuzhiyun struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
2224*4882a593Smuzhiyun
2225*4882a593Smuzhiyun down_read(&_origins_lock);
2226*4882a593Smuzhiyun (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
2227*4882a593Smuzhiyun if (snap_src && snap_dest) {
2228*4882a593Smuzhiyun down_read(&snap_src->lock);
2229*4882a593Smuzhiyun if (s == snap_src) {
2230*4882a593Smuzhiyun DMERR("Unable to resume snapshot source until "
2231*4882a593Smuzhiyun "handover completes.");
2232*4882a593Smuzhiyun r = -EINVAL;
2233*4882a593Smuzhiyun } else if (!dm_suspended(snap_src->ti)) {
2234*4882a593Smuzhiyun DMERR("Unable to perform snapshot handover until "
2235*4882a593Smuzhiyun "source is suspended.");
2236*4882a593Smuzhiyun r = -EINVAL;
2237*4882a593Smuzhiyun }
2238*4882a593Smuzhiyun up_read(&snap_src->lock);
2239*4882a593Smuzhiyun }
2240*4882a593Smuzhiyun up_read(&_origins_lock);
2241*4882a593Smuzhiyun
2242*4882a593Smuzhiyun return r;
2243*4882a593Smuzhiyun }
2244*4882a593Smuzhiyun
snapshot_resume(struct dm_target * ti)2245*4882a593Smuzhiyun static void snapshot_resume(struct dm_target *ti)
2246*4882a593Smuzhiyun {
2247*4882a593Smuzhiyun struct dm_snapshot *s = ti->private;
2248*4882a593Smuzhiyun struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL;
2249*4882a593Smuzhiyun struct dm_origin *o;
2250*4882a593Smuzhiyun struct mapped_device *origin_md = NULL;
2251*4882a593Smuzhiyun bool must_restart_merging = false;
2252*4882a593Smuzhiyun
2253*4882a593Smuzhiyun down_read(&_origins_lock);
2254*4882a593Smuzhiyun
2255*4882a593Smuzhiyun o = __lookup_dm_origin(s->origin->bdev);
2256*4882a593Smuzhiyun if (o)
2257*4882a593Smuzhiyun origin_md = dm_table_get_md(o->ti->table);
2258*4882a593Smuzhiyun if (!origin_md) {
2259*4882a593Smuzhiyun (void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging);
2260*4882a593Smuzhiyun if (snap_merging)
2261*4882a593Smuzhiyun origin_md = dm_table_get_md(snap_merging->ti->table);
2262*4882a593Smuzhiyun }
2263*4882a593Smuzhiyun if (origin_md == dm_table_get_md(ti->table))
2264*4882a593Smuzhiyun origin_md = NULL;
2265*4882a593Smuzhiyun if (origin_md) {
2266*4882a593Smuzhiyun if (dm_hold(origin_md))
2267*4882a593Smuzhiyun origin_md = NULL;
2268*4882a593Smuzhiyun }
2269*4882a593Smuzhiyun
2270*4882a593Smuzhiyun up_read(&_origins_lock);
2271*4882a593Smuzhiyun
2272*4882a593Smuzhiyun if (origin_md) {
2273*4882a593Smuzhiyun dm_internal_suspend_fast(origin_md);
2274*4882a593Smuzhiyun if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) {
2275*4882a593Smuzhiyun must_restart_merging = true;
2276*4882a593Smuzhiyun stop_merge(snap_merging);
2277*4882a593Smuzhiyun }
2278*4882a593Smuzhiyun }
2279*4882a593Smuzhiyun
2280*4882a593Smuzhiyun down_read(&_origins_lock);
2281*4882a593Smuzhiyun
2282*4882a593Smuzhiyun (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
2283*4882a593Smuzhiyun if (snap_src && snap_dest) {
2284*4882a593Smuzhiyun down_write(&snap_src->lock);
2285*4882a593Smuzhiyun down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
2286*4882a593Smuzhiyun __handover_exceptions(snap_src, snap_dest);
2287*4882a593Smuzhiyun up_write(&snap_dest->lock);
2288*4882a593Smuzhiyun up_write(&snap_src->lock);
2289*4882a593Smuzhiyun }
2290*4882a593Smuzhiyun
2291*4882a593Smuzhiyun up_read(&_origins_lock);
2292*4882a593Smuzhiyun
2293*4882a593Smuzhiyun if (origin_md) {
2294*4882a593Smuzhiyun if (must_restart_merging)
2295*4882a593Smuzhiyun start_merge(snap_merging);
2296*4882a593Smuzhiyun dm_internal_resume_fast(origin_md);
2297*4882a593Smuzhiyun dm_put(origin_md);
2298*4882a593Smuzhiyun }
2299*4882a593Smuzhiyun
2300*4882a593Smuzhiyun /* Now we have correct chunk size, reregister */
2301*4882a593Smuzhiyun reregister_snapshot(s);
2302*4882a593Smuzhiyun
2303*4882a593Smuzhiyun down_write(&s->lock);
2304*4882a593Smuzhiyun s->active = 1;
2305*4882a593Smuzhiyun up_write(&s->lock);
2306*4882a593Smuzhiyun }
2307*4882a593Smuzhiyun
get_origin_minimum_chunksize(struct block_device * bdev)2308*4882a593Smuzhiyun static uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
2309*4882a593Smuzhiyun {
2310*4882a593Smuzhiyun uint32_t min_chunksize;
2311*4882a593Smuzhiyun
2312*4882a593Smuzhiyun down_read(&_origins_lock);
2313*4882a593Smuzhiyun min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
2314*4882a593Smuzhiyun up_read(&_origins_lock);
2315*4882a593Smuzhiyun
2316*4882a593Smuzhiyun return min_chunksize;
2317*4882a593Smuzhiyun }
2318*4882a593Smuzhiyun
snapshot_merge_resume(struct dm_target * ti)2319*4882a593Smuzhiyun static void snapshot_merge_resume(struct dm_target *ti)
2320*4882a593Smuzhiyun {
2321*4882a593Smuzhiyun struct dm_snapshot *s = ti->private;
2322*4882a593Smuzhiyun
2323*4882a593Smuzhiyun /*
2324*4882a593Smuzhiyun * Handover exceptions from existing snapshot.
2325*4882a593Smuzhiyun */
2326*4882a593Smuzhiyun snapshot_resume(ti);
2327*4882a593Smuzhiyun
2328*4882a593Smuzhiyun /*
2329*4882a593Smuzhiyun * snapshot-merge acts as an origin, so set ti->max_io_len
2330*4882a593Smuzhiyun */
2331*4882a593Smuzhiyun ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev);
2332*4882a593Smuzhiyun
2333*4882a593Smuzhiyun start_merge(s);
2334*4882a593Smuzhiyun }
2335*4882a593Smuzhiyun
snapshot_status(struct dm_target * ti,status_type_t type,unsigned status_flags,char * result,unsigned maxlen)2336*4882a593Smuzhiyun static void snapshot_status(struct dm_target *ti, status_type_t type,
2337*4882a593Smuzhiyun unsigned status_flags, char *result, unsigned maxlen)
2338*4882a593Smuzhiyun {
2339*4882a593Smuzhiyun unsigned sz = 0;
2340*4882a593Smuzhiyun struct dm_snapshot *snap = ti->private;
2341*4882a593Smuzhiyun unsigned num_features;
2342*4882a593Smuzhiyun
2343*4882a593Smuzhiyun switch (type) {
2344*4882a593Smuzhiyun case STATUSTYPE_INFO:
2345*4882a593Smuzhiyun
2346*4882a593Smuzhiyun down_write(&snap->lock);
2347*4882a593Smuzhiyun
2348*4882a593Smuzhiyun if (!snap->valid)
2349*4882a593Smuzhiyun DMEMIT("Invalid");
2350*4882a593Smuzhiyun else if (snap->merge_failed)
2351*4882a593Smuzhiyun DMEMIT("Merge failed");
2352*4882a593Smuzhiyun else if (snap->snapshot_overflowed)
2353*4882a593Smuzhiyun DMEMIT("Overflow");
2354*4882a593Smuzhiyun else {
2355*4882a593Smuzhiyun if (snap->store->type->usage) {
2356*4882a593Smuzhiyun sector_t total_sectors, sectors_allocated,
2357*4882a593Smuzhiyun metadata_sectors;
2358*4882a593Smuzhiyun snap->store->type->usage(snap->store,
2359*4882a593Smuzhiyun &total_sectors,
2360*4882a593Smuzhiyun §ors_allocated,
2361*4882a593Smuzhiyun &metadata_sectors);
2362*4882a593Smuzhiyun DMEMIT("%llu/%llu %llu",
2363*4882a593Smuzhiyun (unsigned long long)sectors_allocated,
2364*4882a593Smuzhiyun (unsigned long long)total_sectors,
2365*4882a593Smuzhiyun (unsigned long long)metadata_sectors);
2366*4882a593Smuzhiyun }
2367*4882a593Smuzhiyun else
2368*4882a593Smuzhiyun DMEMIT("Unknown");
2369*4882a593Smuzhiyun }
2370*4882a593Smuzhiyun
2371*4882a593Smuzhiyun up_write(&snap->lock);
2372*4882a593Smuzhiyun
2373*4882a593Smuzhiyun break;
2374*4882a593Smuzhiyun
2375*4882a593Smuzhiyun case STATUSTYPE_TABLE:
2376*4882a593Smuzhiyun /*
2377*4882a593Smuzhiyun * kdevname returns a static pointer so we need
2378*4882a593Smuzhiyun * to make private copies if the output is to
2379*4882a593Smuzhiyun * make sense.
2380*4882a593Smuzhiyun */
2381*4882a593Smuzhiyun DMEMIT("%s %s", snap->origin->name, snap->cow->name);
2382*4882a593Smuzhiyun sz += snap->store->type->status(snap->store, type, result + sz,
2383*4882a593Smuzhiyun maxlen - sz);
2384*4882a593Smuzhiyun num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin;
2385*4882a593Smuzhiyun if (num_features) {
2386*4882a593Smuzhiyun DMEMIT(" %u", num_features);
2387*4882a593Smuzhiyun if (snap->discard_zeroes_cow)
2388*4882a593Smuzhiyun DMEMIT(" discard_zeroes_cow");
2389*4882a593Smuzhiyun if (snap->discard_passdown_origin)
2390*4882a593Smuzhiyun DMEMIT(" discard_passdown_origin");
2391*4882a593Smuzhiyun }
2392*4882a593Smuzhiyun break;
2393*4882a593Smuzhiyun }
2394*4882a593Smuzhiyun }
2395*4882a593Smuzhiyun
snapshot_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)2396*4882a593Smuzhiyun static int snapshot_iterate_devices(struct dm_target *ti,
2397*4882a593Smuzhiyun iterate_devices_callout_fn fn, void *data)
2398*4882a593Smuzhiyun {
2399*4882a593Smuzhiyun struct dm_snapshot *snap = ti->private;
2400*4882a593Smuzhiyun int r;
2401*4882a593Smuzhiyun
2402*4882a593Smuzhiyun r = fn(ti, snap->origin, 0, ti->len, data);
2403*4882a593Smuzhiyun
2404*4882a593Smuzhiyun if (!r)
2405*4882a593Smuzhiyun r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data);
2406*4882a593Smuzhiyun
2407*4882a593Smuzhiyun return r;
2408*4882a593Smuzhiyun }
2409*4882a593Smuzhiyun
snapshot_io_hints(struct dm_target * ti,struct queue_limits * limits)2410*4882a593Smuzhiyun static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
2411*4882a593Smuzhiyun {
2412*4882a593Smuzhiyun struct dm_snapshot *snap = ti->private;
2413*4882a593Smuzhiyun
2414*4882a593Smuzhiyun if (snap->discard_zeroes_cow) {
2415*4882a593Smuzhiyun struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
2416*4882a593Smuzhiyun
2417*4882a593Smuzhiyun down_read(&_origins_lock);
2418*4882a593Smuzhiyun
2419*4882a593Smuzhiyun (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL);
2420*4882a593Smuzhiyun if (snap_src && snap_dest)
2421*4882a593Smuzhiyun snap = snap_src;
2422*4882a593Smuzhiyun
2423*4882a593Smuzhiyun /* All discards are split on chunk_size boundary */
2424*4882a593Smuzhiyun limits->discard_granularity = snap->store->chunk_size;
2425*4882a593Smuzhiyun limits->max_discard_sectors = snap->store->chunk_size;
2426*4882a593Smuzhiyun
2427*4882a593Smuzhiyun up_read(&_origins_lock);
2428*4882a593Smuzhiyun }
2429*4882a593Smuzhiyun }
2430*4882a593Smuzhiyun
2431*4882a593Smuzhiyun /*-----------------------------------------------------------------
2432*4882a593Smuzhiyun * Origin methods
2433*4882a593Smuzhiyun *---------------------------------------------------------------*/
2434*4882a593Smuzhiyun
2435*4882a593Smuzhiyun /*
2436*4882a593Smuzhiyun * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any
2437*4882a593Smuzhiyun * supplied bio was ignored. The caller may submit it immediately.
2438*4882a593Smuzhiyun * (No remapping actually occurs as the origin is always a direct linear
2439*4882a593Smuzhiyun * map.)
2440*4882a593Smuzhiyun *
2441*4882a593Smuzhiyun * If further exceptions are required, DM_MAPIO_SUBMITTED is returned
2442*4882a593Smuzhiyun * and any supplied bio is added to a list to be submitted once all
2443*4882a593Smuzhiyun * the necessary exceptions exist.
2444*4882a593Smuzhiyun */
__origin_write(struct list_head * snapshots,sector_t sector,struct bio * bio)2445*4882a593Smuzhiyun static int __origin_write(struct list_head *snapshots, sector_t sector,
2446*4882a593Smuzhiyun struct bio *bio)
2447*4882a593Smuzhiyun {
2448*4882a593Smuzhiyun int r = DM_MAPIO_REMAPPED;
2449*4882a593Smuzhiyun struct dm_snapshot *snap;
2450*4882a593Smuzhiyun struct dm_exception *e;
2451*4882a593Smuzhiyun struct dm_snap_pending_exception *pe, *pe2;
2452*4882a593Smuzhiyun struct dm_snap_pending_exception *pe_to_start_now = NULL;
2453*4882a593Smuzhiyun struct dm_snap_pending_exception *pe_to_start_last = NULL;
2454*4882a593Smuzhiyun struct dm_exception_table_lock lock;
2455*4882a593Smuzhiyun chunk_t chunk;
2456*4882a593Smuzhiyun
2457*4882a593Smuzhiyun /* Do all the snapshots on this origin */
2458*4882a593Smuzhiyun list_for_each_entry (snap, snapshots, list) {
2459*4882a593Smuzhiyun /*
2460*4882a593Smuzhiyun * Don't make new exceptions in a merging snapshot
2461*4882a593Smuzhiyun * because it has effectively been deleted
2462*4882a593Smuzhiyun */
2463*4882a593Smuzhiyun if (dm_target_is_snapshot_merge(snap->ti))
2464*4882a593Smuzhiyun continue;
2465*4882a593Smuzhiyun
2466*4882a593Smuzhiyun /* Nothing to do if writing beyond end of snapshot */
2467*4882a593Smuzhiyun if (sector >= dm_table_get_size(snap->ti->table))
2468*4882a593Smuzhiyun continue;
2469*4882a593Smuzhiyun
2470*4882a593Smuzhiyun /*
2471*4882a593Smuzhiyun * Remember, different snapshots can have
2472*4882a593Smuzhiyun * different chunk sizes.
2473*4882a593Smuzhiyun */
2474*4882a593Smuzhiyun chunk = sector_to_chunk(snap->store, sector);
2475*4882a593Smuzhiyun dm_exception_table_lock_init(snap, chunk, &lock);
2476*4882a593Smuzhiyun
2477*4882a593Smuzhiyun down_read(&snap->lock);
2478*4882a593Smuzhiyun dm_exception_table_lock(&lock);
2479*4882a593Smuzhiyun
2480*4882a593Smuzhiyun /* Only deal with valid and active snapshots */
2481*4882a593Smuzhiyun if (!snap->valid || !snap->active)
2482*4882a593Smuzhiyun goto next_snapshot;
2483*4882a593Smuzhiyun
2484*4882a593Smuzhiyun pe = __lookup_pending_exception(snap, chunk);
2485*4882a593Smuzhiyun if (!pe) {
2486*4882a593Smuzhiyun /*
2487*4882a593Smuzhiyun * Check exception table to see if block is already
2488*4882a593Smuzhiyun * remapped in this snapshot and trigger an exception
2489*4882a593Smuzhiyun * if not.
2490*4882a593Smuzhiyun */
2491*4882a593Smuzhiyun e = dm_lookup_exception(&snap->complete, chunk);
2492*4882a593Smuzhiyun if (e)
2493*4882a593Smuzhiyun goto next_snapshot;
2494*4882a593Smuzhiyun
2495*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2496*4882a593Smuzhiyun pe = alloc_pending_exception(snap);
2497*4882a593Smuzhiyun dm_exception_table_lock(&lock);
2498*4882a593Smuzhiyun
2499*4882a593Smuzhiyun pe2 = __lookup_pending_exception(snap, chunk);
2500*4882a593Smuzhiyun
2501*4882a593Smuzhiyun if (!pe2) {
2502*4882a593Smuzhiyun e = dm_lookup_exception(&snap->complete, chunk);
2503*4882a593Smuzhiyun if (e) {
2504*4882a593Smuzhiyun free_pending_exception(pe);
2505*4882a593Smuzhiyun goto next_snapshot;
2506*4882a593Smuzhiyun }
2507*4882a593Smuzhiyun
2508*4882a593Smuzhiyun pe = __insert_pending_exception(snap, pe, chunk);
2509*4882a593Smuzhiyun if (!pe) {
2510*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2511*4882a593Smuzhiyun up_read(&snap->lock);
2512*4882a593Smuzhiyun
2513*4882a593Smuzhiyun invalidate_snapshot(snap, -ENOMEM);
2514*4882a593Smuzhiyun continue;
2515*4882a593Smuzhiyun }
2516*4882a593Smuzhiyun } else {
2517*4882a593Smuzhiyun free_pending_exception(pe);
2518*4882a593Smuzhiyun pe = pe2;
2519*4882a593Smuzhiyun }
2520*4882a593Smuzhiyun }
2521*4882a593Smuzhiyun
2522*4882a593Smuzhiyun r = DM_MAPIO_SUBMITTED;
2523*4882a593Smuzhiyun
2524*4882a593Smuzhiyun /*
2525*4882a593Smuzhiyun * If an origin bio was supplied, queue it to wait for the
2526*4882a593Smuzhiyun * completion of this exception, and start this one last,
2527*4882a593Smuzhiyun * at the end of the function.
2528*4882a593Smuzhiyun */
2529*4882a593Smuzhiyun if (bio) {
2530*4882a593Smuzhiyun bio_list_add(&pe->origin_bios, bio);
2531*4882a593Smuzhiyun bio = NULL;
2532*4882a593Smuzhiyun
2533*4882a593Smuzhiyun if (!pe->started) {
2534*4882a593Smuzhiyun pe->started = 1;
2535*4882a593Smuzhiyun pe_to_start_last = pe;
2536*4882a593Smuzhiyun }
2537*4882a593Smuzhiyun }
2538*4882a593Smuzhiyun
2539*4882a593Smuzhiyun if (!pe->started) {
2540*4882a593Smuzhiyun pe->started = 1;
2541*4882a593Smuzhiyun pe_to_start_now = pe;
2542*4882a593Smuzhiyun }
2543*4882a593Smuzhiyun
2544*4882a593Smuzhiyun next_snapshot:
2545*4882a593Smuzhiyun dm_exception_table_unlock(&lock);
2546*4882a593Smuzhiyun up_read(&snap->lock);
2547*4882a593Smuzhiyun
2548*4882a593Smuzhiyun if (pe_to_start_now) {
2549*4882a593Smuzhiyun start_copy(pe_to_start_now);
2550*4882a593Smuzhiyun pe_to_start_now = NULL;
2551*4882a593Smuzhiyun }
2552*4882a593Smuzhiyun }
2553*4882a593Smuzhiyun
2554*4882a593Smuzhiyun /*
2555*4882a593Smuzhiyun * Submit the exception against which the bio is queued last,
2556*4882a593Smuzhiyun * to give the other exceptions a head start.
2557*4882a593Smuzhiyun */
2558*4882a593Smuzhiyun if (pe_to_start_last)
2559*4882a593Smuzhiyun start_copy(pe_to_start_last);
2560*4882a593Smuzhiyun
2561*4882a593Smuzhiyun return r;
2562*4882a593Smuzhiyun }
2563*4882a593Smuzhiyun
2564*4882a593Smuzhiyun /*
2565*4882a593Smuzhiyun * Called on a write from the origin driver.
2566*4882a593Smuzhiyun */
do_origin(struct dm_dev * origin,struct bio * bio,bool limit)2567*4882a593Smuzhiyun static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
2568*4882a593Smuzhiyun {
2569*4882a593Smuzhiyun struct origin *o;
2570*4882a593Smuzhiyun int r = DM_MAPIO_REMAPPED;
2571*4882a593Smuzhiyun
2572*4882a593Smuzhiyun again:
2573*4882a593Smuzhiyun down_read(&_origins_lock);
2574*4882a593Smuzhiyun o = __lookup_origin(origin->bdev);
2575*4882a593Smuzhiyun if (o) {
2576*4882a593Smuzhiyun if (limit) {
2577*4882a593Smuzhiyun struct dm_snapshot *s;
2578*4882a593Smuzhiyun list_for_each_entry(s, &o->snapshots, list)
2579*4882a593Smuzhiyun if (unlikely(!wait_for_in_progress(s, true)))
2580*4882a593Smuzhiyun goto again;
2581*4882a593Smuzhiyun }
2582*4882a593Smuzhiyun
2583*4882a593Smuzhiyun r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
2584*4882a593Smuzhiyun }
2585*4882a593Smuzhiyun up_read(&_origins_lock);
2586*4882a593Smuzhiyun
2587*4882a593Smuzhiyun return r;
2588*4882a593Smuzhiyun }
2589*4882a593Smuzhiyun
2590*4882a593Smuzhiyun /*
2591*4882a593Smuzhiyun * Trigger exceptions in all non-merging snapshots.
2592*4882a593Smuzhiyun *
2593*4882a593Smuzhiyun * The chunk size of the merging snapshot may be larger than the chunk
2594*4882a593Smuzhiyun * size of some other snapshot so we may need to reallocate multiple
2595*4882a593Smuzhiyun * chunks in other snapshots.
2596*4882a593Smuzhiyun *
2597*4882a593Smuzhiyun * We scan all the overlapping exceptions in the other snapshots.
2598*4882a593Smuzhiyun * Returns 1 if anything was reallocated and must be waited for,
2599*4882a593Smuzhiyun * otherwise returns 0.
2600*4882a593Smuzhiyun *
2601*4882a593Smuzhiyun * size must be a multiple of merging_snap's chunk_size.
2602*4882a593Smuzhiyun */
origin_write_extent(struct dm_snapshot * merging_snap,sector_t sector,unsigned size)2603*4882a593Smuzhiyun static int origin_write_extent(struct dm_snapshot *merging_snap,
2604*4882a593Smuzhiyun sector_t sector, unsigned size)
2605*4882a593Smuzhiyun {
2606*4882a593Smuzhiyun int must_wait = 0;
2607*4882a593Smuzhiyun sector_t n;
2608*4882a593Smuzhiyun struct origin *o;
2609*4882a593Smuzhiyun
2610*4882a593Smuzhiyun /*
2611*4882a593Smuzhiyun * The origin's __minimum_chunk_size() got stored in max_io_len
2612*4882a593Smuzhiyun * by snapshot_merge_resume().
2613*4882a593Smuzhiyun */
2614*4882a593Smuzhiyun down_read(&_origins_lock);
2615*4882a593Smuzhiyun o = __lookup_origin(merging_snap->origin->bdev);
2616*4882a593Smuzhiyun for (n = 0; n < size; n += merging_snap->ti->max_io_len)
2617*4882a593Smuzhiyun if (__origin_write(&o->snapshots, sector + n, NULL) ==
2618*4882a593Smuzhiyun DM_MAPIO_SUBMITTED)
2619*4882a593Smuzhiyun must_wait = 1;
2620*4882a593Smuzhiyun up_read(&_origins_lock);
2621*4882a593Smuzhiyun
2622*4882a593Smuzhiyun return must_wait;
2623*4882a593Smuzhiyun }
2624*4882a593Smuzhiyun
2625*4882a593Smuzhiyun /*
2626*4882a593Smuzhiyun * Origin: maps a linear range of a device, with hooks for snapshotting.
2627*4882a593Smuzhiyun */
2628*4882a593Smuzhiyun
2629*4882a593Smuzhiyun /*
2630*4882a593Smuzhiyun * Construct an origin mapping: <dev_path>
2631*4882a593Smuzhiyun * The context for an origin is merely a 'struct dm_dev *'
2632*4882a593Smuzhiyun * pointing to the real device.
2633*4882a593Smuzhiyun */
origin_ctr(struct dm_target * ti,unsigned int argc,char ** argv)2634*4882a593Smuzhiyun static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2635*4882a593Smuzhiyun {
2636*4882a593Smuzhiyun int r;
2637*4882a593Smuzhiyun struct dm_origin *o;
2638*4882a593Smuzhiyun
2639*4882a593Smuzhiyun if (argc != 1) {
2640*4882a593Smuzhiyun ti->error = "origin: incorrect number of arguments";
2641*4882a593Smuzhiyun return -EINVAL;
2642*4882a593Smuzhiyun }
2643*4882a593Smuzhiyun
2644*4882a593Smuzhiyun o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL);
2645*4882a593Smuzhiyun if (!o) {
2646*4882a593Smuzhiyun ti->error = "Cannot allocate private origin structure";
2647*4882a593Smuzhiyun r = -ENOMEM;
2648*4882a593Smuzhiyun goto bad_alloc;
2649*4882a593Smuzhiyun }
2650*4882a593Smuzhiyun
2651*4882a593Smuzhiyun r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev);
2652*4882a593Smuzhiyun if (r) {
2653*4882a593Smuzhiyun ti->error = "Cannot get target device";
2654*4882a593Smuzhiyun goto bad_open;
2655*4882a593Smuzhiyun }
2656*4882a593Smuzhiyun
2657*4882a593Smuzhiyun o->ti = ti;
2658*4882a593Smuzhiyun ti->private = o;
2659*4882a593Smuzhiyun ti->num_flush_bios = 1;
2660*4882a593Smuzhiyun
2661*4882a593Smuzhiyun return 0;
2662*4882a593Smuzhiyun
2663*4882a593Smuzhiyun bad_open:
2664*4882a593Smuzhiyun kfree(o);
2665*4882a593Smuzhiyun bad_alloc:
2666*4882a593Smuzhiyun return r;
2667*4882a593Smuzhiyun }
2668*4882a593Smuzhiyun
origin_dtr(struct dm_target * ti)2669*4882a593Smuzhiyun static void origin_dtr(struct dm_target *ti)
2670*4882a593Smuzhiyun {
2671*4882a593Smuzhiyun struct dm_origin *o = ti->private;
2672*4882a593Smuzhiyun
2673*4882a593Smuzhiyun dm_put_device(ti, o->dev);
2674*4882a593Smuzhiyun kfree(o);
2675*4882a593Smuzhiyun }
2676*4882a593Smuzhiyun
origin_map(struct dm_target * ti,struct bio * bio)2677*4882a593Smuzhiyun static int origin_map(struct dm_target *ti, struct bio *bio)
2678*4882a593Smuzhiyun {
2679*4882a593Smuzhiyun struct dm_origin *o = ti->private;
2680*4882a593Smuzhiyun unsigned available_sectors;
2681*4882a593Smuzhiyun
2682*4882a593Smuzhiyun bio_set_dev(bio, o->dev->bdev);
2683*4882a593Smuzhiyun
2684*4882a593Smuzhiyun if (unlikely(bio->bi_opf & REQ_PREFLUSH))
2685*4882a593Smuzhiyun return DM_MAPIO_REMAPPED;
2686*4882a593Smuzhiyun
2687*4882a593Smuzhiyun if (bio_data_dir(bio) != WRITE)
2688*4882a593Smuzhiyun return DM_MAPIO_REMAPPED;
2689*4882a593Smuzhiyun
2690*4882a593Smuzhiyun available_sectors = o->split_boundary -
2691*4882a593Smuzhiyun ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1));
2692*4882a593Smuzhiyun
2693*4882a593Smuzhiyun if (bio_sectors(bio) > available_sectors)
2694*4882a593Smuzhiyun dm_accept_partial_bio(bio, available_sectors);
2695*4882a593Smuzhiyun
2696*4882a593Smuzhiyun /* Only tell snapshots if this is a write */
2697*4882a593Smuzhiyun return do_origin(o->dev, bio, true);
2698*4882a593Smuzhiyun }
2699*4882a593Smuzhiyun
2700*4882a593Smuzhiyun /*
2701*4882a593Smuzhiyun * Set the target "max_io_len" field to the minimum of all the snapshots'
2702*4882a593Smuzhiyun * chunk sizes.
2703*4882a593Smuzhiyun */
origin_resume(struct dm_target * ti)2704*4882a593Smuzhiyun static void origin_resume(struct dm_target *ti)
2705*4882a593Smuzhiyun {
2706*4882a593Smuzhiyun struct dm_origin *o = ti->private;
2707*4882a593Smuzhiyun
2708*4882a593Smuzhiyun o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev);
2709*4882a593Smuzhiyun
2710*4882a593Smuzhiyun down_write(&_origins_lock);
2711*4882a593Smuzhiyun __insert_dm_origin(o);
2712*4882a593Smuzhiyun up_write(&_origins_lock);
2713*4882a593Smuzhiyun }
2714*4882a593Smuzhiyun
origin_postsuspend(struct dm_target * ti)2715*4882a593Smuzhiyun static void origin_postsuspend(struct dm_target *ti)
2716*4882a593Smuzhiyun {
2717*4882a593Smuzhiyun struct dm_origin *o = ti->private;
2718*4882a593Smuzhiyun
2719*4882a593Smuzhiyun down_write(&_origins_lock);
2720*4882a593Smuzhiyun __remove_dm_origin(o);
2721*4882a593Smuzhiyun up_write(&_origins_lock);
2722*4882a593Smuzhiyun }
2723*4882a593Smuzhiyun
origin_status(struct dm_target * ti,status_type_t type,unsigned status_flags,char * result,unsigned maxlen)2724*4882a593Smuzhiyun static void origin_status(struct dm_target *ti, status_type_t type,
2725*4882a593Smuzhiyun unsigned status_flags, char *result, unsigned maxlen)
2726*4882a593Smuzhiyun {
2727*4882a593Smuzhiyun struct dm_origin *o = ti->private;
2728*4882a593Smuzhiyun
2729*4882a593Smuzhiyun switch (type) {
2730*4882a593Smuzhiyun case STATUSTYPE_INFO:
2731*4882a593Smuzhiyun result[0] = '\0';
2732*4882a593Smuzhiyun break;
2733*4882a593Smuzhiyun
2734*4882a593Smuzhiyun case STATUSTYPE_TABLE:
2735*4882a593Smuzhiyun snprintf(result, maxlen, "%s", o->dev->name);
2736*4882a593Smuzhiyun break;
2737*4882a593Smuzhiyun }
2738*4882a593Smuzhiyun }
2739*4882a593Smuzhiyun
origin_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)2740*4882a593Smuzhiyun static int origin_iterate_devices(struct dm_target *ti,
2741*4882a593Smuzhiyun iterate_devices_callout_fn fn, void *data)
2742*4882a593Smuzhiyun {
2743*4882a593Smuzhiyun struct dm_origin *o = ti->private;
2744*4882a593Smuzhiyun
2745*4882a593Smuzhiyun return fn(ti, o->dev, 0, ti->len, data);
2746*4882a593Smuzhiyun }
2747*4882a593Smuzhiyun
2748*4882a593Smuzhiyun static struct target_type origin_target = {
2749*4882a593Smuzhiyun .name = "snapshot-origin",
2750*4882a593Smuzhiyun .version = {1, 9, 0},
2751*4882a593Smuzhiyun .module = THIS_MODULE,
2752*4882a593Smuzhiyun .ctr = origin_ctr,
2753*4882a593Smuzhiyun .dtr = origin_dtr,
2754*4882a593Smuzhiyun .map = origin_map,
2755*4882a593Smuzhiyun .resume = origin_resume,
2756*4882a593Smuzhiyun .postsuspend = origin_postsuspend,
2757*4882a593Smuzhiyun .status = origin_status,
2758*4882a593Smuzhiyun .iterate_devices = origin_iterate_devices,
2759*4882a593Smuzhiyun };
2760*4882a593Smuzhiyun
2761*4882a593Smuzhiyun static struct target_type snapshot_target = {
2762*4882a593Smuzhiyun .name = "snapshot",
2763*4882a593Smuzhiyun .version = {1, 16, 0},
2764*4882a593Smuzhiyun .module = THIS_MODULE,
2765*4882a593Smuzhiyun .ctr = snapshot_ctr,
2766*4882a593Smuzhiyun .dtr = snapshot_dtr,
2767*4882a593Smuzhiyun .map = snapshot_map,
2768*4882a593Smuzhiyun .end_io = snapshot_end_io,
2769*4882a593Smuzhiyun .preresume = snapshot_preresume,
2770*4882a593Smuzhiyun .resume = snapshot_resume,
2771*4882a593Smuzhiyun .status = snapshot_status,
2772*4882a593Smuzhiyun .iterate_devices = snapshot_iterate_devices,
2773*4882a593Smuzhiyun .io_hints = snapshot_io_hints,
2774*4882a593Smuzhiyun };
2775*4882a593Smuzhiyun
2776*4882a593Smuzhiyun static struct target_type merge_target = {
2777*4882a593Smuzhiyun .name = dm_snapshot_merge_target_name,
2778*4882a593Smuzhiyun .version = {1, 5, 0},
2779*4882a593Smuzhiyun .module = THIS_MODULE,
2780*4882a593Smuzhiyun .ctr = snapshot_ctr,
2781*4882a593Smuzhiyun .dtr = snapshot_dtr,
2782*4882a593Smuzhiyun .map = snapshot_merge_map,
2783*4882a593Smuzhiyun .end_io = snapshot_end_io,
2784*4882a593Smuzhiyun .presuspend = snapshot_merge_presuspend,
2785*4882a593Smuzhiyun .preresume = snapshot_preresume,
2786*4882a593Smuzhiyun .resume = snapshot_merge_resume,
2787*4882a593Smuzhiyun .status = snapshot_status,
2788*4882a593Smuzhiyun .iterate_devices = snapshot_iterate_devices,
2789*4882a593Smuzhiyun .io_hints = snapshot_io_hints,
2790*4882a593Smuzhiyun };
2791*4882a593Smuzhiyun
dm_snapshot_init(void)2792*4882a593Smuzhiyun static int __init dm_snapshot_init(void)
2793*4882a593Smuzhiyun {
2794*4882a593Smuzhiyun int r;
2795*4882a593Smuzhiyun
2796*4882a593Smuzhiyun r = dm_exception_store_init();
2797*4882a593Smuzhiyun if (r) {
2798*4882a593Smuzhiyun DMERR("Failed to initialize exception stores");
2799*4882a593Smuzhiyun return r;
2800*4882a593Smuzhiyun }
2801*4882a593Smuzhiyun
2802*4882a593Smuzhiyun r = init_origin_hash();
2803*4882a593Smuzhiyun if (r) {
2804*4882a593Smuzhiyun DMERR("init_origin_hash failed.");
2805*4882a593Smuzhiyun goto bad_origin_hash;
2806*4882a593Smuzhiyun }
2807*4882a593Smuzhiyun
2808*4882a593Smuzhiyun exception_cache = KMEM_CACHE(dm_exception, 0);
2809*4882a593Smuzhiyun if (!exception_cache) {
2810*4882a593Smuzhiyun DMERR("Couldn't create exception cache.");
2811*4882a593Smuzhiyun r = -ENOMEM;
2812*4882a593Smuzhiyun goto bad_exception_cache;
2813*4882a593Smuzhiyun }
2814*4882a593Smuzhiyun
2815*4882a593Smuzhiyun pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
2816*4882a593Smuzhiyun if (!pending_cache) {
2817*4882a593Smuzhiyun DMERR("Couldn't create pending cache.");
2818*4882a593Smuzhiyun r = -ENOMEM;
2819*4882a593Smuzhiyun goto bad_pending_cache;
2820*4882a593Smuzhiyun }
2821*4882a593Smuzhiyun
2822*4882a593Smuzhiyun r = dm_register_target(&snapshot_target);
2823*4882a593Smuzhiyun if (r < 0) {
2824*4882a593Smuzhiyun DMERR("snapshot target register failed %d", r);
2825*4882a593Smuzhiyun goto bad_register_snapshot_target;
2826*4882a593Smuzhiyun }
2827*4882a593Smuzhiyun
2828*4882a593Smuzhiyun r = dm_register_target(&origin_target);
2829*4882a593Smuzhiyun if (r < 0) {
2830*4882a593Smuzhiyun DMERR("Origin target register failed %d", r);
2831*4882a593Smuzhiyun goto bad_register_origin_target;
2832*4882a593Smuzhiyun }
2833*4882a593Smuzhiyun
2834*4882a593Smuzhiyun r = dm_register_target(&merge_target);
2835*4882a593Smuzhiyun if (r < 0) {
2836*4882a593Smuzhiyun DMERR("Merge target register failed %d", r);
2837*4882a593Smuzhiyun goto bad_register_merge_target;
2838*4882a593Smuzhiyun }
2839*4882a593Smuzhiyun
2840*4882a593Smuzhiyun return 0;
2841*4882a593Smuzhiyun
2842*4882a593Smuzhiyun bad_register_merge_target:
2843*4882a593Smuzhiyun dm_unregister_target(&origin_target);
2844*4882a593Smuzhiyun bad_register_origin_target:
2845*4882a593Smuzhiyun dm_unregister_target(&snapshot_target);
2846*4882a593Smuzhiyun bad_register_snapshot_target:
2847*4882a593Smuzhiyun kmem_cache_destroy(pending_cache);
2848*4882a593Smuzhiyun bad_pending_cache:
2849*4882a593Smuzhiyun kmem_cache_destroy(exception_cache);
2850*4882a593Smuzhiyun bad_exception_cache:
2851*4882a593Smuzhiyun exit_origin_hash();
2852*4882a593Smuzhiyun bad_origin_hash:
2853*4882a593Smuzhiyun dm_exception_store_exit();
2854*4882a593Smuzhiyun
2855*4882a593Smuzhiyun return r;
2856*4882a593Smuzhiyun }
2857*4882a593Smuzhiyun
dm_snapshot_exit(void)2858*4882a593Smuzhiyun static void __exit dm_snapshot_exit(void)
2859*4882a593Smuzhiyun {
2860*4882a593Smuzhiyun dm_unregister_target(&snapshot_target);
2861*4882a593Smuzhiyun dm_unregister_target(&origin_target);
2862*4882a593Smuzhiyun dm_unregister_target(&merge_target);
2863*4882a593Smuzhiyun
2864*4882a593Smuzhiyun exit_origin_hash();
2865*4882a593Smuzhiyun kmem_cache_destroy(pending_cache);
2866*4882a593Smuzhiyun kmem_cache_destroy(exception_cache);
2867*4882a593Smuzhiyun
2868*4882a593Smuzhiyun dm_exception_store_exit();
2869*4882a593Smuzhiyun }
2870*4882a593Smuzhiyun
2871*4882a593Smuzhiyun /* Module hooks */
2872*4882a593Smuzhiyun module_init(dm_snapshot_init);
2873*4882a593Smuzhiyun module_exit(dm_snapshot_exit);
2874*4882a593Smuzhiyun
2875*4882a593Smuzhiyun MODULE_DESCRIPTION(DM_NAME " snapshot target");
2876*4882a593Smuzhiyun MODULE_AUTHOR("Joe Thornber");
2877*4882a593Smuzhiyun MODULE_LICENSE("GPL");
2878*4882a593Smuzhiyun MODULE_ALIAS("dm-snapshot-origin");
2879*4882a593Smuzhiyun MODULE_ALIAS("dm-snapshot-merge");
2880