xref: /OK3568_Linux_fs/kernel/drivers/md/dm-bow.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright (C) 2018 Google Limited.
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * This file is released under the GPL.
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #include "dm.h"
8*4882a593Smuzhiyun #include "dm-core.h"
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include <linux/crc32.h>
11*4882a593Smuzhiyun #include <linux/dm-bufio.h>
12*4882a593Smuzhiyun #include <linux/module.h>
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun #define DM_MSG_PREFIX "bow"
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun struct log_entry {
17*4882a593Smuzhiyun 	u64 source;
18*4882a593Smuzhiyun 	u64 dest;
19*4882a593Smuzhiyun 	u32 size;
20*4882a593Smuzhiyun 	u32 checksum;
21*4882a593Smuzhiyun } __packed;
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun struct log_sector {
24*4882a593Smuzhiyun 	u32 magic;
25*4882a593Smuzhiyun 	u16 header_version;
26*4882a593Smuzhiyun 	u16 header_size;
27*4882a593Smuzhiyun 	u32 block_size;
28*4882a593Smuzhiyun 	u32 count;
29*4882a593Smuzhiyun 	u32 sequence;
30*4882a593Smuzhiyun 	sector_t sector0;
31*4882a593Smuzhiyun 	struct log_entry entries[];
32*4882a593Smuzhiyun } __packed;
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun /*
35*4882a593Smuzhiyun  * MAGIC is BOW in ascii
36*4882a593Smuzhiyun  */
37*4882a593Smuzhiyun #define MAGIC 0x00574f42
38*4882a593Smuzhiyun #define HEADER_VERSION 0x0100
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun /*
41*4882a593Smuzhiyun  * A sorted set of ranges representing the state of the data on the device.
42*4882a593Smuzhiyun  * Use an rb_tree for fast lookup of a given sector
43*4882a593Smuzhiyun  * Consecutive ranges are always of different type - operations on this
44*4882a593Smuzhiyun  * set must merge matching consecutive ranges.
45*4882a593Smuzhiyun  *
46*4882a593Smuzhiyun  * Top range is always of type TOP
47*4882a593Smuzhiyun  */
48*4882a593Smuzhiyun struct bow_range {
49*4882a593Smuzhiyun 	struct rb_node		node;
50*4882a593Smuzhiyun 	sector_t		sector;
51*4882a593Smuzhiyun 	enum {
52*4882a593Smuzhiyun 		INVALID,	/* Type not set */
53*4882a593Smuzhiyun 		SECTOR0,	/* First sector - holds log record */
54*4882a593Smuzhiyun 		SECTOR0_CURRENT,/* Live contents of sector0 */
55*4882a593Smuzhiyun 		UNCHANGED,	/* Original contents */
56*4882a593Smuzhiyun 		TRIMMED,	/* Range has been trimmed */
57*4882a593Smuzhiyun 		CHANGED,	/* Range has been changed */
58*4882a593Smuzhiyun 		BACKUP,		/* Range is being used as a backup */
59*4882a593Smuzhiyun 		TOP,		/* Final range - sector is size of device */
60*4882a593Smuzhiyun 	} type;
61*4882a593Smuzhiyun 	struct list_head	trimmed_list; /* list of TRIMMED ranges */
62*4882a593Smuzhiyun };
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun static const char * const readable_type[] = {
65*4882a593Smuzhiyun 	"Invalid",
66*4882a593Smuzhiyun 	"Sector0",
67*4882a593Smuzhiyun 	"Sector0_current",
68*4882a593Smuzhiyun 	"Unchanged",
69*4882a593Smuzhiyun 	"Free",
70*4882a593Smuzhiyun 	"Changed",
71*4882a593Smuzhiyun 	"Backup",
72*4882a593Smuzhiyun 	"Top",
73*4882a593Smuzhiyun };
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun enum state {
76*4882a593Smuzhiyun 	TRIM,
77*4882a593Smuzhiyun 	CHECKPOINT,
78*4882a593Smuzhiyun 	COMMITTED,
79*4882a593Smuzhiyun };
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun struct bow_context {
82*4882a593Smuzhiyun 	struct dm_dev *dev;
83*4882a593Smuzhiyun 	u32 block_size;
84*4882a593Smuzhiyun 	u32 block_shift;
85*4882a593Smuzhiyun 	struct workqueue_struct *workqueue;
86*4882a593Smuzhiyun 	struct dm_bufio_client *bufio;
87*4882a593Smuzhiyun 	struct mutex ranges_lock; /* Hold to access this struct and/or ranges */
88*4882a593Smuzhiyun 	struct rb_root ranges;
89*4882a593Smuzhiyun 	struct dm_kobject_holder kobj_holder;	/* for sysfs attributes */
90*4882a593Smuzhiyun 	atomic_t state; /* One of the enum state values above */
91*4882a593Smuzhiyun 	u64 trims_total;
92*4882a593Smuzhiyun 	struct log_sector *log_sector;
93*4882a593Smuzhiyun 	struct list_head trimmed_list;
94*4882a593Smuzhiyun 	bool forward_trims;
95*4882a593Smuzhiyun };
96*4882a593Smuzhiyun 
range_top(struct bow_range * br)97*4882a593Smuzhiyun sector_t range_top(struct bow_range *br)
98*4882a593Smuzhiyun {
99*4882a593Smuzhiyun 	return container_of(rb_next(&br->node), struct bow_range, node)
100*4882a593Smuzhiyun 		->sector;
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun 
range_size(struct bow_range * br)103*4882a593Smuzhiyun u64 range_size(struct bow_range *br)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun 	return (range_top(br) - br->sector) * SECTOR_SIZE;
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun 
bvec_top(struct bvec_iter * bi_iter)108*4882a593Smuzhiyun static sector_t bvec_top(struct bvec_iter *bi_iter)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun 	return bi_iter->bi_sector + bi_iter->bi_size / SECTOR_SIZE;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun /*
114*4882a593Smuzhiyun  * Find the first range that overlaps with bi_iter
115*4882a593Smuzhiyun  * bi_iter is set to the size of the overlapping sub-range
116*4882a593Smuzhiyun  */
find_first_overlapping_range(struct rb_root * ranges,struct bvec_iter * bi_iter)117*4882a593Smuzhiyun static struct bow_range *find_first_overlapping_range(struct rb_root *ranges,
118*4882a593Smuzhiyun 						      struct bvec_iter *bi_iter)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun 	struct rb_node *node = ranges->rb_node;
121*4882a593Smuzhiyun 	struct bow_range *br;
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	while (node) {
124*4882a593Smuzhiyun 		br = container_of(node, struct bow_range, node);
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 		if (br->sector <= bi_iter->bi_sector
127*4882a593Smuzhiyun 		    && bi_iter->bi_sector < range_top(br))
128*4882a593Smuzhiyun 			break;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 		if (bi_iter->bi_sector < br->sector)
131*4882a593Smuzhiyun 			node = node->rb_left;
132*4882a593Smuzhiyun 		else
133*4882a593Smuzhiyun 			node = node->rb_right;
134*4882a593Smuzhiyun 	}
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	WARN_ON(!node);
137*4882a593Smuzhiyun 	if (!node)
138*4882a593Smuzhiyun 		return NULL;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	if (range_top(br) - bi_iter->bi_sector
141*4882a593Smuzhiyun 	    < bi_iter->bi_size >> SECTOR_SHIFT)
142*4882a593Smuzhiyun 		bi_iter->bi_size = (range_top(br) - bi_iter->bi_sector)
143*4882a593Smuzhiyun 			<< SECTOR_SHIFT;
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	return br;
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun 
add_before(struct rb_root * ranges,struct bow_range * new_br,struct bow_range * existing)148*4882a593Smuzhiyun void add_before(struct rb_root *ranges, struct bow_range *new_br,
149*4882a593Smuzhiyun 		struct bow_range *existing)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun 	struct rb_node *parent = &(existing->node);
152*4882a593Smuzhiyun 	struct rb_node **link = &(parent->rb_left);
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	while (*link) {
155*4882a593Smuzhiyun 		parent = *link;
156*4882a593Smuzhiyun 		link = &((*link)->rb_right);
157*4882a593Smuzhiyun 	}
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	rb_link_node(&new_br->node, parent, link);
160*4882a593Smuzhiyun 	rb_insert_color(&new_br->node, ranges);
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun /*
164*4882a593Smuzhiyun  * Given a range br returned by find_first_overlapping_range, split br into a
165*4882a593Smuzhiyun  * leading range, a range matching the bi_iter and a trailing range.
166*4882a593Smuzhiyun  * Leading and trailing may end up size 0 and will then be deleted. The
167*4882a593Smuzhiyun  * new range matching the bi_iter is then returned and should have its type
168*4882a593Smuzhiyun  * and type specific fields populated.
169*4882a593Smuzhiyun  * If bi_iter runs off the end of the range, bi_iter is truncated accordingly
170*4882a593Smuzhiyun  */
split_range(struct bow_context * bc,struct bow_range ** br,struct bvec_iter * bi_iter)171*4882a593Smuzhiyun static int split_range(struct bow_context *bc, struct bow_range **br,
172*4882a593Smuzhiyun 		       struct bvec_iter *bi_iter)
173*4882a593Smuzhiyun {
174*4882a593Smuzhiyun 	struct bow_range *new_br;
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 	if (bi_iter->bi_sector < (*br)->sector) {
177*4882a593Smuzhiyun 		WARN_ON(true);
178*4882a593Smuzhiyun 		return BLK_STS_IOERR;
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	if (bi_iter->bi_sector > (*br)->sector) {
182*4882a593Smuzhiyun 		struct bow_range *leading_br =
183*4882a593Smuzhiyun 			kzalloc(sizeof(*leading_br), GFP_KERNEL);
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 		if (!leading_br)
186*4882a593Smuzhiyun 			return BLK_STS_RESOURCE;
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 		*leading_br = **br;
189*4882a593Smuzhiyun 		if (leading_br->type == TRIMMED)
190*4882a593Smuzhiyun 			list_add(&leading_br->trimmed_list, &bc->trimmed_list);
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 		add_before(&bc->ranges, leading_br, *br);
193*4882a593Smuzhiyun 		(*br)->sector = bi_iter->bi_sector;
194*4882a593Smuzhiyun 	}
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	if (bvec_top(bi_iter) >= range_top(*br)) {
197*4882a593Smuzhiyun 		bi_iter->bi_size = (range_top(*br) - (*br)->sector)
198*4882a593Smuzhiyun 					* SECTOR_SIZE;
199*4882a593Smuzhiyun 		return BLK_STS_OK;
200*4882a593Smuzhiyun 	}
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	/* new_br will be the beginning, existing br will be the tail */
203*4882a593Smuzhiyun 	new_br = kzalloc(sizeof(*new_br), GFP_KERNEL);
204*4882a593Smuzhiyun 	if (!new_br)
205*4882a593Smuzhiyun 		return BLK_STS_RESOURCE;
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	new_br->sector = (*br)->sector;
208*4882a593Smuzhiyun 	(*br)->sector = bvec_top(bi_iter);
209*4882a593Smuzhiyun 	add_before(&bc->ranges, new_br, *br);
210*4882a593Smuzhiyun 	*br = new_br;
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	return BLK_STS_OK;
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun /*
216*4882a593Smuzhiyun  * Sets type of a range. May merge range into surrounding ranges
217*4882a593Smuzhiyun  * Since br may be invalidated, always sets br to NULL to prevent
218*4882a593Smuzhiyun  * usage after this is called
219*4882a593Smuzhiyun  */
set_type(struct bow_context * bc,struct bow_range ** br,int type)220*4882a593Smuzhiyun static void set_type(struct bow_context *bc, struct bow_range **br, int type)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun 	struct bow_range *prev = container_of(rb_prev(&(*br)->node),
223*4882a593Smuzhiyun 						      struct bow_range, node);
224*4882a593Smuzhiyun 	struct bow_range *next = container_of(rb_next(&(*br)->node),
225*4882a593Smuzhiyun 						      struct bow_range, node);
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	if ((*br)->type == TRIMMED) {
228*4882a593Smuzhiyun 		bc->trims_total -= range_size(*br);
229*4882a593Smuzhiyun 		list_del(&(*br)->trimmed_list);
230*4882a593Smuzhiyun 	}
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	if (type == TRIMMED) {
233*4882a593Smuzhiyun 		bc->trims_total += range_size(*br);
234*4882a593Smuzhiyun 		list_add(&(*br)->trimmed_list, &bc->trimmed_list);
235*4882a593Smuzhiyun 	}
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	(*br)->type = type;
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	if (next->type == type) {
240*4882a593Smuzhiyun 		if (type == TRIMMED)
241*4882a593Smuzhiyun 			list_del(&next->trimmed_list);
242*4882a593Smuzhiyun 		rb_erase(&next->node, &bc->ranges);
243*4882a593Smuzhiyun 		kfree(next);
244*4882a593Smuzhiyun 	}
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	if (prev->type == type) {
247*4882a593Smuzhiyun 		if (type == TRIMMED)
248*4882a593Smuzhiyun 			list_del(&(*br)->trimmed_list);
249*4882a593Smuzhiyun 		rb_erase(&(*br)->node, &bc->ranges);
250*4882a593Smuzhiyun 		kfree(*br);
251*4882a593Smuzhiyun 	}
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	*br = NULL;
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun 
find_free_range(struct bow_context * bc)256*4882a593Smuzhiyun static struct bow_range *find_free_range(struct bow_context *bc)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	if (list_empty(&bc->trimmed_list)) {
259*4882a593Smuzhiyun 		DMERR("Unable to find free space to back up to");
260*4882a593Smuzhiyun 		return NULL;
261*4882a593Smuzhiyun 	}
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 	return list_first_entry(&bc->trimmed_list, struct bow_range,
264*4882a593Smuzhiyun 				trimmed_list);
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun 
sector_to_page(struct bow_context const * bc,sector_t sector)267*4882a593Smuzhiyun static sector_t sector_to_page(struct bow_context const *bc, sector_t sector)
268*4882a593Smuzhiyun {
269*4882a593Smuzhiyun 	WARN_ON((sector & (((sector_t)1 << (bc->block_shift - SECTOR_SHIFT)) - 1))
270*4882a593Smuzhiyun 		!= 0);
271*4882a593Smuzhiyun 	return sector >> (bc->block_shift - SECTOR_SHIFT);
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun 
copy_data(struct bow_context const * bc,struct bow_range * source,struct bow_range * dest,u32 * checksum)274*4882a593Smuzhiyun static int copy_data(struct bow_context const *bc,
275*4882a593Smuzhiyun 		     struct bow_range *source, struct bow_range *dest,
276*4882a593Smuzhiyun 		     u32 *checksum)
277*4882a593Smuzhiyun {
278*4882a593Smuzhiyun 	int i;
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 	if (range_size(source) != range_size(dest)) {
281*4882a593Smuzhiyun 		WARN_ON(1);
282*4882a593Smuzhiyun 		return BLK_STS_IOERR;
283*4882a593Smuzhiyun 	}
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 	if (checksum)
286*4882a593Smuzhiyun 		*checksum = sector_to_page(bc, source->sector);
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	for (i = 0; i < range_size(source) >> bc->block_shift; ++i) {
289*4882a593Smuzhiyun 		struct dm_buffer *read_buffer, *write_buffer;
290*4882a593Smuzhiyun 		u8 *read, *write;
291*4882a593Smuzhiyun 		sector_t page = sector_to_page(bc, source->sector) + i;
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 		read = dm_bufio_read(bc->bufio, page, &read_buffer);
294*4882a593Smuzhiyun 		if (IS_ERR(read)) {
295*4882a593Smuzhiyun 			DMERR("Cannot read page %llu",
296*4882a593Smuzhiyun 			      (unsigned long long)page);
297*4882a593Smuzhiyun 			return PTR_ERR(read);
298*4882a593Smuzhiyun 		}
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 		if (checksum)
301*4882a593Smuzhiyun 			*checksum = crc32(*checksum, read, bc->block_size);
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 		write = dm_bufio_new(bc->bufio,
304*4882a593Smuzhiyun 				     sector_to_page(bc, dest->sector) + i,
305*4882a593Smuzhiyun 				     &write_buffer);
306*4882a593Smuzhiyun 		if (IS_ERR(write)) {
307*4882a593Smuzhiyun 			DMERR("Cannot write sector");
308*4882a593Smuzhiyun 			dm_bufio_release(read_buffer);
309*4882a593Smuzhiyun 			return PTR_ERR(write);
310*4882a593Smuzhiyun 		}
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 		memcpy(write, read, bc->block_size);
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 		dm_bufio_mark_buffer_dirty(write_buffer);
315*4882a593Smuzhiyun 		dm_bufio_release(write_buffer);
316*4882a593Smuzhiyun 		dm_bufio_release(read_buffer);
317*4882a593Smuzhiyun 	}
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	dm_bufio_write_dirty_buffers(bc->bufio);
320*4882a593Smuzhiyun 	return BLK_STS_OK;
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun /****** logging functions ******/
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun static int add_log_entry(struct bow_context *bc, sector_t source, sector_t dest,
326*4882a593Smuzhiyun 			 unsigned int size, u32 checksum);
327*4882a593Smuzhiyun 
backup_log_sector(struct bow_context * bc)328*4882a593Smuzhiyun static int backup_log_sector(struct bow_context *bc)
329*4882a593Smuzhiyun {
330*4882a593Smuzhiyun 	struct bow_range *first_br, *free_br;
331*4882a593Smuzhiyun 	struct bvec_iter bi_iter;
332*4882a593Smuzhiyun 	u32 checksum = 0;
333*4882a593Smuzhiyun 	int ret;
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	first_br = container_of(rb_first(&bc->ranges), struct bow_range, node);
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 	if (first_br->type != SECTOR0) {
338*4882a593Smuzhiyun 		WARN_ON(1);
339*4882a593Smuzhiyun 		return BLK_STS_IOERR;
340*4882a593Smuzhiyun 	}
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	if (range_size(first_br) != bc->block_size) {
343*4882a593Smuzhiyun 		WARN_ON(1);
344*4882a593Smuzhiyun 		return BLK_STS_IOERR;
345*4882a593Smuzhiyun 	}
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	free_br = find_free_range(bc);
348*4882a593Smuzhiyun 	/* No space left - return this error to userspace */
349*4882a593Smuzhiyun 	if (!free_br)
350*4882a593Smuzhiyun 		return BLK_STS_NOSPC;
351*4882a593Smuzhiyun 	bi_iter.bi_sector = free_br->sector;
352*4882a593Smuzhiyun 	bi_iter.bi_size = bc->block_size;
353*4882a593Smuzhiyun 	ret = split_range(bc, &free_br, &bi_iter);
354*4882a593Smuzhiyun 	if (ret)
355*4882a593Smuzhiyun 		return ret;
356*4882a593Smuzhiyun 	if (bi_iter.bi_size != bc->block_size) {
357*4882a593Smuzhiyun 		WARN_ON(1);
358*4882a593Smuzhiyun 		return BLK_STS_IOERR;
359*4882a593Smuzhiyun 	}
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 	ret = copy_data(bc, first_br, free_br, &checksum);
362*4882a593Smuzhiyun 	if (ret)
363*4882a593Smuzhiyun 		return ret;
364*4882a593Smuzhiyun 
365*4882a593Smuzhiyun 	bc->log_sector->count = 0;
366*4882a593Smuzhiyun 	bc->log_sector->sequence++;
367*4882a593Smuzhiyun 	ret = add_log_entry(bc, first_br->sector, free_br->sector,
368*4882a593Smuzhiyun 			    range_size(first_br), checksum);
369*4882a593Smuzhiyun 	if (ret)
370*4882a593Smuzhiyun 		return ret;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	set_type(bc, &free_br, BACKUP);
373*4882a593Smuzhiyun 	return BLK_STS_OK;
374*4882a593Smuzhiyun }
375*4882a593Smuzhiyun 
add_log_entry(struct bow_context * bc,sector_t source,sector_t dest,unsigned int size,u32 checksum)376*4882a593Smuzhiyun static int add_log_entry(struct bow_context *bc, sector_t source, sector_t dest,
377*4882a593Smuzhiyun 			 unsigned int size, u32 checksum)
378*4882a593Smuzhiyun {
379*4882a593Smuzhiyun 	struct dm_buffer *sector_buffer;
380*4882a593Smuzhiyun 	u8 *sector;
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 	if (sizeof(struct log_sector)
383*4882a593Smuzhiyun 	    + sizeof(struct log_entry) * (bc->log_sector->count + 1)
384*4882a593Smuzhiyun 		> bc->block_size) {
385*4882a593Smuzhiyun 		int ret = backup_log_sector(bc);
386*4882a593Smuzhiyun 
387*4882a593Smuzhiyun 		if (ret)
388*4882a593Smuzhiyun 			return ret;
389*4882a593Smuzhiyun 	}
390*4882a593Smuzhiyun 
391*4882a593Smuzhiyun 	sector = dm_bufio_new(bc->bufio, 0, &sector_buffer);
392*4882a593Smuzhiyun 	if (IS_ERR(sector)) {
393*4882a593Smuzhiyun 		DMERR("Cannot write boot sector");
394*4882a593Smuzhiyun 		dm_bufio_release(sector_buffer);
395*4882a593Smuzhiyun 		return BLK_STS_NOSPC;
396*4882a593Smuzhiyun 	}
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	bc->log_sector->entries[bc->log_sector->count].source = source;
399*4882a593Smuzhiyun 	bc->log_sector->entries[bc->log_sector->count].dest = dest;
400*4882a593Smuzhiyun 	bc->log_sector->entries[bc->log_sector->count].size = size;
401*4882a593Smuzhiyun 	bc->log_sector->entries[bc->log_sector->count].checksum = checksum;
402*4882a593Smuzhiyun 	bc->log_sector->count++;
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 	memcpy(sector, bc->log_sector, bc->block_size);
405*4882a593Smuzhiyun 	dm_bufio_mark_buffer_dirty(sector_buffer);
406*4882a593Smuzhiyun 	dm_bufio_release(sector_buffer);
407*4882a593Smuzhiyun 	dm_bufio_write_dirty_buffers(bc->bufio);
408*4882a593Smuzhiyun 	return BLK_STS_OK;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun 
prepare_log(struct bow_context * bc)411*4882a593Smuzhiyun static int prepare_log(struct bow_context *bc)
412*4882a593Smuzhiyun {
413*4882a593Smuzhiyun 	struct bow_range *free_br, *first_br;
414*4882a593Smuzhiyun 	struct bvec_iter bi_iter;
415*4882a593Smuzhiyun 	u32 checksum = 0;
416*4882a593Smuzhiyun 	int ret;
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	/* Carve out first sector as log sector */
419*4882a593Smuzhiyun 	first_br = container_of(rb_first(&bc->ranges), struct bow_range, node);
420*4882a593Smuzhiyun 	if (first_br->type != UNCHANGED) {
421*4882a593Smuzhiyun 		WARN_ON(1);
422*4882a593Smuzhiyun 		return BLK_STS_IOERR;
423*4882a593Smuzhiyun 	}
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 	if (range_size(first_br) < bc->block_size) {
426*4882a593Smuzhiyun 		WARN_ON(1);
427*4882a593Smuzhiyun 		return BLK_STS_IOERR;
428*4882a593Smuzhiyun 	}
429*4882a593Smuzhiyun 	bi_iter.bi_sector = 0;
430*4882a593Smuzhiyun 	bi_iter.bi_size = bc->block_size;
431*4882a593Smuzhiyun 	ret = split_range(bc, &first_br, &bi_iter);
432*4882a593Smuzhiyun 	if (ret)
433*4882a593Smuzhiyun 		return ret;
434*4882a593Smuzhiyun 	first_br->type = SECTOR0;
435*4882a593Smuzhiyun 	if (range_size(first_br) != bc->block_size) {
436*4882a593Smuzhiyun 		WARN_ON(1);
437*4882a593Smuzhiyun 		return BLK_STS_IOERR;
438*4882a593Smuzhiyun 	}
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	/* Find free sector for active sector0 reads/writes */
441*4882a593Smuzhiyun 	free_br = find_free_range(bc);
442*4882a593Smuzhiyun 	if (!free_br)
443*4882a593Smuzhiyun 		return BLK_STS_NOSPC;
444*4882a593Smuzhiyun 	bi_iter.bi_sector = free_br->sector;
445*4882a593Smuzhiyun 	bi_iter.bi_size = bc->block_size;
446*4882a593Smuzhiyun 	ret = split_range(bc, &free_br, &bi_iter);
447*4882a593Smuzhiyun 	if (ret)
448*4882a593Smuzhiyun 		return ret;
449*4882a593Smuzhiyun 	free_br->type = SECTOR0_CURRENT;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	/* Copy data */
452*4882a593Smuzhiyun 	ret = copy_data(bc, first_br, free_br, NULL);
453*4882a593Smuzhiyun 	if (ret)
454*4882a593Smuzhiyun 		return ret;
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun 	bc->log_sector->sector0 = free_br->sector;
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 	/* Find free sector to back up original sector zero */
459*4882a593Smuzhiyun 	free_br = find_free_range(bc);
460*4882a593Smuzhiyun 	if (!free_br)
461*4882a593Smuzhiyun 		return BLK_STS_NOSPC;
462*4882a593Smuzhiyun 	bi_iter.bi_sector = free_br->sector;
463*4882a593Smuzhiyun 	bi_iter.bi_size = bc->block_size;
464*4882a593Smuzhiyun 	ret = split_range(bc, &free_br, &bi_iter);
465*4882a593Smuzhiyun 	if (ret)
466*4882a593Smuzhiyun 		return ret;
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	/* Back up */
469*4882a593Smuzhiyun 	ret = copy_data(bc, first_br, free_br, &checksum);
470*4882a593Smuzhiyun 	if (ret)
471*4882a593Smuzhiyun 		return ret;
472*4882a593Smuzhiyun 
473*4882a593Smuzhiyun 	/*
474*4882a593Smuzhiyun 	 * Set up our replacement boot sector - it will get written when we
475*4882a593Smuzhiyun 	 * add the first log entry, which we do immediately
476*4882a593Smuzhiyun 	 */
477*4882a593Smuzhiyun 	bc->log_sector->magic = MAGIC;
478*4882a593Smuzhiyun 	bc->log_sector->header_version = HEADER_VERSION;
479*4882a593Smuzhiyun 	bc->log_sector->header_size = sizeof(*bc->log_sector);
480*4882a593Smuzhiyun 	bc->log_sector->block_size = bc->block_size;
481*4882a593Smuzhiyun 	bc->log_sector->count = 0;
482*4882a593Smuzhiyun 	bc->log_sector->sequence = 0;
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	/* Add log entry */
485*4882a593Smuzhiyun 	ret = add_log_entry(bc, first_br->sector, free_br->sector,
486*4882a593Smuzhiyun 			    range_size(first_br), checksum);
487*4882a593Smuzhiyun 	if (ret)
488*4882a593Smuzhiyun 		return ret;
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	set_type(bc, &free_br, BACKUP);
491*4882a593Smuzhiyun 	return BLK_STS_OK;
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun 
find_sector0_current(struct bow_context * bc)494*4882a593Smuzhiyun static struct bow_range *find_sector0_current(struct bow_context *bc)
495*4882a593Smuzhiyun {
496*4882a593Smuzhiyun 	struct bvec_iter bi_iter;
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 	bi_iter.bi_sector = bc->log_sector->sector0;
499*4882a593Smuzhiyun 	bi_iter.bi_size = bc->block_size;
500*4882a593Smuzhiyun 	return find_first_overlapping_range(&bc->ranges, &bi_iter);
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun /****** sysfs interface functions ******/
504*4882a593Smuzhiyun 
state_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)505*4882a593Smuzhiyun static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
506*4882a593Smuzhiyun 			  char *buf)
507*4882a593Smuzhiyun {
508*4882a593Smuzhiyun 	struct bow_context *bc = container_of(kobj, struct bow_context,
509*4882a593Smuzhiyun 					      kobj_holder.kobj);
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 	return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&bc->state));
512*4882a593Smuzhiyun }
513*4882a593Smuzhiyun 
state_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)514*4882a593Smuzhiyun static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
515*4882a593Smuzhiyun 			   const char *buf, size_t count)
516*4882a593Smuzhiyun {
517*4882a593Smuzhiyun 	struct bow_context *bc = container_of(kobj, struct bow_context,
518*4882a593Smuzhiyun 					      kobj_holder.kobj);
519*4882a593Smuzhiyun 	enum state state, original_state;
520*4882a593Smuzhiyun 	int ret;
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 	state = buf[0] - '0';
523*4882a593Smuzhiyun 	if (state < TRIM || state > COMMITTED) {
524*4882a593Smuzhiyun 		DMERR("State value %d out of range", state);
525*4882a593Smuzhiyun 		return -EINVAL;
526*4882a593Smuzhiyun 	}
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	mutex_lock(&bc->ranges_lock);
529*4882a593Smuzhiyun 	original_state = atomic_read(&bc->state);
530*4882a593Smuzhiyun 	if (state != original_state + 1) {
531*4882a593Smuzhiyun 		DMERR("Invalid state change from %d to %d",
532*4882a593Smuzhiyun 		      original_state, state);
533*4882a593Smuzhiyun 		ret = -EINVAL;
534*4882a593Smuzhiyun 		goto bad;
535*4882a593Smuzhiyun 	}
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	DMINFO("Switching to state %s", state == CHECKPOINT ? "Checkpoint"
538*4882a593Smuzhiyun 	       : state == COMMITTED ? "Committed" : "Unknown");
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	if (state == CHECKPOINT) {
541*4882a593Smuzhiyun 		ret = prepare_log(bc);
542*4882a593Smuzhiyun 		if (ret) {
543*4882a593Smuzhiyun 			DMERR("Failed to switch to checkpoint state");
544*4882a593Smuzhiyun 			goto bad;
545*4882a593Smuzhiyun 		}
546*4882a593Smuzhiyun 	} else if (state == COMMITTED) {
547*4882a593Smuzhiyun 		struct bow_range *br = find_sector0_current(bc);
548*4882a593Smuzhiyun 		struct bow_range *sector0_br =
549*4882a593Smuzhiyun 			container_of(rb_first(&bc->ranges), struct bow_range,
550*4882a593Smuzhiyun 				     node);
551*4882a593Smuzhiyun 
552*4882a593Smuzhiyun 		ret = copy_data(bc, br, sector0_br, 0);
553*4882a593Smuzhiyun 		if (ret) {
554*4882a593Smuzhiyun 			DMERR("Failed to switch to committed state");
555*4882a593Smuzhiyun 			goto bad;
556*4882a593Smuzhiyun 		}
557*4882a593Smuzhiyun 	}
558*4882a593Smuzhiyun 	atomic_inc(&bc->state);
559*4882a593Smuzhiyun 	ret = count;
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun bad:
562*4882a593Smuzhiyun 	mutex_unlock(&bc->ranges_lock);
563*4882a593Smuzhiyun 	return ret;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun 
free_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)566*4882a593Smuzhiyun static ssize_t free_show(struct kobject *kobj, struct kobj_attribute *attr,
567*4882a593Smuzhiyun 			  char *buf)
568*4882a593Smuzhiyun {
569*4882a593Smuzhiyun 	struct bow_context *bc = container_of(kobj, struct bow_context,
570*4882a593Smuzhiyun 					      kobj_holder.kobj);
571*4882a593Smuzhiyun 	u64 trims_total;
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 	mutex_lock(&bc->ranges_lock);
574*4882a593Smuzhiyun 	trims_total = bc->trims_total;
575*4882a593Smuzhiyun 	mutex_unlock(&bc->ranges_lock);
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 	return scnprintf(buf, PAGE_SIZE, "%llu\n", trims_total);
578*4882a593Smuzhiyun }
579*4882a593Smuzhiyun 
580*4882a593Smuzhiyun static struct kobj_attribute attr_state = __ATTR_RW(state);
581*4882a593Smuzhiyun static struct kobj_attribute attr_free = __ATTR_RO(free);
582*4882a593Smuzhiyun 
583*4882a593Smuzhiyun static struct attribute *bow_attrs[] = {
584*4882a593Smuzhiyun 	&attr_state.attr,
585*4882a593Smuzhiyun 	&attr_free.attr,
586*4882a593Smuzhiyun 	NULL
587*4882a593Smuzhiyun };
588*4882a593Smuzhiyun 
589*4882a593Smuzhiyun static struct kobj_type bow_ktype = {
590*4882a593Smuzhiyun 	.sysfs_ops = &kobj_sysfs_ops,
591*4882a593Smuzhiyun 	.default_attrs = bow_attrs,
592*4882a593Smuzhiyun 	.release = dm_kobject_release
593*4882a593Smuzhiyun };
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun /****** constructor/destructor ******/
596*4882a593Smuzhiyun 
dm_bow_dtr(struct dm_target * ti)597*4882a593Smuzhiyun static void dm_bow_dtr(struct dm_target *ti)
598*4882a593Smuzhiyun {
599*4882a593Smuzhiyun 	struct bow_context *bc = (struct bow_context *) ti->private;
600*4882a593Smuzhiyun 	struct kobject *kobj;
601*4882a593Smuzhiyun 
602*4882a593Smuzhiyun 	mutex_lock(&bc->ranges_lock);
603*4882a593Smuzhiyun 	while (rb_first(&bc->ranges)) {
604*4882a593Smuzhiyun 		struct bow_range *br = container_of(rb_first(&bc->ranges),
605*4882a593Smuzhiyun 						    struct bow_range, node);
606*4882a593Smuzhiyun 
607*4882a593Smuzhiyun 		rb_erase(&br->node, &bc->ranges);
608*4882a593Smuzhiyun 		kfree(br);
609*4882a593Smuzhiyun 	}
610*4882a593Smuzhiyun 	mutex_unlock(&bc->ranges_lock);
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 	if (bc->workqueue)
613*4882a593Smuzhiyun 		destroy_workqueue(bc->workqueue);
614*4882a593Smuzhiyun 	if (bc->bufio)
615*4882a593Smuzhiyun 		dm_bufio_client_destroy(bc->bufio);
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun 	kobj = &bc->kobj_holder.kobj;
618*4882a593Smuzhiyun 	if (kobj->state_initialized) {
619*4882a593Smuzhiyun 		kobject_put(kobj);
620*4882a593Smuzhiyun 		wait_for_completion(dm_get_completion_from_kobject(kobj));
621*4882a593Smuzhiyun 	}
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	kfree(bc->log_sector);
624*4882a593Smuzhiyun 	kfree(bc);
625*4882a593Smuzhiyun }
626*4882a593Smuzhiyun 
dm_bow_io_hints(struct dm_target * ti,struct queue_limits * limits)627*4882a593Smuzhiyun static void dm_bow_io_hints(struct dm_target *ti, struct queue_limits *limits)
628*4882a593Smuzhiyun {
629*4882a593Smuzhiyun 	struct bow_context *bc = ti->private;
630*4882a593Smuzhiyun 	const unsigned int block_size = bc->block_size;
631*4882a593Smuzhiyun 
632*4882a593Smuzhiyun 	limits->logical_block_size =
633*4882a593Smuzhiyun 		max_t(unsigned int, limits->logical_block_size, block_size);
634*4882a593Smuzhiyun 	limits->physical_block_size =
635*4882a593Smuzhiyun 		max_t(unsigned int, limits->physical_block_size, block_size);
636*4882a593Smuzhiyun 	limits->io_min = max_t(unsigned int, limits->io_min, block_size);
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun 	if (limits->max_discard_sectors == 0) {
639*4882a593Smuzhiyun 		limits->discard_granularity = 1 << 12;
640*4882a593Smuzhiyun 		limits->max_hw_discard_sectors = 1 << 15;
641*4882a593Smuzhiyun 		limits->max_discard_sectors = 1 << 15;
642*4882a593Smuzhiyun 		bc->forward_trims = false;
643*4882a593Smuzhiyun 	} else {
644*4882a593Smuzhiyun 		limits->discard_granularity = 1 << 12;
645*4882a593Smuzhiyun 		bc->forward_trims = true;
646*4882a593Smuzhiyun 	}
647*4882a593Smuzhiyun }
648*4882a593Smuzhiyun 
dm_bow_ctr_optional(struct dm_target * ti,unsigned int argc,char ** argv)649*4882a593Smuzhiyun static int dm_bow_ctr_optional(struct dm_target *ti, unsigned int argc, char **argv)
650*4882a593Smuzhiyun {
651*4882a593Smuzhiyun 	struct bow_context *bc = ti->private;
652*4882a593Smuzhiyun 	struct dm_arg_set as;
653*4882a593Smuzhiyun 	static const struct dm_arg _args[] = {
654*4882a593Smuzhiyun 		{0, 1, "Invalid number of feature args"},
655*4882a593Smuzhiyun 	};
656*4882a593Smuzhiyun 	unsigned int opt_params;
657*4882a593Smuzhiyun 	const char *opt_string;
658*4882a593Smuzhiyun 	int err;
659*4882a593Smuzhiyun 	char dummy;
660*4882a593Smuzhiyun 
661*4882a593Smuzhiyun 	as.argc = argc;
662*4882a593Smuzhiyun 	as.argv = argv;
663*4882a593Smuzhiyun 
664*4882a593Smuzhiyun 	err = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
665*4882a593Smuzhiyun 	if (err)
666*4882a593Smuzhiyun 		return err;
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	while (opt_params--) {
669*4882a593Smuzhiyun 		opt_string = dm_shift_arg(&as);
670*4882a593Smuzhiyun 		if (!opt_string) {
671*4882a593Smuzhiyun 			ti->error = "Not enough feature arguments";
672*4882a593Smuzhiyun 			return -EINVAL;
673*4882a593Smuzhiyun 		}
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 		if (sscanf(opt_string, "block_size:%u%c",
676*4882a593Smuzhiyun 					&bc->block_size, &dummy) == 1) {
677*4882a593Smuzhiyun 			if (bc->block_size < SECTOR_SIZE ||
678*4882a593Smuzhiyun 			    bc->block_size > 4096 ||
679*4882a593Smuzhiyun 			    !is_power_of_2(bc->block_size)) {
680*4882a593Smuzhiyun 				ti->error = "Invalid block_size";
681*4882a593Smuzhiyun 				return -EINVAL;
682*4882a593Smuzhiyun 			}
683*4882a593Smuzhiyun 		} else {
684*4882a593Smuzhiyun 			ti->error = "Invalid feature arguments";
685*4882a593Smuzhiyun 			return -EINVAL;
686*4882a593Smuzhiyun 		}
687*4882a593Smuzhiyun 	}
688*4882a593Smuzhiyun 
689*4882a593Smuzhiyun 	return 0;
690*4882a593Smuzhiyun }
691*4882a593Smuzhiyun 
dm_bow_ctr(struct dm_target * ti,unsigned int argc,char ** argv)692*4882a593Smuzhiyun static int dm_bow_ctr(struct dm_target *ti, unsigned int argc, char **argv)
693*4882a593Smuzhiyun {
694*4882a593Smuzhiyun 	struct bow_context *bc;
695*4882a593Smuzhiyun 	struct bow_range *br;
696*4882a593Smuzhiyun 	int ret;
697*4882a593Smuzhiyun 	struct mapped_device *md = dm_table_get_md(ti->table);
698*4882a593Smuzhiyun 
699*4882a593Smuzhiyun 	if (argc < 1) {
700*4882a593Smuzhiyun 		ti->error = "Invalid argument count";
701*4882a593Smuzhiyun 		return -EINVAL;
702*4882a593Smuzhiyun 	}
703*4882a593Smuzhiyun 
704*4882a593Smuzhiyun 	bc = kzalloc(sizeof(*bc), GFP_KERNEL);
705*4882a593Smuzhiyun 	if (!bc) {
706*4882a593Smuzhiyun 		ti->error = "Cannot allocate bow context";
707*4882a593Smuzhiyun 		return -ENOMEM;
708*4882a593Smuzhiyun 	}
709*4882a593Smuzhiyun 
710*4882a593Smuzhiyun 	ti->num_flush_bios = 1;
711*4882a593Smuzhiyun 	ti->num_discard_bios = 1;
712*4882a593Smuzhiyun 	ti->num_write_same_bios = 1;
713*4882a593Smuzhiyun 	ti->private = bc;
714*4882a593Smuzhiyun 
715*4882a593Smuzhiyun 	ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
716*4882a593Smuzhiyun 			    &bc->dev);
717*4882a593Smuzhiyun 	if (ret) {
718*4882a593Smuzhiyun 		ti->error = "Device lookup failed";
719*4882a593Smuzhiyun 		goto bad;
720*4882a593Smuzhiyun 	}
721*4882a593Smuzhiyun 
722*4882a593Smuzhiyun 	bc->block_size =
723*4882a593Smuzhiyun 		bdev_get_queue(bc->dev->bdev)->limits.logical_block_size;
724*4882a593Smuzhiyun 	if (argc > 1) {
725*4882a593Smuzhiyun 		ret = dm_bow_ctr_optional(ti, argc - 1, &argv[1]);
726*4882a593Smuzhiyun 		if (ret)
727*4882a593Smuzhiyun 			goto bad;
728*4882a593Smuzhiyun 	}
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 	bc->block_shift = ilog2(bc->block_size);
731*4882a593Smuzhiyun 	bc->log_sector = kzalloc(bc->block_size, GFP_KERNEL);
732*4882a593Smuzhiyun 	if (!bc->log_sector) {
733*4882a593Smuzhiyun 		ti->error = "Cannot allocate log sector";
734*4882a593Smuzhiyun 		goto bad;
735*4882a593Smuzhiyun 	}
736*4882a593Smuzhiyun 
737*4882a593Smuzhiyun 	init_completion(&bc->kobj_holder.completion);
738*4882a593Smuzhiyun 	ret = kobject_init_and_add(&bc->kobj_holder.kobj, &bow_ktype,
739*4882a593Smuzhiyun 				   &disk_to_dev(dm_disk(md))->kobj, "%s",
740*4882a593Smuzhiyun 				   "bow");
741*4882a593Smuzhiyun 	if (ret) {
742*4882a593Smuzhiyun 		ti->error = "Cannot create sysfs node";
743*4882a593Smuzhiyun 		goto bad;
744*4882a593Smuzhiyun 	}
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 	mutex_init(&bc->ranges_lock);
747*4882a593Smuzhiyun 	bc->ranges = RB_ROOT;
748*4882a593Smuzhiyun 	bc->bufio = dm_bufio_client_create(bc->dev->bdev, bc->block_size, 1, 0,
749*4882a593Smuzhiyun 					   NULL, NULL);
750*4882a593Smuzhiyun 	if (IS_ERR(bc->bufio)) {
751*4882a593Smuzhiyun 		ti->error = "Cannot initialize dm-bufio";
752*4882a593Smuzhiyun 		ret = PTR_ERR(bc->bufio);
753*4882a593Smuzhiyun 		bc->bufio = NULL;
754*4882a593Smuzhiyun 		goto bad;
755*4882a593Smuzhiyun 	}
756*4882a593Smuzhiyun 
757*4882a593Smuzhiyun 	bc->workqueue = alloc_workqueue("dm-bow",
758*4882a593Smuzhiyun 					WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM
759*4882a593Smuzhiyun 					| WQ_UNBOUND, num_online_cpus());
760*4882a593Smuzhiyun 	if (!bc->workqueue) {
761*4882a593Smuzhiyun 		ti->error = "Cannot allocate workqueue";
762*4882a593Smuzhiyun 		ret = -ENOMEM;
763*4882a593Smuzhiyun 		goto bad;
764*4882a593Smuzhiyun 	}
765*4882a593Smuzhiyun 
766*4882a593Smuzhiyun 	INIT_LIST_HEAD(&bc->trimmed_list);
767*4882a593Smuzhiyun 
768*4882a593Smuzhiyun 	br = kzalloc(sizeof(*br), GFP_KERNEL);
769*4882a593Smuzhiyun 	if (!br) {
770*4882a593Smuzhiyun 		ti->error = "Cannot allocate ranges";
771*4882a593Smuzhiyun 		ret = -ENOMEM;
772*4882a593Smuzhiyun 		goto bad;
773*4882a593Smuzhiyun 	}
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 	br->sector = ti->len;
776*4882a593Smuzhiyun 	br->type = TOP;
777*4882a593Smuzhiyun 	rb_link_node(&br->node, NULL, &bc->ranges.rb_node);
778*4882a593Smuzhiyun 	rb_insert_color(&br->node, &bc->ranges);
779*4882a593Smuzhiyun 
780*4882a593Smuzhiyun 	br = kzalloc(sizeof(*br), GFP_KERNEL);
781*4882a593Smuzhiyun 	if (!br) {
782*4882a593Smuzhiyun 		ti->error = "Cannot allocate ranges";
783*4882a593Smuzhiyun 		ret = -ENOMEM;
784*4882a593Smuzhiyun 		goto bad;
785*4882a593Smuzhiyun 	}
786*4882a593Smuzhiyun 
787*4882a593Smuzhiyun 	br->sector = 0;
788*4882a593Smuzhiyun 	br->type = UNCHANGED;
789*4882a593Smuzhiyun 	rb_link_node(&br->node, bc->ranges.rb_node,
790*4882a593Smuzhiyun 		     &bc->ranges.rb_node->rb_left);
791*4882a593Smuzhiyun 	rb_insert_color(&br->node, &bc->ranges);
792*4882a593Smuzhiyun 
793*4882a593Smuzhiyun 	ti->discards_supported = true;
794*4882a593Smuzhiyun 
795*4882a593Smuzhiyun 	return 0;
796*4882a593Smuzhiyun 
797*4882a593Smuzhiyun bad:
798*4882a593Smuzhiyun 	dm_bow_dtr(ti);
799*4882a593Smuzhiyun 	return ret;
800*4882a593Smuzhiyun }
801*4882a593Smuzhiyun 
802*4882a593Smuzhiyun /****** Handle writes ******/
803*4882a593Smuzhiyun 
prepare_unchanged_range(struct bow_context * bc,struct bow_range * br,struct bvec_iter * bi_iter,bool record_checksum)804*4882a593Smuzhiyun static int prepare_unchanged_range(struct bow_context *bc, struct bow_range *br,
805*4882a593Smuzhiyun 				   struct bvec_iter *bi_iter,
806*4882a593Smuzhiyun 				   bool record_checksum)
807*4882a593Smuzhiyun {
808*4882a593Smuzhiyun 	struct bow_range *backup_br;
809*4882a593Smuzhiyun 	struct bvec_iter backup_bi;
810*4882a593Smuzhiyun 	sector_t log_source, log_dest;
811*4882a593Smuzhiyun 	unsigned int log_size;
812*4882a593Smuzhiyun 	u32 checksum = 0;
813*4882a593Smuzhiyun 	int ret;
814*4882a593Smuzhiyun 	int original_type;
815*4882a593Smuzhiyun 	sector_t sector0;
816*4882a593Smuzhiyun 
817*4882a593Smuzhiyun 	/* Find a free range */
818*4882a593Smuzhiyun 	backup_br = find_free_range(bc);
819*4882a593Smuzhiyun 	if (!backup_br)
820*4882a593Smuzhiyun 		return BLK_STS_NOSPC;
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 	/* Carve out a backup range. This may be smaller than the br given */
823*4882a593Smuzhiyun 	backup_bi.bi_sector = backup_br->sector;
824*4882a593Smuzhiyun 	backup_bi.bi_size = min(range_size(backup_br), (u64) bi_iter->bi_size);
825*4882a593Smuzhiyun 	ret = split_range(bc, &backup_br, &backup_bi);
826*4882a593Smuzhiyun 	if (ret)
827*4882a593Smuzhiyun 		return ret;
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	/*
830*4882a593Smuzhiyun 	 * Carve out a changed range. This will not be smaller than the backup
831*4882a593Smuzhiyun 	 * br since the backup br is smaller than the source range and iterator
832*4882a593Smuzhiyun 	 */
833*4882a593Smuzhiyun 	bi_iter->bi_size = backup_bi.bi_size;
834*4882a593Smuzhiyun 	ret = split_range(bc, &br, bi_iter);
835*4882a593Smuzhiyun 	if (ret)
836*4882a593Smuzhiyun 		return ret;
837*4882a593Smuzhiyun 	if (range_size(br) != range_size(backup_br)) {
838*4882a593Smuzhiyun 		WARN_ON(1);
839*4882a593Smuzhiyun 		return BLK_STS_IOERR;
840*4882a593Smuzhiyun 	}
841*4882a593Smuzhiyun 
842*4882a593Smuzhiyun 
843*4882a593Smuzhiyun 	/* Copy data over */
844*4882a593Smuzhiyun 	ret = copy_data(bc, br, backup_br, record_checksum ? &checksum : NULL);
845*4882a593Smuzhiyun 	if (ret)
846*4882a593Smuzhiyun 		return ret;
847*4882a593Smuzhiyun 
848*4882a593Smuzhiyun 	/* Add an entry to the log */
849*4882a593Smuzhiyun 	log_source = br->sector;
850*4882a593Smuzhiyun 	log_dest = backup_br->sector;
851*4882a593Smuzhiyun 	log_size = range_size(br);
852*4882a593Smuzhiyun 
853*4882a593Smuzhiyun 	/*
854*4882a593Smuzhiyun 	 * Set the types. Note that since set_type also amalgamates ranges
855*4882a593Smuzhiyun 	 * we have to set both sectors to their final type before calling
856*4882a593Smuzhiyun 	 * set_type on either
857*4882a593Smuzhiyun 	 */
858*4882a593Smuzhiyun 	original_type = br->type;
859*4882a593Smuzhiyun 	sector0 = backup_br->sector;
860*4882a593Smuzhiyun 	bc->trims_total -= range_size(backup_br);
861*4882a593Smuzhiyun 	if (backup_br->type == TRIMMED)
862*4882a593Smuzhiyun 		list_del(&backup_br->trimmed_list);
863*4882a593Smuzhiyun 	backup_br->type = br->type == SECTOR0_CURRENT ? SECTOR0_CURRENT
864*4882a593Smuzhiyun 						      : BACKUP;
865*4882a593Smuzhiyun 	br->type = CHANGED;
866*4882a593Smuzhiyun 	set_type(bc, &backup_br, backup_br->type);
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 	/*
869*4882a593Smuzhiyun 	 * Add the log entry after marking the backup sector, since adding a log
870*4882a593Smuzhiyun 	 * can cause another backup
871*4882a593Smuzhiyun 	 */
872*4882a593Smuzhiyun 	ret = add_log_entry(bc, log_source, log_dest, log_size, checksum);
873*4882a593Smuzhiyun 	if (ret) {
874*4882a593Smuzhiyun 		br->type = original_type;
875*4882a593Smuzhiyun 		return ret;
876*4882a593Smuzhiyun 	}
877*4882a593Smuzhiyun 
878*4882a593Smuzhiyun 	/* Now it is safe to mark this backup successful */
879*4882a593Smuzhiyun 	if (original_type == SECTOR0_CURRENT)
880*4882a593Smuzhiyun 		bc->log_sector->sector0 = sector0;
881*4882a593Smuzhiyun 
882*4882a593Smuzhiyun 	set_type(bc, &br, br->type);
883*4882a593Smuzhiyun 	return ret;
884*4882a593Smuzhiyun }
885*4882a593Smuzhiyun 
prepare_free_range(struct bow_context * bc,struct bow_range * br,struct bvec_iter * bi_iter)886*4882a593Smuzhiyun static int prepare_free_range(struct bow_context *bc, struct bow_range *br,
887*4882a593Smuzhiyun 			      struct bvec_iter *bi_iter)
888*4882a593Smuzhiyun {
889*4882a593Smuzhiyun 	int ret;
890*4882a593Smuzhiyun 
891*4882a593Smuzhiyun 	ret = split_range(bc, &br, bi_iter);
892*4882a593Smuzhiyun 	if (ret)
893*4882a593Smuzhiyun 		return ret;
894*4882a593Smuzhiyun 	set_type(bc, &br, CHANGED);
895*4882a593Smuzhiyun 	return BLK_STS_OK;
896*4882a593Smuzhiyun }
897*4882a593Smuzhiyun 
prepare_changed_range(struct bow_context * bc,struct bow_range * br,struct bvec_iter * bi_iter)898*4882a593Smuzhiyun static int prepare_changed_range(struct bow_context *bc, struct bow_range *br,
899*4882a593Smuzhiyun 				 struct bvec_iter *bi_iter)
900*4882a593Smuzhiyun {
901*4882a593Smuzhiyun 	/* Nothing to do ... */
902*4882a593Smuzhiyun 	return BLK_STS_OK;
903*4882a593Smuzhiyun }
904*4882a593Smuzhiyun 
prepare_one_range(struct bow_context * bc,struct bvec_iter * bi_iter)905*4882a593Smuzhiyun static int prepare_one_range(struct bow_context *bc,
906*4882a593Smuzhiyun 			     struct bvec_iter *bi_iter)
907*4882a593Smuzhiyun {
908*4882a593Smuzhiyun 	struct bow_range *br = find_first_overlapping_range(&bc->ranges,
909*4882a593Smuzhiyun 							    bi_iter);
910*4882a593Smuzhiyun 	switch (br->type) {
911*4882a593Smuzhiyun 	case CHANGED:
912*4882a593Smuzhiyun 		return prepare_changed_range(bc, br, bi_iter);
913*4882a593Smuzhiyun 
914*4882a593Smuzhiyun 	case TRIMMED:
915*4882a593Smuzhiyun 		return prepare_free_range(bc, br, bi_iter);
916*4882a593Smuzhiyun 
917*4882a593Smuzhiyun 	case UNCHANGED:
918*4882a593Smuzhiyun 	case BACKUP:
919*4882a593Smuzhiyun 		return prepare_unchanged_range(bc, br, bi_iter, true);
920*4882a593Smuzhiyun 
921*4882a593Smuzhiyun 	/*
922*4882a593Smuzhiyun 	 * We cannot track the checksum for the active sector0, since it
923*4882a593Smuzhiyun 	 * may change at any point.
924*4882a593Smuzhiyun 	 */
925*4882a593Smuzhiyun 	case SECTOR0_CURRENT:
926*4882a593Smuzhiyun 		return prepare_unchanged_range(bc, br, bi_iter, false);
927*4882a593Smuzhiyun 
928*4882a593Smuzhiyun 	case SECTOR0:	/* Handled in the dm_bow_map */
929*4882a593Smuzhiyun 	case TOP:	/* Illegal - top is off the end of the device */
930*4882a593Smuzhiyun 	default:
931*4882a593Smuzhiyun 		WARN_ON(1);
932*4882a593Smuzhiyun 		return BLK_STS_IOERR;
933*4882a593Smuzhiyun 	}
934*4882a593Smuzhiyun }
935*4882a593Smuzhiyun 
936*4882a593Smuzhiyun struct write_work {
937*4882a593Smuzhiyun 	struct work_struct work;
938*4882a593Smuzhiyun 	struct bow_context *bc;
939*4882a593Smuzhiyun 	struct bio *bio;
940*4882a593Smuzhiyun };
941*4882a593Smuzhiyun 
bow_write(struct work_struct * work)942*4882a593Smuzhiyun static void bow_write(struct work_struct *work)
943*4882a593Smuzhiyun {
944*4882a593Smuzhiyun 	struct write_work *ww = container_of(work, struct write_work, work);
945*4882a593Smuzhiyun 	struct bow_context *bc = ww->bc;
946*4882a593Smuzhiyun 	struct bio *bio = ww->bio;
947*4882a593Smuzhiyun 	struct bvec_iter bi_iter = bio->bi_iter;
948*4882a593Smuzhiyun 	int ret = BLK_STS_OK;
949*4882a593Smuzhiyun 
950*4882a593Smuzhiyun 	kfree(ww);
951*4882a593Smuzhiyun 
952*4882a593Smuzhiyun 	mutex_lock(&bc->ranges_lock);
953*4882a593Smuzhiyun 	do {
954*4882a593Smuzhiyun 		ret = prepare_one_range(bc, &bi_iter);
955*4882a593Smuzhiyun 		bi_iter.bi_sector += bi_iter.bi_size / SECTOR_SIZE;
956*4882a593Smuzhiyun 		bi_iter.bi_size = bio->bi_iter.bi_size
957*4882a593Smuzhiyun 			- (bi_iter.bi_sector - bio->bi_iter.bi_sector)
958*4882a593Smuzhiyun 			  * SECTOR_SIZE;
959*4882a593Smuzhiyun 	} while (!ret && bi_iter.bi_size);
960*4882a593Smuzhiyun 
961*4882a593Smuzhiyun 	mutex_unlock(&bc->ranges_lock);
962*4882a593Smuzhiyun 
963*4882a593Smuzhiyun 	if (!ret) {
964*4882a593Smuzhiyun 		bio_set_dev(bio, bc->dev->bdev);
965*4882a593Smuzhiyun 		submit_bio(bio);
966*4882a593Smuzhiyun 	} else {
967*4882a593Smuzhiyun 		DMERR("Write failure with error %d", -ret);
968*4882a593Smuzhiyun 		bio->bi_status = ret;
969*4882a593Smuzhiyun 		bio_endio(bio);
970*4882a593Smuzhiyun 	}
971*4882a593Smuzhiyun }
972*4882a593Smuzhiyun 
queue_write(struct bow_context * bc,struct bio * bio)973*4882a593Smuzhiyun static int queue_write(struct bow_context *bc, struct bio *bio)
974*4882a593Smuzhiyun {
975*4882a593Smuzhiyun 	struct write_work *ww = kmalloc(sizeof(*ww), GFP_NOIO | __GFP_NORETRY
976*4882a593Smuzhiyun 					| __GFP_NOMEMALLOC | __GFP_NOWARN);
977*4882a593Smuzhiyun 	if (!ww) {
978*4882a593Smuzhiyun 		DMERR("Failed to allocate write_work");
979*4882a593Smuzhiyun 		return -ENOMEM;
980*4882a593Smuzhiyun 	}
981*4882a593Smuzhiyun 
982*4882a593Smuzhiyun 	INIT_WORK(&ww->work, bow_write);
983*4882a593Smuzhiyun 	ww->bc = bc;
984*4882a593Smuzhiyun 	ww->bio = bio;
985*4882a593Smuzhiyun 	queue_work(bc->workqueue, &ww->work);
986*4882a593Smuzhiyun 	return DM_MAPIO_SUBMITTED;
987*4882a593Smuzhiyun }
988*4882a593Smuzhiyun 
handle_sector0(struct bow_context * bc,struct bio * bio)989*4882a593Smuzhiyun static int handle_sector0(struct bow_context *bc, struct bio *bio)
990*4882a593Smuzhiyun {
991*4882a593Smuzhiyun 	int ret = DM_MAPIO_REMAPPED;
992*4882a593Smuzhiyun 
993*4882a593Smuzhiyun 	if (bio->bi_iter.bi_size > bc->block_size) {
994*4882a593Smuzhiyun 		struct bio * split = bio_split(bio,
995*4882a593Smuzhiyun 					       bc->block_size >> SECTOR_SHIFT,
996*4882a593Smuzhiyun 					       GFP_NOIO,
997*4882a593Smuzhiyun 					       &fs_bio_set);
998*4882a593Smuzhiyun 		if (!split) {
999*4882a593Smuzhiyun 			DMERR("Failed to split bio");
1000*4882a593Smuzhiyun 			bio->bi_status = BLK_STS_RESOURCE;
1001*4882a593Smuzhiyun 			bio_endio(bio);
1002*4882a593Smuzhiyun 			return DM_MAPIO_SUBMITTED;
1003*4882a593Smuzhiyun 		}
1004*4882a593Smuzhiyun 
1005*4882a593Smuzhiyun 		bio_chain(split, bio);
1006*4882a593Smuzhiyun 		split->bi_iter.bi_sector = bc->log_sector->sector0;
1007*4882a593Smuzhiyun 		bio_set_dev(split, bc->dev->bdev);
1008*4882a593Smuzhiyun 		submit_bio(split);
1009*4882a593Smuzhiyun 
1010*4882a593Smuzhiyun 		if (bio_data_dir(bio) == WRITE)
1011*4882a593Smuzhiyun 			ret = queue_write(bc, bio);
1012*4882a593Smuzhiyun 	} else {
1013*4882a593Smuzhiyun 		bio->bi_iter.bi_sector = bc->log_sector->sector0;
1014*4882a593Smuzhiyun 	}
1015*4882a593Smuzhiyun 
1016*4882a593Smuzhiyun 	return ret;
1017*4882a593Smuzhiyun }
1018*4882a593Smuzhiyun 
add_trim(struct bow_context * bc,struct bio * bio)1019*4882a593Smuzhiyun static int add_trim(struct bow_context *bc, struct bio *bio)
1020*4882a593Smuzhiyun {
1021*4882a593Smuzhiyun 	struct bow_range *br;
1022*4882a593Smuzhiyun 	struct bvec_iter bi_iter = bio->bi_iter;
1023*4882a593Smuzhiyun 
1024*4882a593Smuzhiyun 	DMDEBUG("add_trim: %llu, %u",
1025*4882a593Smuzhiyun 		(unsigned long long)bio->bi_iter.bi_sector,
1026*4882a593Smuzhiyun 		bio->bi_iter.bi_size);
1027*4882a593Smuzhiyun 
1028*4882a593Smuzhiyun 	do {
1029*4882a593Smuzhiyun 		br = find_first_overlapping_range(&bc->ranges, &bi_iter);
1030*4882a593Smuzhiyun 
1031*4882a593Smuzhiyun 		switch (br->type) {
1032*4882a593Smuzhiyun 		case UNCHANGED:
1033*4882a593Smuzhiyun 			if (!split_range(bc, &br, &bi_iter))
1034*4882a593Smuzhiyun 				set_type(bc, &br, TRIMMED);
1035*4882a593Smuzhiyun 			break;
1036*4882a593Smuzhiyun 
1037*4882a593Smuzhiyun 		case TRIMMED:
1038*4882a593Smuzhiyun 			/* Nothing to do */
1039*4882a593Smuzhiyun 			break;
1040*4882a593Smuzhiyun 
1041*4882a593Smuzhiyun 		default:
1042*4882a593Smuzhiyun 			/* No other case is legal in TRIM state */
1043*4882a593Smuzhiyun 			WARN_ON(true);
1044*4882a593Smuzhiyun 			break;
1045*4882a593Smuzhiyun 		}
1046*4882a593Smuzhiyun 
1047*4882a593Smuzhiyun 		bi_iter.bi_sector += bi_iter.bi_size / SECTOR_SIZE;
1048*4882a593Smuzhiyun 		bi_iter.bi_size = bio->bi_iter.bi_size
1049*4882a593Smuzhiyun 			- (bi_iter.bi_sector - bio->bi_iter.bi_sector)
1050*4882a593Smuzhiyun 			  * SECTOR_SIZE;
1051*4882a593Smuzhiyun 
1052*4882a593Smuzhiyun 	} while (bi_iter.bi_size);
1053*4882a593Smuzhiyun 
1054*4882a593Smuzhiyun 	bio_endio(bio);
1055*4882a593Smuzhiyun 	return DM_MAPIO_SUBMITTED;
1056*4882a593Smuzhiyun }
1057*4882a593Smuzhiyun 
remove_trim(struct bow_context * bc,struct bio * bio)1058*4882a593Smuzhiyun static int remove_trim(struct bow_context *bc, struct bio *bio)
1059*4882a593Smuzhiyun {
1060*4882a593Smuzhiyun 	struct bow_range *br;
1061*4882a593Smuzhiyun 	struct bvec_iter bi_iter = bio->bi_iter;
1062*4882a593Smuzhiyun 
1063*4882a593Smuzhiyun 	DMDEBUG("remove_trim: %llu, %u",
1064*4882a593Smuzhiyun 		(unsigned long long)bio->bi_iter.bi_sector,
1065*4882a593Smuzhiyun 		bio->bi_iter.bi_size);
1066*4882a593Smuzhiyun 
1067*4882a593Smuzhiyun 	do {
1068*4882a593Smuzhiyun 		br = find_first_overlapping_range(&bc->ranges, &bi_iter);
1069*4882a593Smuzhiyun 
1070*4882a593Smuzhiyun 		switch (br->type) {
1071*4882a593Smuzhiyun 		case UNCHANGED:
1072*4882a593Smuzhiyun 			/* Nothing to do */
1073*4882a593Smuzhiyun 			break;
1074*4882a593Smuzhiyun 
1075*4882a593Smuzhiyun 		case TRIMMED:
1076*4882a593Smuzhiyun 			if (!split_range(bc, &br, &bi_iter))
1077*4882a593Smuzhiyun 				set_type(bc, &br, UNCHANGED);
1078*4882a593Smuzhiyun 			break;
1079*4882a593Smuzhiyun 
1080*4882a593Smuzhiyun 		default:
1081*4882a593Smuzhiyun 			/* No other case is legal in TRIM state */
1082*4882a593Smuzhiyun 			WARN_ON(true);
1083*4882a593Smuzhiyun 			break;
1084*4882a593Smuzhiyun 		}
1085*4882a593Smuzhiyun 
1086*4882a593Smuzhiyun 		bi_iter.bi_sector += bi_iter.bi_size / SECTOR_SIZE;
1087*4882a593Smuzhiyun 		bi_iter.bi_size = bio->bi_iter.bi_size
1088*4882a593Smuzhiyun 			- (bi_iter.bi_sector - bio->bi_iter.bi_sector)
1089*4882a593Smuzhiyun 			  * SECTOR_SIZE;
1090*4882a593Smuzhiyun 
1091*4882a593Smuzhiyun 	} while (bi_iter.bi_size);
1092*4882a593Smuzhiyun 
1093*4882a593Smuzhiyun 	return DM_MAPIO_REMAPPED;
1094*4882a593Smuzhiyun }
1095*4882a593Smuzhiyun 
remap_unless_illegal_trim(struct bow_context * bc,struct bio * bio)1096*4882a593Smuzhiyun int remap_unless_illegal_trim(struct bow_context *bc, struct bio *bio)
1097*4882a593Smuzhiyun {
1098*4882a593Smuzhiyun 	if (!bc->forward_trims && bio_op(bio) == REQ_OP_DISCARD) {
1099*4882a593Smuzhiyun 		bio->bi_status = BLK_STS_NOTSUPP;
1100*4882a593Smuzhiyun 		bio_endio(bio);
1101*4882a593Smuzhiyun 		return DM_MAPIO_SUBMITTED;
1102*4882a593Smuzhiyun 	} else {
1103*4882a593Smuzhiyun 		bio_set_dev(bio, bc->dev->bdev);
1104*4882a593Smuzhiyun 		return DM_MAPIO_REMAPPED;
1105*4882a593Smuzhiyun 	}
1106*4882a593Smuzhiyun }
1107*4882a593Smuzhiyun 
1108*4882a593Smuzhiyun /****** dm interface ******/
1109*4882a593Smuzhiyun 
dm_bow_map(struct dm_target * ti,struct bio * bio)1110*4882a593Smuzhiyun static int dm_bow_map(struct dm_target *ti, struct bio *bio)
1111*4882a593Smuzhiyun {
1112*4882a593Smuzhiyun 	int ret = DM_MAPIO_REMAPPED;
1113*4882a593Smuzhiyun 	struct bow_context *bc = ti->private;
1114*4882a593Smuzhiyun 
1115*4882a593Smuzhiyun 	if (likely(bc->state.counter == COMMITTED))
1116*4882a593Smuzhiyun 		return remap_unless_illegal_trim(bc, bio);
1117*4882a593Smuzhiyun 
1118*4882a593Smuzhiyun 	if (bio_data_dir(bio) == READ && bio->bi_iter.bi_sector != 0)
1119*4882a593Smuzhiyun 		return remap_unless_illegal_trim(bc, bio);
1120*4882a593Smuzhiyun 
1121*4882a593Smuzhiyun 	if (atomic_read(&bc->state) != COMMITTED) {
1122*4882a593Smuzhiyun 		enum state state;
1123*4882a593Smuzhiyun 
1124*4882a593Smuzhiyun 		mutex_lock(&bc->ranges_lock);
1125*4882a593Smuzhiyun 		state = atomic_read(&bc->state);
1126*4882a593Smuzhiyun 		if (state == TRIM) {
1127*4882a593Smuzhiyun 			if (bio_op(bio) == REQ_OP_DISCARD)
1128*4882a593Smuzhiyun 				ret = add_trim(bc, bio);
1129*4882a593Smuzhiyun 			else if (bio_data_dir(bio) == WRITE)
1130*4882a593Smuzhiyun 				ret = remove_trim(bc, bio);
1131*4882a593Smuzhiyun 			else
1132*4882a593Smuzhiyun 				/* pass-through */;
1133*4882a593Smuzhiyun 		} else if (state == CHECKPOINT) {
1134*4882a593Smuzhiyun 			if (bio->bi_iter.bi_sector == 0)
1135*4882a593Smuzhiyun 				ret = handle_sector0(bc, bio);
1136*4882a593Smuzhiyun 			else if (bio_data_dir(bio) == WRITE)
1137*4882a593Smuzhiyun 				ret = queue_write(bc, bio);
1138*4882a593Smuzhiyun 			else
1139*4882a593Smuzhiyun 				/* pass-through */;
1140*4882a593Smuzhiyun 		} else {
1141*4882a593Smuzhiyun 			/* pass-through */
1142*4882a593Smuzhiyun 		}
1143*4882a593Smuzhiyun 		mutex_unlock(&bc->ranges_lock);
1144*4882a593Smuzhiyun 	}
1145*4882a593Smuzhiyun 
1146*4882a593Smuzhiyun 	if (ret == DM_MAPIO_REMAPPED)
1147*4882a593Smuzhiyun 		return remap_unless_illegal_trim(bc, bio);
1148*4882a593Smuzhiyun 
1149*4882a593Smuzhiyun 	return ret;
1150*4882a593Smuzhiyun }
1151*4882a593Smuzhiyun 
dm_bow_tablestatus(struct dm_target * ti,char * result,unsigned int maxlen)1152*4882a593Smuzhiyun static void dm_bow_tablestatus(struct dm_target *ti, char *result,
1153*4882a593Smuzhiyun 			       unsigned int maxlen)
1154*4882a593Smuzhiyun {
1155*4882a593Smuzhiyun 	char *end = result + maxlen;
1156*4882a593Smuzhiyun 	struct bow_context *bc = ti->private;
1157*4882a593Smuzhiyun 	struct rb_node *i;
1158*4882a593Smuzhiyun 	int trimmed_list_length = 0;
1159*4882a593Smuzhiyun 	int trimmed_range_count = 0;
1160*4882a593Smuzhiyun 	struct bow_range *br;
1161*4882a593Smuzhiyun 
1162*4882a593Smuzhiyun 	if (maxlen == 0)
1163*4882a593Smuzhiyun 		return;
1164*4882a593Smuzhiyun 	result[0] = 0;
1165*4882a593Smuzhiyun 
1166*4882a593Smuzhiyun 	list_for_each_entry(br, &bc->trimmed_list, trimmed_list)
1167*4882a593Smuzhiyun 		if (br->type == TRIMMED) {
1168*4882a593Smuzhiyun 			++trimmed_list_length;
1169*4882a593Smuzhiyun 		} else {
1170*4882a593Smuzhiyun 			scnprintf(result, end - result,
1171*4882a593Smuzhiyun 				  "ERROR: non-trimmed entry in trimmed_list");
1172*4882a593Smuzhiyun 			return;
1173*4882a593Smuzhiyun 		}
1174*4882a593Smuzhiyun 
1175*4882a593Smuzhiyun 	if (!rb_first(&bc->ranges)) {
1176*4882a593Smuzhiyun 		scnprintf(result, end - result, "ERROR: Empty ranges");
1177*4882a593Smuzhiyun 		return;
1178*4882a593Smuzhiyun 	}
1179*4882a593Smuzhiyun 
1180*4882a593Smuzhiyun 	if (container_of(rb_first(&bc->ranges), struct bow_range, node)
1181*4882a593Smuzhiyun 	    ->sector) {
1182*4882a593Smuzhiyun 		scnprintf(result, end - result,
1183*4882a593Smuzhiyun 			 "ERROR: First range does not start at sector 0");
1184*4882a593Smuzhiyun 		return;
1185*4882a593Smuzhiyun 	}
1186*4882a593Smuzhiyun 
1187*4882a593Smuzhiyun 	mutex_lock(&bc->ranges_lock);
1188*4882a593Smuzhiyun 	for (i = rb_first(&bc->ranges); i; i = rb_next(i)) {
1189*4882a593Smuzhiyun 		struct bow_range *br = container_of(i, struct bow_range, node);
1190*4882a593Smuzhiyun 
1191*4882a593Smuzhiyun 		result += scnprintf(result, end - result, "%s: %llu",
1192*4882a593Smuzhiyun 				    readable_type[br->type],
1193*4882a593Smuzhiyun 				    (unsigned long long)br->sector);
1194*4882a593Smuzhiyun 		if (result >= end)
1195*4882a593Smuzhiyun 			goto unlock;
1196*4882a593Smuzhiyun 
1197*4882a593Smuzhiyun 		result += scnprintf(result, end - result, "\n");
1198*4882a593Smuzhiyun 		if (result >= end)
1199*4882a593Smuzhiyun 			goto unlock;
1200*4882a593Smuzhiyun 
1201*4882a593Smuzhiyun 		if (br->type == TRIMMED)
1202*4882a593Smuzhiyun 			++trimmed_range_count;
1203*4882a593Smuzhiyun 
1204*4882a593Smuzhiyun 		if (br->type == TOP) {
1205*4882a593Smuzhiyun 			if (br->sector != ti->len) {
1206*4882a593Smuzhiyun 				scnprintf(result, end - result,
1207*4882a593Smuzhiyun 					 "\nERROR: Top sector is incorrect");
1208*4882a593Smuzhiyun 			}
1209*4882a593Smuzhiyun 
1210*4882a593Smuzhiyun 			if (&br->node != rb_last(&bc->ranges)) {
1211*4882a593Smuzhiyun 				scnprintf(result, end - result,
1212*4882a593Smuzhiyun 					  "\nERROR: Top sector is not last");
1213*4882a593Smuzhiyun 			}
1214*4882a593Smuzhiyun 
1215*4882a593Smuzhiyun 			break;
1216*4882a593Smuzhiyun 		}
1217*4882a593Smuzhiyun 
1218*4882a593Smuzhiyun 		if (!rb_next(i)) {
1219*4882a593Smuzhiyun 			scnprintf(result, end - result,
1220*4882a593Smuzhiyun 				  "\nERROR: Last range not of type TOP");
1221*4882a593Smuzhiyun 			goto unlock;
1222*4882a593Smuzhiyun 		}
1223*4882a593Smuzhiyun 
1224*4882a593Smuzhiyun 		if (br->sector > range_top(br)) {
1225*4882a593Smuzhiyun 			scnprintf(result, end - result,
1226*4882a593Smuzhiyun 				  "\nERROR: sectors out of order");
1227*4882a593Smuzhiyun 			goto unlock;
1228*4882a593Smuzhiyun 		}
1229*4882a593Smuzhiyun 	}
1230*4882a593Smuzhiyun 
1231*4882a593Smuzhiyun 	if (trimmed_range_count != trimmed_list_length)
1232*4882a593Smuzhiyun 		scnprintf(result, end - result,
1233*4882a593Smuzhiyun 			  "\nERROR: not all trimmed ranges in trimmed list");
1234*4882a593Smuzhiyun 
1235*4882a593Smuzhiyun unlock:
1236*4882a593Smuzhiyun 	mutex_unlock(&bc->ranges_lock);
1237*4882a593Smuzhiyun }
1238*4882a593Smuzhiyun 
dm_bow_status(struct dm_target * ti,status_type_t type,unsigned int status_flags,char * result,unsigned int maxlen)1239*4882a593Smuzhiyun static void dm_bow_status(struct dm_target *ti, status_type_t type,
1240*4882a593Smuzhiyun 			  unsigned int status_flags, char *result,
1241*4882a593Smuzhiyun 			  unsigned int maxlen)
1242*4882a593Smuzhiyun {
1243*4882a593Smuzhiyun 	switch (type) {
1244*4882a593Smuzhiyun 	case STATUSTYPE_INFO:
1245*4882a593Smuzhiyun 		if (maxlen)
1246*4882a593Smuzhiyun 			result[0] = 0;
1247*4882a593Smuzhiyun 		break;
1248*4882a593Smuzhiyun 
1249*4882a593Smuzhiyun 	case STATUSTYPE_TABLE:
1250*4882a593Smuzhiyun 		dm_bow_tablestatus(ti, result, maxlen);
1251*4882a593Smuzhiyun 		break;
1252*4882a593Smuzhiyun 	}
1253*4882a593Smuzhiyun }
1254*4882a593Smuzhiyun 
dm_bow_prepare_ioctl(struct dm_target * ti,struct block_device ** bdev)1255*4882a593Smuzhiyun int dm_bow_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
1256*4882a593Smuzhiyun {
1257*4882a593Smuzhiyun 	struct bow_context *bc = ti->private;
1258*4882a593Smuzhiyun 	struct dm_dev *dev = bc->dev;
1259*4882a593Smuzhiyun 
1260*4882a593Smuzhiyun 	*bdev = dev->bdev;
1261*4882a593Smuzhiyun 	/* Only pass ioctls through if the device sizes match exactly. */
1262*4882a593Smuzhiyun 	return ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
1263*4882a593Smuzhiyun }
1264*4882a593Smuzhiyun 
dm_bow_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)1265*4882a593Smuzhiyun static int dm_bow_iterate_devices(struct dm_target *ti,
1266*4882a593Smuzhiyun 				  iterate_devices_callout_fn fn, void *data)
1267*4882a593Smuzhiyun {
1268*4882a593Smuzhiyun 	struct bow_context *bc = ti->private;
1269*4882a593Smuzhiyun 
1270*4882a593Smuzhiyun 	return fn(ti, bc->dev, 0, ti->len, data);
1271*4882a593Smuzhiyun }
1272*4882a593Smuzhiyun 
1273*4882a593Smuzhiyun static struct target_type bow_target = {
1274*4882a593Smuzhiyun 	.name   = "bow",
1275*4882a593Smuzhiyun 	.version = {1, 2, 0},
1276*4882a593Smuzhiyun 	.features = DM_TARGET_PASSES_CRYPTO,
1277*4882a593Smuzhiyun 	.module = THIS_MODULE,
1278*4882a593Smuzhiyun 	.ctr    = dm_bow_ctr,
1279*4882a593Smuzhiyun 	.dtr    = dm_bow_dtr,
1280*4882a593Smuzhiyun 	.map    = dm_bow_map,
1281*4882a593Smuzhiyun 	.status = dm_bow_status,
1282*4882a593Smuzhiyun 	.prepare_ioctl  = dm_bow_prepare_ioctl,
1283*4882a593Smuzhiyun 	.iterate_devices = dm_bow_iterate_devices,
1284*4882a593Smuzhiyun 	.io_hints = dm_bow_io_hints,
1285*4882a593Smuzhiyun };
1286*4882a593Smuzhiyun 
dm_bow_init(void)1287*4882a593Smuzhiyun int __init dm_bow_init(void)
1288*4882a593Smuzhiyun {
1289*4882a593Smuzhiyun 	int r = dm_register_target(&bow_target);
1290*4882a593Smuzhiyun 
1291*4882a593Smuzhiyun 	if (r < 0)
1292*4882a593Smuzhiyun 		DMERR("registering bow failed %d", r);
1293*4882a593Smuzhiyun 	return r;
1294*4882a593Smuzhiyun }
1295*4882a593Smuzhiyun 
dm_bow_exit(void)1296*4882a593Smuzhiyun void dm_bow_exit(void)
1297*4882a593Smuzhiyun {
1298*4882a593Smuzhiyun 	dm_unregister_target(&bow_target);
1299*4882a593Smuzhiyun }
1300*4882a593Smuzhiyun 
1301*4882a593Smuzhiyun MODULE_LICENSE("GPL");
1302*4882a593Smuzhiyun 
1303*4882a593Smuzhiyun module_init(dm_bow_init);
1304*4882a593Smuzhiyun module_exit(dm_bow_exit);
1305