xref: /OK3568_Linux_fs/kernel/drivers/md/dm-zoned-target.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2017 Western Digital Corporation or its affiliates.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * This file is released under the GPL.
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include "dm-zoned.h"
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include <linux/module.h>
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #define	DM_MSG_PREFIX		"zoned"
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun #define DMZ_MIN_BIOS		8192
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun /*
17*4882a593Smuzhiyun  * Zone BIO context.
18*4882a593Smuzhiyun  */
19*4882a593Smuzhiyun struct dmz_bioctx {
20*4882a593Smuzhiyun 	struct dmz_dev		*dev;
21*4882a593Smuzhiyun 	struct dm_zone		*zone;
22*4882a593Smuzhiyun 	struct bio		*bio;
23*4882a593Smuzhiyun 	refcount_t		ref;
24*4882a593Smuzhiyun };
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun /*
27*4882a593Smuzhiyun  * Chunk work descriptor.
28*4882a593Smuzhiyun  */
29*4882a593Smuzhiyun struct dm_chunk_work {
30*4882a593Smuzhiyun 	struct work_struct	work;
31*4882a593Smuzhiyun 	refcount_t		refcount;
32*4882a593Smuzhiyun 	struct dmz_target	*target;
33*4882a593Smuzhiyun 	unsigned int		chunk;
34*4882a593Smuzhiyun 	struct bio_list		bio_list;
35*4882a593Smuzhiyun };
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun /*
38*4882a593Smuzhiyun  * Target descriptor.
39*4882a593Smuzhiyun  */
40*4882a593Smuzhiyun struct dmz_target {
41*4882a593Smuzhiyun 	struct dm_dev		**ddev;
42*4882a593Smuzhiyun 	unsigned int		nr_ddevs;
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun 	unsigned int		flags;
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	/* Zoned block device information */
47*4882a593Smuzhiyun 	struct dmz_dev		*dev;
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun 	/* For metadata handling */
50*4882a593Smuzhiyun 	struct dmz_metadata     *metadata;
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	/* For chunk work */
53*4882a593Smuzhiyun 	struct radix_tree_root	chunk_rxtree;
54*4882a593Smuzhiyun 	struct workqueue_struct *chunk_wq;
55*4882a593Smuzhiyun 	struct mutex		chunk_lock;
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 	/* For cloned BIOs to zones */
58*4882a593Smuzhiyun 	struct bio_set		bio_set;
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	/* For flush */
61*4882a593Smuzhiyun 	spinlock_t		flush_lock;
62*4882a593Smuzhiyun 	struct bio_list		flush_list;
63*4882a593Smuzhiyun 	struct delayed_work	flush_work;
64*4882a593Smuzhiyun 	struct workqueue_struct *flush_wq;
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun /*
68*4882a593Smuzhiyun  * Flush intervals (seconds).
69*4882a593Smuzhiyun  */
70*4882a593Smuzhiyun #define DMZ_FLUSH_PERIOD	(10 * HZ)
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun /*
73*4882a593Smuzhiyun  * Target BIO completion.
74*4882a593Smuzhiyun  */
dmz_bio_endio(struct bio * bio,blk_status_t status)75*4882a593Smuzhiyun static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
76*4882a593Smuzhiyun {
77*4882a593Smuzhiyun 	struct dmz_bioctx *bioctx =
78*4882a593Smuzhiyun 		dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun 	if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
81*4882a593Smuzhiyun 		bio->bi_status = status;
82*4882a593Smuzhiyun 	if (bioctx->dev && bio->bi_status != BLK_STS_OK)
83*4882a593Smuzhiyun 		bioctx->dev->flags |= DMZ_CHECK_BDEV;
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	if (refcount_dec_and_test(&bioctx->ref)) {
86*4882a593Smuzhiyun 		struct dm_zone *zone = bioctx->zone;
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun 		if (zone) {
89*4882a593Smuzhiyun 			if (bio->bi_status != BLK_STS_OK &&
90*4882a593Smuzhiyun 			    bio_op(bio) == REQ_OP_WRITE &&
91*4882a593Smuzhiyun 			    dmz_is_seq(zone))
92*4882a593Smuzhiyun 				set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
93*4882a593Smuzhiyun 			dmz_deactivate_zone(zone);
94*4882a593Smuzhiyun 		}
95*4882a593Smuzhiyun 		bio_endio(bio);
96*4882a593Smuzhiyun 	}
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun /*
100*4882a593Smuzhiyun  * Completion callback for an internally cloned target BIO. This terminates the
101*4882a593Smuzhiyun  * target BIO when there are no more references to its context.
102*4882a593Smuzhiyun  */
dmz_clone_endio(struct bio * clone)103*4882a593Smuzhiyun static void dmz_clone_endio(struct bio *clone)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun 	struct dmz_bioctx *bioctx = clone->bi_private;
106*4882a593Smuzhiyun 	blk_status_t status = clone->bi_status;
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun 	bio_put(clone);
109*4882a593Smuzhiyun 	dmz_bio_endio(bioctx->bio, status);
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun /*
113*4882a593Smuzhiyun  * Issue a clone of a target BIO. The clone may only partially process the
114*4882a593Smuzhiyun  * original target BIO.
115*4882a593Smuzhiyun  */
dmz_submit_bio(struct dmz_target * dmz,struct dm_zone * zone,struct bio * bio,sector_t chunk_block,unsigned int nr_blocks)116*4882a593Smuzhiyun static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
117*4882a593Smuzhiyun 			  struct bio *bio, sector_t chunk_block,
118*4882a593Smuzhiyun 			  unsigned int nr_blocks)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun 	struct dmz_bioctx *bioctx =
121*4882a593Smuzhiyun 		dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
122*4882a593Smuzhiyun 	struct dmz_dev *dev = zone->dev;
123*4882a593Smuzhiyun 	struct bio *clone;
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	if (dev->flags & DMZ_BDEV_DYING)
126*4882a593Smuzhiyun 		return -EIO;
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
129*4882a593Smuzhiyun 	if (!clone)
130*4882a593Smuzhiyun 		return -ENOMEM;
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 	bio_set_dev(clone, dev->bdev);
133*4882a593Smuzhiyun 	bioctx->dev = dev;
134*4882a593Smuzhiyun 	clone->bi_iter.bi_sector =
135*4882a593Smuzhiyun 		dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
136*4882a593Smuzhiyun 	clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
137*4882a593Smuzhiyun 	clone->bi_end_io = dmz_clone_endio;
138*4882a593Smuzhiyun 	clone->bi_private = bioctx;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	bio_advance(bio, clone->bi_iter.bi_size);
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	refcount_inc(&bioctx->ref);
143*4882a593Smuzhiyun 	submit_bio_noacct(clone);
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
146*4882a593Smuzhiyun 		zone->wp_block += nr_blocks;
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 	return 0;
149*4882a593Smuzhiyun }
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun /*
152*4882a593Smuzhiyun  * Zero out pages of discarded blocks accessed by a read BIO.
153*4882a593Smuzhiyun  */
dmz_handle_read_zero(struct dmz_target * dmz,struct bio * bio,sector_t chunk_block,unsigned int nr_blocks)154*4882a593Smuzhiyun static void dmz_handle_read_zero(struct dmz_target *dmz, struct bio *bio,
155*4882a593Smuzhiyun 				 sector_t chunk_block, unsigned int nr_blocks)
156*4882a593Smuzhiyun {
157*4882a593Smuzhiyun 	unsigned int size = nr_blocks << DMZ_BLOCK_SHIFT;
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	/* Clear nr_blocks */
160*4882a593Smuzhiyun 	swap(bio->bi_iter.bi_size, size);
161*4882a593Smuzhiyun 	zero_fill_bio(bio);
162*4882a593Smuzhiyun 	swap(bio->bi_iter.bi_size, size);
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 	bio_advance(bio, size);
165*4882a593Smuzhiyun }
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun /*
168*4882a593Smuzhiyun  * Process a read BIO.
169*4882a593Smuzhiyun  */
dmz_handle_read(struct dmz_target * dmz,struct dm_zone * zone,struct bio * bio)170*4882a593Smuzhiyun static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
171*4882a593Smuzhiyun 			   struct bio *bio)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun 	struct dmz_metadata *zmd = dmz->metadata;
174*4882a593Smuzhiyun 	sector_t chunk_block = dmz_chunk_block(zmd, dmz_bio_block(bio));
175*4882a593Smuzhiyun 	unsigned int nr_blocks = dmz_bio_blocks(bio);
176*4882a593Smuzhiyun 	sector_t end_block = chunk_block + nr_blocks;
177*4882a593Smuzhiyun 	struct dm_zone *rzone, *bzone;
178*4882a593Smuzhiyun 	int ret;
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	/* Read into unmapped chunks need only zeroing the BIO buffer */
181*4882a593Smuzhiyun 	if (!zone) {
182*4882a593Smuzhiyun 		zero_fill_bio(bio);
183*4882a593Smuzhiyun 		return 0;
184*4882a593Smuzhiyun 	}
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	DMDEBUG("(%s): READ chunk %llu -> %s zone %u, block %llu, %u blocks",
187*4882a593Smuzhiyun 		dmz_metadata_label(zmd),
188*4882a593Smuzhiyun 		(unsigned long long)dmz_bio_chunk(zmd, bio),
189*4882a593Smuzhiyun 		(dmz_is_rnd(zone) ? "RND" :
190*4882a593Smuzhiyun 		 (dmz_is_cache(zone) ? "CACHE" : "SEQ")),
191*4882a593Smuzhiyun 		zone->id,
192*4882a593Smuzhiyun 		(unsigned long long)chunk_block, nr_blocks);
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 	/* Check block validity to determine the read location */
195*4882a593Smuzhiyun 	bzone = zone->bzone;
196*4882a593Smuzhiyun 	while (chunk_block < end_block) {
197*4882a593Smuzhiyun 		nr_blocks = 0;
198*4882a593Smuzhiyun 		if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
199*4882a593Smuzhiyun 		    chunk_block < zone->wp_block) {
200*4882a593Smuzhiyun 			/* Test block validity in the data zone */
201*4882a593Smuzhiyun 			ret = dmz_block_valid(zmd, zone, chunk_block);
202*4882a593Smuzhiyun 			if (ret < 0)
203*4882a593Smuzhiyun 				return ret;
204*4882a593Smuzhiyun 			if (ret > 0) {
205*4882a593Smuzhiyun 				/* Read data zone blocks */
206*4882a593Smuzhiyun 				nr_blocks = ret;
207*4882a593Smuzhiyun 				rzone = zone;
208*4882a593Smuzhiyun 			}
209*4882a593Smuzhiyun 		}
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 		/*
212*4882a593Smuzhiyun 		 * No valid blocks found in the data zone.
213*4882a593Smuzhiyun 		 * Check the buffer zone, if there is one.
214*4882a593Smuzhiyun 		 */
215*4882a593Smuzhiyun 		if (!nr_blocks && bzone) {
216*4882a593Smuzhiyun 			ret = dmz_block_valid(zmd, bzone, chunk_block);
217*4882a593Smuzhiyun 			if (ret < 0)
218*4882a593Smuzhiyun 				return ret;
219*4882a593Smuzhiyun 			if (ret > 0) {
220*4882a593Smuzhiyun 				/* Read buffer zone blocks */
221*4882a593Smuzhiyun 				nr_blocks = ret;
222*4882a593Smuzhiyun 				rzone = bzone;
223*4882a593Smuzhiyun 			}
224*4882a593Smuzhiyun 		}
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 		if (nr_blocks) {
227*4882a593Smuzhiyun 			/* Valid blocks found: read them */
228*4882a593Smuzhiyun 			nr_blocks = min_t(unsigned int, nr_blocks,
229*4882a593Smuzhiyun 					  end_block - chunk_block);
230*4882a593Smuzhiyun 			ret = dmz_submit_bio(dmz, rzone, bio,
231*4882a593Smuzhiyun 					     chunk_block, nr_blocks);
232*4882a593Smuzhiyun 			if (ret)
233*4882a593Smuzhiyun 				return ret;
234*4882a593Smuzhiyun 			chunk_block += nr_blocks;
235*4882a593Smuzhiyun 		} else {
236*4882a593Smuzhiyun 			/* No valid block: zeroout the current BIO block */
237*4882a593Smuzhiyun 			dmz_handle_read_zero(dmz, bio, chunk_block, 1);
238*4882a593Smuzhiyun 			chunk_block++;
239*4882a593Smuzhiyun 		}
240*4882a593Smuzhiyun 	}
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	return 0;
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun /*
246*4882a593Smuzhiyun  * Write blocks directly in a data zone, at the write pointer.
247*4882a593Smuzhiyun  * If a buffer zone is assigned, invalidate the blocks written
248*4882a593Smuzhiyun  * in place.
249*4882a593Smuzhiyun  */
dmz_handle_direct_write(struct dmz_target * dmz,struct dm_zone * zone,struct bio * bio,sector_t chunk_block,unsigned int nr_blocks)250*4882a593Smuzhiyun static int dmz_handle_direct_write(struct dmz_target *dmz,
251*4882a593Smuzhiyun 				   struct dm_zone *zone, struct bio *bio,
252*4882a593Smuzhiyun 				   sector_t chunk_block,
253*4882a593Smuzhiyun 				   unsigned int nr_blocks)
254*4882a593Smuzhiyun {
255*4882a593Smuzhiyun 	struct dmz_metadata *zmd = dmz->metadata;
256*4882a593Smuzhiyun 	struct dm_zone *bzone = zone->bzone;
257*4882a593Smuzhiyun 	int ret;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	if (dmz_is_readonly(zone))
260*4882a593Smuzhiyun 		return -EROFS;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	/* Submit write */
263*4882a593Smuzhiyun 	ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
264*4882a593Smuzhiyun 	if (ret)
265*4882a593Smuzhiyun 		return ret;
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun 	/*
268*4882a593Smuzhiyun 	 * Validate the blocks in the data zone and invalidate
269*4882a593Smuzhiyun 	 * in the buffer zone, if there is one.
270*4882a593Smuzhiyun 	 */
271*4882a593Smuzhiyun 	ret = dmz_validate_blocks(zmd, zone, chunk_block, nr_blocks);
272*4882a593Smuzhiyun 	if (ret == 0 && bzone)
273*4882a593Smuzhiyun 		ret = dmz_invalidate_blocks(zmd, bzone, chunk_block, nr_blocks);
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 	return ret;
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun /*
279*4882a593Smuzhiyun  * Write blocks in the buffer zone of @zone.
280*4882a593Smuzhiyun  * If no buffer zone is assigned yet, get one.
281*4882a593Smuzhiyun  * Called with @zone write locked.
282*4882a593Smuzhiyun  */
dmz_handle_buffered_write(struct dmz_target * dmz,struct dm_zone * zone,struct bio * bio,sector_t chunk_block,unsigned int nr_blocks)283*4882a593Smuzhiyun static int dmz_handle_buffered_write(struct dmz_target *dmz,
284*4882a593Smuzhiyun 				     struct dm_zone *zone, struct bio *bio,
285*4882a593Smuzhiyun 				     sector_t chunk_block,
286*4882a593Smuzhiyun 				     unsigned int nr_blocks)
287*4882a593Smuzhiyun {
288*4882a593Smuzhiyun 	struct dmz_metadata *zmd = dmz->metadata;
289*4882a593Smuzhiyun 	struct dm_zone *bzone;
290*4882a593Smuzhiyun 	int ret;
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	/* Get the buffer zone. One will be allocated if needed */
293*4882a593Smuzhiyun 	bzone = dmz_get_chunk_buffer(zmd, zone);
294*4882a593Smuzhiyun 	if (IS_ERR(bzone))
295*4882a593Smuzhiyun 		return PTR_ERR(bzone);
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	if (dmz_is_readonly(bzone))
298*4882a593Smuzhiyun 		return -EROFS;
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	/* Submit write */
301*4882a593Smuzhiyun 	ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
302*4882a593Smuzhiyun 	if (ret)
303*4882a593Smuzhiyun 		return ret;
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 	/*
306*4882a593Smuzhiyun 	 * Validate the blocks in the buffer zone
307*4882a593Smuzhiyun 	 * and invalidate in the data zone.
308*4882a593Smuzhiyun 	 */
309*4882a593Smuzhiyun 	ret = dmz_validate_blocks(zmd, bzone, chunk_block, nr_blocks);
310*4882a593Smuzhiyun 	if (ret == 0 && chunk_block < zone->wp_block)
311*4882a593Smuzhiyun 		ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	return ret;
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun /*
317*4882a593Smuzhiyun  * Process a write BIO.
318*4882a593Smuzhiyun  */
dmz_handle_write(struct dmz_target * dmz,struct dm_zone * zone,struct bio * bio)319*4882a593Smuzhiyun static int dmz_handle_write(struct dmz_target *dmz, struct dm_zone *zone,
320*4882a593Smuzhiyun 			    struct bio *bio)
321*4882a593Smuzhiyun {
322*4882a593Smuzhiyun 	struct dmz_metadata *zmd = dmz->metadata;
323*4882a593Smuzhiyun 	sector_t chunk_block = dmz_chunk_block(zmd, dmz_bio_block(bio));
324*4882a593Smuzhiyun 	unsigned int nr_blocks = dmz_bio_blocks(bio);
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	if (!zone)
327*4882a593Smuzhiyun 		return -ENOSPC;
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	DMDEBUG("(%s): WRITE chunk %llu -> %s zone %u, block %llu, %u blocks",
330*4882a593Smuzhiyun 		dmz_metadata_label(zmd),
331*4882a593Smuzhiyun 		(unsigned long long)dmz_bio_chunk(zmd, bio),
332*4882a593Smuzhiyun 		(dmz_is_rnd(zone) ? "RND" :
333*4882a593Smuzhiyun 		 (dmz_is_cache(zone) ? "CACHE" : "SEQ")),
334*4882a593Smuzhiyun 		zone->id,
335*4882a593Smuzhiyun 		(unsigned long long)chunk_block, nr_blocks);
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 	if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
338*4882a593Smuzhiyun 	    chunk_block == zone->wp_block) {
339*4882a593Smuzhiyun 		/*
340*4882a593Smuzhiyun 		 * zone is a random zone or it is a sequential zone
341*4882a593Smuzhiyun 		 * and the BIO is aligned to the zone write pointer:
342*4882a593Smuzhiyun 		 * direct write the zone.
343*4882a593Smuzhiyun 		 */
344*4882a593Smuzhiyun 		return dmz_handle_direct_write(dmz, zone, bio,
345*4882a593Smuzhiyun 					       chunk_block, nr_blocks);
346*4882a593Smuzhiyun 	}
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	/*
349*4882a593Smuzhiyun 	 * This is an unaligned write in a sequential zone:
350*4882a593Smuzhiyun 	 * use buffered write.
351*4882a593Smuzhiyun 	 */
352*4882a593Smuzhiyun 	return dmz_handle_buffered_write(dmz, zone, bio, chunk_block, nr_blocks);
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun /*
356*4882a593Smuzhiyun  * Process a discard BIO.
357*4882a593Smuzhiyun  */
dmz_handle_discard(struct dmz_target * dmz,struct dm_zone * zone,struct bio * bio)358*4882a593Smuzhiyun static int dmz_handle_discard(struct dmz_target *dmz, struct dm_zone *zone,
359*4882a593Smuzhiyun 			      struct bio *bio)
360*4882a593Smuzhiyun {
361*4882a593Smuzhiyun 	struct dmz_metadata *zmd = dmz->metadata;
362*4882a593Smuzhiyun 	sector_t block = dmz_bio_block(bio);
363*4882a593Smuzhiyun 	unsigned int nr_blocks = dmz_bio_blocks(bio);
364*4882a593Smuzhiyun 	sector_t chunk_block = dmz_chunk_block(zmd, block);
365*4882a593Smuzhiyun 	int ret = 0;
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	/* For unmapped chunks, there is nothing to do */
368*4882a593Smuzhiyun 	if (!zone)
369*4882a593Smuzhiyun 		return 0;
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	if (dmz_is_readonly(zone))
372*4882a593Smuzhiyun 		return -EROFS;
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	DMDEBUG("(%s): DISCARD chunk %llu -> zone %u, block %llu, %u blocks",
375*4882a593Smuzhiyun 		dmz_metadata_label(dmz->metadata),
376*4882a593Smuzhiyun 		(unsigned long long)dmz_bio_chunk(zmd, bio),
377*4882a593Smuzhiyun 		zone->id,
378*4882a593Smuzhiyun 		(unsigned long long)chunk_block, nr_blocks);
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	/*
381*4882a593Smuzhiyun 	 * Invalidate blocks in the data zone and its
382*4882a593Smuzhiyun 	 * buffer zone if one is mapped.
383*4882a593Smuzhiyun 	 */
384*4882a593Smuzhiyun 	if (dmz_is_rnd(zone) || dmz_is_cache(zone) ||
385*4882a593Smuzhiyun 	    chunk_block < zone->wp_block)
386*4882a593Smuzhiyun 		ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
387*4882a593Smuzhiyun 	if (ret == 0 && zone->bzone)
388*4882a593Smuzhiyun 		ret = dmz_invalidate_blocks(zmd, zone->bzone,
389*4882a593Smuzhiyun 					    chunk_block, nr_blocks);
390*4882a593Smuzhiyun 	return ret;
391*4882a593Smuzhiyun }
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun /*
394*4882a593Smuzhiyun  * Process a BIO.
395*4882a593Smuzhiyun  */
dmz_handle_bio(struct dmz_target * dmz,struct dm_chunk_work * cw,struct bio * bio)396*4882a593Smuzhiyun static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
397*4882a593Smuzhiyun 			   struct bio *bio)
398*4882a593Smuzhiyun {
399*4882a593Smuzhiyun 	struct dmz_bioctx *bioctx =
400*4882a593Smuzhiyun 		dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
401*4882a593Smuzhiyun 	struct dmz_metadata *zmd = dmz->metadata;
402*4882a593Smuzhiyun 	struct dm_zone *zone;
403*4882a593Smuzhiyun 	int ret;
404*4882a593Smuzhiyun 
405*4882a593Smuzhiyun 	dmz_lock_metadata(zmd);
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	/*
408*4882a593Smuzhiyun 	 * Get the data zone mapping the chunk. There may be no
409*4882a593Smuzhiyun 	 * mapping for read and discard. If a mapping is obtained,
410*4882a593Smuzhiyun 	 + the zone returned will be set to active state.
411*4882a593Smuzhiyun 	 */
412*4882a593Smuzhiyun 	zone = dmz_get_chunk_mapping(zmd, dmz_bio_chunk(zmd, bio),
413*4882a593Smuzhiyun 				     bio_op(bio));
414*4882a593Smuzhiyun 	if (IS_ERR(zone)) {
415*4882a593Smuzhiyun 		ret = PTR_ERR(zone);
416*4882a593Smuzhiyun 		goto out;
417*4882a593Smuzhiyun 	}
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	/* Process the BIO */
420*4882a593Smuzhiyun 	if (zone) {
421*4882a593Smuzhiyun 		dmz_activate_zone(zone);
422*4882a593Smuzhiyun 		bioctx->zone = zone;
423*4882a593Smuzhiyun 		dmz_reclaim_bio_acc(zone->dev->reclaim);
424*4882a593Smuzhiyun 	}
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 	switch (bio_op(bio)) {
427*4882a593Smuzhiyun 	case REQ_OP_READ:
428*4882a593Smuzhiyun 		ret = dmz_handle_read(dmz, zone, bio);
429*4882a593Smuzhiyun 		break;
430*4882a593Smuzhiyun 	case REQ_OP_WRITE:
431*4882a593Smuzhiyun 		ret = dmz_handle_write(dmz, zone, bio);
432*4882a593Smuzhiyun 		break;
433*4882a593Smuzhiyun 	case REQ_OP_DISCARD:
434*4882a593Smuzhiyun 	case REQ_OP_WRITE_ZEROES:
435*4882a593Smuzhiyun 		ret = dmz_handle_discard(dmz, zone, bio);
436*4882a593Smuzhiyun 		break;
437*4882a593Smuzhiyun 	default:
438*4882a593Smuzhiyun 		DMERR("(%s): Unsupported BIO operation 0x%x",
439*4882a593Smuzhiyun 		      dmz_metadata_label(dmz->metadata), bio_op(bio));
440*4882a593Smuzhiyun 		ret = -EIO;
441*4882a593Smuzhiyun 	}
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 	/*
444*4882a593Smuzhiyun 	 * Release the chunk mapping. This will check that the mapping
445*4882a593Smuzhiyun 	 * is still valid, that is, that the zone used still has valid blocks.
446*4882a593Smuzhiyun 	 */
447*4882a593Smuzhiyun 	if (zone)
448*4882a593Smuzhiyun 		dmz_put_chunk_mapping(zmd, zone);
449*4882a593Smuzhiyun out:
450*4882a593Smuzhiyun 	dmz_bio_endio(bio, errno_to_blk_status(ret));
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun 	dmz_unlock_metadata(zmd);
453*4882a593Smuzhiyun }
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun /*
456*4882a593Smuzhiyun  * Increment a chunk reference counter.
457*4882a593Smuzhiyun  */
dmz_get_chunk_work(struct dm_chunk_work * cw)458*4882a593Smuzhiyun static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun 	refcount_inc(&cw->refcount);
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun /*
464*4882a593Smuzhiyun  * Decrement a chunk work reference count and
465*4882a593Smuzhiyun  * free it if it becomes 0.
466*4882a593Smuzhiyun  */
dmz_put_chunk_work(struct dm_chunk_work * cw)467*4882a593Smuzhiyun static void dmz_put_chunk_work(struct dm_chunk_work *cw)
468*4882a593Smuzhiyun {
469*4882a593Smuzhiyun 	if (refcount_dec_and_test(&cw->refcount)) {
470*4882a593Smuzhiyun 		WARN_ON(!bio_list_empty(&cw->bio_list));
471*4882a593Smuzhiyun 		radix_tree_delete(&cw->target->chunk_rxtree, cw->chunk);
472*4882a593Smuzhiyun 		kfree(cw);
473*4882a593Smuzhiyun 	}
474*4882a593Smuzhiyun }
475*4882a593Smuzhiyun 
476*4882a593Smuzhiyun /*
477*4882a593Smuzhiyun  * Chunk BIO work function.
478*4882a593Smuzhiyun  */
dmz_chunk_work(struct work_struct * work)479*4882a593Smuzhiyun static void dmz_chunk_work(struct work_struct *work)
480*4882a593Smuzhiyun {
481*4882a593Smuzhiyun 	struct dm_chunk_work *cw = container_of(work, struct dm_chunk_work, work);
482*4882a593Smuzhiyun 	struct dmz_target *dmz = cw->target;
483*4882a593Smuzhiyun 	struct bio *bio;
484*4882a593Smuzhiyun 
485*4882a593Smuzhiyun 	mutex_lock(&dmz->chunk_lock);
486*4882a593Smuzhiyun 
487*4882a593Smuzhiyun 	/* Process the chunk BIOs */
488*4882a593Smuzhiyun 	while ((bio = bio_list_pop(&cw->bio_list))) {
489*4882a593Smuzhiyun 		mutex_unlock(&dmz->chunk_lock);
490*4882a593Smuzhiyun 		dmz_handle_bio(dmz, cw, bio);
491*4882a593Smuzhiyun 		mutex_lock(&dmz->chunk_lock);
492*4882a593Smuzhiyun 		dmz_put_chunk_work(cw);
493*4882a593Smuzhiyun 	}
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 	/* Queueing the work incremented the work refcount */
496*4882a593Smuzhiyun 	dmz_put_chunk_work(cw);
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 	mutex_unlock(&dmz->chunk_lock);
499*4882a593Smuzhiyun }
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun /*
502*4882a593Smuzhiyun  * Flush work.
503*4882a593Smuzhiyun  */
dmz_flush_work(struct work_struct * work)504*4882a593Smuzhiyun static void dmz_flush_work(struct work_struct *work)
505*4882a593Smuzhiyun {
506*4882a593Smuzhiyun 	struct dmz_target *dmz = container_of(work, struct dmz_target, flush_work.work);
507*4882a593Smuzhiyun 	struct bio *bio;
508*4882a593Smuzhiyun 	int ret;
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	/* Flush dirty metadata blocks */
511*4882a593Smuzhiyun 	ret = dmz_flush_metadata(dmz->metadata);
512*4882a593Smuzhiyun 	if (ret)
513*4882a593Smuzhiyun 		DMDEBUG("(%s): Metadata flush failed, rc=%d",
514*4882a593Smuzhiyun 			dmz_metadata_label(dmz->metadata), ret);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	/* Process queued flush requests */
517*4882a593Smuzhiyun 	while (1) {
518*4882a593Smuzhiyun 		spin_lock(&dmz->flush_lock);
519*4882a593Smuzhiyun 		bio = bio_list_pop(&dmz->flush_list);
520*4882a593Smuzhiyun 		spin_unlock(&dmz->flush_lock);
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 		if (!bio)
523*4882a593Smuzhiyun 			break;
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 		dmz_bio_endio(bio, errno_to_blk_status(ret));
526*4882a593Smuzhiyun 	}
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
529*4882a593Smuzhiyun }
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun /*
532*4882a593Smuzhiyun  * Get a chunk work and start it to process a new BIO.
533*4882a593Smuzhiyun  * If the BIO chunk has no work yet, create one.
534*4882a593Smuzhiyun  */
dmz_queue_chunk_work(struct dmz_target * dmz,struct bio * bio)535*4882a593Smuzhiyun static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
536*4882a593Smuzhiyun {
537*4882a593Smuzhiyun 	unsigned int chunk = dmz_bio_chunk(dmz->metadata, bio);
538*4882a593Smuzhiyun 	struct dm_chunk_work *cw;
539*4882a593Smuzhiyun 	int ret = 0;
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	mutex_lock(&dmz->chunk_lock);
542*4882a593Smuzhiyun 
543*4882a593Smuzhiyun 	/* Get the BIO chunk work. If one is not active yet, create one */
544*4882a593Smuzhiyun 	cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
545*4882a593Smuzhiyun 	if (cw) {
546*4882a593Smuzhiyun 		dmz_get_chunk_work(cw);
547*4882a593Smuzhiyun 	} else {
548*4882a593Smuzhiyun 		/* Create a new chunk work */
549*4882a593Smuzhiyun 		cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
550*4882a593Smuzhiyun 		if (unlikely(!cw)) {
551*4882a593Smuzhiyun 			ret = -ENOMEM;
552*4882a593Smuzhiyun 			goto out;
553*4882a593Smuzhiyun 		}
554*4882a593Smuzhiyun 
555*4882a593Smuzhiyun 		INIT_WORK(&cw->work, dmz_chunk_work);
556*4882a593Smuzhiyun 		refcount_set(&cw->refcount, 1);
557*4882a593Smuzhiyun 		cw->target = dmz;
558*4882a593Smuzhiyun 		cw->chunk = chunk;
559*4882a593Smuzhiyun 		bio_list_init(&cw->bio_list);
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 		ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw);
562*4882a593Smuzhiyun 		if (unlikely(ret)) {
563*4882a593Smuzhiyun 			kfree(cw);
564*4882a593Smuzhiyun 			goto out;
565*4882a593Smuzhiyun 		}
566*4882a593Smuzhiyun 	}
567*4882a593Smuzhiyun 
568*4882a593Smuzhiyun 	bio_list_add(&cw->bio_list, bio);
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	if (queue_work(dmz->chunk_wq, &cw->work))
571*4882a593Smuzhiyun 		dmz_get_chunk_work(cw);
572*4882a593Smuzhiyun out:
573*4882a593Smuzhiyun 	mutex_unlock(&dmz->chunk_lock);
574*4882a593Smuzhiyun 	return ret;
575*4882a593Smuzhiyun }
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun /*
578*4882a593Smuzhiyun  * Check if the backing device is being removed. If it's on the way out,
579*4882a593Smuzhiyun  * start failing I/O. Reclaim and metadata components also call this
580*4882a593Smuzhiyun  * function to cleanly abort operation in the event of such failure.
581*4882a593Smuzhiyun  */
dmz_bdev_is_dying(struct dmz_dev * dmz_dev)582*4882a593Smuzhiyun bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
583*4882a593Smuzhiyun {
584*4882a593Smuzhiyun 	if (dmz_dev->flags & DMZ_BDEV_DYING)
585*4882a593Smuzhiyun 		return true;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	if (dmz_dev->flags & DMZ_CHECK_BDEV)
588*4882a593Smuzhiyun 		return !dmz_check_bdev(dmz_dev);
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun 	if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
591*4882a593Smuzhiyun 		dmz_dev_warn(dmz_dev, "Backing device queue dying");
592*4882a593Smuzhiyun 		dmz_dev->flags |= DMZ_BDEV_DYING;
593*4882a593Smuzhiyun 	}
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	return dmz_dev->flags & DMZ_BDEV_DYING;
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun /*
599*4882a593Smuzhiyun  * Check the backing device availability. This detects such events as
600*4882a593Smuzhiyun  * backing device going offline due to errors, media removals, etc.
601*4882a593Smuzhiyun  * This check is less efficient than dmz_bdev_is_dying() and should
602*4882a593Smuzhiyun  * only be performed as a part of error handling.
603*4882a593Smuzhiyun  */
dmz_check_bdev(struct dmz_dev * dmz_dev)604*4882a593Smuzhiyun bool dmz_check_bdev(struct dmz_dev *dmz_dev)
605*4882a593Smuzhiyun {
606*4882a593Smuzhiyun 	struct gendisk *disk;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	dmz_dev->flags &= ~DMZ_CHECK_BDEV;
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	if (dmz_bdev_is_dying(dmz_dev))
611*4882a593Smuzhiyun 		return false;
612*4882a593Smuzhiyun 
613*4882a593Smuzhiyun 	disk = dmz_dev->bdev->bd_disk;
614*4882a593Smuzhiyun 	if (disk->fops->check_events &&
615*4882a593Smuzhiyun 	    disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) {
616*4882a593Smuzhiyun 		dmz_dev_warn(dmz_dev, "Backing device offline");
617*4882a593Smuzhiyun 		dmz_dev->flags |= DMZ_BDEV_DYING;
618*4882a593Smuzhiyun 	}
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun 	return !(dmz_dev->flags & DMZ_BDEV_DYING);
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun /*
624*4882a593Smuzhiyun  * Process a new BIO.
625*4882a593Smuzhiyun  */
dmz_map(struct dm_target * ti,struct bio * bio)626*4882a593Smuzhiyun static int dmz_map(struct dm_target *ti, struct bio *bio)
627*4882a593Smuzhiyun {
628*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
629*4882a593Smuzhiyun 	struct dmz_metadata *zmd = dmz->metadata;
630*4882a593Smuzhiyun 	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
631*4882a593Smuzhiyun 	sector_t sector = bio->bi_iter.bi_sector;
632*4882a593Smuzhiyun 	unsigned int nr_sectors = bio_sectors(bio);
633*4882a593Smuzhiyun 	sector_t chunk_sector;
634*4882a593Smuzhiyun 	int ret;
635*4882a593Smuzhiyun 
636*4882a593Smuzhiyun 	if (dmz_dev_is_dying(zmd))
637*4882a593Smuzhiyun 		return DM_MAPIO_KILL;
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 	DMDEBUG("(%s): BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks",
640*4882a593Smuzhiyun 		dmz_metadata_label(zmd),
641*4882a593Smuzhiyun 		bio_op(bio), (unsigned long long)sector, nr_sectors,
642*4882a593Smuzhiyun 		(unsigned long long)dmz_bio_chunk(zmd, bio),
643*4882a593Smuzhiyun 		(unsigned long long)dmz_chunk_block(zmd, dmz_bio_block(bio)),
644*4882a593Smuzhiyun 		(unsigned int)dmz_bio_blocks(bio));
645*4882a593Smuzhiyun 
646*4882a593Smuzhiyun 	if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE)
647*4882a593Smuzhiyun 		return DM_MAPIO_REMAPPED;
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun 	/* The BIO should be block aligned */
650*4882a593Smuzhiyun 	if ((nr_sectors & DMZ_BLOCK_SECTORS_MASK) || (sector & DMZ_BLOCK_SECTORS_MASK))
651*4882a593Smuzhiyun 		return DM_MAPIO_KILL;
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun 	/* Initialize the BIO context */
654*4882a593Smuzhiyun 	bioctx->dev = NULL;
655*4882a593Smuzhiyun 	bioctx->zone = NULL;
656*4882a593Smuzhiyun 	bioctx->bio = bio;
657*4882a593Smuzhiyun 	refcount_set(&bioctx->ref, 1);
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun 	/* Set the BIO pending in the flush list */
660*4882a593Smuzhiyun 	if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
661*4882a593Smuzhiyun 		spin_lock(&dmz->flush_lock);
662*4882a593Smuzhiyun 		bio_list_add(&dmz->flush_list, bio);
663*4882a593Smuzhiyun 		spin_unlock(&dmz->flush_lock);
664*4882a593Smuzhiyun 		mod_delayed_work(dmz->flush_wq, &dmz->flush_work, 0);
665*4882a593Smuzhiyun 		return DM_MAPIO_SUBMITTED;
666*4882a593Smuzhiyun 	}
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	/* Split zone BIOs to fit entirely into a zone */
669*4882a593Smuzhiyun 	chunk_sector = sector & (dmz_zone_nr_sectors(zmd) - 1);
670*4882a593Smuzhiyun 	if (chunk_sector + nr_sectors > dmz_zone_nr_sectors(zmd))
671*4882a593Smuzhiyun 		dm_accept_partial_bio(bio, dmz_zone_nr_sectors(zmd) - chunk_sector);
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 	/* Now ready to handle this BIO */
674*4882a593Smuzhiyun 	ret = dmz_queue_chunk_work(dmz, bio);
675*4882a593Smuzhiyun 	if (ret) {
676*4882a593Smuzhiyun 		DMDEBUG("(%s): BIO op %d, can't process chunk %llu, err %i",
677*4882a593Smuzhiyun 			dmz_metadata_label(zmd),
678*4882a593Smuzhiyun 			bio_op(bio), (u64)dmz_bio_chunk(zmd, bio),
679*4882a593Smuzhiyun 			ret);
680*4882a593Smuzhiyun 		return DM_MAPIO_REQUEUE;
681*4882a593Smuzhiyun 	}
682*4882a593Smuzhiyun 
683*4882a593Smuzhiyun 	return DM_MAPIO_SUBMITTED;
684*4882a593Smuzhiyun }
685*4882a593Smuzhiyun 
686*4882a593Smuzhiyun /*
687*4882a593Smuzhiyun  * Get zoned device information.
688*4882a593Smuzhiyun  */
dmz_get_zoned_device(struct dm_target * ti,char * path,int idx,int nr_devs)689*4882a593Smuzhiyun static int dmz_get_zoned_device(struct dm_target *ti, char *path,
690*4882a593Smuzhiyun 				int idx, int nr_devs)
691*4882a593Smuzhiyun {
692*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
693*4882a593Smuzhiyun 	struct dm_dev *ddev;
694*4882a593Smuzhiyun 	struct dmz_dev *dev;
695*4882a593Smuzhiyun 	int ret;
696*4882a593Smuzhiyun 	struct block_device *bdev;
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	/* Get the target device */
699*4882a593Smuzhiyun 	ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &ddev);
700*4882a593Smuzhiyun 	if (ret) {
701*4882a593Smuzhiyun 		ti->error = "Get target device failed";
702*4882a593Smuzhiyun 		return ret;
703*4882a593Smuzhiyun 	}
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 	bdev = ddev->bdev;
706*4882a593Smuzhiyun 	if (bdev_zoned_model(bdev) == BLK_ZONED_NONE) {
707*4882a593Smuzhiyun 		if (nr_devs == 1) {
708*4882a593Smuzhiyun 			ti->error = "Invalid regular device";
709*4882a593Smuzhiyun 			goto err;
710*4882a593Smuzhiyun 		}
711*4882a593Smuzhiyun 		if (idx != 0) {
712*4882a593Smuzhiyun 			ti->error = "First device must be a regular device";
713*4882a593Smuzhiyun 			goto err;
714*4882a593Smuzhiyun 		}
715*4882a593Smuzhiyun 		if (dmz->ddev[0]) {
716*4882a593Smuzhiyun 			ti->error = "Too many regular devices";
717*4882a593Smuzhiyun 			goto err;
718*4882a593Smuzhiyun 		}
719*4882a593Smuzhiyun 		dev = &dmz->dev[idx];
720*4882a593Smuzhiyun 		dev->flags = DMZ_BDEV_REGULAR;
721*4882a593Smuzhiyun 	} else {
722*4882a593Smuzhiyun 		if (dmz->ddev[idx]) {
723*4882a593Smuzhiyun 			ti->error = "Too many zoned devices";
724*4882a593Smuzhiyun 			goto err;
725*4882a593Smuzhiyun 		}
726*4882a593Smuzhiyun 		if (nr_devs > 1 && idx == 0) {
727*4882a593Smuzhiyun 			ti->error = "First device must be a regular device";
728*4882a593Smuzhiyun 			goto err;
729*4882a593Smuzhiyun 		}
730*4882a593Smuzhiyun 		dev = &dmz->dev[idx];
731*4882a593Smuzhiyun 	}
732*4882a593Smuzhiyun 	dev->bdev = bdev;
733*4882a593Smuzhiyun 	dev->dev_idx = idx;
734*4882a593Smuzhiyun 	(void)bdevname(dev->bdev, dev->name);
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun 	dev->capacity = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
737*4882a593Smuzhiyun 	if (ti->begin) {
738*4882a593Smuzhiyun 		ti->error = "Partial mapping is not supported";
739*4882a593Smuzhiyun 		goto err;
740*4882a593Smuzhiyun 	}
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun 	dmz->ddev[idx] = ddev;
743*4882a593Smuzhiyun 
744*4882a593Smuzhiyun 	return 0;
745*4882a593Smuzhiyun err:
746*4882a593Smuzhiyun 	dm_put_device(ti, ddev);
747*4882a593Smuzhiyun 	return -EINVAL;
748*4882a593Smuzhiyun }
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun /*
751*4882a593Smuzhiyun  * Cleanup zoned device information.
752*4882a593Smuzhiyun  */
dmz_put_zoned_device(struct dm_target * ti)753*4882a593Smuzhiyun static void dmz_put_zoned_device(struct dm_target *ti)
754*4882a593Smuzhiyun {
755*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
756*4882a593Smuzhiyun 	int i;
757*4882a593Smuzhiyun 
758*4882a593Smuzhiyun 	for (i = 0; i < dmz->nr_ddevs; i++) {
759*4882a593Smuzhiyun 		if (dmz->ddev[i]) {
760*4882a593Smuzhiyun 			dm_put_device(ti, dmz->ddev[i]);
761*4882a593Smuzhiyun 			dmz->ddev[i] = NULL;
762*4882a593Smuzhiyun 		}
763*4882a593Smuzhiyun 	}
764*4882a593Smuzhiyun }
765*4882a593Smuzhiyun 
dmz_fixup_devices(struct dm_target * ti)766*4882a593Smuzhiyun static int dmz_fixup_devices(struct dm_target *ti)
767*4882a593Smuzhiyun {
768*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
769*4882a593Smuzhiyun 	struct dmz_dev *reg_dev, *zoned_dev;
770*4882a593Smuzhiyun 	struct request_queue *q;
771*4882a593Smuzhiyun 	sector_t zone_nr_sectors = 0;
772*4882a593Smuzhiyun 	int i;
773*4882a593Smuzhiyun 
774*4882a593Smuzhiyun 	/*
775*4882a593Smuzhiyun 	 * When we have more than on devices, the first one must be a
776*4882a593Smuzhiyun 	 * regular block device and the others zoned block devices.
777*4882a593Smuzhiyun 	 */
778*4882a593Smuzhiyun 	if (dmz->nr_ddevs > 1) {
779*4882a593Smuzhiyun 		reg_dev = &dmz->dev[0];
780*4882a593Smuzhiyun 		if (!(reg_dev->flags & DMZ_BDEV_REGULAR)) {
781*4882a593Smuzhiyun 			ti->error = "Primary disk is not a regular device";
782*4882a593Smuzhiyun 			return -EINVAL;
783*4882a593Smuzhiyun 		}
784*4882a593Smuzhiyun 		for (i = 1; i < dmz->nr_ddevs; i++) {
785*4882a593Smuzhiyun 			zoned_dev = &dmz->dev[i];
786*4882a593Smuzhiyun 			if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
787*4882a593Smuzhiyun 				ti->error = "Secondary disk is not a zoned device";
788*4882a593Smuzhiyun 				return -EINVAL;
789*4882a593Smuzhiyun 			}
790*4882a593Smuzhiyun 			q = bdev_get_queue(zoned_dev->bdev);
791*4882a593Smuzhiyun 			if (zone_nr_sectors &&
792*4882a593Smuzhiyun 			    zone_nr_sectors != blk_queue_zone_sectors(q)) {
793*4882a593Smuzhiyun 				ti->error = "Zone nr sectors mismatch";
794*4882a593Smuzhiyun 				return -EINVAL;
795*4882a593Smuzhiyun 			}
796*4882a593Smuzhiyun 			zone_nr_sectors = blk_queue_zone_sectors(q);
797*4882a593Smuzhiyun 			zoned_dev->zone_nr_sectors = zone_nr_sectors;
798*4882a593Smuzhiyun 			zoned_dev->nr_zones =
799*4882a593Smuzhiyun 				blkdev_nr_zones(zoned_dev->bdev->bd_disk);
800*4882a593Smuzhiyun 		}
801*4882a593Smuzhiyun 	} else {
802*4882a593Smuzhiyun 		reg_dev = NULL;
803*4882a593Smuzhiyun 		zoned_dev = &dmz->dev[0];
804*4882a593Smuzhiyun 		if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
805*4882a593Smuzhiyun 			ti->error = "Disk is not a zoned device";
806*4882a593Smuzhiyun 			return -EINVAL;
807*4882a593Smuzhiyun 		}
808*4882a593Smuzhiyun 		q = bdev_get_queue(zoned_dev->bdev);
809*4882a593Smuzhiyun 		zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q);
810*4882a593Smuzhiyun 		zoned_dev->nr_zones = blkdev_nr_zones(zoned_dev->bdev->bd_disk);
811*4882a593Smuzhiyun 	}
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun 	if (reg_dev) {
814*4882a593Smuzhiyun 		sector_t zone_offset;
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 		reg_dev->zone_nr_sectors = zone_nr_sectors;
817*4882a593Smuzhiyun 		reg_dev->nr_zones =
818*4882a593Smuzhiyun 			DIV_ROUND_UP_SECTOR_T(reg_dev->capacity,
819*4882a593Smuzhiyun 					      reg_dev->zone_nr_sectors);
820*4882a593Smuzhiyun 		reg_dev->zone_offset = 0;
821*4882a593Smuzhiyun 		zone_offset = reg_dev->nr_zones;
822*4882a593Smuzhiyun 		for (i = 1; i < dmz->nr_ddevs; i++) {
823*4882a593Smuzhiyun 			dmz->dev[i].zone_offset = zone_offset;
824*4882a593Smuzhiyun 			zone_offset += dmz->dev[i].nr_zones;
825*4882a593Smuzhiyun 		}
826*4882a593Smuzhiyun 	}
827*4882a593Smuzhiyun 	return 0;
828*4882a593Smuzhiyun }
829*4882a593Smuzhiyun 
830*4882a593Smuzhiyun /*
831*4882a593Smuzhiyun  * Setup target.
832*4882a593Smuzhiyun  */
dmz_ctr(struct dm_target * ti,unsigned int argc,char ** argv)833*4882a593Smuzhiyun static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
834*4882a593Smuzhiyun {
835*4882a593Smuzhiyun 	struct dmz_target *dmz;
836*4882a593Smuzhiyun 	int ret, i;
837*4882a593Smuzhiyun 
838*4882a593Smuzhiyun 	/* Check arguments */
839*4882a593Smuzhiyun 	if (argc < 1) {
840*4882a593Smuzhiyun 		ti->error = "Invalid argument count";
841*4882a593Smuzhiyun 		return -EINVAL;
842*4882a593Smuzhiyun 	}
843*4882a593Smuzhiyun 
844*4882a593Smuzhiyun 	/* Allocate and initialize the target descriptor */
845*4882a593Smuzhiyun 	dmz = kzalloc(sizeof(struct dmz_target), GFP_KERNEL);
846*4882a593Smuzhiyun 	if (!dmz) {
847*4882a593Smuzhiyun 		ti->error = "Unable to allocate the zoned target descriptor";
848*4882a593Smuzhiyun 		return -ENOMEM;
849*4882a593Smuzhiyun 	}
850*4882a593Smuzhiyun 	dmz->dev = kcalloc(argc, sizeof(struct dmz_dev), GFP_KERNEL);
851*4882a593Smuzhiyun 	if (!dmz->dev) {
852*4882a593Smuzhiyun 		ti->error = "Unable to allocate the zoned device descriptors";
853*4882a593Smuzhiyun 		kfree(dmz);
854*4882a593Smuzhiyun 		return -ENOMEM;
855*4882a593Smuzhiyun 	}
856*4882a593Smuzhiyun 	dmz->ddev = kcalloc(argc, sizeof(struct dm_dev *), GFP_KERNEL);
857*4882a593Smuzhiyun 	if (!dmz->ddev) {
858*4882a593Smuzhiyun 		ti->error = "Unable to allocate the dm device descriptors";
859*4882a593Smuzhiyun 		ret = -ENOMEM;
860*4882a593Smuzhiyun 		goto err;
861*4882a593Smuzhiyun 	}
862*4882a593Smuzhiyun 	dmz->nr_ddevs = argc;
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 	ti->private = dmz;
865*4882a593Smuzhiyun 
866*4882a593Smuzhiyun 	/* Get the target zoned block device */
867*4882a593Smuzhiyun 	for (i = 0; i < argc; i++) {
868*4882a593Smuzhiyun 		ret = dmz_get_zoned_device(ti, argv[i], i, argc);
869*4882a593Smuzhiyun 		if (ret)
870*4882a593Smuzhiyun 			goto err_dev;
871*4882a593Smuzhiyun 	}
872*4882a593Smuzhiyun 	ret = dmz_fixup_devices(ti);
873*4882a593Smuzhiyun 	if (ret)
874*4882a593Smuzhiyun 		goto err_dev;
875*4882a593Smuzhiyun 
876*4882a593Smuzhiyun 	/* Initialize metadata */
877*4882a593Smuzhiyun 	ret = dmz_ctr_metadata(dmz->dev, argc, &dmz->metadata,
878*4882a593Smuzhiyun 			       dm_table_device_name(ti->table));
879*4882a593Smuzhiyun 	if (ret) {
880*4882a593Smuzhiyun 		ti->error = "Metadata initialization failed";
881*4882a593Smuzhiyun 		goto err_dev;
882*4882a593Smuzhiyun 	}
883*4882a593Smuzhiyun 
884*4882a593Smuzhiyun 	/* Set target (no write same support) */
885*4882a593Smuzhiyun 	ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata);
886*4882a593Smuzhiyun 	ti->num_flush_bios = 1;
887*4882a593Smuzhiyun 	ti->num_discard_bios = 1;
888*4882a593Smuzhiyun 	ti->num_write_zeroes_bios = 1;
889*4882a593Smuzhiyun 	ti->per_io_data_size = sizeof(struct dmz_bioctx);
890*4882a593Smuzhiyun 	ti->flush_supported = true;
891*4882a593Smuzhiyun 	ti->discards_supported = true;
892*4882a593Smuzhiyun 
893*4882a593Smuzhiyun 	/* The exposed capacity is the number of chunks that can be mapped */
894*4882a593Smuzhiyun 	ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) <<
895*4882a593Smuzhiyun 		dmz_zone_nr_sectors_shift(dmz->metadata);
896*4882a593Smuzhiyun 
897*4882a593Smuzhiyun 	/* Zone BIO */
898*4882a593Smuzhiyun 	ret = bioset_init(&dmz->bio_set, DMZ_MIN_BIOS, 0, 0);
899*4882a593Smuzhiyun 	if (ret) {
900*4882a593Smuzhiyun 		ti->error = "Create BIO set failed";
901*4882a593Smuzhiyun 		goto err_meta;
902*4882a593Smuzhiyun 	}
903*4882a593Smuzhiyun 
904*4882a593Smuzhiyun 	/* Chunk BIO work */
905*4882a593Smuzhiyun 	mutex_init(&dmz->chunk_lock);
906*4882a593Smuzhiyun 	INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOIO);
907*4882a593Smuzhiyun 	dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s",
908*4882a593Smuzhiyun 					WQ_MEM_RECLAIM | WQ_UNBOUND, 0,
909*4882a593Smuzhiyun 					dmz_metadata_label(dmz->metadata));
910*4882a593Smuzhiyun 	if (!dmz->chunk_wq) {
911*4882a593Smuzhiyun 		ti->error = "Create chunk workqueue failed";
912*4882a593Smuzhiyun 		ret = -ENOMEM;
913*4882a593Smuzhiyun 		goto err_bio;
914*4882a593Smuzhiyun 	}
915*4882a593Smuzhiyun 
916*4882a593Smuzhiyun 	/* Flush work */
917*4882a593Smuzhiyun 	spin_lock_init(&dmz->flush_lock);
918*4882a593Smuzhiyun 	bio_list_init(&dmz->flush_list);
919*4882a593Smuzhiyun 	INIT_DELAYED_WORK(&dmz->flush_work, dmz_flush_work);
920*4882a593Smuzhiyun 	dmz->flush_wq = alloc_ordered_workqueue("dmz_fwq_%s", WQ_MEM_RECLAIM,
921*4882a593Smuzhiyun 						dmz_metadata_label(dmz->metadata));
922*4882a593Smuzhiyun 	if (!dmz->flush_wq) {
923*4882a593Smuzhiyun 		ti->error = "Create flush workqueue failed";
924*4882a593Smuzhiyun 		ret = -ENOMEM;
925*4882a593Smuzhiyun 		goto err_cwq;
926*4882a593Smuzhiyun 	}
927*4882a593Smuzhiyun 	mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
928*4882a593Smuzhiyun 
929*4882a593Smuzhiyun 	/* Initialize reclaim */
930*4882a593Smuzhiyun 	for (i = 0; i < dmz->nr_ddevs; i++) {
931*4882a593Smuzhiyun 		ret = dmz_ctr_reclaim(dmz->metadata, &dmz->dev[i].reclaim, i);
932*4882a593Smuzhiyun 		if (ret) {
933*4882a593Smuzhiyun 			ti->error = "Zone reclaim initialization failed";
934*4882a593Smuzhiyun 			goto err_fwq;
935*4882a593Smuzhiyun 		}
936*4882a593Smuzhiyun 	}
937*4882a593Smuzhiyun 
938*4882a593Smuzhiyun 	DMINFO("(%s): Target device: %llu 512-byte logical sectors (%llu blocks)",
939*4882a593Smuzhiyun 	       dmz_metadata_label(dmz->metadata),
940*4882a593Smuzhiyun 	       (unsigned long long)ti->len,
941*4882a593Smuzhiyun 	       (unsigned long long)dmz_sect2blk(ti->len));
942*4882a593Smuzhiyun 
943*4882a593Smuzhiyun 	return 0;
944*4882a593Smuzhiyun err_fwq:
945*4882a593Smuzhiyun 	destroy_workqueue(dmz->flush_wq);
946*4882a593Smuzhiyun err_cwq:
947*4882a593Smuzhiyun 	destroy_workqueue(dmz->chunk_wq);
948*4882a593Smuzhiyun err_bio:
949*4882a593Smuzhiyun 	mutex_destroy(&dmz->chunk_lock);
950*4882a593Smuzhiyun 	bioset_exit(&dmz->bio_set);
951*4882a593Smuzhiyun err_meta:
952*4882a593Smuzhiyun 	dmz_dtr_metadata(dmz->metadata);
953*4882a593Smuzhiyun err_dev:
954*4882a593Smuzhiyun 	dmz_put_zoned_device(ti);
955*4882a593Smuzhiyun err:
956*4882a593Smuzhiyun 	kfree(dmz->dev);
957*4882a593Smuzhiyun 	kfree(dmz);
958*4882a593Smuzhiyun 
959*4882a593Smuzhiyun 	return ret;
960*4882a593Smuzhiyun }
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun /*
963*4882a593Smuzhiyun  * Cleanup target.
964*4882a593Smuzhiyun  */
dmz_dtr(struct dm_target * ti)965*4882a593Smuzhiyun static void dmz_dtr(struct dm_target *ti)
966*4882a593Smuzhiyun {
967*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
968*4882a593Smuzhiyun 	int i;
969*4882a593Smuzhiyun 
970*4882a593Smuzhiyun 	flush_workqueue(dmz->chunk_wq);
971*4882a593Smuzhiyun 	destroy_workqueue(dmz->chunk_wq);
972*4882a593Smuzhiyun 
973*4882a593Smuzhiyun 	for (i = 0; i < dmz->nr_ddevs; i++)
974*4882a593Smuzhiyun 		dmz_dtr_reclaim(dmz->dev[i].reclaim);
975*4882a593Smuzhiyun 
976*4882a593Smuzhiyun 	cancel_delayed_work_sync(&dmz->flush_work);
977*4882a593Smuzhiyun 	destroy_workqueue(dmz->flush_wq);
978*4882a593Smuzhiyun 
979*4882a593Smuzhiyun 	(void) dmz_flush_metadata(dmz->metadata);
980*4882a593Smuzhiyun 
981*4882a593Smuzhiyun 	dmz_dtr_metadata(dmz->metadata);
982*4882a593Smuzhiyun 
983*4882a593Smuzhiyun 	bioset_exit(&dmz->bio_set);
984*4882a593Smuzhiyun 
985*4882a593Smuzhiyun 	dmz_put_zoned_device(ti);
986*4882a593Smuzhiyun 
987*4882a593Smuzhiyun 	mutex_destroy(&dmz->chunk_lock);
988*4882a593Smuzhiyun 
989*4882a593Smuzhiyun 	kfree(dmz->dev);
990*4882a593Smuzhiyun 	kfree(dmz);
991*4882a593Smuzhiyun }
992*4882a593Smuzhiyun 
993*4882a593Smuzhiyun /*
994*4882a593Smuzhiyun  * Setup target request queue limits.
995*4882a593Smuzhiyun  */
dmz_io_hints(struct dm_target * ti,struct queue_limits * limits)996*4882a593Smuzhiyun static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
997*4882a593Smuzhiyun {
998*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
999*4882a593Smuzhiyun 	unsigned int chunk_sectors = dmz_zone_nr_sectors(dmz->metadata);
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	limits->logical_block_size = DMZ_BLOCK_SIZE;
1002*4882a593Smuzhiyun 	limits->physical_block_size = DMZ_BLOCK_SIZE;
1003*4882a593Smuzhiyun 
1004*4882a593Smuzhiyun 	blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
1005*4882a593Smuzhiyun 	blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
1006*4882a593Smuzhiyun 
1007*4882a593Smuzhiyun 	limits->discard_alignment = DMZ_BLOCK_SIZE;
1008*4882a593Smuzhiyun 	limits->discard_granularity = DMZ_BLOCK_SIZE;
1009*4882a593Smuzhiyun 	limits->max_discard_sectors = chunk_sectors;
1010*4882a593Smuzhiyun 	limits->max_hw_discard_sectors = chunk_sectors;
1011*4882a593Smuzhiyun 	limits->max_write_zeroes_sectors = chunk_sectors;
1012*4882a593Smuzhiyun 
1013*4882a593Smuzhiyun 	/* FS hint to try to align to the device zone size */
1014*4882a593Smuzhiyun 	limits->chunk_sectors = chunk_sectors;
1015*4882a593Smuzhiyun 	limits->max_sectors = chunk_sectors;
1016*4882a593Smuzhiyun 
1017*4882a593Smuzhiyun 	/* We are exposing a drive-managed zoned block device */
1018*4882a593Smuzhiyun 	limits->zoned = BLK_ZONED_NONE;
1019*4882a593Smuzhiyun }
1020*4882a593Smuzhiyun 
1021*4882a593Smuzhiyun /*
1022*4882a593Smuzhiyun  * Pass on ioctl to the backend device.
1023*4882a593Smuzhiyun  */
dmz_prepare_ioctl(struct dm_target * ti,struct block_device ** bdev)1024*4882a593Smuzhiyun static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
1025*4882a593Smuzhiyun {
1026*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
1027*4882a593Smuzhiyun 	struct dmz_dev *dev = &dmz->dev[0];
1028*4882a593Smuzhiyun 
1029*4882a593Smuzhiyun 	if (!dmz_check_bdev(dev))
1030*4882a593Smuzhiyun 		return -EIO;
1031*4882a593Smuzhiyun 
1032*4882a593Smuzhiyun 	*bdev = dev->bdev;
1033*4882a593Smuzhiyun 
1034*4882a593Smuzhiyun 	return 0;
1035*4882a593Smuzhiyun }
1036*4882a593Smuzhiyun 
1037*4882a593Smuzhiyun /*
1038*4882a593Smuzhiyun  * Stop works on suspend.
1039*4882a593Smuzhiyun  */
dmz_suspend(struct dm_target * ti)1040*4882a593Smuzhiyun static void dmz_suspend(struct dm_target *ti)
1041*4882a593Smuzhiyun {
1042*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
1043*4882a593Smuzhiyun 	int i;
1044*4882a593Smuzhiyun 
1045*4882a593Smuzhiyun 	flush_workqueue(dmz->chunk_wq);
1046*4882a593Smuzhiyun 	for (i = 0; i < dmz->nr_ddevs; i++)
1047*4882a593Smuzhiyun 		dmz_suspend_reclaim(dmz->dev[i].reclaim);
1048*4882a593Smuzhiyun 	cancel_delayed_work_sync(&dmz->flush_work);
1049*4882a593Smuzhiyun }
1050*4882a593Smuzhiyun 
1051*4882a593Smuzhiyun /*
1052*4882a593Smuzhiyun  * Restart works on resume or if suspend failed.
1053*4882a593Smuzhiyun  */
dmz_resume(struct dm_target * ti)1054*4882a593Smuzhiyun static void dmz_resume(struct dm_target *ti)
1055*4882a593Smuzhiyun {
1056*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
1057*4882a593Smuzhiyun 	int i;
1058*4882a593Smuzhiyun 
1059*4882a593Smuzhiyun 	queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
1060*4882a593Smuzhiyun 	for (i = 0; i < dmz->nr_ddevs; i++)
1061*4882a593Smuzhiyun 		dmz_resume_reclaim(dmz->dev[i].reclaim);
1062*4882a593Smuzhiyun }
1063*4882a593Smuzhiyun 
dmz_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)1064*4882a593Smuzhiyun static int dmz_iterate_devices(struct dm_target *ti,
1065*4882a593Smuzhiyun 			       iterate_devices_callout_fn fn, void *data)
1066*4882a593Smuzhiyun {
1067*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
1068*4882a593Smuzhiyun 	unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata);
1069*4882a593Smuzhiyun 	sector_t capacity;
1070*4882a593Smuzhiyun 	int i, r;
1071*4882a593Smuzhiyun 
1072*4882a593Smuzhiyun 	for (i = 0; i < dmz->nr_ddevs; i++) {
1073*4882a593Smuzhiyun 		capacity = dmz->dev[i].capacity & ~(zone_nr_sectors - 1);
1074*4882a593Smuzhiyun 		r = fn(ti, dmz->ddev[i], 0, capacity, data);
1075*4882a593Smuzhiyun 		if (r)
1076*4882a593Smuzhiyun 			break;
1077*4882a593Smuzhiyun 	}
1078*4882a593Smuzhiyun 	return r;
1079*4882a593Smuzhiyun }
1080*4882a593Smuzhiyun 
dmz_status(struct dm_target * ti,status_type_t type,unsigned int status_flags,char * result,unsigned int maxlen)1081*4882a593Smuzhiyun static void dmz_status(struct dm_target *ti, status_type_t type,
1082*4882a593Smuzhiyun 		       unsigned int status_flags, char *result,
1083*4882a593Smuzhiyun 		       unsigned int maxlen)
1084*4882a593Smuzhiyun {
1085*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
1086*4882a593Smuzhiyun 	ssize_t sz = 0;
1087*4882a593Smuzhiyun 	char buf[BDEVNAME_SIZE];
1088*4882a593Smuzhiyun 	struct dmz_dev *dev;
1089*4882a593Smuzhiyun 	int i;
1090*4882a593Smuzhiyun 
1091*4882a593Smuzhiyun 	switch (type) {
1092*4882a593Smuzhiyun 	case STATUSTYPE_INFO:
1093*4882a593Smuzhiyun 		DMEMIT("%u zones %u/%u cache",
1094*4882a593Smuzhiyun 		       dmz_nr_zones(dmz->metadata),
1095*4882a593Smuzhiyun 		       dmz_nr_unmap_cache_zones(dmz->metadata),
1096*4882a593Smuzhiyun 		       dmz_nr_cache_zones(dmz->metadata));
1097*4882a593Smuzhiyun 		for (i = 0; i < dmz->nr_ddevs; i++) {
1098*4882a593Smuzhiyun 			/*
1099*4882a593Smuzhiyun 			 * For a multi-device setup the first device
1100*4882a593Smuzhiyun 			 * contains only cache zones.
1101*4882a593Smuzhiyun 			 */
1102*4882a593Smuzhiyun 			if ((i == 0) &&
1103*4882a593Smuzhiyun 			    (dmz_nr_cache_zones(dmz->metadata) > 0))
1104*4882a593Smuzhiyun 				continue;
1105*4882a593Smuzhiyun 			DMEMIT(" %u/%u random %u/%u sequential",
1106*4882a593Smuzhiyun 			       dmz_nr_unmap_rnd_zones(dmz->metadata, i),
1107*4882a593Smuzhiyun 			       dmz_nr_rnd_zones(dmz->metadata, i),
1108*4882a593Smuzhiyun 			       dmz_nr_unmap_seq_zones(dmz->metadata, i),
1109*4882a593Smuzhiyun 			       dmz_nr_seq_zones(dmz->metadata, i));
1110*4882a593Smuzhiyun 		}
1111*4882a593Smuzhiyun 		break;
1112*4882a593Smuzhiyun 	case STATUSTYPE_TABLE:
1113*4882a593Smuzhiyun 		dev = &dmz->dev[0];
1114*4882a593Smuzhiyun 		format_dev_t(buf, dev->bdev->bd_dev);
1115*4882a593Smuzhiyun 		DMEMIT("%s", buf);
1116*4882a593Smuzhiyun 		for (i = 1; i < dmz->nr_ddevs; i++) {
1117*4882a593Smuzhiyun 			dev = &dmz->dev[i];
1118*4882a593Smuzhiyun 			format_dev_t(buf, dev->bdev->bd_dev);
1119*4882a593Smuzhiyun 			DMEMIT(" %s", buf);
1120*4882a593Smuzhiyun 		}
1121*4882a593Smuzhiyun 		break;
1122*4882a593Smuzhiyun 	}
1123*4882a593Smuzhiyun 	return;
1124*4882a593Smuzhiyun }
1125*4882a593Smuzhiyun 
dmz_message(struct dm_target * ti,unsigned int argc,char ** argv,char * result,unsigned int maxlen)1126*4882a593Smuzhiyun static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
1127*4882a593Smuzhiyun 		       char *result, unsigned int maxlen)
1128*4882a593Smuzhiyun {
1129*4882a593Smuzhiyun 	struct dmz_target *dmz = ti->private;
1130*4882a593Smuzhiyun 	int r = -EINVAL;
1131*4882a593Smuzhiyun 
1132*4882a593Smuzhiyun 	if (!strcasecmp(argv[0], "reclaim")) {
1133*4882a593Smuzhiyun 		int i;
1134*4882a593Smuzhiyun 
1135*4882a593Smuzhiyun 		for (i = 0; i < dmz->nr_ddevs; i++)
1136*4882a593Smuzhiyun 			dmz_schedule_reclaim(dmz->dev[i].reclaim);
1137*4882a593Smuzhiyun 		r = 0;
1138*4882a593Smuzhiyun 	} else
1139*4882a593Smuzhiyun 		DMERR("unrecognized message %s", argv[0]);
1140*4882a593Smuzhiyun 	return r;
1141*4882a593Smuzhiyun }
1142*4882a593Smuzhiyun 
1143*4882a593Smuzhiyun static struct target_type dmz_type = {
1144*4882a593Smuzhiyun 	.name		 = "zoned",
1145*4882a593Smuzhiyun 	.version	 = {2, 0, 0},
1146*4882a593Smuzhiyun 	.features	 = DM_TARGET_SINGLETON | DM_TARGET_MIXED_ZONED_MODEL,
1147*4882a593Smuzhiyun 	.module		 = THIS_MODULE,
1148*4882a593Smuzhiyun 	.ctr		 = dmz_ctr,
1149*4882a593Smuzhiyun 	.dtr		 = dmz_dtr,
1150*4882a593Smuzhiyun 	.map		 = dmz_map,
1151*4882a593Smuzhiyun 	.io_hints	 = dmz_io_hints,
1152*4882a593Smuzhiyun 	.prepare_ioctl	 = dmz_prepare_ioctl,
1153*4882a593Smuzhiyun 	.postsuspend	 = dmz_suspend,
1154*4882a593Smuzhiyun 	.resume		 = dmz_resume,
1155*4882a593Smuzhiyun 	.iterate_devices = dmz_iterate_devices,
1156*4882a593Smuzhiyun 	.status		 = dmz_status,
1157*4882a593Smuzhiyun 	.message	 = dmz_message,
1158*4882a593Smuzhiyun };
1159*4882a593Smuzhiyun 
dmz_init(void)1160*4882a593Smuzhiyun static int __init dmz_init(void)
1161*4882a593Smuzhiyun {
1162*4882a593Smuzhiyun 	return dm_register_target(&dmz_type);
1163*4882a593Smuzhiyun }
1164*4882a593Smuzhiyun 
dmz_exit(void)1165*4882a593Smuzhiyun static void __exit dmz_exit(void)
1166*4882a593Smuzhiyun {
1167*4882a593Smuzhiyun 	dm_unregister_target(&dmz_type);
1168*4882a593Smuzhiyun }
1169*4882a593Smuzhiyun 
1170*4882a593Smuzhiyun module_init(dmz_init);
1171*4882a593Smuzhiyun module_exit(dmz_exit);
1172*4882a593Smuzhiyun 
1173*4882a593Smuzhiyun MODULE_DESCRIPTION(DM_NAME " target for zoned block devices");
1174*4882a593Smuzhiyun MODULE_AUTHOR("Damien Le Moal <damien.lemoal@wdc.com>");
1175*4882a593Smuzhiyun MODULE_LICENSE("GPL");
1176