xref: /OK3568_Linux_fs/kernel/block/blk-zoned.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Zoned block device handling
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (c) 2015, Hannes Reinecke
6*4882a593Smuzhiyun  * Copyright (c) 2015, SUSE Linux GmbH
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * Copyright (c) 2016, Damien Le Moal
9*4882a593Smuzhiyun  * Copyright (c) 2016, Western Digital
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include <linux/kernel.h>
13*4882a593Smuzhiyun #include <linux/module.h>
14*4882a593Smuzhiyun #include <linux/rbtree.h>
15*4882a593Smuzhiyun #include <linux/blkdev.h>
16*4882a593Smuzhiyun #include <linux/blk-mq.h>
17*4882a593Smuzhiyun #include <linux/mm.h>
18*4882a593Smuzhiyun #include <linux/vmalloc.h>
19*4882a593Smuzhiyun #include <linux/sched/mm.h>
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun #include "blk.h"
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun #define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
24*4882a593Smuzhiyun static const char *const zone_cond_name[] = {
25*4882a593Smuzhiyun 	ZONE_COND_NAME(NOT_WP),
26*4882a593Smuzhiyun 	ZONE_COND_NAME(EMPTY),
27*4882a593Smuzhiyun 	ZONE_COND_NAME(IMP_OPEN),
28*4882a593Smuzhiyun 	ZONE_COND_NAME(EXP_OPEN),
29*4882a593Smuzhiyun 	ZONE_COND_NAME(CLOSED),
30*4882a593Smuzhiyun 	ZONE_COND_NAME(READONLY),
31*4882a593Smuzhiyun 	ZONE_COND_NAME(FULL),
32*4882a593Smuzhiyun 	ZONE_COND_NAME(OFFLINE),
33*4882a593Smuzhiyun };
34*4882a593Smuzhiyun #undef ZONE_COND_NAME
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun /**
37*4882a593Smuzhiyun  * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
38*4882a593Smuzhiyun  * @zone_cond: BLK_ZONE_COND_XXX.
39*4882a593Smuzhiyun  *
40*4882a593Smuzhiyun  * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
41*4882a593Smuzhiyun  * into string format. Useful in the debugging and tracing zone conditions. For
42*4882a593Smuzhiyun  * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN".
43*4882a593Smuzhiyun  */
blk_zone_cond_str(enum blk_zone_cond zone_cond)44*4882a593Smuzhiyun const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
45*4882a593Smuzhiyun {
46*4882a593Smuzhiyun 	static const char *zone_cond_str = "UNKNOWN";
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 	if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
49*4882a593Smuzhiyun 		zone_cond_str = zone_cond_name[zone_cond];
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	return zone_cond_str;
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(blk_zone_cond_str);
54*4882a593Smuzhiyun 
blk_zone_start(struct request_queue * q,sector_t sector)55*4882a593Smuzhiyun static inline sector_t blk_zone_start(struct request_queue *q,
56*4882a593Smuzhiyun 				      sector_t sector)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun 	sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	return sector & ~zone_mask;
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun /*
64*4882a593Smuzhiyun  * Return true if a request is a write requests that needs zone write locking.
65*4882a593Smuzhiyun  */
blk_req_needs_zone_write_lock(struct request * rq)66*4882a593Smuzhiyun bool blk_req_needs_zone_write_lock(struct request *rq)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun 	if (!rq->q->seq_zones_wlock)
69*4882a593Smuzhiyun 		return false;
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	if (blk_rq_is_passthrough(rq))
72*4882a593Smuzhiyun 		return false;
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	switch (req_op(rq)) {
75*4882a593Smuzhiyun 	case REQ_OP_WRITE_ZEROES:
76*4882a593Smuzhiyun 	case REQ_OP_WRITE_SAME:
77*4882a593Smuzhiyun 	case REQ_OP_WRITE:
78*4882a593Smuzhiyun 		return blk_rq_zone_is_seq(rq);
79*4882a593Smuzhiyun 	default:
80*4882a593Smuzhiyun 		return false;
81*4882a593Smuzhiyun 	}
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
84*4882a593Smuzhiyun 
blk_req_zone_write_trylock(struct request * rq)85*4882a593Smuzhiyun bool blk_req_zone_write_trylock(struct request *rq)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun 	unsigned int zno = blk_rq_zone_no(rq);
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	if (test_and_set_bit(zno, rq->q->seq_zones_wlock))
90*4882a593Smuzhiyun 		return false;
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun 	WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
93*4882a593Smuzhiyun 	rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	return true;
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(blk_req_zone_write_trylock);
98*4882a593Smuzhiyun 
__blk_req_zone_write_lock(struct request * rq)99*4882a593Smuzhiyun void __blk_req_zone_write_lock(struct request *rq)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun 	if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
102*4882a593Smuzhiyun 					  rq->q->seq_zones_wlock)))
103*4882a593Smuzhiyun 		return;
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun 	WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
106*4882a593Smuzhiyun 	rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
107*4882a593Smuzhiyun }
108*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
109*4882a593Smuzhiyun 
__blk_req_zone_write_unlock(struct request * rq)110*4882a593Smuzhiyun void __blk_req_zone_write_unlock(struct request *rq)
111*4882a593Smuzhiyun {
112*4882a593Smuzhiyun 	rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
113*4882a593Smuzhiyun 	if (rq->q->seq_zones_wlock)
114*4882a593Smuzhiyun 		WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
115*4882a593Smuzhiyun 						 rq->q->seq_zones_wlock));
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun /**
120*4882a593Smuzhiyun  * blkdev_nr_zones - Get number of zones
121*4882a593Smuzhiyun  * @disk:	Target gendisk
122*4882a593Smuzhiyun  *
123*4882a593Smuzhiyun  * Return the total number of zones of a zoned block device.  For a block
124*4882a593Smuzhiyun  * device without zone capabilities, the number of zones is always 0.
125*4882a593Smuzhiyun  */
blkdev_nr_zones(struct gendisk * disk)126*4882a593Smuzhiyun unsigned int blkdev_nr_zones(struct gendisk *disk)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun 	sector_t zone_sectors = blk_queue_zone_sectors(disk->queue);
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	if (!blk_queue_is_zoned(disk->queue))
131*4882a593Smuzhiyun 		return 0;
132*4882a593Smuzhiyun 	return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(blkdev_nr_zones);
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun /**
137*4882a593Smuzhiyun  * blkdev_report_zones - Get zones information
138*4882a593Smuzhiyun  * @bdev:	Target block device
139*4882a593Smuzhiyun  * @sector:	Sector from which to report zones
140*4882a593Smuzhiyun  * @nr_zones:	Maximum number of zones to report
141*4882a593Smuzhiyun  * @cb:		Callback function called for each reported zone
142*4882a593Smuzhiyun  * @data:	Private data for the callback
143*4882a593Smuzhiyun  *
144*4882a593Smuzhiyun  * Description:
145*4882a593Smuzhiyun  *    Get zone information starting from the zone containing @sector for at most
146*4882a593Smuzhiyun  *    @nr_zones, and call @cb for each zone reported by the device.
147*4882a593Smuzhiyun  *    To report all zones in a device starting from @sector, the BLK_ALL_ZONES
148*4882a593Smuzhiyun  *    constant can be passed to @nr_zones.
149*4882a593Smuzhiyun  *    Returns the number of zones reported by the device, or a negative errno
150*4882a593Smuzhiyun  *    value in case of failure.
151*4882a593Smuzhiyun  *
152*4882a593Smuzhiyun  *    Note: The caller must use memalloc_noXX_save/restore() calls to control
153*4882a593Smuzhiyun  *    memory allocations done within this function.
154*4882a593Smuzhiyun  */
blkdev_report_zones(struct block_device * bdev,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)155*4882a593Smuzhiyun int blkdev_report_zones(struct block_device *bdev, sector_t sector,
156*4882a593Smuzhiyun 			unsigned int nr_zones, report_zones_cb cb, void *data)
157*4882a593Smuzhiyun {
158*4882a593Smuzhiyun 	struct gendisk *disk = bdev->bd_disk;
159*4882a593Smuzhiyun 	sector_t capacity = get_capacity(disk);
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	if (!blk_queue_is_zoned(bdev_get_queue(bdev)) ||
162*4882a593Smuzhiyun 	    WARN_ON_ONCE(!disk->fops->report_zones))
163*4882a593Smuzhiyun 		return -EOPNOTSUPP;
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	if (!nr_zones || sector >= capacity)
166*4882a593Smuzhiyun 		return 0;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(blkdev_report_zones);
171*4882a593Smuzhiyun 
blkdev_allow_reset_all_zones(struct block_device * bdev,sector_t sector,sector_t nr_sectors)172*4882a593Smuzhiyun static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
173*4882a593Smuzhiyun 						sector_t sector,
174*4882a593Smuzhiyun 						sector_t nr_sectors)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun 	if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
177*4882a593Smuzhiyun 		return false;
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 	/*
180*4882a593Smuzhiyun 	 * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
181*4882a593Smuzhiyun 	 * of the applicable zone range is the entire disk.
182*4882a593Smuzhiyun 	 */
183*4882a593Smuzhiyun 	return !sector && nr_sectors == get_capacity(bdev->bd_disk);
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun /**
187*4882a593Smuzhiyun  * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
188*4882a593Smuzhiyun  * @bdev:	Target block device
189*4882a593Smuzhiyun  * @op:		Operation to be performed on the zones
190*4882a593Smuzhiyun  * @sector:	Start sector of the first zone to operate on
191*4882a593Smuzhiyun  * @nr_sectors:	Number of sectors, should be at least the length of one zone and
192*4882a593Smuzhiyun  *		must be zone size aligned.
193*4882a593Smuzhiyun  * @gfp_mask:	Memory allocation flags (for bio_alloc)
194*4882a593Smuzhiyun  *
195*4882a593Smuzhiyun  * Description:
196*4882a593Smuzhiyun  *    Perform the specified operation on the range of zones specified by
197*4882a593Smuzhiyun  *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
198*4882a593Smuzhiyun  *    is valid, but the specified range should not contain conventional zones.
199*4882a593Smuzhiyun  *    The operation to execute on each zone can be a zone reset, open, close
200*4882a593Smuzhiyun  *    or finish request.
201*4882a593Smuzhiyun  */
blkdev_zone_mgmt(struct block_device * bdev,enum req_opf op,sector_t sector,sector_t nr_sectors,gfp_t gfp_mask)202*4882a593Smuzhiyun int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
203*4882a593Smuzhiyun 		     sector_t sector, sector_t nr_sectors,
204*4882a593Smuzhiyun 		     gfp_t gfp_mask)
205*4882a593Smuzhiyun {
206*4882a593Smuzhiyun 	struct request_queue *q = bdev_get_queue(bdev);
207*4882a593Smuzhiyun 	sector_t zone_sectors = blk_queue_zone_sectors(q);
208*4882a593Smuzhiyun 	sector_t capacity = get_capacity(bdev->bd_disk);
209*4882a593Smuzhiyun 	sector_t end_sector = sector + nr_sectors;
210*4882a593Smuzhiyun 	struct bio *bio = NULL;
211*4882a593Smuzhiyun 	int ret;
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 	if (!blk_queue_is_zoned(q))
214*4882a593Smuzhiyun 		return -EOPNOTSUPP;
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun 	if (bdev_read_only(bdev))
217*4882a593Smuzhiyun 		return -EPERM;
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	if (!op_is_zone_mgmt(op))
220*4882a593Smuzhiyun 		return -EOPNOTSUPP;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	if (end_sector <= sector || end_sector > capacity)
223*4882a593Smuzhiyun 		/* Out of range */
224*4882a593Smuzhiyun 		return -EINVAL;
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 	/* Check alignment (handle eventual smaller last zone) */
227*4882a593Smuzhiyun 	if (sector & (zone_sectors - 1))
228*4882a593Smuzhiyun 		return -EINVAL;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
231*4882a593Smuzhiyun 		return -EINVAL;
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	while (sector < end_sector) {
234*4882a593Smuzhiyun 		bio = blk_next_bio(bio, 0, gfp_mask);
235*4882a593Smuzhiyun 		bio_set_dev(bio, bdev);
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 		/*
238*4882a593Smuzhiyun 		 * Special case for the zone reset operation that reset all
239*4882a593Smuzhiyun 		 * zones, this is useful for applications like mkfs.
240*4882a593Smuzhiyun 		 */
241*4882a593Smuzhiyun 		if (op == REQ_OP_ZONE_RESET &&
242*4882a593Smuzhiyun 		    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
243*4882a593Smuzhiyun 			bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
244*4882a593Smuzhiyun 			break;
245*4882a593Smuzhiyun 		}
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 		bio->bi_opf = op | REQ_SYNC;
248*4882a593Smuzhiyun 		bio->bi_iter.bi_sector = sector;
249*4882a593Smuzhiyun 		sector += zone_sectors;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 		/* This may take a while, so be nice to others */
252*4882a593Smuzhiyun 		cond_resched();
253*4882a593Smuzhiyun 	}
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	ret = submit_bio_wait(bio);
256*4882a593Smuzhiyun 	bio_put(bio);
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	return ret;
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun struct zone_report_args {
263*4882a593Smuzhiyun 	struct blk_zone __user *zones;
264*4882a593Smuzhiyun };
265*4882a593Smuzhiyun 
blkdev_copy_zone_to_user(struct blk_zone * zone,unsigned int idx,void * data)266*4882a593Smuzhiyun static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
267*4882a593Smuzhiyun 				    void *data)
268*4882a593Smuzhiyun {
269*4882a593Smuzhiyun 	struct zone_report_args *args = data;
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
272*4882a593Smuzhiyun 		return -EFAULT;
273*4882a593Smuzhiyun 	return 0;
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun /*
277*4882a593Smuzhiyun  * BLKREPORTZONE ioctl processing.
278*4882a593Smuzhiyun  * Called from blkdev_ioctl.
279*4882a593Smuzhiyun  */
blkdev_report_zones_ioctl(struct block_device * bdev,fmode_t mode,unsigned int cmd,unsigned long arg)280*4882a593Smuzhiyun int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
281*4882a593Smuzhiyun 			      unsigned int cmd, unsigned long arg)
282*4882a593Smuzhiyun {
283*4882a593Smuzhiyun 	void __user *argp = (void __user *)arg;
284*4882a593Smuzhiyun 	struct zone_report_args args;
285*4882a593Smuzhiyun 	struct request_queue *q;
286*4882a593Smuzhiyun 	struct blk_zone_report rep;
287*4882a593Smuzhiyun 	int ret;
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 	if (!argp)
290*4882a593Smuzhiyun 		return -EINVAL;
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	q = bdev_get_queue(bdev);
293*4882a593Smuzhiyun 	if (!q)
294*4882a593Smuzhiyun 		return -ENXIO;
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	if (!blk_queue_is_zoned(q))
297*4882a593Smuzhiyun 		return -ENOTTY;
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 	if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
300*4882a593Smuzhiyun 		return -EFAULT;
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	if (!rep.nr_zones)
303*4882a593Smuzhiyun 		return -EINVAL;
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 	args.zones = argp + sizeof(struct blk_zone_report);
306*4882a593Smuzhiyun 	ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
307*4882a593Smuzhiyun 				  blkdev_copy_zone_to_user, &args);
308*4882a593Smuzhiyun 	if (ret < 0)
309*4882a593Smuzhiyun 		return ret;
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	rep.nr_zones = ret;
312*4882a593Smuzhiyun 	rep.flags = BLK_ZONE_REP_CAPACITY;
313*4882a593Smuzhiyun 	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
314*4882a593Smuzhiyun 		return -EFAULT;
315*4882a593Smuzhiyun 	return 0;
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun 
blkdev_truncate_zone_range(struct block_device * bdev,fmode_t mode,const struct blk_zone_range * zrange)318*4882a593Smuzhiyun static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode,
319*4882a593Smuzhiyun 				      const struct blk_zone_range *zrange)
320*4882a593Smuzhiyun {
321*4882a593Smuzhiyun 	loff_t start, end;
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 	if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
324*4882a593Smuzhiyun 	    zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
325*4882a593Smuzhiyun 		/* Out of range */
326*4882a593Smuzhiyun 		return -EINVAL;
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	start = zrange->sector << SECTOR_SHIFT;
329*4882a593Smuzhiyun 	end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	return truncate_bdev_range(bdev, mode, start, end);
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun /*
335*4882a593Smuzhiyun  * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
336*4882a593Smuzhiyun  * Called from blkdev_ioctl.
337*4882a593Smuzhiyun  */
blkdev_zone_mgmt_ioctl(struct block_device * bdev,fmode_t mode,unsigned int cmd,unsigned long arg)338*4882a593Smuzhiyun int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
339*4882a593Smuzhiyun 			   unsigned int cmd, unsigned long arg)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun 	void __user *argp = (void __user *)arg;
342*4882a593Smuzhiyun 	struct request_queue *q;
343*4882a593Smuzhiyun 	struct blk_zone_range zrange;
344*4882a593Smuzhiyun 	enum req_opf op;
345*4882a593Smuzhiyun 	int ret;
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	if (!argp)
348*4882a593Smuzhiyun 		return -EINVAL;
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	q = bdev_get_queue(bdev);
351*4882a593Smuzhiyun 	if (!q)
352*4882a593Smuzhiyun 		return -ENXIO;
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 	if (!blk_queue_is_zoned(q))
355*4882a593Smuzhiyun 		return -ENOTTY;
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun 	if (!(mode & FMODE_WRITE))
358*4882a593Smuzhiyun 		return -EBADF;
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
361*4882a593Smuzhiyun 		return -EFAULT;
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 	switch (cmd) {
364*4882a593Smuzhiyun 	case BLKRESETZONE:
365*4882a593Smuzhiyun 		op = REQ_OP_ZONE_RESET;
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 		/* Invalidate the page cache, including dirty pages. */
368*4882a593Smuzhiyun 		ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
369*4882a593Smuzhiyun 		if (ret)
370*4882a593Smuzhiyun 			return ret;
371*4882a593Smuzhiyun 		break;
372*4882a593Smuzhiyun 	case BLKOPENZONE:
373*4882a593Smuzhiyun 		op = REQ_OP_ZONE_OPEN;
374*4882a593Smuzhiyun 		break;
375*4882a593Smuzhiyun 	case BLKCLOSEZONE:
376*4882a593Smuzhiyun 		op = REQ_OP_ZONE_CLOSE;
377*4882a593Smuzhiyun 		break;
378*4882a593Smuzhiyun 	case BLKFINISHZONE:
379*4882a593Smuzhiyun 		op = REQ_OP_ZONE_FINISH;
380*4882a593Smuzhiyun 		break;
381*4882a593Smuzhiyun 	default:
382*4882a593Smuzhiyun 		return -ENOTTY;
383*4882a593Smuzhiyun 	}
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
386*4882a593Smuzhiyun 			       GFP_KERNEL);
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 	/*
389*4882a593Smuzhiyun 	 * Invalidate the page cache again for zone reset: writes can only be
390*4882a593Smuzhiyun 	 * direct for zoned devices so concurrent writes would not add any page
391*4882a593Smuzhiyun 	 * to the page cache after/during reset. The page cache may be filled
392*4882a593Smuzhiyun 	 * again due to concurrent reads though and dropping the pages for
393*4882a593Smuzhiyun 	 * these is fine.
394*4882a593Smuzhiyun 	 */
395*4882a593Smuzhiyun 	if (!ret && cmd == BLKRESETZONE)
396*4882a593Smuzhiyun 		ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	return ret;
399*4882a593Smuzhiyun }
400*4882a593Smuzhiyun 
blk_alloc_zone_bitmap(int node,unsigned int nr_zones)401*4882a593Smuzhiyun static inline unsigned long *blk_alloc_zone_bitmap(int node,
402*4882a593Smuzhiyun 						   unsigned int nr_zones)
403*4882a593Smuzhiyun {
404*4882a593Smuzhiyun 	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
405*4882a593Smuzhiyun 			    GFP_NOIO, node);
406*4882a593Smuzhiyun }
407*4882a593Smuzhiyun 
blk_queue_free_zone_bitmaps(struct request_queue * q)408*4882a593Smuzhiyun void blk_queue_free_zone_bitmaps(struct request_queue *q)
409*4882a593Smuzhiyun {
410*4882a593Smuzhiyun 	kfree(q->conv_zones_bitmap);
411*4882a593Smuzhiyun 	q->conv_zones_bitmap = NULL;
412*4882a593Smuzhiyun 	kfree(q->seq_zones_wlock);
413*4882a593Smuzhiyun 	q->seq_zones_wlock = NULL;
414*4882a593Smuzhiyun }
415*4882a593Smuzhiyun 
416*4882a593Smuzhiyun struct blk_revalidate_zone_args {
417*4882a593Smuzhiyun 	struct gendisk	*disk;
418*4882a593Smuzhiyun 	unsigned long	*conv_zones_bitmap;
419*4882a593Smuzhiyun 	unsigned long	*seq_zones_wlock;
420*4882a593Smuzhiyun 	unsigned int	nr_zones;
421*4882a593Smuzhiyun 	sector_t	zone_sectors;
422*4882a593Smuzhiyun 	sector_t	sector;
423*4882a593Smuzhiyun };
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun /*
426*4882a593Smuzhiyun  * Helper function to check the validity of zones of a zoned block device.
427*4882a593Smuzhiyun  */
blk_revalidate_zone_cb(struct blk_zone * zone,unsigned int idx,void * data)428*4882a593Smuzhiyun static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
429*4882a593Smuzhiyun 				  void *data)
430*4882a593Smuzhiyun {
431*4882a593Smuzhiyun 	struct blk_revalidate_zone_args *args = data;
432*4882a593Smuzhiyun 	struct gendisk *disk = args->disk;
433*4882a593Smuzhiyun 	struct request_queue *q = disk->queue;
434*4882a593Smuzhiyun 	sector_t capacity = get_capacity(disk);
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	/*
437*4882a593Smuzhiyun 	 * All zones must have the same size, with the exception on an eventual
438*4882a593Smuzhiyun 	 * smaller last zone.
439*4882a593Smuzhiyun 	 */
440*4882a593Smuzhiyun 	if (zone->start == 0) {
441*4882a593Smuzhiyun 		if (zone->len == 0 || !is_power_of_2(zone->len)) {
442*4882a593Smuzhiyun 			pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
443*4882a593Smuzhiyun 				disk->disk_name, zone->len);
444*4882a593Smuzhiyun 			return -ENODEV;
445*4882a593Smuzhiyun 		}
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 		args->zone_sectors = zone->len;
448*4882a593Smuzhiyun 		args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
449*4882a593Smuzhiyun 	} else if (zone->start + args->zone_sectors < capacity) {
450*4882a593Smuzhiyun 		if (zone->len != args->zone_sectors) {
451*4882a593Smuzhiyun 			pr_warn("%s: Invalid zoned device with non constant zone size\n",
452*4882a593Smuzhiyun 				disk->disk_name);
453*4882a593Smuzhiyun 			return -ENODEV;
454*4882a593Smuzhiyun 		}
455*4882a593Smuzhiyun 	} else {
456*4882a593Smuzhiyun 		if (zone->len > args->zone_sectors) {
457*4882a593Smuzhiyun 			pr_warn("%s: Invalid zoned device with larger last zone size\n",
458*4882a593Smuzhiyun 				disk->disk_name);
459*4882a593Smuzhiyun 			return -ENODEV;
460*4882a593Smuzhiyun 		}
461*4882a593Smuzhiyun 	}
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	/* Check for holes in the zone report */
464*4882a593Smuzhiyun 	if (zone->start != args->sector) {
465*4882a593Smuzhiyun 		pr_warn("%s: Zone gap at sectors %llu..%llu\n",
466*4882a593Smuzhiyun 			disk->disk_name, args->sector, zone->start);
467*4882a593Smuzhiyun 		return -ENODEV;
468*4882a593Smuzhiyun 	}
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	/* Check zone type */
471*4882a593Smuzhiyun 	switch (zone->type) {
472*4882a593Smuzhiyun 	case BLK_ZONE_TYPE_CONVENTIONAL:
473*4882a593Smuzhiyun 		if (!args->conv_zones_bitmap) {
474*4882a593Smuzhiyun 			args->conv_zones_bitmap =
475*4882a593Smuzhiyun 				blk_alloc_zone_bitmap(q->node, args->nr_zones);
476*4882a593Smuzhiyun 			if (!args->conv_zones_bitmap)
477*4882a593Smuzhiyun 				return -ENOMEM;
478*4882a593Smuzhiyun 		}
479*4882a593Smuzhiyun 		set_bit(idx, args->conv_zones_bitmap);
480*4882a593Smuzhiyun 		break;
481*4882a593Smuzhiyun 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
482*4882a593Smuzhiyun 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
483*4882a593Smuzhiyun 		if (!args->seq_zones_wlock) {
484*4882a593Smuzhiyun 			args->seq_zones_wlock =
485*4882a593Smuzhiyun 				blk_alloc_zone_bitmap(q->node, args->nr_zones);
486*4882a593Smuzhiyun 			if (!args->seq_zones_wlock)
487*4882a593Smuzhiyun 				return -ENOMEM;
488*4882a593Smuzhiyun 		}
489*4882a593Smuzhiyun 		break;
490*4882a593Smuzhiyun 	default:
491*4882a593Smuzhiyun 		pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
492*4882a593Smuzhiyun 			disk->disk_name, (int)zone->type, zone->start);
493*4882a593Smuzhiyun 		return -ENODEV;
494*4882a593Smuzhiyun 	}
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun 	args->sector += zone->len;
497*4882a593Smuzhiyun 	return 0;
498*4882a593Smuzhiyun }
499*4882a593Smuzhiyun 
500*4882a593Smuzhiyun /**
501*4882a593Smuzhiyun  * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
502*4882a593Smuzhiyun  * @disk:	Target disk
503*4882a593Smuzhiyun  * @update_driver_data:	Callback to update driver data on the frozen disk
504*4882a593Smuzhiyun  *
505*4882a593Smuzhiyun  * Helper function for low-level device drivers to (re) allocate and initialize
506*4882a593Smuzhiyun  * a disk request queue zone bitmaps. This functions should normally be called
507*4882a593Smuzhiyun  * within the disk ->revalidate method for blk-mq based drivers.  For BIO based
508*4882a593Smuzhiyun  * drivers only q->nr_zones needs to be updated so that the sysfs exposed value
509*4882a593Smuzhiyun  * is correct.
510*4882a593Smuzhiyun  * If the @update_driver_data callback function is not NULL, the callback is
511*4882a593Smuzhiyun  * executed with the device request queue frozen after all zones have been
512*4882a593Smuzhiyun  * checked.
513*4882a593Smuzhiyun  */
blk_revalidate_disk_zones(struct gendisk * disk,void (* update_driver_data)(struct gendisk * disk))514*4882a593Smuzhiyun int blk_revalidate_disk_zones(struct gendisk *disk,
515*4882a593Smuzhiyun 			      void (*update_driver_data)(struct gendisk *disk))
516*4882a593Smuzhiyun {
517*4882a593Smuzhiyun 	struct request_queue *q = disk->queue;
518*4882a593Smuzhiyun 	struct blk_revalidate_zone_args args = {
519*4882a593Smuzhiyun 		.disk		= disk,
520*4882a593Smuzhiyun 	};
521*4882a593Smuzhiyun 	unsigned int noio_flag;
522*4882a593Smuzhiyun 	int ret;
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 	if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
525*4882a593Smuzhiyun 		return -EIO;
526*4882a593Smuzhiyun 	if (WARN_ON_ONCE(!queue_is_mq(q)))
527*4882a593Smuzhiyun 		return -EIO;
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 	if (!get_capacity(disk))
530*4882a593Smuzhiyun 		return -EIO;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	/*
533*4882a593Smuzhiyun 	 * Ensure that all memory allocations in this context are done as if
534*4882a593Smuzhiyun 	 * GFP_NOIO was specified.
535*4882a593Smuzhiyun 	 */
536*4882a593Smuzhiyun 	noio_flag = memalloc_noio_save();
537*4882a593Smuzhiyun 	ret = disk->fops->report_zones(disk, 0, UINT_MAX,
538*4882a593Smuzhiyun 				       blk_revalidate_zone_cb, &args);
539*4882a593Smuzhiyun 	memalloc_noio_restore(noio_flag);
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	/*
542*4882a593Smuzhiyun 	 * Install the new bitmaps and update nr_zones only once the queue is
543*4882a593Smuzhiyun 	 * stopped and all I/Os are completed (i.e. a scheduler is not
544*4882a593Smuzhiyun 	 * referencing the bitmaps).
545*4882a593Smuzhiyun 	 */
546*4882a593Smuzhiyun 	blk_mq_freeze_queue(q);
547*4882a593Smuzhiyun 	if (ret >= 0) {
548*4882a593Smuzhiyun 		blk_queue_chunk_sectors(q, args.zone_sectors);
549*4882a593Smuzhiyun 		q->nr_zones = args.nr_zones;
550*4882a593Smuzhiyun 		swap(q->seq_zones_wlock, args.seq_zones_wlock);
551*4882a593Smuzhiyun 		swap(q->conv_zones_bitmap, args.conv_zones_bitmap);
552*4882a593Smuzhiyun 		if (update_driver_data)
553*4882a593Smuzhiyun 			update_driver_data(disk);
554*4882a593Smuzhiyun 		ret = 0;
555*4882a593Smuzhiyun 	} else {
556*4882a593Smuzhiyun 		pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
557*4882a593Smuzhiyun 		blk_queue_free_zone_bitmaps(q);
558*4882a593Smuzhiyun 	}
559*4882a593Smuzhiyun 	blk_mq_unfreeze_queue(q);
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	kfree(args.seq_zones_wlock);
562*4882a593Smuzhiyun 	kfree(args.conv_zones_bitmap);
563*4882a593Smuzhiyun 	return ret;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
566