xref: /OK3568_Linux_fs/kernel/include/linux/blk-mq.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef BLK_MQ_H
3*4882a593Smuzhiyun #define BLK_MQ_H
4*4882a593Smuzhiyun 
5*4882a593Smuzhiyun #include <linux/blkdev.h>
6*4882a593Smuzhiyun #include <linux/sbitmap.h>
7*4882a593Smuzhiyun #include <linux/srcu.h>
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun struct blk_mq_tags;
10*4882a593Smuzhiyun struct blk_flush_queue;
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun /**
13*4882a593Smuzhiyun  * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
14*4882a593Smuzhiyun  * block device
15*4882a593Smuzhiyun  */
16*4882a593Smuzhiyun struct blk_mq_hw_ctx {
17*4882a593Smuzhiyun 	struct {
18*4882a593Smuzhiyun 		/** @lock: Protects the dispatch list. */
19*4882a593Smuzhiyun 		spinlock_t		lock;
20*4882a593Smuzhiyun 		/**
21*4882a593Smuzhiyun 		 * @dispatch: Used for requests that are ready to be
22*4882a593Smuzhiyun 		 * dispatched to the hardware but for some reason (e.g. lack of
23*4882a593Smuzhiyun 		 * resources) could not be sent to the hardware. As soon as the
24*4882a593Smuzhiyun 		 * driver can send new requests, requests at this list will
25*4882a593Smuzhiyun 		 * be sent first for a fairer dispatch.
26*4882a593Smuzhiyun 		 */
27*4882a593Smuzhiyun 		struct list_head	dispatch;
28*4882a593Smuzhiyun 		 /**
29*4882a593Smuzhiyun 		  * @state: BLK_MQ_S_* flags. Defines the state of the hw
30*4882a593Smuzhiyun 		  * queue (active, scheduled to restart, stopped).
31*4882a593Smuzhiyun 		  */
32*4882a593Smuzhiyun 		unsigned long		state;
33*4882a593Smuzhiyun 	} ____cacheline_aligned_in_smp;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun 	/**
36*4882a593Smuzhiyun 	 * @run_work: Used for scheduling a hardware queue run at a later time.
37*4882a593Smuzhiyun 	 */
38*4882a593Smuzhiyun 	struct delayed_work	run_work;
39*4882a593Smuzhiyun 	/** @cpumask: Map of available CPUs where this hctx can run. */
40*4882a593Smuzhiyun 	cpumask_var_t		cpumask;
41*4882a593Smuzhiyun 	/**
42*4882a593Smuzhiyun 	 * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU
43*4882a593Smuzhiyun 	 * selection from @cpumask.
44*4882a593Smuzhiyun 	 */
45*4882a593Smuzhiyun 	int			next_cpu;
46*4882a593Smuzhiyun 	/**
47*4882a593Smuzhiyun 	 * @next_cpu_batch: Counter of how many works left in the batch before
48*4882a593Smuzhiyun 	 * changing to the next CPU.
49*4882a593Smuzhiyun 	 */
50*4882a593Smuzhiyun 	int			next_cpu_batch;
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	/** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */
53*4882a593Smuzhiyun 	unsigned long		flags;
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun 	/**
56*4882a593Smuzhiyun 	 * @sched_data: Pointer owned by the IO scheduler attached to a request
57*4882a593Smuzhiyun 	 * queue. It's up to the IO scheduler how to use this pointer.
58*4882a593Smuzhiyun 	 */
59*4882a593Smuzhiyun 	void			*sched_data;
60*4882a593Smuzhiyun 	/**
61*4882a593Smuzhiyun 	 * @queue: Pointer to the request queue that owns this hardware context.
62*4882a593Smuzhiyun 	 */
63*4882a593Smuzhiyun 	struct request_queue	*queue;
64*4882a593Smuzhiyun 	/** @fq: Queue of requests that need to perform a flush operation. */
65*4882a593Smuzhiyun 	struct blk_flush_queue	*fq;
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 	/**
68*4882a593Smuzhiyun 	 * @driver_data: Pointer to data owned by the block driver that created
69*4882a593Smuzhiyun 	 * this hctx
70*4882a593Smuzhiyun 	 */
71*4882a593Smuzhiyun 	void			*driver_data;
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun 	/**
74*4882a593Smuzhiyun 	 * @ctx_map: Bitmap for each software queue. If bit is on, there is a
75*4882a593Smuzhiyun 	 * pending request in that software queue.
76*4882a593Smuzhiyun 	 */
77*4882a593Smuzhiyun 	struct sbitmap		ctx_map;
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	/**
80*4882a593Smuzhiyun 	 * @dispatch_from: Software queue to be used when no scheduler was
81*4882a593Smuzhiyun 	 * selected.
82*4882a593Smuzhiyun 	 */
83*4882a593Smuzhiyun 	struct blk_mq_ctx	*dispatch_from;
84*4882a593Smuzhiyun 	/**
85*4882a593Smuzhiyun 	 * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
86*4882a593Smuzhiyun 	 * decide if the hw_queue is busy using Exponential Weighted Moving
87*4882a593Smuzhiyun 	 * Average algorithm.
88*4882a593Smuzhiyun 	 */
89*4882a593Smuzhiyun 	unsigned int		dispatch_busy;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	/** @type: HCTX_TYPE_* flags. Type of hardware queue. */
92*4882a593Smuzhiyun 	unsigned short		type;
93*4882a593Smuzhiyun 	/** @nr_ctx: Number of software queues. */
94*4882a593Smuzhiyun 	unsigned short		nr_ctx;
95*4882a593Smuzhiyun 	/** @ctxs: Array of software queues. */
96*4882a593Smuzhiyun 	struct blk_mq_ctx	**ctxs;
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 	/** @dispatch_wait_lock: Lock for dispatch_wait queue. */
99*4882a593Smuzhiyun 	spinlock_t		dispatch_wait_lock;
100*4882a593Smuzhiyun 	/**
101*4882a593Smuzhiyun 	 * @dispatch_wait: Waitqueue to put requests when there is no tag
102*4882a593Smuzhiyun 	 * available at the moment, to wait for another try in the future.
103*4882a593Smuzhiyun 	 */
104*4882a593Smuzhiyun 	wait_queue_entry_t	dispatch_wait;
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	/**
107*4882a593Smuzhiyun 	 * @wait_index: Index of next available dispatch_wait queue to insert
108*4882a593Smuzhiyun 	 * requests.
109*4882a593Smuzhiyun 	 */
110*4882a593Smuzhiyun 	atomic_t		wait_index;
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 	/**
113*4882a593Smuzhiyun 	 * @tags: Tags owned by the block driver. A tag at this set is only
114*4882a593Smuzhiyun 	 * assigned when a request is dispatched from a hardware queue.
115*4882a593Smuzhiyun 	 */
116*4882a593Smuzhiyun 	struct blk_mq_tags	*tags;
117*4882a593Smuzhiyun 	/**
118*4882a593Smuzhiyun 	 * @sched_tags: Tags owned by I/O scheduler. If there is an I/O
119*4882a593Smuzhiyun 	 * scheduler associated with a request queue, a tag is assigned when
120*4882a593Smuzhiyun 	 * that request is allocated. Else, this member is not used.
121*4882a593Smuzhiyun 	 */
122*4882a593Smuzhiyun 	struct blk_mq_tags	*sched_tags;
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 	/** @queued: Number of queued requests. */
125*4882a593Smuzhiyun 	unsigned long		queued;
126*4882a593Smuzhiyun 	/** @run: Number of dispatched requests. */
127*4882a593Smuzhiyun 	unsigned long		run;
128*4882a593Smuzhiyun #define BLK_MQ_MAX_DISPATCH_ORDER	7
129*4882a593Smuzhiyun 	/** @dispatched: Number of dispatch requests by queue. */
130*4882a593Smuzhiyun 	unsigned long		dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 	/** @numa_node: NUMA node the storage adapter has been connected to. */
133*4882a593Smuzhiyun 	unsigned int		numa_node;
134*4882a593Smuzhiyun 	/** @queue_num: Index of this hardware queue. */
135*4882a593Smuzhiyun 	unsigned int		queue_num;
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun 	/**
138*4882a593Smuzhiyun 	 * @nr_active: Number of active requests. Only used when a tag set is
139*4882a593Smuzhiyun 	 * shared across request queues.
140*4882a593Smuzhiyun 	 */
141*4882a593Smuzhiyun 	atomic_t		nr_active;
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	/** @cpuhp_online: List to store request if CPU is going to die */
144*4882a593Smuzhiyun 	struct hlist_node	cpuhp_online;
145*4882a593Smuzhiyun 	/** @cpuhp_dead: List to store request if some CPU die. */
146*4882a593Smuzhiyun 	struct hlist_node	cpuhp_dead;
147*4882a593Smuzhiyun 	/** @kobj: Kernel object for sysfs. */
148*4882a593Smuzhiyun 	struct kobject		kobj;
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 	/** @poll_considered: Count times blk_poll() was called. */
151*4882a593Smuzhiyun 	unsigned long		poll_considered;
152*4882a593Smuzhiyun 	/** @poll_invoked: Count how many requests blk_poll() polled. */
153*4882a593Smuzhiyun 	unsigned long		poll_invoked;
154*4882a593Smuzhiyun 	/** @poll_success: Count how many polled requests were completed. */
155*4882a593Smuzhiyun 	unsigned long		poll_success;
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun #ifdef CONFIG_BLK_DEBUG_FS
158*4882a593Smuzhiyun 	/**
159*4882a593Smuzhiyun 	 * @debugfs_dir: debugfs directory for this hardware queue. Named
160*4882a593Smuzhiyun 	 * as cpu<cpu_number>.
161*4882a593Smuzhiyun 	 */
162*4882a593Smuzhiyun 	struct dentry		*debugfs_dir;
163*4882a593Smuzhiyun 	/** @sched_debugfs_dir:	debugfs directory for the scheduler. */
164*4882a593Smuzhiyun 	struct dentry		*sched_debugfs_dir;
165*4882a593Smuzhiyun #endif
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	/**
168*4882a593Smuzhiyun 	 * @hctx_list: if this hctx is not in use, this is an entry in
169*4882a593Smuzhiyun 	 * q->unused_hctx_list.
170*4882a593Smuzhiyun 	 */
171*4882a593Smuzhiyun 	struct list_head	hctx_list;
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 	/**
174*4882a593Smuzhiyun 	 * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
175*4882a593Smuzhiyun 	 * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
176*4882a593Smuzhiyun 	 * blk_mq_hw_ctx_size().
177*4882a593Smuzhiyun 	 */
178*4882a593Smuzhiyun 	struct srcu_struct	srcu[];
179*4882a593Smuzhiyun };
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun /**
182*4882a593Smuzhiyun  * struct blk_mq_queue_map - Map software queues to hardware queues
183*4882a593Smuzhiyun  * @mq_map:       CPU ID to hardware queue index map. This is an array
184*4882a593Smuzhiyun  *	with nr_cpu_ids elements. Each element has a value in the range
185*4882a593Smuzhiyun  *	[@queue_offset, @queue_offset + @nr_queues).
186*4882a593Smuzhiyun  * @nr_queues:    Number of hardware queues to map CPU IDs onto.
187*4882a593Smuzhiyun  * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe
188*4882a593Smuzhiyun  *	driver to map each hardware queue type (enum hctx_type) onto a distinct
189*4882a593Smuzhiyun  *	set of hardware queues.
190*4882a593Smuzhiyun  */
191*4882a593Smuzhiyun struct blk_mq_queue_map {
192*4882a593Smuzhiyun 	unsigned int *mq_map;
193*4882a593Smuzhiyun 	unsigned int nr_queues;
194*4882a593Smuzhiyun 	unsigned int queue_offset;
195*4882a593Smuzhiyun };
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun /**
198*4882a593Smuzhiyun  * enum hctx_type - Type of hardware queue
199*4882a593Smuzhiyun  * @HCTX_TYPE_DEFAULT:	All I/O not otherwise accounted for.
200*4882a593Smuzhiyun  * @HCTX_TYPE_READ:	Just for READ I/O.
201*4882a593Smuzhiyun  * @HCTX_TYPE_POLL:	Polled I/O of any kind.
202*4882a593Smuzhiyun  * @HCTX_MAX_TYPES:	Number of types of hctx.
203*4882a593Smuzhiyun  */
204*4882a593Smuzhiyun enum hctx_type {
205*4882a593Smuzhiyun 	HCTX_TYPE_DEFAULT,
206*4882a593Smuzhiyun 	HCTX_TYPE_READ,
207*4882a593Smuzhiyun 	HCTX_TYPE_POLL,
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	HCTX_MAX_TYPES,
210*4882a593Smuzhiyun };
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun /**
213*4882a593Smuzhiyun  * struct blk_mq_tag_set - tag set that can be shared between request queues
214*4882a593Smuzhiyun  * @map:	   One or more ctx -> hctx mappings. One map exists for each
215*4882a593Smuzhiyun  *		   hardware queue type (enum hctx_type) that the driver wishes
216*4882a593Smuzhiyun  *		   to support. There are no restrictions on maps being of the
217*4882a593Smuzhiyun  *		   same size, and it's perfectly legal to share maps between
218*4882a593Smuzhiyun  *		   types.
219*4882a593Smuzhiyun  * @nr_maps:	   Number of elements in the @map array. A number in the range
220*4882a593Smuzhiyun  *		   [1, HCTX_MAX_TYPES].
221*4882a593Smuzhiyun  * @ops:	   Pointers to functions that implement block driver behavior.
222*4882a593Smuzhiyun  * @nr_hw_queues:  Number of hardware queues supported by the block driver that
223*4882a593Smuzhiyun  *		   owns this data structure.
224*4882a593Smuzhiyun  * @queue_depth:   Number of tags per hardware queue, reserved tags included.
225*4882a593Smuzhiyun  * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag
226*4882a593Smuzhiyun  *		   allocations.
227*4882a593Smuzhiyun  * @cmd_size:	   Number of additional bytes to allocate per request. The block
228*4882a593Smuzhiyun  *		   driver owns these additional bytes.
229*4882a593Smuzhiyun  * @numa_node:	   NUMA node the storage adapter has been connected to.
230*4882a593Smuzhiyun  * @timeout:	   Request processing timeout in jiffies.
231*4882a593Smuzhiyun  * @flags:	   Zero or more BLK_MQ_F_* flags.
232*4882a593Smuzhiyun  * @driver_data:   Pointer to data owned by the block driver that created this
233*4882a593Smuzhiyun  *		   tag set.
234*4882a593Smuzhiyun  * @active_queues_shared_sbitmap:
235*4882a593Smuzhiyun  * 		   number of active request queues per tag set.
236*4882a593Smuzhiyun  * @__bitmap_tags: A shared tags sbitmap, used over all hctx's
237*4882a593Smuzhiyun  * @__breserved_tags:
238*4882a593Smuzhiyun  *		   A shared reserved tags sbitmap, used over all hctx's
239*4882a593Smuzhiyun  * @tags:	   Tag sets. One tag set per hardware queue. Has @nr_hw_queues
240*4882a593Smuzhiyun  *		   elements.
241*4882a593Smuzhiyun  * @tag_list_lock: Serializes tag_list accesses.
242*4882a593Smuzhiyun  * @tag_list:	   List of the request queues that use this tag set. See also
243*4882a593Smuzhiyun  *		   request_queue.tag_set_list.
244*4882a593Smuzhiyun  */
245*4882a593Smuzhiyun struct blk_mq_tag_set {
246*4882a593Smuzhiyun 	struct blk_mq_queue_map	map[HCTX_MAX_TYPES];
247*4882a593Smuzhiyun 	unsigned int		nr_maps;
248*4882a593Smuzhiyun 	const struct blk_mq_ops	*ops;
249*4882a593Smuzhiyun 	unsigned int		nr_hw_queues;
250*4882a593Smuzhiyun 	unsigned int		queue_depth;
251*4882a593Smuzhiyun 	unsigned int		reserved_tags;
252*4882a593Smuzhiyun 	unsigned int		cmd_size;
253*4882a593Smuzhiyun 	int			numa_node;
254*4882a593Smuzhiyun 	unsigned int		timeout;
255*4882a593Smuzhiyun 	unsigned int		flags;
256*4882a593Smuzhiyun 	void			*driver_data;
257*4882a593Smuzhiyun 	atomic_t		active_queues_shared_sbitmap;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	struct sbitmap_queue	__bitmap_tags;
260*4882a593Smuzhiyun 	struct sbitmap_queue	__breserved_tags;
261*4882a593Smuzhiyun 	struct blk_mq_tags	**tags;
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 	struct mutex		tag_list_lock;
264*4882a593Smuzhiyun 	struct list_head	tag_list;
265*4882a593Smuzhiyun };
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun /**
268*4882a593Smuzhiyun  * struct blk_mq_queue_data - Data about a request inserted in a queue
269*4882a593Smuzhiyun  *
270*4882a593Smuzhiyun  * @rq:   Request pointer.
271*4882a593Smuzhiyun  * @last: If it is the last request in the queue.
272*4882a593Smuzhiyun  */
273*4882a593Smuzhiyun struct blk_mq_queue_data {
274*4882a593Smuzhiyun 	struct request *rq;
275*4882a593Smuzhiyun 	bool last;
276*4882a593Smuzhiyun };
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
279*4882a593Smuzhiyun 		bool);
280*4882a593Smuzhiyun typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun /**
283*4882a593Smuzhiyun  * struct blk_mq_ops - Callback functions that implements block driver
284*4882a593Smuzhiyun  * behaviour.
285*4882a593Smuzhiyun  */
286*4882a593Smuzhiyun struct blk_mq_ops {
287*4882a593Smuzhiyun 	/**
288*4882a593Smuzhiyun 	 * @queue_rq: Queue a new request from block IO.
289*4882a593Smuzhiyun 	 */
290*4882a593Smuzhiyun 	blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *,
291*4882a593Smuzhiyun 				 const struct blk_mq_queue_data *);
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	/**
294*4882a593Smuzhiyun 	 * @commit_rqs: If a driver uses bd->last to judge when to submit
295*4882a593Smuzhiyun 	 * requests to hardware, it must define this function. In case of errors
296*4882a593Smuzhiyun 	 * that make us stop issuing further requests, this hook serves the
297*4882a593Smuzhiyun 	 * purpose of kicking the hardware (which the last request otherwise
298*4882a593Smuzhiyun 	 * would have done).
299*4882a593Smuzhiyun 	 */
300*4882a593Smuzhiyun 	void (*commit_rqs)(struct blk_mq_hw_ctx *);
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	/**
303*4882a593Smuzhiyun 	 * @get_budget: Reserve budget before queue request, once .queue_rq is
304*4882a593Smuzhiyun 	 * run, it is driver's responsibility to release the
305*4882a593Smuzhiyun 	 * reserved budget. Also we have to handle failure case
306*4882a593Smuzhiyun 	 * of .get_budget for avoiding I/O deadlock.
307*4882a593Smuzhiyun 	 */
308*4882a593Smuzhiyun 	bool (*get_budget)(struct request_queue *);
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun 	/**
311*4882a593Smuzhiyun 	 * @put_budget: Release the reserved budget.
312*4882a593Smuzhiyun 	 */
313*4882a593Smuzhiyun 	void (*put_budget)(struct request_queue *);
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	/**
316*4882a593Smuzhiyun 	 * @timeout: Called on request timeout.
317*4882a593Smuzhiyun 	 */
318*4882a593Smuzhiyun 	enum blk_eh_timer_return (*timeout)(struct request *, bool);
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	/**
321*4882a593Smuzhiyun 	 * @poll: Called to poll for completion of a specific tag.
322*4882a593Smuzhiyun 	 */
323*4882a593Smuzhiyun 	int (*poll)(struct blk_mq_hw_ctx *);
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	/**
326*4882a593Smuzhiyun 	 * @complete: Mark the request as complete.
327*4882a593Smuzhiyun 	 */
328*4882a593Smuzhiyun 	void (*complete)(struct request *);
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 	/**
331*4882a593Smuzhiyun 	 * @init_hctx: Called when the block layer side of a hardware queue has
332*4882a593Smuzhiyun 	 * been set up, allowing the driver to allocate/init matching
333*4882a593Smuzhiyun 	 * structures.
334*4882a593Smuzhiyun 	 */
335*4882a593Smuzhiyun 	int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int);
336*4882a593Smuzhiyun 	/**
337*4882a593Smuzhiyun 	 * @exit_hctx: Ditto for exit/teardown.
338*4882a593Smuzhiyun 	 */
339*4882a593Smuzhiyun 	void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	/**
342*4882a593Smuzhiyun 	 * @init_request: Called for every command allocated by the block layer
343*4882a593Smuzhiyun 	 * to allow the driver to set up driver specific data.
344*4882a593Smuzhiyun 	 *
345*4882a593Smuzhiyun 	 * Tag greater than or equal to queue_depth is for setting up
346*4882a593Smuzhiyun 	 * flush request.
347*4882a593Smuzhiyun 	 */
348*4882a593Smuzhiyun 	int (*init_request)(struct blk_mq_tag_set *set, struct request *,
349*4882a593Smuzhiyun 			    unsigned int, unsigned int);
350*4882a593Smuzhiyun 	/**
351*4882a593Smuzhiyun 	 * @exit_request: Ditto for exit/teardown.
352*4882a593Smuzhiyun 	 */
353*4882a593Smuzhiyun 	void (*exit_request)(struct blk_mq_tag_set *set, struct request *,
354*4882a593Smuzhiyun 			     unsigned int);
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 	/**
357*4882a593Smuzhiyun 	 * @initialize_rq_fn: Called from inside blk_get_request().
358*4882a593Smuzhiyun 	 */
359*4882a593Smuzhiyun 	void (*initialize_rq_fn)(struct request *rq);
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 	/**
362*4882a593Smuzhiyun 	 * @cleanup_rq: Called before freeing one request which isn't completed
363*4882a593Smuzhiyun 	 * yet, and usually for freeing the driver private data.
364*4882a593Smuzhiyun 	 */
365*4882a593Smuzhiyun 	void (*cleanup_rq)(struct request *);
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	/**
368*4882a593Smuzhiyun 	 * @busy: If set, returns whether or not this queue currently is busy.
369*4882a593Smuzhiyun 	 */
370*4882a593Smuzhiyun 	bool (*busy)(struct request_queue *);
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	/**
373*4882a593Smuzhiyun 	 * @map_queues: This allows drivers specify their own queue mapping by
374*4882a593Smuzhiyun 	 * overriding the setup-time function that builds the mq_map.
375*4882a593Smuzhiyun 	 */
376*4882a593Smuzhiyun 	int (*map_queues)(struct blk_mq_tag_set *set);
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun #ifdef CONFIG_BLK_DEBUG_FS
379*4882a593Smuzhiyun 	/**
380*4882a593Smuzhiyun 	 * @show_rq: Used by the debugfs implementation to show driver-specific
381*4882a593Smuzhiyun 	 * information about a request.
382*4882a593Smuzhiyun 	 */
383*4882a593Smuzhiyun 	void (*show_rq)(struct seq_file *m, struct request *rq);
384*4882a593Smuzhiyun #endif
385*4882a593Smuzhiyun };
386*4882a593Smuzhiyun 
387*4882a593Smuzhiyun enum {
388*4882a593Smuzhiyun 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
389*4882a593Smuzhiyun 	BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1,
390*4882a593Smuzhiyun 	/*
391*4882a593Smuzhiyun 	 * Set when this device requires underlying blk-mq device for
392*4882a593Smuzhiyun 	 * completing IO:
393*4882a593Smuzhiyun 	 */
394*4882a593Smuzhiyun 	BLK_MQ_F_STACKING	= 1 << 2,
395*4882a593Smuzhiyun 	BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
396*4882a593Smuzhiyun 	BLK_MQ_F_BLOCKING	= 1 << 5,
397*4882a593Smuzhiyun 	/* Do not allow an I/O scheduler to be configured. */
398*4882a593Smuzhiyun 	BLK_MQ_F_NO_SCHED	= 1 << 6,
399*4882a593Smuzhiyun 	/*
400*4882a593Smuzhiyun 	 * Select 'none' during queue registration in case of a single hwq
401*4882a593Smuzhiyun 	 * or shared hwqs instead of 'mq-deadline'.
402*4882a593Smuzhiyun 	 */
403*4882a593Smuzhiyun 	BLK_MQ_F_NO_SCHED_BY_DEFAULT	= 1 << 7,
404*4882a593Smuzhiyun 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
405*4882a593Smuzhiyun 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	BLK_MQ_S_STOPPED	= 0,
408*4882a593Smuzhiyun 	BLK_MQ_S_TAG_ACTIVE	= 1,
409*4882a593Smuzhiyun 	BLK_MQ_S_SCHED_RESTART	= 2,
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 	/* hw queue is inactive after all its CPUs become offline */
412*4882a593Smuzhiyun 	BLK_MQ_S_INACTIVE	= 3,
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun 	BLK_MQ_MAX_DEPTH	= 10240,
415*4882a593Smuzhiyun 
416*4882a593Smuzhiyun 	BLK_MQ_CPU_WORK_BATCH	= 8,
417*4882a593Smuzhiyun };
418*4882a593Smuzhiyun #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
419*4882a593Smuzhiyun 	((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
420*4882a593Smuzhiyun 		((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
421*4882a593Smuzhiyun #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
422*4882a593Smuzhiyun 	((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
423*4882a593Smuzhiyun 		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
426*4882a593Smuzhiyun struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
427*4882a593Smuzhiyun 		void *queuedata);
428*4882a593Smuzhiyun struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
429*4882a593Smuzhiyun 						  struct request_queue *q,
430*4882a593Smuzhiyun 						  bool elevator_init);
431*4882a593Smuzhiyun struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
432*4882a593Smuzhiyun 						const struct blk_mq_ops *ops,
433*4882a593Smuzhiyun 						unsigned int queue_depth,
434*4882a593Smuzhiyun 						unsigned int set_flags);
435*4882a593Smuzhiyun void blk_mq_unregister_dev(struct device *, struct request_queue *);
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
438*4882a593Smuzhiyun void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun void blk_mq_free_request(struct request *rq);
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun bool blk_mq_queue_inflight(struct request_queue *q);
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun enum {
447*4882a593Smuzhiyun 	/* return when out of requests */
448*4882a593Smuzhiyun 	BLK_MQ_REQ_NOWAIT	= (__force blk_mq_req_flags_t)(1 << 0),
449*4882a593Smuzhiyun 	/* allocate from reserved pool */
450*4882a593Smuzhiyun 	BLK_MQ_REQ_RESERVED	= (__force blk_mq_req_flags_t)(1 << 1),
451*4882a593Smuzhiyun 	/* set RQF_PM */
452*4882a593Smuzhiyun 	BLK_MQ_REQ_PM		= (__force blk_mq_req_flags_t)(1 << 2),
453*4882a593Smuzhiyun };
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
456*4882a593Smuzhiyun 		blk_mq_req_flags_t flags);
457*4882a593Smuzhiyun struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
458*4882a593Smuzhiyun 		unsigned int op, blk_mq_req_flags_t flags,
459*4882a593Smuzhiyun 		unsigned int hctx_idx);
460*4882a593Smuzhiyun struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun enum {
463*4882a593Smuzhiyun 	BLK_MQ_UNIQUE_TAG_BITS = 16,
464*4882a593Smuzhiyun 	BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1,
465*4882a593Smuzhiyun };
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun u32 blk_mq_unique_tag(struct request *rq);
468*4882a593Smuzhiyun 
blk_mq_unique_tag_to_hwq(u32 unique_tag)469*4882a593Smuzhiyun static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag)
470*4882a593Smuzhiyun {
471*4882a593Smuzhiyun 	return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS;
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun 
blk_mq_unique_tag_to_tag(u32 unique_tag)474*4882a593Smuzhiyun static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
475*4882a593Smuzhiyun {
476*4882a593Smuzhiyun 	return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun /**
480*4882a593Smuzhiyun  * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
481*4882a593Smuzhiyun  * @rq: target request.
482*4882a593Smuzhiyun  */
blk_mq_rq_state(struct request * rq)483*4882a593Smuzhiyun static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
484*4882a593Smuzhiyun {
485*4882a593Smuzhiyun 	return READ_ONCE(rq->state);
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun 
blk_mq_request_started(struct request * rq)488*4882a593Smuzhiyun static inline int blk_mq_request_started(struct request *rq)
489*4882a593Smuzhiyun {
490*4882a593Smuzhiyun 	return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun 
blk_mq_request_completed(struct request * rq)493*4882a593Smuzhiyun static inline int blk_mq_request_completed(struct request *rq)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun 	return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
496*4882a593Smuzhiyun }
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun void blk_mq_start_request(struct request *rq);
499*4882a593Smuzhiyun void blk_mq_end_request(struct request *rq, blk_status_t error);
500*4882a593Smuzhiyun void __blk_mq_end_request(struct request *rq, blk_status_t error);
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
503*4882a593Smuzhiyun void blk_mq_kick_requeue_list(struct request_queue *q);
504*4882a593Smuzhiyun void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
505*4882a593Smuzhiyun void blk_mq_complete_request(struct request *rq);
506*4882a593Smuzhiyun bool blk_mq_complete_request_remote(struct request *rq);
507*4882a593Smuzhiyun bool blk_mq_queue_stopped(struct request_queue *q);
508*4882a593Smuzhiyun void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
509*4882a593Smuzhiyun void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
510*4882a593Smuzhiyun void blk_mq_stop_hw_queues(struct request_queue *q);
511*4882a593Smuzhiyun void blk_mq_start_hw_queues(struct request_queue *q);
512*4882a593Smuzhiyun void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
513*4882a593Smuzhiyun void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
514*4882a593Smuzhiyun void blk_mq_quiesce_queue(struct request_queue *q);
515*4882a593Smuzhiyun void blk_mq_unquiesce_queue(struct request_queue *q);
516*4882a593Smuzhiyun void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
517*4882a593Smuzhiyun void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
518*4882a593Smuzhiyun void blk_mq_run_hw_queues(struct request_queue *q, bool async);
519*4882a593Smuzhiyun void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs);
520*4882a593Smuzhiyun void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
521*4882a593Smuzhiyun 		busy_tag_iter_fn *fn, void *priv);
522*4882a593Smuzhiyun void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset);
523*4882a593Smuzhiyun void blk_mq_freeze_queue(struct request_queue *q);
524*4882a593Smuzhiyun void blk_mq_unfreeze_queue(struct request_queue *q);
525*4882a593Smuzhiyun void blk_freeze_queue_start(struct request_queue *q);
526*4882a593Smuzhiyun void blk_mq_freeze_queue_wait(struct request_queue *q);
527*4882a593Smuzhiyun int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
528*4882a593Smuzhiyun 				     unsigned long timeout);
529*4882a593Smuzhiyun 
530*4882a593Smuzhiyun int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
531*4882a593Smuzhiyun void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun void blk_mq_quiesce_queue_nowait(struct request_queue *q);
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun unsigned int blk_mq_rq_cpu(struct request *rq);
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun bool __blk_should_fake_timeout(struct request_queue *q);
blk_should_fake_timeout(struct request_queue * q)538*4882a593Smuzhiyun static inline bool blk_should_fake_timeout(struct request_queue *q)
539*4882a593Smuzhiyun {
540*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) &&
541*4882a593Smuzhiyun 	    test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
542*4882a593Smuzhiyun 		return __blk_should_fake_timeout(q);
543*4882a593Smuzhiyun 	return false;
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun 
546*4882a593Smuzhiyun /**
547*4882a593Smuzhiyun  * blk_mq_rq_from_pdu - cast a PDU to a request
548*4882a593Smuzhiyun  * @pdu: the PDU (Protocol Data Unit) to be casted
549*4882a593Smuzhiyun  *
550*4882a593Smuzhiyun  * Return: request
551*4882a593Smuzhiyun  *
552*4882a593Smuzhiyun  * Driver command data is immediately after the request. So subtract request
553*4882a593Smuzhiyun  * size to get back to the original request.
554*4882a593Smuzhiyun  */
blk_mq_rq_from_pdu(void * pdu)555*4882a593Smuzhiyun static inline struct request *blk_mq_rq_from_pdu(void *pdu)
556*4882a593Smuzhiyun {
557*4882a593Smuzhiyun 	return pdu - sizeof(struct request);
558*4882a593Smuzhiyun }
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun /**
561*4882a593Smuzhiyun  * blk_mq_rq_to_pdu - cast a request to a PDU
562*4882a593Smuzhiyun  * @rq: the request to be casted
563*4882a593Smuzhiyun  *
564*4882a593Smuzhiyun  * Return: pointer to the PDU
565*4882a593Smuzhiyun  *
566*4882a593Smuzhiyun  * Driver command data is immediately after the request. So add request to get
567*4882a593Smuzhiyun  * the PDU.
568*4882a593Smuzhiyun  */
blk_mq_rq_to_pdu(struct request * rq)569*4882a593Smuzhiyun static inline void *blk_mq_rq_to_pdu(struct request *rq)
570*4882a593Smuzhiyun {
571*4882a593Smuzhiyun 	return rq + 1;
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun 
574*4882a593Smuzhiyun #define queue_for_each_hw_ctx(q, hctx, i)				\
575*4882a593Smuzhiyun 	for ((i) = 0; (i) < (q)->nr_hw_queues &&			\
576*4882a593Smuzhiyun 	     ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
577*4882a593Smuzhiyun 
578*4882a593Smuzhiyun #define hctx_for_each_ctx(hctx, ctx, i)					\
579*4882a593Smuzhiyun 	for ((i) = 0; (i) < (hctx)->nr_ctx &&				\
580*4882a593Smuzhiyun 	     ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
581*4882a593Smuzhiyun 
request_to_qc_t(struct blk_mq_hw_ctx * hctx,struct request * rq)582*4882a593Smuzhiyun static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx,
583*4882a593Smuzhiyun 		struct request *rq)
584*4882a593Smuzhiyun {
585*4882a593Smuzhiyun 	if (rq->tag != -1)
586*4882a593Smuzhiyun 		return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT);
587*4882a593Smuzhiyun 
588*4882a593Smuzhiyun 	return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) |
589*4882a593Smuzhiyun 			BLK_QC_T_INTERNAL;
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun 
blk_mq_cleanup_rq(struct request * rq)592*4882a593Smuzhiyun static inline void blk_mq_cleanup_rq(struct request *rq)
593*4882a593Smuzhiyun {
594*4882a593Smuzhiyun 	if (rq->q->mq_ops->cleanup_rq)
595*4882a593Smuzhiyun 		rq->q->mq_ops->cleanup_rq(rq);
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun blk_qc_t blk_mq_submit_bio(struct bio *bio);
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun #endif
601