1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef BLK_MQ_H
3*4882a593Smuzhiyun #define BLK_MQ_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun #include <linux/blkdev.h>
6*4882a593Smuzhiyun #include <linux/sbitmap.h>
7*4882a593Smuzhiyun #include <linux/srcu.h>
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun struct blk_mq_tags;
10*4882a593Smuzhiyun struct blk_flush_queue;
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun /**
13*4882a593Smuzhiyun * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
14*4882a593Smuzhiyun * block device
15*4882a593Smuzhiyun */
16*4882a593Smuzhiyun struct blk_mq_hw_ctx {
17*4882a593Smuzhiyun struct {
18*4882a593Smuzhiyun /** @lock: Protects the dispatch list. */
19*4882a593Smuzhiyun spinlock_t lock;
20*4882a593Smuzhiyun /**
21*4882a593Smuzhiyun * @dispatch: Used for requests that are ready to be
22*4882a593Smuzhiyun * dispatched to the hardware but for some reason (e.g. lack of
23*4882a593Smuzhiyun * resources) could not be sent to the hardware. As soon as the
24*4882a593Smuzhiyun * driver can send new requests, requests at this list will
25*4882a593Smuzhiyun * be sent first for a fairer dispatch.
26*4882a593Smuzhiyun */
27*4882a593Smuzhiyun struct list_head dispatch;
28*4882a593Smuzhiyun /**
29*4882a593Smuzhiyun * @state: BLK_MQ_S_* flags. Defines the state of the hw
30*4882a593Smuzhiyun * queue (active, scheduled to restart, stopped).
31*4882a593Smuzhiyun */
32*4882a593Smuzhiyun unsigned long state;
33*4882a593Smuzhiyun } ____cacheline_aligned_in_smp;
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun /**
36*4882a593Smuzhiyun * @run_work: Used for scheduling a hardware queue run at a later time.
37*4882a593Smuzhiyun */
38*4882a593Smuzhiyun struct delayed_work run_work;
39*4882a593Smuzhiyun /** @cpumask: Map of available CPUs where this hctx can run. */
40*4882a593Smuzhiyun cpumask_var_t cpumask;
41*4882a593Smuzhiyun /**
42*4882a593Smuzhiyun * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU
43*4882a593Smuzhiyun * selection from @cpumask.
44*4882a593Smuzhiyun */
45*4882a593Smuzhiyun int next_cpu;
46*4882a593Smuzhiyun /**
47*4882a593Smuzhiyun * @next_cpu_batch: Counter of how many works left in the batch before
48*4882a593Smuzhiyun * changing to the next CPU.
49*4882a593Smuzhiyun */
50*4882a593Smuzhiyun int next_cpu_batch;
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */
53*4882a593Smuzhiyun unsigned long flags;
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun /**
56*4882a593Smuzhiyun * @sched_data: Pointer owned by the IO scheduler attached to a request
57*4882a593Smuzhiyun * queue. It's up to the IO scheduler how to use this pointer.
58*4882a593Smuzhiyun */
59*4882a593Smuzhiyun void *sched_data;
60*4882a593Smuzhiyun /**
61*4882a593Smuzhiyun * @queue: Pointer to the request queue that owns this hardware context.
62*4882a593Smuzhiyun */
63*4882a593Smuzhiyun struct request_queue *queue;
64*4882a593Smuzhiyun /** @fq: Queue of requests that need to perform a flush operation. */
65*4882a593Smuzhiyun struct blk_flush_queue *fq;
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun /**
68*4882a593Smuzhiyun * @driver_data: Pointer to data owned by the block driver that created
69*4882a593Smuzhiyun * this hctx
70*4882a593Smuzhiyun */
71*4882a593Smuzhiyun void *driver_data;
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun /**
74*4882a593Smuzhiyun * @ctx_map: Bitmap for each software queue. If bit is on, there is a
75*4882a593Smuzhiyun * pending request in that software queue.
76*4882a593Smuzhiyun */
77*4882a593Smuzhiyun struct sbitmap ctx_map;
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun /**
80*4882a593Smuzhiyun * @dispatch_from: Software queue to be used when no scheduler was
81*4882a593Smuzhiyun * selected.
82*4882a593Smuzhiyun */
83*4882a593Smuzhiyun struct blk_mq_ctx *dispatch_from;
84*4882a593Smuzhiyun /**
85*4882a593Smuzhiyun * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
86*4882a593Smuzhiyun * decide if the hw_queue is busy using Exponential Weighted Moving
87*4882a593Smuzhiyun * Average algorithm.
88*4882a593Smuzhiyun */
89*4882a593Smuzhiyun unsigned int dispatch_busy;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun /** @type: HCTX_TYPE_* flags. Type of hardware queue. */
92*4882a593Smuzhiyun unsigned short type;
93*4882a593Smuzhiyun /** @nr_ctx: Number of software queues. */
94*4882a593Smuzhiyun unsigned short nr_ctx;
95*4882a593Smuzhiyun /** @ctxs: Array of software queues. */
96*4882a593Smuzhiyun struct blk_mq_ctx **ctxs;
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun /** @dispatch_wait_lock: Lock for dispatch_wait queue. */
99*4882a593Smuzhiyun spinlock_t dispatch_wait_lock;
100*4882a593Smuzhiyun /**
101*4882a593Smuzhiyun * @dispatch_wait: Waitqueue to put requests when there is no tag
102*4882a593Smuzhiyun * available at the moment, to wait for another try in the future.
103*4882a593Smuzhiyun */
104*4882a593Smuzhiyun wait_queue_entry_t dispatch_wait;
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun /**
107*4882a593Smuzhiyun * @wait_index: Index of next available dispatch_wait queue to insert
108*4882a593Smuzhiyun * requests.
109*4882a593Smuzhiyun */
110*4882a593Smuzhiyun atomic_t wait_index;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun /**
113*4882a593Smuzhiyun * @tags: Tags owned by the block driver. A tag at this set is only
114*4882a593Smuzhiyun * assigned when a request is dispatched from a hardware queue.
115*4882a593Smuzhiyun */
116*4882a593Smuzhiyun struct blk_mq_tags *tags;
117*4882a593Smuzhiyun /**
118*4882a593Smuzhiyun * @sched_tags: Tags owned by I/O scheduler. If there is an I/O
119*4882a593Smuzhiyun * scheduler associated with a request queue, a tag is assigned when
120*4882a593Smuzhiyun * that request is allocated. Else, this member is not used.
121*4882a593Smuzhiyun */
122*4882a593Smuzhiyun struct blk_mq_tags *sched_tags;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun /** @queued: Number of queued requests. */
125*4882a593Smuzhiyun unsigned long queued;
126*4882a593Smuzhiyun /** @run: Number of dispatched requests. */
127*4882a593Smuzhiyun unsigned long run;
128*4882a593Smuzhiyun #define BLK_MQ_MAX_DISPATCH_ORDER 7
129*4882a593Smuzhiyun /** @dispatched: Number of dispatch requests by queue. */
130*4882a593Smuzhiyun unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun /** @numa_node: NUMA node the storage adapter has been connected to. */
133*4882a593Smuzhiyun unsigned int numa_node;
134*4882a593Smuzhiyun /** @queue_num: Index of this hardware queue. */
135*4882a593Smuzhiyun unsigned int queue_num;
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun /**
138*4882a593Smuzhiyun * @nr_active: Number of active requests. Only used when a tag set is
139*4882a593Smuzhiyun * shared across request queues.
140*4882a593Smuzhiyun */
141*4882a593Smuzhiyun atomic_t nr_active;
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun /** @cpuhp_online: List to store request if CPU is going to die */
144*4882a593Smuzhiyun struct hlist_node cpuhp_online;
145*4882a593Smuzhiyun /** @cpuhp_dead: List to store request if some CPU die. */
146*4882a593Smuzhiyun struct hlist_node cpuhp_dead;
147*4882a593Smuzhiyun /** @kobj: Kernel object for sysfs. */
148*4882a593Smuzhiyun struct kobject kobj;
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun /** @poll_considered: Count times blk_poll() was called. */
151*4882a593Smuzhiyun unsigned long poll_considered;
152*4882a593Smuzhiyun /** @poll_invoked: Count how many requests blk_poll() polled. */
153*4882a593Smuzhiyun unsigned long poll_invoked;
154*4882a593Smuzhiyun /** @poll_success: Count how many polled requests were completed. */
155*4882a593Smuzhiyun unsigned long poll_success;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun #ifdef CONFIG_BLK_DEBUG_FS
158*4882a593Smuzhiyun /**
159*4882a593Smuzhiyun * @debugfs_dir: debugfs directory for this hardware queue. Named
160*4882a593Smuzhiyun * as cpu<cpu_number>.
161*4882a593Smuzhiyun */
162*4882a593Smuzhiyun struct dentry *debugfs_dir;
163*4882a593Smuzhiyun /** @sched_debugfs_dir: debugfs directory for the scheduler. */
164*4882a593Smuzhiyun struct dentry *sched_debugfs_dir;
165*4882a593Smuzhiyun #endif
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun /**
168*4882a593Smuzhiyun * @hctx_list: if this hctx is not in use, this is an entry in
169*4882a593Smuzhiyun * q->unused_hctx_list.
170*4882a593Smuzhiyun */
171*4882a593Smuzhiyun struct list_head hctx_list;
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun /**
174*4882a593Smuzhiyun * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
175*4882a593Smuzhiyun * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
176*4882a593Smuzhiyun * blk_mq_hw_ctx_size().
177*4882a593Smuzhiyun */
178*4882a593Smuzhiyun struct srcu_struct srcu[];
179*4882a593Smuzhiyun };
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun /**
182*4882a593Smuzhiyun * struct blk_mq_queue_map - Map software queues to hardware queues
183*4882a593Smuzhiyun * @mq_map: CPU ID to hardware queue index map. This is an array
184*4882a593Smuzhiyun * with nr_cpu_ids elements. Each element has a value in the range
185*4882a593Smuzhiyun * [@queue_offset, @queue_offset + @nr_queues).
186*4882a593Smuzhiyun * @nr_queues: Number of hardware queues to map CPU IDs onto.
187*4882a593Smuzhiyun * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe
188*4882a593Smuzhiyun * driver to map each hardware queue type (enum hctx_type) onto a distinct
189*4882a593Smuzhiyun * set of hardware queues.
190*4882a593Smuzhiyun */
191*4882a593Smuzhiyun struct blk_mq_queue_map {
192*4882a593Smuzhiyun unsigned int *mq_map;
193*4882a593Smuzhiyun unsigned int nr_queues;
194*4882a593Smuzhiyun unsigned int queue_offset;
195*4882a593Smuzhiyun };
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun /**
198*4882a593Smuzhiyun * enum hctx_type - Type of hardware queue
199*4882a593Smuzhiyun * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for.
200*4882a593Smuzhiyun * @HCTX_TYPE_READ: Just for READ I/O.
201*4882a593Smuzhiyun * @HCTX_TYPE_POLL: Polled I/O of any kind.
202*4882a593Smuzhiyun * @HCTX_MAX_TYPES: Number of types of hctx.
203*4882a593Smuzhiyun */
204*4882a593Smuzhiyun enum hctx_type {
205*4882a593Smuzhiyun HCTX_TYPE_DEFAULT,
206*4882a593Smuzhiyun HCTX_TYPE_READ,
207*4882a593Smuzhiyun HCTX_TYPE_POLL,
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun HCTX_MAX_TYPES,
210*4882a593Smuzhiyun };
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun /**
213*4882a593Smuzhiyun * struct blk_mq_tag_set - tag set that can be shared between request queues
214*4882a593Smuzhiyun * @map: One or more ctx -> hctx mappings. One map exists for each
215*4882a593Smuzhiyun * hardware queue type (enum hctx_type) that the driver wishes
216*4882a593Smuzhiyun * to support. There are no restrictions on maps being of the
217*4882a593Smuzhiyun * same size, and it's perfectly legal to share maps between
218*4882a593Smuzhiyun * types.
219*4882a593Smuzhiyun * @nr_maps: Number of elements in the @map array. A number in the range
220*4882a593Smuzhiyun * [1, HCTX_MAX_TYPES].
221*4882a593Smuzhiyun * @ops: Pointers to functions that implement block driver behavior.
222*4882a593Smuzhiyun * @nr_hw_queues: Number of hardware queues supported by the block driver that
223*4882a593Smuzhiyun * owns this data structure.
224*4882a593Smuzhiyun * @queue_depth: Number of tags per hardware queue, reserved tags included.
225*4882a593Smuzhiyun * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag
226*4882a593Smuzhiyun * allocations.
227*4882a593Smuzhiyun * @cmd_size: Number of additional bytes to allocate per request. The block
228*4882a593Smuzhiyun * driver owns these additional bytes.
229*4882a593Smuzhiyun * @numa_node: NUMA node the storage adapter has been connected to.
230*4882a593Smuzhiyun * @timeout: Request processing timeout in jiffies.
231*4882a593Smuzhiyun * @flags: Zero or more BLK_MQ_F_* flags.
232*4882a593Smuzhiyun * @driver_data: Pointer to data owned by the block driver that created this
233*4882a593Smuzhiyun * tag set.
234*4882a593Smuzhiyun * @active_queues_shared_sbitmap:
235*4882a593Smuzhiyun * number of active request queues per tag set.
236*4882a593Smuzhiyun * @__bitmap_tags: A shared tags sbitmap, used over all hctx's
237*4882a593Smuzhiyun * @__breserved_tags:
238*4882a593Smuzhiyun * A shared reserved tags sbitmap, used over all hctx's
239*4882a593Smuzhiyun * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues
240*4882a593Smuzhiyun * elements.
241*4882a593Smuzhiyun * @tag_list_lock: Serializes tag_list accesses.
242*4882a593Smuzhiyun * @tag_list: List of the request queues that use this tag set. See also
243*4882a593Smuzhiyun * request_queue.tag_set_list.
244*4882a593Smuzhiyun */
245*4882a593Smuzhiyun struct blk_mq_tag_set {
246*4882a593Smuzhiyun struct blk_mq_queue_map map[HCTX_MAX_TYPES];
247*4882a593Smuzhiyun unsigned int nr_maps;
248*4882a593Smuzhiyun const struct blk_mq_ops *ops;
249*4882a593Smuzhiyun unsigned int nr_hw_queues;
250*4882a593Smuzhiyun unsigned int queue_depth;
251*4882a593Smuzhiyun unsigned int reserved_tags;
252*4882a593Smuzhiyun unsigned int cmd_size;
253*4882a593Smuzhiyun int numa_node;
254*4882a593Smuzhiyun unsigned int timeout;
255*4882a593Smuzhiyun unsigned int flags;
256*4882a593Smuzhiyun void *driver_data;
257*4882a593Smuzhiyun atomic_t active_queues_shared_sbitmap;
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun struct sbitmap_queue __bitmap_tags;
260*4882a593Smuzhiyun struct sbitmap_queue __breserved_tags;
261*4882a593Smuzhiyun struct blk_mq_tags **tags;
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun struct mutex tag_list_lock;
264*4882a593Smuzhiyun struct list_head tag_list;
265*4882a593Smuzhiyun };
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun /**
268*4882a593Smuzhiyun * struct blk_mq_queue_data - Data about a request inserted in a queue
269*4882a593Smuzhiyun *
270*4882a593Smuzhiyun * @rq: Request pointer.
271*4882a593Smuzhiyun * @last: If it is the last request in the queue.
272*4882a593Smuzhiyun */
273*4882a593Smuzhiyun struct blk_mq_queue_data {
274*4882a593Smuzhiyun struct request *rq;
275*4882a593Smuzhiyun bool last;
276*4882a593Smuzhiyun };
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
279*4882a593Smuzhiyun bool);
280*4882a593Smuzhiyun typedef bool (busy_tag_iter_fn)(struct request *, void *, bool);
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun /**
283*4882a593Smuzhiyun * struct blk_mq_ops - Callback functions that implements block driver
284*4882a593Smuzhiyun * behaviour.
285*4882a593Smuzhiyun */
286*4882a593Smuzhiyun struct blk_mq_ops {
287*4882a593Smuzhiyun /**
288*4882a593Smuzhiyun * @queue_rq: Queue a new request from block IO.
289*4882a593Smuzhiyun */
290*4882a593Smuzhiyun blk_status_t (*queue_rq)(struct blk_mq_hw_ctx *,
291*4882a593Smuzhiyun const struct blk_mq_queue_data *);
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun /**
294*4882a593Smuzhiyun * @commit_rqs: If a driver uses bd->last to judge when to submit
295*4882a593Smuzhiyun * requests to hardware, it must define this function. In case of errors
296*4882a593Smuzhiyun * that make us stop issuing further requests, this hook serves the
297*4882a593Smuzhiyun * purpose of kicking the hardware (which the last request otherwise
298*4882a593Smuzhiyun * would have done).
299*4882a593Smuzhiyun */
300*4882a593Smuzhiyun void (*commit_rqs)(struct blk_mq_hw_ctx *);
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun /**
303*4882a593Smuzhiyun * @get_budget: Reserve budget before queue request, once .queue_rq is
304*4882a593Smuzhiyun * run, it is driver's responsibility to release the
305*4882a593Smuzhiyun * reserved budget. Also we have to handle failure case
306*4882a593Smuzhiyun * of .get_budget for avoiding I/O deadlock.
307*4882a593Smuzhiyun */
308*4882a593Smuzhiyun bool (*get_budget)(struct request_queue *);
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun /**
311*4882a593Smuzhiyun * @put_budget: Release the reserved budget.
312*4882a593Smuzhiyun */
313*4882a593Smuzhiyun void (*put_budget)(struct request_queue *);
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun /**
316*4882a593Smuzhiyun * @timeout: Called on request timeout.
317*4882a593Smuzhiyun */
318*4882a593Smuzhiyun enum blk_eh_timer_return (*timeout)(struct request *, bool);
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun /**
321*4882a593Smuzhiyun * @poll: Called to poll for completion of a specific tag.
322*4882a593Smuzhiyun */
323*4882a593Smuzhiyun int (*poll)(struct blk_mq_hw_ctx *);
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun /**
326*4882a593Smuzhiyun * @complete: Mark the request as complete.
327*4882a593Smuzhiyun */
328*4882a593Smuzhiyun void (*complete)(struct request *);
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun /**
331*4882a593Smuzhiyun * @init_hctx: Called when the block layer side of a hardware queue has
332*4882a593Smuzhiyun * been set up, allowing the driver to allocate/init matching
333*4882a593Smuzhiyun * structures.
334*4882a593Smuzhiyun */
335*4882a593Smuzhiyun int (*init_hctx)(struct blk_mq_hw_ctx *, void *, unsigned int);
336*4882a593Smuzhiyun /**
337*4882a593Smuzhiyun * @exit_hctx: Ditto for exit/teardown.
338*4882a593Smuzhiyun */
339*4882a593Smuzhiyun void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun /**
342*4882a593Smuzhiyun * @init_request: Called for every command allocated by the block layer
343*4882a593Smuzhiyun * to allow the driver to set up driver specific data.
344*4882a593Smuzhiyun *
345*4882a593Smuzhiyun * Tag greater than or equal to queue_depth is for setting up
346*4882a593Smuzhiyun * flush request.
347*4882a593Smuzhiyun */
348*4882a593Smuzhiyun int (*init_request)(struct blk_mq_tag_set *set, struct request *,
349*4882a593Smuzhiyun unsigned int, unsigned int);
350*4882a593Smuzhiyun /**
351*4882a593Smuzhiyun * @exit_request: Ditto for exit/teardown.
352*4882a593Smuzhiyun */
353*4882a593Smuzhiyun void (*exit_request)(struct blk_mq_tag_set *set, struct request *,
354*4882a593Smuzhiyun unsigned int);
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun /**
357*4882a593Smuzhiyun * @initialize_rq_fn: Called from inside blk_get_request().
358*4882a593Smuzhiyun */
359*4882a593Smuzhiyun void (*initialize_rq_fn)(struct request *rq);
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun /**
362*4882a593Smuzhiyun * @cleanup_rq: Called before freeing one request which isn't completed
363*4882a593Smuzhiyun * yet, and usually for freeing the driver private data.
364*4882a593Smuzhiyun */
365*4882a593Smuzhiyun void (*cleanup_rq)(struct request *);
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun /**
368*4882a593Smuzhiyun * @busy: If set, returns whether or not this queue currently is busy.
369*4882a593Smuzhiyun */
370*4882a593Smuzhiyun bool (*busy)(struct request_queue *);
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun /**
373*4882a593Smuzhiyun * @map_queues: This allows drivers specify their own queue mapping by
374*4882a593Smuzhiyun * overriding the setup-time function that builds the mq_map.
375*4882a593Smuzhiyun */
376*4882a593Smuzhiyun int (*map_queues)(struct blk_mq_tag_set *set);
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun #ifdef CONFIG_BLK_DEBUG_FS
379*4882a593Smuzhiyun /**
380*4882a593Smuzhiyun * @show_rq: Used by the debugfs implementation to show driver-specific
381*4882a593Smuzhiyun * information about a request.
382*4882a593Smuzhiyun */
383*4882a593Smuzhiyun void (*show_rq)(struct seq_file *m, struct request *rq);
384*4882a593Smuzhiyun #endif
385*4882a593Smuzhiyun };
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun enum {
388*4882a593Smuzhiyun BLK_MQ_F_SHOULD_MERGE = 1 << 0,
389*4882a593Smuzhiyun BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1,
390*4882a593Smuzhiyun /*
391*4882a593Smuzhiyun * Set when this device requires underlying blk-mq device for
392*4882a593Smuzhiyun * completing IO:
393*4882a593Smuzhiyun */
394*4882a593Smuzhiyun BLK_MQ_F_STACKING = 1 << 2,
395*4882a593Smuzhiyun BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
396*4882a593Smuzhiyun BLK_MQ_F_BLOCKING = 1 << 5,
397*4882a593Smuzhiyun /* Do not allow an I/O scheduler to be configured. */
398*4882a593Smuzhiyun BLK_MQ_F_NO_SCHED = 1 << 6,
399*4882a593Smuzhiyun /*
400*4882a593Smuzhiyun * Select 'none' during queue registration in case of a single hwq
401*4882a593Smuzhiyun * or shared hwqs instead of 'mq-deadline'.
402*4882a593Smuzhiyun */
403*4882a593Smuzhiyun BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7,
404*4882a593Smuzhiyun BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
405*4882a593Smuzhiyun BLK_MQ_F_ALLOC_POLICY_BITS = 1,
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun BLK_MQ_S_STOPPED = 0,
408*4882a593Smuzhiyun BLK_MQ_S_TAG_ACTIVE = 1,
409*4882a593Smuzhiyun BLK_MQ_S_SCHED_RESTART = 2,
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun /* hw queue is inactive after all its CPUs become offline */
412*4882a593Smuzhiyun BLK_MQ_S_INACTIVE = 3,
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun BLK_MQ_MAX_DEPTH = 10240,
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun BLK_MQ_CPU_WORK_BATCH = 8,
417*4882a593Smuzhiyun };
418*4882a593Smuzhiyun #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
419*4882a593Smuzhiyun ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
420*4882a593Smuzhiyun ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1))
421*4882a593Smuzhiyun #define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \
422*4882a593Smuzhiyun ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
423*4882a593Smuzhiyun << BLK_MQ_F_ALLOC_POLICY_START_BIT)
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
426*4882a593Smuzhiyun struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
427*4882a593Smuzhiyun void *queuedata);
428*4882a593Smuzhiyun struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
429*4882a593Smuzhiyun struct request_queue *q,
430*4882a593Smuzhiyun bool elevator_init);
431*4882a593Smuzhiyun struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
432*4882a593Smuzhiyun const struct blk_mq_ops *ops,
433*4882a593Smuzhiyun unsigned int queue_depth,
434*4882a593Smuzhiyun unsigned int set_flags);
435*4882a593Smuzhiyun void blk_mq_unregister_dev(struct device *, struct request_queue *);
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
438*4882a593Smuzhiyun void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun void blk_mq_free_request(struct request *rq);
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun bool blk_mq_queue_inflight(struct request_queue *q);
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun enum {
447*4882a593Smuzhiyun /* return when out of requests */
448*4882a593Smuzhiyun BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0),
449*4882a593Smuzhiyun /* allocate from reserved pool */
450*4882a593Smuzhiyun BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1),
451*4882a593Smuzhiyun /* set RQF_PM */
452*4882a593Smuzhiyun BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2),
453*4882a593Smuzhiyun };
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
456*4882a593Smuzhiyun blk_mq_req_flags_t flags);
457*4882a593Smuzhiyun struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
458*4882a593Smuzhiyun unsigned int op, blk_mq_req_flags_t flags,
459*4882a593Smuzhiyun unsigned int hctx_idx);
460*4882a593Smuzhiyun struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun enum {
463*4882a593Smuzhiyun BLK_MQ_UNIQUE_TAG_BITS = 16,
464*4882a593Smuzhiyun BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1,
465*4882a593Smuzhiyun };
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun u32 blk_mq_unique_tag(struct request *rq);
468*4882a593Smuzhiyun
blk_mq_unique_tag_to_hwq(u32 unique_tag)469*4882a593Smuzhiyun static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag)
470*4882a593Smuzhiyun {
471*4882a593Smuzhiyun return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS;
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun
blk_mq_unique_tag_to_tag(u32 unique_tag)474*4882a593Smuzhiyun static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
475*4882a593Smuzhiyun {
476*4882a593Smuzhiyun return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun /**
480*4882a593Smuzhiyun * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
481*4882a593Smuzhiyun * @rq: target request.
482*4882a593Smuzhiyun */
blk_mq_rq_state(struct request * rq)483*4882a593Smuzhiyun static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
484*4882a593Smuzhiyun {
485*4882a593Smuzhiyun return READ_ONCE(rq->state);
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
blk_mq_request_started(struct request * rq)488*4882a593Smuzhiyun static inline int blk_mq_request_started(struct request *rq)
489*4882a593Smuzhiyun {
490*4882a593Smuzhiyun return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun
blk_mq_request_completed(struct request * rq)493*4882a593Smuzhiyun static inline int blk_mq_request_completed(struct request *rq)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
496*4882a593Smuzhiyun }
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun void blk_mq_start_request(struct request *rq);
499*4882a593Smuzhiyun void blk_mq_end_request(struct request *rq, blk_status_t error);
500*4882a593Smuzhiyun void __blk_mq_end_request(struct request *rq, blk_status_t error);
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
503*4882a593Smuzhiyun void blk_mq_kick_requeue_list(struct request_queue *q);
504*4882a593Smuzhiyun void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
505*4882a593Smuzhiyun void blk_mq_complete_request(struct request *rq);
506*4882a593Smuzhiyun bool blk_mq_complete_request_remote(struct request *rq);
507*4882a593Smuzhiyun bool blk_mq_queue_stopped(struct request_queue *q);
508*4882a593Smuzhiyun void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
509*4882a593Smuzhiyun void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
510*4882a593Smuzhiyun void blk_mq_stop_hw_queues(struct request_queue *q);
511*4882a593Smuzhiyun void blk_mq_start_hw_queues(struct request_queue *q);
512*4882a593Smuzhiyun void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
513*4882a593Smuzhiyun void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
514*4882a593Smuzhiyun void blk_mq_quiesce_queue(struct request_queue *q);
515*4882a593Smuzhiyun void blk_mq_unquiesce_queue(struct request_queue *q);
516*4882a593Smuzhiyun void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
517*4882a593Smuzhiyun void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
518*4882a593Smuzhiyun void blk_mq_run_hw_queues(struct request_queue *q, bool async);
519*4882a593Smuzhiyun void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs);
520*4882a593Smuzhiyun void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
521*4882a593Smuzhiyun busy_tag_iter_fn *fn, void *priv);
522*4882a593Smuzhiyun void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset);
523*4882a593Smuzhiyun void blk_mq_freeze_queue(struct request_queue *q);
524*4882a593Smuzhiyun void blk_mq_unfreeze_queue(struct request_queue *q);
525*4882a593Smuzhiyun void blk_freeze_queue_start(struct request_queue *q);
526*4882a593Smuzhiyun void blk_mq_freeze_queue_wait(struct request_queue *q);
527*4882a593Smuzhiyun int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
528*4882a593Smuzhiyun unsigned long timeout);
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
531*4882a593Smuzhiyun void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun void blk_mq_quiesce_queue_nowait(struct request_queue *q);
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun unsigned int blk_mq_rq_cpu(struct request *rq);
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun bool __blk_should_fake_timeout(struct request_queue *q);
blk_should_fake_timeout(struct request_queue * q)538*4882a593Smuzhiyun static inline bool blk_should_fake_timeout(struct request_queue *q)
539*4882a593Smuzhiyun {
540*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_FAIL_IO_TIMEOUT) &&
541*4882a593Smuzhiyun test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
542*4882a593Smuzhiyun return __blk_should_fake_timeout(q);
543*4882a593Smuzhiyun return false;
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun /**
547*4882a593Smuzhiyun * blk_mq_rq_from_pdu - cast a PDU to a request
548*4882a593Smuzhiyun * @pdu: the PDU (Protocol Data Unit) to be casted
549*4882a593Smuzhiyun *
550*4882a593Smuzhiyun * Return: request
551*4882a593Smuzhiyun *
552*4882a593Smuzhiyun * Driver command data is immediately after the request. So subtract request
553*4882a593Smuzhiyun * size to get back to the original request.
554*4882a593Smuzhiyun */
blk_mq_rq_from_pdu(void * pdu)555*4882a593Smuzhiyun static inline struct request *blk_mq_rq_from_pdu(void *pdu)
556*4882a593Smuzhiyun {
557*4882a593Smuzhiyun return pdu - sizeof(struct request);
558*4882a593Smuzhiyun }
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun /**
561*4882a593Smuzhiyun * blk_mq_rq_to_pdu - cast a request to a PDU
562*4882a593Smuzhiyun * @rq: the request to be casted
563*4882a593Smuzhiyun *
564*4882a593Smuzhiyun * Return: pointer to the PDU
565*4882a593Smuzhiyun *
566*4882a593Smuzhiyun * Driver command data is immediately after the request. So add request to get
567*4882a593Smuzhiyun * the PDU.
568*4882a593Smuzhiyun */
blk_mq_rq_to_pdu(struct request * rq)569*4882a593Smuzhiyun static inline void *blk_mq_rq_to_pdu(struct request *rq)
570*4882a593Smuzhiyun {
571*4882a593Smuzhiyun return rq + 1;
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun #define queue_for_each_hw_ctx(q, hctx, i) \
575*4882a593Smuzhiyun for ((i) = 0; (i) < (q)->nr_hw_queues && \
576*4882a593Smuzhiyun ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
577*4882a593Smuzhiyun
578*4882a593Smuzhiyun #define hctx_for_each_ctx(hctx, ctx, i) \
579*4882a593Smuzhiyun for ((i) = 0; (i) < (hctx)->nr_ctx && \
580*4882a593Smuzhiyun ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
581*4882a593Smuzhiyun
request_to_qc_t(struct blk_mq_hw_ctx * hctx,struct request * rq)582*4882a593Smuzhiyun static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx,
583*4882a593Smuzhiyun struct request *rq)
584*4882a593Smuzhiyun {
585*4882a593Smuzhiyun if (rq->tag != -1)
586*4882a593Smuzhiyun return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT);
587*4882a593Smuzhiyun
588*4882a593Smuzhiyun return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) |
589*4882a593Smuzhiyun BLK_QC_T_INTERNAL;
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun
blk_mq_cleanup_rq(struct request * rq)592*4882a593Smuzhiyun static inline void blk_mq_cleanup_rq(struct request *rq)
593*4882a593Smuzhiyun {
594*4882a593Smuzhiyun if (rq->q->mq_ops->cleanup_rq)
595*4882a593Smuzhiyun rq->q->mq_ops->cleanup_rq(rq);
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun
598*4882a593Smuzhiyun blk_qc_t blk_mq_submit_bio(struct bio *bio);
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun #endif
601