1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef INT_BLK_MQ_H
3*4882a593Smuzhiyun #define INT_BLK_MQ_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun #include "blk-stat.h"
6*4882a593Smuzhiyun #include "blk-mq-tag.h"
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun struct blk_mq_tag_set;
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun struct blk_mq_ctxs {
11*4882a593Smuzhiyun struct kobject kobj;
12*4882a593Smuzhiyun struct blk_mq_ctx __percpu *queue_ctx;
13*4882a593Smuzhiyun };
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun /**
16*4882a593Smuzhiyun * struct blk_mq_ctx - State for a software queue facing the submitting CPUs
17*4882a593Smuzhiyun */
18*4882a593Smuzhiyun struct blk_mq_ctx {
19*4882a593Smuzhiyun struct {
20*4882a593Smuzhiyun spinlock_t lock;
21*4882a593Smuzhiyun struct list_head rq_lists[HCTX_MAX_TYPES];
22*4882a593Smuzhiyun } ____cacheline_aligned_in_smp;
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun unsigned int cpu;
25*4882a593Smuzhiyun unsigned short index_hw[HCTX_MAX_TYPES];
26*4882a593Smuzhiyun struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES];
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun /* incremented at dispatch time */
29*4882a593Smuzhiyun unsigned long rq_dispatched[2];
30*4882a593Smuzhiyun unsigned long rq_merged;
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun /* incremented at completion time */
33*4882a593Smuzhiyun unsigned long ____cacheline_aligned_in_smp rq_completed[2];
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun struct request_queue *queue;
36*4882a593Smuzhiyun struct blk_mq_ctxs *ctxs;
37*4882a593Smuzhiyun struct kobject kobj;
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun ANDROID_OEM_DATA_ARRAY(1, 2);
40*4882a593Smuzhiyun } ____cacheline_aligned_in_smp;
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun void blk_mq_exit_queue(struct request_queue *q);
43*4882a593Smuzhiyun int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
44*4882a593Smuzhiyun void blk_mq_wake_waiters(struct request_queue *q);
45*4882a593Smuzhiyun bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
46*4882a593Smuzhiyun unsigned int);
47*4882a593Smuzhiyun void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
48*4882a593Smuzhiyun bool kick_requeue_list);
49*4882a593Smuzhiyun void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
50*4882a593Smuzhiyun struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
51*4882a593Smuzhiyun struct blk_mq_ctx *start);
52*4882a593Smuzhiyun void blk_mq_put_rq_ref(struct request *rq);
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun /*
55*4882a593Smuzhiyun * Internal helpers for allocating/freeing the request map
56*4882a593Smuzhiyun */
57*4882a593Smuzhiyun void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
58*4882a593Smuzhiyun unsigned int hctx_idx);
59*4882a593Smuzhiyun void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags);
60*4882a593Smuzhiyun struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
61*4882a593Smuzhiyun unsigned int hctx_idx,
62*4882a593Smuzhiyun unsigned int nr_tags,
63*4882a593Smuzhiyun unsigned int reserved_tags,
64*4882a593Smuzhiyun unsigned int flags);
65*4882a593Smuzhiyun int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
66*4882a593Smuzhiyun unsigned int hctx_idx, unsigned int depth);
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun /*
69*4882a593Smuzhiyun * Internal helpers for request insertion into sw queues
70*4882a593Smuzhiyun */
71*4882a593Smuzhiyun void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
72*4882a593Smuzhiyun bool at_head);
73*4882a593Smuzhiyun void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
74*4882a593Smuzhiyun bool run_queue);
75*4882a593Smuzhiyun void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
76*4882a593Smuzhiyun struct list_head *list);
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun /* Used by blk_insert_cloned_request() to issue request directly */
79*4882a593Smuzhiyun blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last);
80*4882a593Smuzhiyun void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
81*4882a593Smuzhiyun struct list_head *list);
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun /*
84*4882a593Smuzhiyun * CPU -> queue mappings
85*4882a593Smuzhiyun */
86*4882a593Smuzhiyun extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int);
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun /*
89*4882a593Smuzhiyun * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue
90*4882a593Smuzhiyun * @q: request queue
91*4882a593Smuzhiyun * @type: the hctx type index
92*4882a593Smuzhiyun * @cpu: CPU
93*4882a593Smuzhiyun */
blk_mq_map_queue_type(struct request_queue * q,enum hctx_type type,unsigned int cpu)94*4882a593Smuzhiyun static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q,
95*4882a593Smuzhiyun enum hctx_type type,
96*4882a593Smuzhiyun unsigned int cpu)
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun /*
102*4882a593Smuzhiyun * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
103*4882a593Smuzhiyun * @q: request queue
104*4882a593Smuzhiyun * @flags: request command flags
105*4882a593Smuzhiyun * @cpu: cpu ctx
106*4882a593Smuzhiyun */
blk_mq_map_queue(struct request_queue * q,unsigned int flags,struct blk_mq_ctx * ctx)107*4882a593Smuzhiyun static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
108*4882a593Smuzhiyun unsigned int flags,
109*4882a593Smuzhiyun struct blk_mq_ctx *ctx)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun enum hctx_type type = HCTX_TYPE_DEFAULT;
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun /*
114*4882a593Smuzhiyun * The caller ensure that if REQ_HIPRI, poll must be enabled.
115*4882a593Smuzhiyun */
116*4882a593Smuzhiyun if (flags & REQ_HIPRI)
117*4882a593Smuzhiyun type = HCTX_TYPE_POLL;
118*4882a593Smuzhiyun else if ((flags & REQ_OP_MASK) == REQ_OP_READ)
119*4882a593Smuzhiyun type = HCTX_TYPE_READ;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun return ctx->hctxs[type];
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun /*
125*4882a593Smuzhiyun * sysfs helpers
126*4882a593Smuzhiyun */
127*4882a593Smuzhiyun extern void blk_mq_sysfs_init(struct request_queue *q);
128*4882a593Smuzhiyun extern void blk_mq_sysfs_deinit(struct request_queue *q);
129*4882a593Smuzhiyun extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q);
130*4882a593Smuzhiyun extern int blk_mq_sysfs_register(struct request_queue *q);
131*4882a593Smuzhiyun extern void blk_mq_sysfs_unregister(struct request_queue *q);
132*4882a593Smuzhiyun extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun void blk_mq_release(struct request_queue *q);
135*4882a593Smuzhiyun
__blk_mq_get_ctx(struct request_queue * q,unsigned int cpu)136*4882a593Smuzhiyun static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
137*4882a593Smuzhiyun unsigned int cpu)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun return per_cpu_ptr(q->queue_ctx, cpu);
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun /*
143*4882a593Smuzhiyun * This assumes per-cpu software queueing queues. They could be per-node
144*4882a593Smuzhiyun * as well, for instance. For now this is hardcoded as-is. Note that we don't
145*4882a593Smuzhiyun * care about preemption, since we know the ctx's are persistent. This does
146*4882a593Smuzhiyun * mean that we can't rely on ctx always matching the currently running CPU.
147*4882a593Smuzhiyun */
blk_mq_get_ctx(struct request_queue * q)148*4882a593Smuzhiyun static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
149*4882a593Smuzhiyun {
150*4882a593Smuzhiyun return __blk_mq_get_ctx(q, raw_smp_processor_id());
151*4882a593Smuzhiyun }
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun struct blk_mq_alloc_data {
154*4882a593Smuzhiyun /* input parameter */
155*4882a593Smuzhiyun struct request_queue *q;
156*4882a593Smuzhiyun blk_mq_req_flags_t flags;
157*4882a593Smuzhiyun unsigned int shallow_depth;
158*4882a593Smuzhiyun unsigned int cmd_flags;
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun /* input & output parameter */
161*4882a593Smuzhiyun struct blk_mq_ctx *ctx;
162*4882a593Smuzhiyun struct blk_mq_hw_ctx *hctx;
163*4882a593Smuzhiyun };
164*4882a593Smuzhiyun
blk_mq_is_sbitmap_shared(unsigned int flags)165*4882a593Smuzhiyun static inline bool blk_mq_is_sbitmap_shared(unsigned int flags)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun return flags & BLK_MQ_F_TAG_HCTX_SHARED;
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun
blk_mq_tags_from_data(struct blk_mq_alloc_data * data)170*4882a593Smuzhiyun static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
171*4882a593Smuzhiyun {
172*4882a593Smuzhiyun if (data->q->elevator)
173*4882a593Smuzhiyun return data->hctx->sched_tags;
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun return data->hctx->tags;
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun
blk_mq_hctx_stopped(struct blk_mq_hw_ctx * hctx)178*4882a593Smuzhiyun static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
179*4882a593Smuzhiyun {
180*4882a593Smuzhiyun return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun
blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx * hctx)183*4882a593Smuzhiyun static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun return hctx->nr_ctx && hctx->tags;
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part);
189*4882a593Smuzhiyun void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
190*4882a593Smuzhiyun unsigned int inflight[2]);
191*4882a593Smuzhiyun
blk_mq_put_dispatch_budget(struct request_queue * q)192*4882a593Smuzhiyun static inline void blk_mq_put_dispatch_budget(struct request_queue *q)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun if (q->mq_ops->put_budget)
195*4882a593Smuzhiyun q->mq_ops->put_budget(q);
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun
blk_mq_get_dispatch_budget(struct request_queue * q)198*4882a593Smuzhiyun static inline bool blk_mq_get_dispatch_budget(struct request_queue *q)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun if (q->mq_ops->get_budget)
201*4882a593Smuzhiyun return q->mq_ops->get_budget(q);
202*4882a593Smuzhiyun return true;
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun
__blk_mq_inc_active_requests(struct blk_mq_hw_ctx * hctx)205*4882a593Smuzhiyun static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
206*4882a593Smuzhiyun {
207*4882a593Smuzhiyun if (blk_mq_is_sbitmap_shared(hctx->flags))
208*4882a593Smuzhiyun atomic_inc(&hctx->queue->nr_active_requests_shared_sbitmap);
209*4882a593Smuzhiyun else
210*4882a593Smuzhiyun atomic_inc(&hctx->nr_active);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
__blk_mq_dec_active_requests(struct blk_mq_hw_ctx * hctx)213*4882a593Smuzhiyun static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun if (blk_mq_is_sbitmap_shared(hctx->flags))
216*4882a593Smuzhiyun atomic_dec(&hctx->queue->nr_active_requests_shared_sbitmap);
217*4882a593Smuzhiyun else
218*4882a593Smuzhiyun atomic_dec(&hctx->nr_active);
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun
__blk_mq_active_requests(struct blk_mq_hw_ctx * hctx)221*4882a593Smuzhiyun static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun if (blk_mq_is_sbitmap_shared(hctx->flags))
224*4882a593Smuzhiyun return atomic_read(&hctx->queue->nr_active_requests_shared_sbitmap);
225*4882a593Smuzhiyun return atomic_read(&hctx->nr_active);
226*4882a593Smuzhiyun }
__blk_mq_put_driver_tag(struct blk_mq_hw_ctx * hctx,struct request * rq)227*4882a593Smuzhiyun static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
228*4882a593Smuzhiyun struct request *rq)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
231*4882a593Smuzhiyun rq->tag = BLK_MQ_NO_TAG;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun if (rq->rq_flags & RQF_MQ_INFLIGHT) {
234*4882a593Smuzhiyun rq->rq_flags &= ~RQF_MQ_INFLIGHT;
235*4882a593Smuzhiyun __blk_mq_dec_active_requests(hctx);
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun }
238*4882a593Smuzhiyun
blk_mq_put_driver_tag(struct request * rq)239*4882a593Smuzhiyun static inline void blk_mq_put_driver_tag(struct request *rq)
240*4882a593Smuzhiyun {
241*4882a593Smuzhiyun if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG)
242*4882a593Smuzhiyun return;
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun __blk_mq_put_driver_tag(rq->mq_hctx, rq);
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun
blk_mq_clear_mq_map(struct blk_mq_queue_map * qmap)247*4882a593Smuzhiyun static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun int cpu;
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun for_each_possible_cpu(cpu)
252*4882a593Smuzhiyun qmap->mq_map[cpu] = 0;
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun /*
256*4882a593Smuzhiyun * blk_mq_plug() - Get caller context plug
257*4882a593Smuzhiyun * @q: request queue
258*4882a593Smuzhiyun * @bio : the bio being submitted by the caller context
259*4882a593Smuzhiyun *
260*4882a593Smuzhiyun * Plugging, by design, may delay the insertion of BIOs into the elevator in
261*4882a593Smuzhiyun * order to increase BIO merging opportunities. This however can cause BIO
262*4882a593Smuzhiyun * insertion order to change from the order in which submit_bio() is being
263*4882a593Smuzhiyun * executed in the case of multiple contexts concurrently issuing BIOs to a
264*4882a593Smuzhiyun * device, even if these context are synchronized to tightly control BIO issuing
265*4882a593Smuzhiyun * order. While this is not a problem with regular block devices, this ordering
266*4882a593Smuzhiyun * change can cause write BIO failures with zoned block devices as these
267*4882a593Smuzhiyun * require sequential write patterns to zones. Prevent this from happening by
268*4882a593Smuzhiyun * ignoring the plug state of a BIO issuing context if the target request queue
269*4882a593Smuzhiyun * is for a zoned block device and the BIO to plug is a write operation.
270*4882a593Smuzhiyun *
271*4882a593Smuzhiyun * Return current->plug if the bio can be plugged and NULL otherwise
272*4882a593Smuzhiyun */
blk_mq_plug(struct request_queue * q,struct bio * bio)273*4882a593Smuzhiyun static inline struct blk_plug *blk_mq_plug(struct request_queue *q,
274*4882a593Smuzhiyun struct bio *bio)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun /*
277*4882a593Smuzhiyun * For regular block devices or read operations, use the context plug
278*4882a593Smuzhiyun * which may be NULL if blk_start_plug() was not executed.
279*4882a593Smuzhiyun */
280*4882a593Smuzhiyun if (!blk_queue_is_zoned(q) || !op_is_write(bio_op(bio)))
281*4882a593Smuzhiyun return current->plug;
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun /* Zoned block device write operation case: do not plug the BIO */
284*4882a593Smuzhiyun return NULL;
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun /*
288*4882a593Smuzhiyun * For shared tag users, we track the number of currently active users
289*4882a593Smuzhiyun * and attempt to provide a fair share of the tag depth for each of them.
290*4882a593Smuzhiyun */
hctx_may_queue(struct blk_mq_hw_ctx * hctx,struct sbitmap_queue * bt)291*4882a593Smuzhiyun static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
292*4882a593Smuzhiyun struct sbitmap_queue *bt)
293*4882a593Smuzhiyun {
294*4882a593Smuzhiyun unsigned int depth, users;
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
297*4882a593Smuzhiyun return true;
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun /*
300*4882a593Smuzhiyun * Don't try dividing an ant
301*4882a593Smuzhiyun */
302*4882a593Smuzhiyun if (bt->sb.depth == 1)
303*4882a593Smuzhiyun return true;
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun if (blk_mq_is_sbitmap_shared(hctx->flags)) {
306*4882a593Smuzhiyun struct request_queue *q = hctx->queue;
307*4882a593Smuzhiyun struct blk_mq_tag_set *set = q->tag_set;
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
310*4882a593Smuzhiyun return true;
311*4882a593Smuzhiyun users = atomic_read(&set->active_queues_shared_sbitmap);
312*4882a593Smuzhiyun } else {
313*4882a593Smuzhiyun if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
314*4882a593Smuzhiyun return true;
315*4882a593Smuzhiyun users = atomic_read(&hctx->tags->active_queues);
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun
318*4882a593Smuzhiyun if (!users)
319*4882a593Smuzhiyun return true;
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun /*
322*4882a593Smuzhiyun * Allow at least some tags
323*4882a593Smuzhiyun */
324*4882a593Smuzhiyun depth = max((bt->sb.depth + users - 1) / users, 4U);
325*4882a593Smuzhiyun return __blk_mq_active_requests(hctx) < depth;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun #endif
330