1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun #include "blk-rq-qos.h"
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun /*
6*4882a593Smuzhiyun * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
7*4882a593Smuzhiyun * false if 'v' + 1 would be bigger than 'below'.
8*4882a593Smuzhiyun */
atomic_inc_below(atomic_t * v,unsigned int below)9*4882a593Smuzhiyun static bool atomic_inc_below(atomic_t *v, unsigned int below)
10*4882a593Smuzhiyun {
11*4882a593Smuzhiyun unsigned int cur = atomic_read(v);
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun for (;;) {
14*4882a593Smuzhiyun unsigned int old;
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun if (cur >= below)
17*4882a593Smuzhiyun return false;
18*4882a593Smuzhiyun old = atomic_cmpxchg(v, cur, cur + 1);
19*4882a593Smuzhiyun if (old == cur)
20*4882a593Smuzhiyun break;
21*4882a593Smuzhiyun cur = old;
22*4882a593Smuzhiyun }
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun return true;
25*4882a593Smuzhiyun }
26*4882a593Smuzhiyun
rq_wait_inc_below(struct rq_wait * rq_wait,unsigned int limit)27*4882a593Smuzhiyun bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun return atomic_inc_below(&rq_wait->inflight, limit);
30*4882a593Smuzhiyun }
31*4882a593Smuzhiyun
__rq_qos_cleanup(struct rq_qos * rqos,struct bio * bio)32*4882a593Smuzhiyun void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun do {
35*4882a593Smuzhiyun if (rqos->ops->cleanup)
36*4882a593Smuzhiyun rqos->ops->cleanup(rqos, bio);
37*4882a593Smuzhiyun rqos = rqos->next;
38*4882a593Smuzhiyun } while (rqos);
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun
__rq_qos_done(struct rq_qos * rqos,struct request * rq)41*4882a593Smuzhiyun void __rq_qos_done(struct rq_qos *rqos, struct request *rq)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun do {
44*4882a593Smuzhiyun if (rqos->ops->done)
45*4882a593Smuzhiyun rqos->ops->done(rqos, rq);
46*4882a593Smuzhiyun rqos = rqos->next;
47*4882a593Smuzhiyun } while (rqos);
48*4882a593Smuzhiyun }
49*4882a593Smuzhiyun
__rq_qos_issue(struct rq_qos * rqos,struct request * rq)50*4882a593Smuzhiyun void __rq_qos_issue(struct rq_qos *rqos, struct request *rq)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun do {
53*4882a593Smuzhiyun if (rqos->ops->issue)
54*4882a593Smuzhiyun rqos->ops->issue(rqos, rq);
55*4882a593Smuzhiyun rqos = rqos->next;
56*4882a593Smuzhiyun } while (rqos);
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
__rq_qos_requeue(struct rq_qos * rqos,struct request * rq)59*4882a593Smuzhiyun void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun do {
62*4882a593Smuzhiyun if (rqos->ops->requeue)
63*4882a593Smuzhiyun rqos->ops->requeue(rqos, rq);
64*4882a593Smuzhiyun rqos = rqos->next;
65*4882a593Smuzhiyun } while (rqos);
66*4882a593Smuzhiyun }
67*4882a593Smuzhiyun
__rq_qos_throttle(struct rq_qos * rqos,struct bio * bio)68*4882a593Smuzhiyun void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio)
69*4882a593Smuzhiyun {
70*4882a593Smuzhiyun do {
71*4882a593Smuzhiyun if (rqos->ops->throttle)
72*4882a593Smuzhiyun rqos->ops->throttle(rqos, bio);
73*4882a593Smuzhiyun rqos = rqos->next;
74*4882a593Smuzhiyun } while (rqos);
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun
__rq_qos_track(struct rq_qos * rqos,struct request * rq,struct bio * bio)77*4882a593Smuzhiyun void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun do {
80*4882a593Smuzhiyun if (rqos->ops->track)
81*4882a593Smuzhiyun rqos->ops->track(rqos, rq, bio);
82*4882a593Smuzhiyun rqos = rqos->next;
83*4882a593Smuzhiyun } while (rqos);
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
__rq_qos_merge(struct rq_qos * rqos,struct request * rq,struct bio * bio)86*4882a593Smuzhiyun void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun do {
89*4882a593Smuzhiyun if (rqos->ops->merge)
90*4882a593Smuzhiyun rqos->ops->merge(rqos, rq, bio);
91*4882a593Smuzhiyun rqos = rqos->next;
92*4882a593Smuzhiyun } while (rqos);
93*4882a593Smuzhiyun }
94*4882a593Smuzhiyun
__rq_qos_done_bio(struct rq_qos * rqos,struct bio * bio)95*4882a593Smuzhiyun void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun do {
98*4882a593Smuzhiyun if (rqos->ops->done_bio)
99*4882a593Smuzhiyun rqos->ops->done_bio(rqos, bio);
100*4882a593Smuzhiyun rqos = rqos->next;
101*4882a593Smuzhiyun } while (rqos);
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
__rq_qos_queue_depth_changed(struct rq_qos * rqos)104*4882a593Smuzhiyun void __rq_qos_queue_depth_changed(struct rq_qos *rqos)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun do {
107*4882a593Smuzhiyun if (rqos->ops->queue_depth_changed)
108*4882a593Smuzhiyun rqos->ops->queue_depth_changed(rqos);
109*4882a593Smuzhiyun rqos = rqos->next;
110*4882a593Smuzhiyun } while (rqos);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun /*
114*4882a593Smuzhiyun * Return true, if we can't increase the depth further by scaling
115*4882a593Smuzhiyun */
rq_depth_calc_max_depth(struct rq_depth * rqd)116*4882a593Smuzhiyun bool rq_depth_calc_max_depth(struct rq_depth *rqd)
117*4882a593Smuzhiyun {
118*4882a593Smuzhiyun unsigned int depth;
119*4882a593Smuzhiyun bool ret = false;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun /*
122*4882a593Smuzhiyun * For QD=1 devices, this is a special case. It's important for those
123*4882a593Smuzhiyun * to have one request ready when one completes, so force a depth of
124*4882a593Smuzhiyun * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
125*4882a593Smuzhiyun * since the device can't have more than that in flight. If we're
126*4882a593Smuzhiyun * scaling down, then keep a setting of 1/1/1.
127*4882a593Smuzhiyun */
128*4882a593Smuzhiyun if (rqd->queue_depth == 1) {
129*4882a593Smuzhiyun if (rqd->scale_step > 0)
130*4882a593Smuzhiyun rqd->max_depth = 1;
131*4882a593Smuzhiyun else {
132*4882a593Smuzhiyun rqd->max_depth = 2;
133*4882a593Smuzhiyun ret = true;
134*4882a593Smuzhiyun }
135*4882a593Smuzhiyun } else {
136*4882a593Smuzhiyun /*
137*4882a593Smuzhiyun * scale_step == 0 is our default state. If we have suffered
138*4882a593Smuzhiyun * latency spikes, step will be > 0, and we shrink the
139*4882a593Smuzhiyun * allowed write depths. If step is < 0, we're only doing
140*4882a593Smuzhiyun * writes, and we allow a temporarily higher depth to
141*4882a593Smuzhiyun * increase performance.
142*4882a593Smuzhiyun */
143*4882a593Smuzhiyun depth = min_t(unsigned int, rqd->default_depth,
144*4882a593Smuzhiyun rqd->queue_depth);
145*4882a593Smuzhiyun if (rqd->scale_step > 0)
146*4882a593Smuzhiyun depth = 1 + ((depth - 1) >> min(31, rqd->scale_step));
147*4882a593Smuzhiyun else if (rqd->scale_step < 0) {
148*4882a593Smuzhiyun unsigned int maxd = 3 * rqd->queue_depth / 4;
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun depth = 1 + ((depth - 1) << -rqd->scale_step);
151*4882a593Smuzhiyun if (depth > maxd) {
152*4882a593Smuzhiyun depth = maxd;
153*4882a593Smuzhiyun ret = true;
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun rqd->max_depth = depth;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun return ret;
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun /* Returns true on success and false if scaling up wasn't possible */
rq_depth_scale_up(struct rq_depth * rqd)164*4882a593Smuzhiyun bool rq_depth_scale_up(struct rq_depth *rqd)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun /*
167*4882a593Smuzhiyun * Hit max in previous round, stop here
168*4882a593Smuzhiyun */
169*4882a593Smuzhiyun if (rqd->scaled_max)
170*4882a593Smuzhiyun return false;
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun rqd->scale_step--;
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun rqd->scaled_max = rq_depth_calc_max_depth(rqd);
175*4882a593Smuzhiyun return true;
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun /*
179*4882a593Smuzhiyun * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
180*4882a593Smuzhiyun * had a latency violation. Returns true on success and returns false if
181*4882a593Smuzhiyun * scaling down wasn't possible.
182*4882a593Smuzhiyun */
rq_depth_scale_down(struct rq_depth * rqd,bool hard_throttle)183*4882a593Smuzhiyun bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun /*
186*4882a593Smuzhiyun * Stop scaling down when we've hit the limit. This also prevents
187*4882a593Smuzhiyun * ->scale_step from going to crazy values, if the device can't
188*4882a593Smuzhiyun * keep up.
189*4882a593Smuzhiyun */
190*4882a593Smuzhiyun if (rqd->max_depth == 1)
191*4882a593Smuzhiyun return false;
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun if (rqd->scale_step < 0 && hard_throttle)
194*4882a593Smuzhiyun rqd->scale_step = 0;
195*4882a593Smuzhiyun else
196*4882a593Smuzhiyun rqd->scale_step++;
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun rqd->scaled_max = false;
199*4882a593Smuzhiyun rq_depth_calc_max_depth(rqd);
200*4882a593Smuzhiyun return true;
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun struct rq_qos_wait_data {
204*4882a593Smuzhiyun struct wait_queue_entry wq;
205*4882a593Smuzhiyun struct task_struct *task;
206*4882a593Smuzhiyun struct rq_wait *rqw;
207*4882a593Smuzhiyun acquire_inflight_cb_t *cb;
208*4882a593Smuzhiyun void *private_data;
209*4882a593Smuzhiyun bool got_token;
210*4882a593Smuzhiyun };
211*4882a593Smuzhiyun
rq_qos_wake_function(struct wait_queue_entry * curr,unsigned int mode,int wake_flags,void * key)212*4882a593Smuzhiyun static int rq_qos_wake_function(struct wait_queue_entry *curr,
213*4882a593Smuzhiyun unsigned int mode, int wake_flags, void *key)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun struct rq_qos_wait_data *data = container_of(curr,
216*4882a593Smuzhiyun struct rq_qos_wait_data,
217*4882a593Smuzhiyun wq);
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun /*
220*4882a593Smuzhiyun * If we fail to get a budget, return -1 to interrupt the wake up loop
221*4882a593Smuzhiyun * in __wake_up_common.
222*4882a593Smuzhiyun */
223*4882a593Smuzhiyun if (!data->cb(data->rqw, data->private_data))
224*4882a593Smuzhiyun return -1;
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun data->got_token = true;
227*4882a593Smuzhiyun smp_wmb();
228*4882a593Smuzhiyun list_del_init(&curr->entry);
229*4882a593Smuzhiyun wake_up_process(data->task);
230*4882a593Smuzhiyun return 1;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun /**
234*4882a593Smuzhiyun * rq_qos_wait - throttle on a rqw if we need to
235*4882a593Smuzhiyun * @rqw: rqw to throttle on
236*4882a593Smuzhiyun * @private_data: caller provided specific data
237*4882a593Smuzhiyun * @acquire_inflight_cb: inc the rqw->inflight counter if we can
238*4882a593Smuzhiyun * @cleanup_cb: the callback to cleanup in case we race with a waker
239*4882a593Smuzhiyun *
240*4882a593Smuzhiyun * This provides a uniform place for the rq_qos users to do their throttling.
241*4882a593Smuzhiyun * Since you can end up with a lot of things sleeping at once, this manages the
242*4882a593Smuzhiyun * waking up based on the resources available. The acquire_inflight_cb should
243*4882a593Smuzhiyun * inc the rqw->inflight if we have the ability to do so, or return false if not
244*4882a593Smuzhiyun * and then we will sleep until the room becomes available.
245*4882a593Smuzhiyun *
246*4882a593Smuzhiyun * cleanup_cb is in case that we race with a waker and need to cleanup the
247*4882a593Smuzhiyun * inflight count accordingly.
248*4882a593Smuzhiyun */
rq_qos_wait(struct rq_wait * rqw,void * private_data,acquire_inflight_cb_t * acquire_inflight_cb,cleanup_cb_t * cleanup_cb)249*4882a593Smuzhiyun void rq_qos_wait(struct rq_wait *rqw, void *private_data,
250*4882a593Smuzhiyun acquire_inflight_cb_t *acquire_inflight_cb,
251*4882a593Smuzhiyun cleanup_cb_t *cleanup_cb)
252*4882a593Smuzhiyun {
253*4882a593Smuzhiyun struct rq_qos_wait_data data = {
254*4882a593Smuzhiyun .wq = {
255*4882a593Smuzhiyun .func = rq_qos_wake_function,
256*4882a593Smuzhiyun .entry = LIST_HEAD_INIT(data.wq.entry),
257*4882a593Smuzhiyun },
258*4882a593Smuzhiyun .task = current,
259*4882a593Smuzhiyun .rqw = rqw,
260*4882a593Smuzhiyun .cb = acquire_inflight_cb,
261*4882a593Smuzhiyun .private_data = private_data,
262*4882a593Smuzhiyun };
263*4882a593Smuzhiyun bool has_sleeper;
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun has_sleeper = wq_has_sleeper(&rqw->wait);
266*4882a593Smuzhiyun if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
267*4882a593Smuzhiyun return;
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq,
270*4882a593Smuzhiyun TASK_UNINTERRUPTIBLE);
271*4882a593Smuzhiyun do {
272*4882a593Smuzhiyun /* The memory barrier in set_task_state saves us here. */
273*4882a593Smuzhiyun if (data.got_token)
274*4882a593Smuzhiyun break;
275*4882a593Smuzhiyun if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
276*4882a593Smuzhiyun finish_wait(&rqw->wait, &data.wq);
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun /*
279*4882a593Smuzhiyun * We raced with wbt_wake_function() getting a token,
280*4882a593Smuzhiyun * which means we now have two. Put our local token
281*4882a593Smuzhiyun * and wake anyone else potentially waiting for one.
282*4882a593Smuzhiyun */
283*4882a593Smuzhiyun smp_rmb();
284*4882a593Smuzhiyun if (data.got_token)
285*4882a593Smuzhiyun cleanup_cb(rqw, private_data);
286*4882a593Smuzhiyun break;
287*4882a593Smuzhiyun }
288*4882a593Smuzhiyun io_schedule();
289*4882a593Smuzhiyun has_sleeper = true;
290*4882a593Smuzhiyun set_current_state(TASK_UNINTERRUPTIBLE);
291*4882a593Smuzhiyun } while (1);
292*4882a593Smuzhiyun finish_wait(&rqw->wait, &data.wq);
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun
rq_qos_exit(struct request_queue * q)295*4882a593Smuzhiyun void rq_qos_exit(struct request_queue *q)
296*4882a593Smuzhiyun {
297*4882a593Smuzhiyun blk_mq_debugfs_unregister_queue_rqos(q);
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun while (q->rq_qos) {
300*4882a593Smuzhiyun struct rq_qos *rqos = q->rq_qos;
301*4882a593Smuzhiyun q->rq_qos = rqos->next;
302*4882a593Smuzhiyun rqos->ops->exit(rqos);
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun }
305