xref: /OK3568_Linux_fs/kernel/block/blk-wbt.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * buffered writeback throttling. loosely based on CoDel. We can't drop
4*4882a593Smuzhiyun  * packets for IO scheduling, so the logic is something like this:
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * - Monitor latencies in a defined window of time.
7*4882a593Smuzhiyun  * - If the minimum latency in the above window exceeds some target, increment
8*4882a593Smuzhiyun  *   scaling step and scale down queue depth by a factor of 2x. The monitoring
9*4882a593Smuzhiyun  *   window is then shrunk to 100 / sqrt(scaling step + 1).
10*4882a593Smuzhiyun  * - For any window where we don't have solid data on what the latencies
11*4882a593Smuzhiyun  *   look like, retain status quo.
12*4882a593Smuzhiyun  * - If latencies look good, decrement scaling step.
13*4882a593Smuzhiyun  * - If we're only doing writes, allow the scaling step to go negative. This
14*4882a593Smuzhiyun  *   will temporarily boost write performance, snapping back to a stable
15*4882a593Smuzhiyun  *   scaling step of 0 if reads show up or the heavy writers finish. Unlike
16*4882a593Smuzhiyun  *   positive scaling steps where we shrink the monitoring window, a negative
17*4882a593Smuzhiyun  *   scaling step retains the default step==0 window size.
18*4882a593Smuzhiyun  *
19*4882a593Smuzhiyun  * Copyright (C) 2016 Jens Axboe
20*4882a593Smuzhiyun  *
21*4882a593Smuzhiyun  */
22*4882a593Smuzhiyun #include <linux/kernel.h>
23*4882a593Smuzhiyun #include <linux/blk_types.h>
24*4882a593Smuzhiyun #include <linux/slab.h>
25*4882a593Smuzhiyun #include <linux/backing-dev.h>
26*4882a593Smuzhiyun #include <linux/swap.h>
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun #include "blk-wbt.h"
29*4882a593Smuzhiyun #include "blk-rq-qos.h"
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
32*4882a593Smuzhiyun #include <trace/events/wbt.h>
33*4882a593Smuzhiyun 
wbt_clear_state(struct request * rq)34*4882a593Smuzhiyun static inline void wbt_clear_state(struct request *rq)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun 	rq->wbt_flags = 0;
37*4882a593Smuzhiyun }
38*4882a593Smuzhiyun 
wbt_flags(struct request * rq)39*4882a593Smuzhiyun static inline enum wbt_flags wbt_flags(struct request *rq)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun 	return rq->wbt_flags;
42*4882a593Smuzhiyun }
43*4882a593Smuzhiyun 
wbt_is_tracked(struct request * rq)44*4882a593Smuzhiyun static inline bool wbt_is_tracked(struct request *rq)
45*4882a593Smuzhiyun {
46*4882a593Smuzhiyun 	return rq->wbt_flags & WBT_TRACKED;
47*4882a593Smuzhiyun }
48*4882a593Smuzhiyun 
wbt_is_read(struct request * rq)49*4882a593Smuzhiyun static inline bool wbt_is_read(struct request *rq)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun 	return rq->wbt_flags & WBT_READ;
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun enum {
55*4882a593Smuzhiyun 	/*
56*4882a593Smuzhiyun 	 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
57*4882a593Smuzhiyun 	 * from here depending on device stats
58*4882a593Smuzhiyun 	 */
59*4882a593Smuzhiyun 	RWB_DEF_DEPTH	= 16,
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun 	/*
62*4882a593Smuzhiyun 	 * 100msec window
63*4882a593Smuzhiyun 	 */
64*4882a593Smuzhiyun 	RWB_WINDOW_NSEC		= 100 * 1000 * 1000ULL,
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	/*
67*4882a593Smuzhiyun 	 * Disregard stats, if we don't meet this minimum
68*4882a593Smuzhiyun 	 */
69*4882a593Smuzhiyun 	RWB_MIN_WRITE_SAMPLES	= 3,
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	/*
72*4882a593Smuzhiyun 	 * If we have this number of consecutive windows with not enough
73*4882a593Smuzhiyun 	 * information to scale up or down, scale up.
74*4882a593Smuzhiyun 	 */
75*4882a593Smuzhiyun 	RWB_UNKNOWN_BUMP	= 5,
76*4882a593Smuzhiyun };
77*4882a593Smuzhiyun 
rwb_enabled(struct rq_wb * rwb)78*4882a593Smuzhiyun static inline bool rwb_enabled(struct rq_wb *rwb)
79*4882a593Smuzhiyun {
80*4882a593Smuzhiyun 	return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT &&
81*4882a593Smuzhiyun 		      rwb->wb_normal != 0;
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun 
wb_timestamp(struct rq_wb * rwb,unsigned long * var)84*4882a593Smuzhiyun static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun 	if (rwb_enabled(rwb)) {
87*4882a593Smuzhiyun 		const unsigned long cur = jiffies;
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 		if (cur != *var)
90*4882a593Smuzhiyun 			*var = cur;
91*4882a593Smuzhiyun 	}
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun /*
95*4882a593Smuzhiyun  * If a task was rate throttled in balance_dirty_pages() within the last
96*4882a593Smuzhiyun  * second or so, use that to indicate a higher cleaning rate.
97*4882a593Smuzhiyun  */
wb_recent_wait(struct rq_wb * rwb)98*4882a593Smuzhiyun static bool wb_recent_wait(struct rq_wb *rwb)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun 	struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	return time_before(jiffies, wb->dirty_sleep + HZ);
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun 
get_rq_wait(struct rq_wb * rwb,enum wbt_flags wb_acct)105*4882a593Smuzhiyun static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
106*4882a593Smuzhiyun 					  enum wbt_flags wb_acct)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	if (wb_acct & WBT_KSWAPD)
109*4882a593Smuzhiyun 		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
110*4882a593Smuzhiyun 	else if (wb_acct & WBT_DISCARD)
111*4882a593Smuzhiyun 		return &rwb->rq_wait[WBT_RWQ_DISCARD];
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	return &rwb->rq_wait[WBT_RWQ_BG];
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun 
rwb_wake_all(struct rq_wb * rwb)116*4882a593Smuzhiyun static void rwb_wake_all(struct rq_wb *rwb)
117*4882a593Smuzhiyun {
118*4882a593Smuzhiyun 	int i;
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	for (i = 0; i < WBT_NUM_RWQ; i++) {
121*4882a593Smuzhiyun 		struct rq_wait *rqw = &rwb->rq_wait[i];
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 		if (wq_has_sleeper(&rqw->wait))
124*4882a593Smuzhiyun 			wake_up_all(&rqw->wait);
125*4882a593Smuzhiyun 	}
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun 
wbt_rqw_done(struct rq_wb * rwb,struct rq_wait * rqw,enum wbt_flags wb_acct)128*4882a593Smuzhiyun static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
129*4882a593Smuzhiyun 			 enum wbt_flags wb_acct)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun 	int inflight, limit;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	inflight = atomic_dec_return(&rqw->inflight);
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	/*
136*4882a593Smuzhiyun 	 * wbt got disabled with IO in flight. Wake up any potential
137*4882a593Smuzhiyun 	 * waiters, we don't have to do more than that.
138*4882a593Smuzhiyun 	 */
139*4882a593Smuzhiyun 	if (unlikely(!rwb_enabled(rwb))) {
140*4882a593Smuzhiyun 		rwb_wake_all(rwb);
141*4882a593Smuzhiyun 		return;
142*4882a593Smuzhiyun 	}
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 	/*
145*4882a593Smuzhiyun 	 * For discards, our limit is always the background. For writes, if
146*4882a593Smuzhiyun 	 * the device does write back caching, drop further down before we
147*4882a593Smuzhiyun 	 * wake people up.
148*4882a593Smuzhiyun 	 */
149*4882a593Smuzhiyun 	if (wb_acct & WBT_DISCARD)
150*4882a593Smuzhiyun 		limit = rwb->wb_background;
151*4882a593Smuzhiyun 	else if (rwb->wc && !wb_recent_wait(rwb))
152*4882a593Smuzhiyun 		limit = 0;
153*4882a593Smuzhiyun 	else
154*4882a593Smuzhiyun 		limit = rwb->wb_normal;
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 	/*
157*4882a593Smuzhiyun 	 * Don't wake anyone up if we are above the normal limit.
158*4882a593Smuzhiyun 	 */
159*4882a593Smuzhiyun 	if (inflight && inflight >= limit)
160*4882a593Smuzhiyun 		return;
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	if (wq_has_sleeper(&rqw->wait)) {
163*4882a593Smuzhiyun 		int diff = limit - inflight;
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 		if (!inflight || diff >= rwb->wb_background / 2)
166*4882a593Smuzhiyun 			wake_up_all(&rqw->wait);
167*4882a593Smuzhiyun 	}
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun 
__wbt_done(struct rq_qos * rqos,enum wbt_flags wb_acct)170*4882a593Smuzhiyun static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
171*4882a593Smuzhiyun {
172*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
173*4882a593Smuzhiyun 	struct rq_wait *rqw;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	if (!(wb_acct & WBT_TRACKED))
176*4882a593Smuzhiyun 		return;
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	rqw = get_rq_wait(rwb, wb_acct);
179*4882a593Smuzhiyun 	wbt_rqw_done(rwb, rqw, wb_acct);
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun /*
183*4882a593Smuzhiyun  * Called on completion of a request. Note that it's also called when
184*4882a593Smuzhiyun  * a request is merged, when the request gets freed.
185*4882a593Smuzhiyun  */
wbt_done(struct rq_qos * rqos,struct request * rq)186*4882a593Smuzhiyun static void wbt_done(struct rq_qos *rqos, struct request *rq)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	if (!wbt_is_tracked(rq)) {
191*4882a593Smuzhiyun 		if (rwb->sync_cookie == rq) {
192*4882a593Smuzhiyun 			rwb->sync_issue = 0;
193*4882a593Smuzhiyun 			rwb->sync_cookie = NULL;
194*4882a593Smuzhiyun 		}
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 		if (wbt_is_read(rq))
197*4882a593Smuzhiyun 			wb_timestamp(rwb, &rwb->last_comp);
198*4882a593Smuzhiyun 	} else {
199*4882a593Smuzhiyun 		WARN_ON_ONCE(rq == rwb->sync_cookie);
200*4882a593Smuzhiyun 		__wbt_done(rqos, wbt_flags(rq));
201*4882a593Smuzhiyun 	}
202*4882a593Smuzhiyun 	wbt_clear_state(rq);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
stat_sample_valid(struct blk_rq_stat * stat)205*4882a593Smuzhiyun static inline bool stat_sample_valid(struct blk_rq_stat *stat)
206*4882a593Smuzhiyun {
207*4882a593Smuzhiyun 	/*
208*4882a593Smuzhiyun 	 * We need at least one read sample, and a minimum of
209*4882a593Smuzhiyun 	 * RWB_MIN_WRITE_SAMPLES. We require some write samples to know
210*4882a593Smuzhiyun 	 * that it's writes impacting us, and not just some sole read on
211*4882a593Smuzhiyun 	 * a device that is in a lower power state.
212*4882a593Smuzhiyun 	 */
213*4882a593Smuzhiyun 	return (stat[READ].nr_samples >= 1 &&
214*4882a593Smuzhiyun 		stat[WRITE].nr_samples >= RWB_MIN_WRITE_SAMPLES);
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun 
rwb_sync_issue_lat(struct rq_wb * rwb)217*4882a593Smuzhiyun static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun 	u64 now, issue = READ_ONCE(rwb->sync_issue);
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	if (!issue || !rwb->sync_cookie)
222*4882a593Smuzhiyun 		return 0;
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	now = ktime_to_ns(ktime_get());
225*4882a593Smuzhiyun 	return now - issue;
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun enum {
229*4882a593Smuzhiyun 	LAT_OK = 1,
230*4882a593Smuzhiyun 	LAT_UNKNOWN,
231*4882a593Smuzhiyun 	LAT_UNKNOWN_WRITES,
232*4882a593Smuzhiyun 	LAT_EXCEEDED,
233*4882a593Smuzhiyun };
234*4882a593Smuzhiyun 
latency_exceeded(struct rq_wb * rwb,struct blk_rq_stat * stat)235*4882a593Smuzhiyun static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun 	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
238*4882a593Smuzhiyun 	struct rq_depth *rqd = &rwb->rq_depth;
239*4882a593Smuzhiyun 	u64 thislat;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	/*
242*4882a593Smuzhiyun 	 * If our stored sync issue exceeds the window size, or it
243*4882a593Smuzhiyun 	 * exceeds our min target AND we haven't logged any entries,
244*4882a593Smuzhiyun 	 * flag the latency as exceeded. wbt works off completion latencies,
245*4882a593Smuzhiyun 	 * but for a flooded device, a single sync IO can take a long time
246*4882a593Smuzhiyun 	 * to complete after being issued. If this time exceeds our
247*4882a593Smuzhiyun 	 * monitoring window AND we didn't see any other completions in that
248*4882a593Smuzhiyun 	 * window, then count that sync IO as a violation of the latency.
249*4882a593Smuzhiyun 	 */
250*4882a593Smuzhiyun 	thislat = rwb_sync_issue_lat(rwb);
251*4882a593Smuzhiyun 	if (thislat > rwb->cur_win_nsec ||
252*4882a593Smuzhiyun 	    (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) {
253*4882a593Smuzhiyun 		trace_wbt_lat(bdi, thislat);
254*4882a593Smuzhiyun 		return LAT_EXCEEDED;
255*4882a593Smuzhiyun 	}
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	/*
258*4882a593Smuzhiyun 	 * No read/write mix, if stat isn't valid
259*4882a593Smuzhiyun 	 */
260*4882a593Smuzhiyun 	if (!stat_sample_valid(stat)) {
261*4882a593Smuzhiyun 		/*
262*4882a593Smuzhiyun 		 * If we had writes in this stat window and the window is
263*4882a593Smuzhiyun 		 * current, we're only doing writes. If a task recently
264*4882a593Smuzhiyun 		 * waited or still has writes in flights, consider us doing
265*4882a593Smuzhiyun 		 * just writes as well.
266*4882a593Smuzhiyun 		 */
267*4882a593Smuzhiyun 		if (stat[WRITE].nr_samples || wb_recent_wait(rwb) ||
268*4882a593Smuzhiyun 		    wbt_inflight(rwb))
269*4882a593Smuzhiyun 			return LAT_UNKNOWN_WRITES;
270*4882a593Smuzhiyun 		return LAT_UNKNOWN;
271*4882a593Smuzhiyun 	}
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	/*
274*4882a593Smuzhiyun 	 * If the 'min' latency exceeds our target, step down.
275*4882a593Smuzhiyun 	 */
276*4882a593Smuzhiyun 	if (stat[READ].min > rwb->min_lat_nsec) {
277*4882a593Smuzhiyun 		trace_wbt_lat(bdi, stat[READ].min);
278*4882a593Smuzhiyun 		trace_wbt_stat(bdi, stat);
279*4882a593Smuzhiyun 		return LAT_EXCEEDED;
280*4882a593Smuzhiyun 	}
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 	if (rqd->scale_step)
283*4882a593Smuzhiyun 		trace_wbt_stat(bdi, stat);
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 	return LAT_OK;
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun 
rwb_trace_step(struct rq_wb * rwb,const char * msg)288*4882a593Smuzhiyun static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun 	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
291*4882a593Smuzhiyun 	struct rq_depth *rqd = &rwb->rq_depth;
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
294*4882a593Smuzhiyun 			rwb->wb_background, rwb->wb_normal, rqd->max_depth);
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun 
calc_wb_limits(struct rq_wb * rwb)297*4882a593Smuzhiyun static void calc_wb_limits(struct rq_wb *rwb)
298*4882a593Smuzhiyun {
299*4882a593Smuzhiyun 	if (rwb->min_lat_nsec == 0) {
300*4882a593Smuzhiyun 		rwb->wb_normal = rwb->wb_background = 0;
301*4882a593Smuzhiyun 	} else if (rwb->rq_depth.max_depth <= 2) {
302*4882a593Smuzhiyun 		rwb->wb_normal = rwb->rq_depth.max_depth;
303*4882a593Smuzhiyun 		rwb->wb_background = 1;
304*4882a593Smuzhiyun 	} else {
305*4882a593Smuzhiyun 		rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
306*4882a593Smuzhiyun 		rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
307*4882a593Smuzhiyun 	}
308*4882a593Smuzhiyun }
309*4882a593Smuzhiyun 
scale_up(struct rq_wb * rwb)310*4882a593Smuzhiyun static void scale_up(struct rq_wb *rwb)
311*4882a593Smuzhiyun {
312*4882a593Smuzhiyun 	if (!rq_depth_scale_up(&rwb->rq_depth))
313*4882a593Smuzhiyun 		return;
314*4882a593Smuzhiyun 	calc_wb_limits(rwb);
315*4882a593Smuzhiyun 	rwb->unknown_cnt = 0;
316*4882a593Smuzhiyun 	rwb_wake_all(rwb);
317*4882a593Smuzhiyun 	rwb_trace_step(rwb, tracepoint_string("scale up"));
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun 
scale_down(struct rq_wb * rwb,bool hard_throttle)320*4882a593Smuzhiyun static void scale_down(struct rq_wb *rwb, bool hard_throttle)
321*4882a593Smuzhiyun {
322*4882a593Smuzhiyun 	if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
323*4882a593Smuzhiyun 		return;
324*4882a593Smuzhiyun 	calc_wb_limits(rwb);
325*4882a593Smuzhiyun 	rwb->unknown_cnt = 0;
326*4882a593Smuzhiyun 	rwb_trace_step(rwb, tracepoint_string("scale down"));
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun 
rwb_arm_timer(struct rq_wb * rwb)329*4882a593Smuzhiyun static void rwb_arm_timer(struct rq_wb *rwb)
330*4882a593Smuzhiyun {
331*4882a593Smuzhiyun 	struct rq_depth *rqd = &rwb->rq_depth;
332*4882a593Smuzhiyun 
333*4882a593Smuzhiyun 	if (rqd->scale_step > 0) {
334*4882a593Smuzhiyun 		/*
335*4882a593Smuzhiyun 		 * We should speed this up, using some variant of a fast
336*4882a593Smuzhiyun 		 * integer inverse square root calculation. Since we only do
337*4882a593Smuzhiyun 		 * this for every window expiration, it's not a huge deal,
338*4882a593Smuzhiyun 		 * though.
339*4882a593Smuzhiyun 		 */
340*4882a593Smuzhiyun 		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
341*4882a593Smuzhiyun 					int_sqrt((rqd->scale_step + 1) << 8));
342*4882a593Smuzhiyun 	} else {
343*4882a593Smuzhiyun 		/*
344*4882a593Smuzhiyun 		 * For step < 0, we don't want to increase/decrease the
345*4882a593Smuzhiyun 		 * window size.
346*4882a593Smuzhiyun 		 */
347*4882a593Smuzhiyun 		rwb->cur_win_nsec = rwb->win_nsec;
348*4882a593Smuzhiyun 	}
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec);
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun 
wb_timer_fn(struct blk_stat_callback * cb)353*4882a593Smuzhiyun static void wb_timer_fn(struct blk_stat_callback *cb)
354*4882a593Smuzhiyun {
355*4882a593Smuzhiyun 	struct rq_wb *rwb = cb->data;
356*4882a593Smuzhiyun 	struct rq_depth *rqd = &rwb->rq_depth;
357*4882a593Smuzhiyun 	unsigned int inflight = wbt_inflight(rwb);
358*4882a593Smuzhiyun 	int status;
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	status = latency_exceeded(rwb, cb->stat);
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun 	trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
363*4882a593Smuzhiyun 			inflight);
364*4882a593Smuzhiyun 
365*4882a593Smuzhiyun 	/*
366*4882a593Smuzhiyun 	 * If we exceeded the latency target, step down. If we did not,
367*4882a593Smuzhiyun 	 * step one level up. If we don't know enough to say either exceeded
368*4882a593Smuzhiyun 	 * or ok, then don't do anything.
369*4882a593Smuzhiyun 	 */
370*4882a593Smuzhiyun 	switch (status) {
371*4882a593Smuzhiyun 	case LAT_EXCEEDED:
372*4882a593Smuzhiyun 		scale_down(rwb, true);
373*4882a593Smuzhiyun 		break;
374*4882a593Smuzhiyun 	case LAT_OK:
375*4882a593Smuzhiyun 		scale_up(rwb);
376*4882a593Smuzhiyun 		break;
377*4882a593Smuzhiyun 	case LAT_UNKNOWN_WRITES:
378*4882a593Smuzhiyun 		/*
379*4882a593Smuzhiyun 		 * We started a the center step, but don't have a valid
380*4882a593Smuzhiyun 		 * read/write sample, but we do have writes going on.
381*4882a593Smuzhiyun 		 * Allow step to go negative, to increase write perf.
382*4882a593Smuzhiyun 		 */
383*4882a593Smuzhiyun 		scale_up(rwb);
384*4882a593Smuzhiyun 		break;
385*4882a593Smuzhiyun 	case LAT_UNKNOWN:
386*4882a593Smuzhiyun 		if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP)
387*4882a593Smuzhiyun 			break;
388*4882a593Smuzhiyun 		/*
389*4882a593Smuzhiyun 		 * We get here when previously scaled reduced depth, and we
390*4882a593Smuzhiyun 		 * currently don't have a valid read/write sample. For that
391*4882a593Smuzhiyun 		 * case, slowly return to center state (step == 0).
392*4882a593Smuzhiyun 		 */
393*4882a593Smuzhiyun 		if (rqd->scale_step > 0)
394*4882a593Smuzhiyun 			scale_up(rwb);
395*4882a593Smuzhiyun 		else if (rqd->scale_step < 0)
396*4882a593Smuzhiyun 			scale_down(rwb, false);
397*4882a593Smuzhiyun 		break;
398*4882a593Smuzhiyun 	default:
399*4882a593Smuzhiyun 		break;
400*4882a593Smuzhiyun 	}
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	/*
403*4882a593Smuzhiyun 	 * Re-arm timer, if we have IO in flight
404*4882a593Smuzhiyun 	 */
405*4882a593Smuzhiyun 	if (rqd->scale_step || inflight)
406*4882a593Smuzhiyun 		rwb_arm_timer(rwb);
407*4882a593Smuzhiyun }
408*4882a593Smuzhiyun 
wbt_update_limits(struct rq_wb * rwb)409*4882a593Smuzhiyun static void wbt_update_limits(struct rq_wb *rwb)
410*4882a593Smuzhiyun {
411*4882a593Smuzhiyun 	struct rq_depth *rqd = &rwb->rq_depth;
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 	rqd->scale_step = 0;
414*4882a593Smuzhiyun 	rqd->scaled_max = false;
415*4882a593Smuzhiyun 
416*4882a593Smuzhiyun 	rq_depth_calc_max_depth(rqd);
417*4882a593Smuzhiyun 	calc_wb_limits(rwb);
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	rwb_wake_all(rwb);
420*4882a593Smuzhiyun }
421*4882a593Smuzhiyun 
wbt_get_min_lat(struct request_queue * q)422*4882a593Smuzhiyun u64 wbt_get_min_lat(struct request_queue *q)
423*4882a593Smuzhiyun {
424*4882a593Smuzhiyun 	struct rq_qos *rqos = wbt_rq_qos(q);
425*4882a593Smuzhiyun 	if (!rqos)
426*4882a593Smuzhiyun 		return 0;
427*4882a593Smuzhiyun 	return RQWB(rqos)->min_lat_nsec;
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun 
wbt_set_min_lat(struct request_queue * q,u64 val)430*4882a593Smuzhiyun void wbt_set_min_lat(struct request_queue *q, u64 val)
431*4882a593Smuzhiyun {
432*4882a593Smuzhiyun 	struct rq_qos *rqos = wbt_rq_qos(q);
433*4882a593Smuzhiyun 	if (!rqos)
434*4882a593Smuzhiyun 		return;
435*4882a593Smuzhiyun 	RQWB(rqos)->min_lat_nsec = val;
436*4882a593Smuzhiyun 	RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
437*4882a593Smuzhiyun 	wbt_update_limits(RQWB(rqos));
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 
close_io(struct rq_wb * rwb)441*4882a593Smuzhiyun static bool close_io(struct rq_wb *rwb)
442*4882a593Smuzhiyun {
443*4882a593Smuzhiyun 	const unsigned long now = jiffies;
444*4882a593Smuzhiyun 
445*4882a593Smuzhiyun 	return time_before(now, rwb->last_issue + HZ / 10) ||
446*4882a593Smuzhiyun 		time_before(now, rwb->last_comp + HZ / 10);
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun #define REQ_HIPRIO	(REQ_SYNC | REQ_META | REQ_PRIO)
450*4882a593Smuzhiyun 
get_limit(struct rq_wb * rwb,unsigned long rw)451*4882a593Smuzhiyun static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
452*4882a593Smuzhiyun {
453*4882a593Smuzhiyun 	unsigned int limit;
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun 	/*
456*4882a593Smuzhiyun 	 * If we got disabled, just return UINT_MAX. This ensures that
457*4882a593Smuzhiyun 	 * we'll properly inc a new IO, and dec+wakeup at the end.
458*4882a593Smuzhiyun 	 */
459*4882a593Smuzhiyun 	if (!rwb_enabled(rwb))
460*4882a593Smuzhiyun 		return UINT_MAX;
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun 	if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
463*4882a593Smuzhiyun 		return rwb->wb_background;
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun 	/*
466*4882a593Smuzhiyun 	 * At this point we know it's a buffered write. If this is
467*4882a593Smuzhiyun 	 * kswapd trying to free memory, or REQ_SYNC is set, then
468*4882a593Smuzhiyun 	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
469*4882a593Smuzhiyun 	 * that. If the write is marked as a background write, then use
470*4882a593Smuzhiyun 	 * the idle limit, or go to normal if we haven't had competing
471*4882a593Smuzhiyun 	 * IO for a bit.
472*4882a593Smuzhiyun 	 */
473*4882a593Smuzhiyun 	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
474*4882a593Smuzhiyun 		limit = rwb->rq_depth.max_depth;
475*4882a593Smuzhiyun 	else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
476*4882a593Smuzhiyun 		/*
477*4882a593Smuzhiyun 		 * If less than 100ms since we completed unrelated IO,
478*4882a593Smuzhiyun 		 * limit us to half the depth for background writeback.
479*4882a593Smuzhiyun 		 */
480*4882a593Smuzhiyun 		limit = rwb->wb_background;
481*4882a593Smuzhiyun 	} else
482*4882a593Smuzhiyun 		limit = rwb->wb_normal;
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	return limit;
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun 
487*4882a593Smuzhiyun struct wbt_wait_data {
488*4882a593Smuzhiyun 	struct rq_wb *rwb;
489*4882a593Smuzhiyun 	enum wbt_flags wb_acct;
490*4882a593Smuzhiyun 	unsigned long rw;
491*4882a593Smuzhiyun };
492*4882a593Smuzhiyun 
wbt_inflight_cb(struct rq_wait * rqw,void * private_data)493*4882a593Smuzhiyun static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun 	struct wbt_wait_data *data = private_data;
496*4882a593Smuzhiyun 	return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw));
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun 
wbt_cleanup_cb(struct rq_wait * rqw,void * private_data)499*4882a593Smuzhiyun static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
500*4882a593Smuzhiyun {
501*4882a593Smuzhiyun 	struct wbt_wait_data *data = private_data;
502*4882a593Smuzhiyun 	wbt_rqw_done(data->rwb, rqw, data->wb_acct);
503*4882a593Smuzhiyun }
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun /*
506*4882a593Smuzhiyun  * Block if we will exceed our limit, or if we are currently waiting for
507*4882a593Smuzhiyun  * the timer to kick off queuing again.
508*4882a593Smuzhiyun  */
__wbt_wait(struct rq_wb * rwb,enum wbt_flags wb_acct,unsigned long rw)509*4882a593Smuzhiyun static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
510*4882a593Smuzhiyun 		       unsigned long rw)
511*4882a593Smuzhiyun {
512*4882a593Smuzhiyun 	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
513*4882a593Smuzhiyun 	struct wbt_wait_data data = {
514*4882a593Smuzhiyun 		.rwb = rwb,
515*4882a593Smuzhiyun 		.wb_acct = wb_acct,
516*4882a593Smuzhiyun 		.rw = rw,
517*4882a593Smuzhiyun 	};
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun 
wbt_should_throttle(struct rq_wb * rwb,struct bio * bio)522*4882a593Smuzhiyun static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun 	switch (bio_op(bio)) {
525*4882a593Smuzhiyun 	case REQ_OP_WRITE:
526*4882a593Smuzhiyun 		/*
527*4882a593Smuzhiyun 		 * Don't throttle WRITE_ODIRECT
528*4882a593Smuzhiyun 		 */
529*4882a593Smuzhiyun 		if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
530*4882a593Smuzhiyun 		    (REQ_SYNC | REQ_IDLE))
531*4882a593Smuzhiyun 			return false;
532*4882a593Smuzhiyun 		fallthrough;
533*4882a593Smuzhiyun 	case REQ_OP_DISCARD:
534*4882a593Smuzhiyun 		return true;
535*4882a593Smuzhiyun 	default:
536*4882a593Smuzhiyun 		return false;
537*4882a593Smuzhiyun 	}
538*4882a593Smuzhiyun }
539*4882a593Smuzhiyun 
bio_to_wbt_flags(struct rq_wb * rwb,struct bio * bio)540*4882a593Smuzhiyun static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun 	enum wbt_flags flags = 0;
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	if (!rwb_enabled(rwb))
545*4882a593Smuzhiyun 		return 0;
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun 	if (bio_op(bio) == REQ_OP_READ) {
548*4882a593Smuzhiyun 		flags = WBT_READ;
549*4882a593Smuzhiyun 	} else if (wbt_should_throttle(rwb, bio)) {
550*4882a593Smuzhiyun 		if (current_is_kswapd())
551*4882a593Smuzhiyun 			flags |= WBT_KSWAPD;
552*4882a593Smuzhiyun 		if (bio_op(bio) == REQ_OP_DISCARD)
553*4882a593Smuzhiyun 			flags |= WBT_DISCARD;
554*4882a593Smuzhiyun 		flags |= WBT_TRACKED;
555*4882a593Smuzhiyun 	}
556*4882a593Smuzhiyun 	return flags;
557*4882a593Smuzhiyun }
558*4882a593Smuzhiyun 
wbt_cleanup(struct rq_qos * rqos,struct bio * bio)559*4882a593Smuzhiyun static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
560*4882a593Smuzhiyun {
561*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
562*4882a593Smuzhiyun 	enum wbt_flags flags = bio_to_wbt_flags(rwb, bio);
563*4882a593Smuzhiyun 	__wbt_done(rqos, flags);
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun /*
567*4882a593Smuzhiyun  * Returns true if the IO request should be accounted, false if not.
568*4882a593Smuzhiyun  * May sleep, if we have exceeded the writeback limits. Caller can pass
569*4882a593Smuzhiyun  * in an irq held spinlock, if it holds one when calling this function.
570*4882a593Smuzhiyun  * If we do sleep, we'll release and re-grab it.
571*4882a593Smuzhiyun  */
wbt_wait(struct rq_qos * rqos,struct bio * bio)572*4882a593Smuzhiyun static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
573*4882a593Smuzhiyun {
574*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
575*4882a593Smuzhiyun 	enum wbt_flags flags;
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 	flags = bio_to_wbt_flags(rwb, bio);
578*4882a593Smuzhiyun 	if (!(flags & WBT_TRACKED)) {
579*4882a593Smuzhiyun 		if (flags & WBT_READ)
580*4882a593Smuzhiyun 			wb_timestamp(rwb, &rwb->last_issue);
581*4882a593Smuzhiyun 		return;
582*4882a593Smuzhiyun 	}
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	__wbt_wait(rwb, flags, bio->bi_opf);
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	if (!blk_stat_is_active(rwb->cb))
587*4882a593Smuzhiyun 		rwb_arm_timer(rwb);
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun 
wbt_track(struct rq_qos * rqos,struct request * rq,struct bio * bio)590*4882a593Smuzhiyun static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
591*4882a593Smuzhiyun {
592*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
593*4882a593Smuzhiyun 	rq->wbt_flags |= bio_to_wbt_flags(rwb, bio);
594*4882a593Smuzhiyun }
595*4882a593Smuzhiyun 
wbt_issue(struct rq_qos * rqos,struct request * rq)596*4882a593Smuzhiyun static void wbt_issue(struct rq_qos *rqos, struct request *rq)
597*4882a593Smuzhiyun {
598*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	if (!rwb_enabled(rwb))
601*4882a593Smuzhiyun 		return;
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	/*
604*4882a593Smuzhiyun 	 * Track sync issue, in case it takes a long time to complete. Allows us
605*4882a593Smuzhiyun 	 * to react quicker, if a sync IO takes a long time to complete. Note
606*4882a593Smuzhiyun 	 * that this is just a hint. The request can go away when it completes,
607*4882a593Smuzhiyun 	 * so it's important we never dereference it. We only use the address to
608*4882a593Smuzhiyun 	 * compare with, which is why we store the sync_issue time locally.
609*4882a593Smuzhiyun 	 */
610*4882a593Smuzhiyun 	if (wbt_is_read(rq) && !rwb->sync_issue) {
611*4882a593Smuzhiyun 		rwb->sync_cookie = rq;
612*4882a593Smuzhiyun 		rwb->sync_issue = rq->io_start_time_ns;
613*4882a593Smuzhiyun 	}
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun 
wbt_requeue(struct rq_qos * rqos,struct request * rq)616*4882a593Smuzhiyun static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
617*4882a593Smuzhiyun {
618*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
619*4882a593Smuzhiyun 	if (!rwb_enabled(rwb))
620*4882a593Smuzhiyun 		return;
621*4882a593Smuzhiyun 	if (rq == rwb->sync_cookie) {
622*4882a593Smuzhiyun 		rwb->sync_issue = 0;
623*4882a593Smuzhiyun 		rwb->sync_cookie = NULL;
624*4882a593Smuzhiyun 	}
625*4882a593Smuzhiyun }
626*4882a593Smuzhiyun 
wbt_set_write_cache(struct request_queue * q,bool write_cache_on)627*4882a593Smuzhiyun void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
628*4882a593Smuzhiyun {
629*4882a593Smuzhiyun 	struct rq_qos *rqos = wbt_rq_qos(q);
630*4882a593Smuzhiyun 	if (rqos)
631*4882a593Smuzhiyun 		RQWB(rqos)->wc = write_cache_on;
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun /*
635*4882a593Smuzhiyun  * Enable wbt if defaults are configured that way
636*4882a593Smuzhiyun  */
wbt_enable_default(struct request_queue * q)637*4882a593Smuzhiyun void wbt_enable_default(struct request_queue *q)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun 	struct rq_qos *rqos = wbt_rq_qos(q);
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun 	/* Throttling already enabled? */
642*4882a593Smuzhiyun 	if (rqos) {
643*4882a593Smuzhiyun 		if (RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
644*4882a593Smuzhiyun 			RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
645*4882a593Smuzhiyun 		return;
646*4882a593Smuzhiyun 	}
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 	/* Queue not registered? Maybe shutting down... */
649*4882a593Smuzhiyun 	if (!blk_queue_registered(q))
650*4882a593Smuzhiyun 		return;
651*4882a593Smuzhiyun 
652*4882a593Smuzhiyun 	if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
653*4882a593Smuzhiyun 		wbt_init(q);
654*4882a593Smuzhiyun }
655*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(wbt_enable_default);
656*4882a593Smuzhiyun 
wbt_default_latency_nsec(struct request_queue * q)657*4882a593Smuzhiyun u64 wbt_default_latency_nsec(struct request_queue *q)
658*4882a593Smuzhiyun {
659*4882a593Smuzhiyun 	/*
660*4882a593Smuzhiyun 	 * We default to 2msec for non-rotational storage, and 75msec
661*4882a593Smuzhiyun 	 * for rotational storage.
662*4882a593Smuzhiyun 	 */
663*4882a593Smuzhiyun 	if (blk_queue_nonrot(q))
664*4882a593Smuzhiyun 		return 2000000ULL;
665*4882a593Smuzhiyun 	else
666*4882a593Smuzhiyun 		return 75000000ULL;
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun 
wbt_data_dir(const struct request * rq)669*4882a593Smuzhiyun static int wbt_data_dir(const struct request *rq)
670*4882a593Smuzhiyun {
671*4882a593Smuzhiyun 	const int op = req_op(rq);
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 	if (op == REQ_OP_READ)
674*4882a593Smuzhiyun 		return READ;
675*4882a593Smuzhiyun 	else if (op_is_write(op))
676*4882a593Smuzhiyun 		return WRITE;
677*4882a593Smuzhiyun 
678*4882a593Smuzhiyun 	/* don't account */
679*4882a593Smuzhiyun 	return -1;
680*4882a593Smuzhiyun }
681*4882a593Smuzhiyun 
wbt_queue_depth_changed(struct rq_qos * rqos)682*4882a593Smuzhiyun static void wbt_queue_depth_changed(struct rq_qos *rqos)
683*4882a593Smuzhiyun {
684*4882a593Smuzhiyun 	RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
685*4882a593Smuzhiyun 	wbt_update_limits(RQWB(rqos));
686*4882a593Smuzhiyun }
687*4882a593Smuzhiyun 
wbt_exit(struct rq_qos * rqos)688*4882a593Smuzhiyun static void wbt_exit(struct rq_qos *rqos)
689*4882a593Smuzhiyun {
690*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
691*4882a593Smuzhiyun 	struct request_queue *q = rqos->q;
692*4882a593Smuzhiyun 
693*4882a593Smuzhiyun 	blk_stat_remove_callback(q, rwb->cb);
694*4882a593Smuzhiyun 	blk_stat_free_callback(rwb->cb);
695*4882a593Smuzhiyun 	kfree(rwb);
696*4882a593Smuzhiyun }
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun /*
699*4882a593Smuzhiyun  * Disable wbt, if enabled by default.
700*4882a593Smuzhiyun  */
wbt_disable_default(struct request_queue * q)701*4882a593Smuzhiyun void wbt_disable_default(struct request_queue *q)
702*4882a593Smuzhiyun {
703*4882a593Smuzhiyun 	struct rq_qos *rqos = wbt_rq_qos(q);
704*4882a593Smuzhiyun 	struct rq_wb *rwb;
705*4882a593Smuzhiyun 	if (!rqos)
706*4882a593Smuzhiyun 		return;
707*4882a593Smuzhiyun 	rwb = RQWB(rqos);
708*4882a593Smuzhiyun 	if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
709*4882a593Smuzhiyun 		blk_stat_deactivate(rwb->cb);
710*4882a593Smuzhiyun 		rwb->enable_state = WBT_STATE_OFF_DEFAULT;
711*4882a593Smuzhiyun 	}
712*4882a593Smuzhiyun }
713*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(wbt_disable_default);
714*4882a593Smuzhiyun 
715*4882a593Smuzhiyun #ifdef CONFIG_BLK_DEBUG_FS
wbt_curr_win_nsec_show(void * data,struct seq_file * m)716*4882a593Smuzhiyun static int wbt_curr_win_nsec_show(void *data, struct seq_file *m)
717*4882a593Smuzhiyun {
718*4882a593Smuzhiyun 	struct rq_qos *rqos = data;
719*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun 	seq_printf(m, "%llu\n", rwb->cur_win_nsec);
722*4882a593Smuzhiyun 	return 0;
723*4882a593Smuzhiyun }
724*4882a593Smuzhiyun 
wbt_enabled_show(void * data,struct seq_file * m)725*4882a593Smuzhiyun static int wbt_enabled_show(void *data, struct seq_file *m)
726*4882a593Smuzhiyun {
727*4882a593Smuzhiyun 	struct rq_qos *rqos = data;
728*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 	seq_printf(m, "%d\n", rwb->enable_state);
731*4882a593Smuzhiyun 	return 0;
732*4882a593Smuzhiyun }
733*4882a593Smuzhiyun 
wbt_id_show(void * data,struct seq_file * m)734*4882a593Smuzhiyun static int wbt_id_show(void *data, struct seq_file *m)
735*4882a593Smuzhiyun {
736*4882a593Smuzhiyun 	struct rq_qos *rqos = data;
737*4882a593Smuzhiyun 
738*4882a593Smuzhiyun 	seq_printf(m, "%u\n", rqos->id);
739*4882a593Smuzhiyun 	return 0;
740*4882a593Smuzhiyun }
741*4882a593Smuzhiyun 
wbt_inflight_show(void * data,struct seq_file * m)742*4882a593Smuzhiyun static int wbt_inflight_show(void *data, struct seq_file *m)
743*4882a593Smuzhiyun {
744*4882a593Smuzhiyun 	struct rq_qos *rqos = data;
745*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
746*4882a593Smuzhiyun 	int i;
747*4882a593Smuzhiyun 
748*4882a593Smuzhiyun 	for (i = 0; i < WBT_NUM_RWQ; i++)
749*4882a593Smuzhiyun 		seq_printf(m, "%d: inflight %d\n", i,
750*4882a593Smuzhiyun 			   atomic_read(&rwb->rq_wait[i].inflight));
751*4882a593Smuzhiyun 	return 0;
752*4882a593Smuzhiyun }
753*4882a593Smuzhiyun 
wbt_min_lat_nsec_show(void * data,struct seq_file * m)754*4882a593Smuzhiyun static int wbt_min_lat_nsec_show(void *data, struct seq_file *m)
755*4882a593Smuzhiyun {
756*4882a593Smuzhiyun 	struct rq_qos *rqos = data;
757*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
758*4882a593Smuzhiyun 
759*4882a593Smuzhiyun 	seq_printf(m, "%lu\n", rwb->min_lat_nsec);
760*4882a593Smuzhiyun 	return 0;
761*4882a593Smuzhiyun }
762*4882a593Smuzhiyun 
wbt_unknown_cnt_show(void * data,struct seq_file * m)763*4882a593Smuzhiyun static int wbt_unknown_cnt_show(void *data, struct seq_file *m)
764*4882a593Smuzhiyun {
765*4882a593Smuzhiyun 	struct rq_qos *rqos = data;
766*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
767*4882a593Smuzhiyun 
768*4882a593Smuzhiyun 	seq_printf(m, "%u\n", rwb->unknown_cnt);
769*4882a593Smuzhiyun 	return 0;
770*4882a593Smuzhiyun }
771*4882a593Smuzhiyun 
wbt_normal_show(void * data,struct seq_file * m)772*4882a593Smuzhiyun static int wbt_normal_show(void *data, struct seq_file *m)
773*4882a593Smuzhiyun {
774*4882a593Smuzhiyun 	struct rq_qos *rqos = data;
775*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
776*4882a593Smuzhiyun 
777*4882a593Smuzhiyun 	seq_printf(m, "%u\n", rwb->wb_normal);
778*4882a593Smuzhiyun 	return 0;
779*4882a593Smuzhiyun }
780*4882a593Smuzhiyun 
wbt_background_show(void * data,struct seq_file * m)781*4882a593Smuzhiyun static int wbt_background_show(void *data, struct seq_file *m)
782*4882a593Smuzhiyun {
783*4882a593Smuzhiyun 	struct rq_qos *rqos = data;
784*4882a593Smuzhiyun 	struct rq_wb *rwb = RQWB(rqos);
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun 	seq_printf(m, "%u\n", rwb->wb_background);
787*4882a593Smuzhiyun 	return 0;
788*4882a593Smuzhiyun }
789*4882a593Smuzhiyun 
790*4882a593Smuzhiyun static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
791*4882a593Smuzhiyun 	{"curr_win_nsec", 0400, wbt_curr_win_nsec_show},
792*4882a593Smuzhiyun 	{"enabled", 0400, wbt_enabled_show},
793*4882a593Smuzhiyun 	{"id", 0400, wbt_id_show},
794*4882a593Smuzhiyun 	{"inflight", 0400, wbt_inflight_show},
795*4882a593Smuzhiyun 	{"min_lat_nsec", 0400, wbt_min_lat_nsec_show},
796*4882a593Smuzhiyun 	{"unknown_cnt", 0400, wbt_unknown_cnt_show},
797*4882a593Smuzhiyun 	{"wb_normal", 0400, wbt_normal_show},
798*4882a593Smuzhiyun 	{"wb_background", 0400, wbt_background_show},
799*4882a593Smuzhiyun 	{},
800*4882a593Smuzhiyun };
801*4882a593Smuzhiyun #endif
802*4882a593Smuzhiyun 
803*4882a593Smuzhiyun static struct rq_qos_ops wbt_rqos_ops = {
804*4882a593Smuzhiyun 	.throttle = wbt_wait,
805*4882a593Smuzhiyun 	.issue = wbt_issue,
806*4882a593Smuzhiyun 	.track = wbt_track,
807*4882a593Smuzhiyun 	.requeue = wbt_requeue,
808*4882a593Smuzhiyun 	.done = wbt_done,
809*4882a593Smuzhiyun 	.cleanup = wbt_cleanup,
810*4882a593Smuzhiyun 	.queue_depth_changed = wbt_queue_depth_changed,
811*4882a593Smuzhiyun 	.exit = wbt_exit,
812*4882a593Smuzhiyun #ifdef CONFIG_BLK_DEBUG_FS
813*4882a593Smuzhiyun 	.debugfs_attrs = wbt_debugfs_attrs,
814*4882a593Smuzhiyun #endif
815*4882a593Smuzhiyun };
816*4882a593Smuzhiyun 
wbt_init(struct request_queue * q)817*4882a593Smuzhiyun int wbt_init(struct request_queue *q)
818*4882a593Smuzhiyun {
819*4882a593Smuzhiyun 	struct rq_wb *rwb;
820*4882a593Smuzhiyun 	int i;
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
823*4882a593Smuzhiyun 	if (!rwb)
824*4882a593Smuzhiyun 		return -ENOMEM;
825*4882a593Smuzhiyun 
826*4882a593Smuzhiyun 	rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
827*4882a593Smuzhiyun 	if (!rwb->cb) {
828*4882a593Smuzhiyun 		kfree(rwb);
829*4882a593Smuzhiyun 		return -ENOMEM;
830*4882a593Smuzhiyun 	}
831*4882a593Smuzhiyun 
832*4882a593Smuzhiyun 	for (i = 0; i < WBT_NUM_RWQ; i++)
833*4882a593Smuzhiyun 		rq_wait_init(&rwb->rq_wait[i]);
834*4882a593Smuzhiyun 
835*4882a593Smuzhiyun 	rwb->rqos.id = RQ_QOS_WBT;
836*4882a593Smuzhiyun 	rwb->rqos.ops = &wbt_rqos_ops;
837*4882a593Smuzhiyun 	rwb->rqos.q = q;
838*4882a593Smuzhiyun 	rwb->last_comp = rwb->last_issue = jiffies;
839*4882a593Smuzhiyun 	rwb->win_nsec = RWB_WINDOW_NSEC;
840*4882a593Smuzhiyun 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
841*4882a593Smuzhiyun 	rwb->wc = test_bit(QUEUE_FLAG_WC, &q->queue_flags);
842*4882a593Smuzhiyun 	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
843*4882a593Smuzhiyun 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 	wbt_queue_depth_changed(&rwb->rqos);
846*4882a593Smuzhiyun 
847*4882a593Smuzhiyun 	/*
848*4882a593Smuzhiyun 	 * Assign rwb and add the stats callback.
849*4882a593Smuzhiyun 	 */
850*4882a593Smuzhiyun 	rq_qos_add(q, &rwb->rqos);
851*4882a593Smuzhiyun 	blk_stat_add_callback(q, rwb->cb);
852*4882a593Smuzhiyun 
853*4882a593Smuzhiyun 	return 0;
854*4882a593Smuzhiyun }
855