1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Block rq-qos policy for assigning an I/O priority class to requests.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Using an rq-qos policy for assigning I/O priority class has two advantages
6*4882a593Smuzhiyun * over using the ioprio_set() system call:
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * - This policy is cgroup based so it has all the advantages of cgroups.
9*4882a593Smuzhiyun * - While ioprio_set() does not affect page cache writeback I/O, this rq-qos
10*4882a593Smuzhiyun * controller affects page cache writeback I/O for filesystems that support
11*4882a593Smuzhiyun * assiociating a cgroup with writeback I/O. See also
12*4882a593Smuzhiyun * Documentation/admin-guide/cgroup-v2.rst.
13*4882a593Smuzhiyun */
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include <linux/blk-cgroup.h>
16*4882a593Smuzhiyun #include <linux/blk-mq.h>
17*4882a593Smuzhiyun #include <linux/blk_types.h>
18*4882a593Smuzhiyun #include <linux/kernel.h>
19*4882a593Smuzhiyun #include <linux/module.h>
20*4882a593Smuzhiyun #include "blk-ioprio.h"
21*4882a593Smuzhiyun #include "blk-rq-qos.h"
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun /**
24*4882a593Smuzhiyun * enum prio_policy - I/O priority class policy.
25*4882a593Smuzhiyun * @POLICY_NO_CHANGE: (default) do not modify the I/O priority class.
26*4882a593Smuzhiyun * @POLICY_NONE_TO_RT: modify IOPRIO_CLASS_NONE into IOPRIO_CLASS_RT.
27*4882a593Smuzhiyun * @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into
28*4882a593Smuzhiyun * IOPRIO_CLASS_BE.
29*4882a593Smuzhiyun * @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE.
30*4882a593Smuzhiyun *
31*4882a593Smuzhiyun * See also <linux/ioprio.h>.
32*4882a593Smuzhiyun */
33*4882a593Smuzhiyun enum prio_policy {
34*4882a593Smuzhiyun POLICY_NO_CHANGE = 0,
35*4882a593Smuzhiyun POLICY_NONE_TO_RT = 1,
36*4882a593Smuzhiyun POLICY_RESTRICT_TO_BE = 2,
37*4882a593Smuzhiyun POLICY_ALL_TO_IDLE = 3,
38*4882a593Smuzhiyun };
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun static const char *policy_name[] = {
41*4882a593Smuzhiyun [POLICY_NO_CHANGE] = "no-change",
42*4882a593Smuzhiyun [POLICY_NONE_TO_RT] = "none-to-rt",
43*4882a593Smuzhiyun [POLICY_RESTRICT_TO_BE] = "restrict-to-be",
44*4882a593Smuzhiyun [POLICY_ALL_TO_IDLE] = "idle",
45*4882a593Smuzhiyun };
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun static struct blkcg_policy ioprio_policy;
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun /**
50*4882a593Smuzhiyun * struct ioprio_blkg - Per (cgroup, request queue) data.
51*4882a593Smuzhiyun * @pd: blkg_policy_data structure.
52*4882a593Smuzhiyun */
53*4882a593Smuzhiyun struct ioprio_blkg {
54*4882a593Smuzhiyun struct blkg_policy_data pd;
55*4882a593Smuzhiyun };
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun /**
58*4882a593Smuzhiyun * struct ioprio_blkcg - Per cgroup data.
59*4882a593Smuzhiyun * @cpd: blkcg_policy_data structure.
60*4882a593Smuzhiyun * @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>.
61*4882a593Smuzhiyun */
62*4882a593Smuzhiyun struct ioprio_blkcg {
63*4882a593Smuzhiyun struct blkcg_policy_data cpd;
64*4882a593Smuzhiyun enum prio_policy prio_policy;
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun
pd_to_ioprio(struct blkg_policy_data * pd)67*4882a593Smuzhiyun static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
68*4882a593Smuzhiyun {
69*4882a593Smuzhiyun return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL;
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun
blkcg_to_ioprio_blkcg(struct blkcg * blkcg)72*4882a593Smuzhiyun static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun return container_of(blkcg_to_cpd(blkcg, &ioprio_policy),
75*4882a593Smuzhiyun struct ioprio_blkcg, cpd);
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun static struct ioprio_blkcg *
ioprio_blkcg_from_css(struct cgroup_subsys_state * css)79*4882a593Smuzhiyun ioprio_blkcg_from_css(struct cgroup_subsys_state *css)
80*4882a593Smuzhiyun {
81*4882a593Smuzhiyun return blkcg_to_ioprio_blkcg(css_to_blkcg(css));
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun
ioprio_blkcg_from_bio(struct bio * bio)84*4882a593Smuzhiyun static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun struct blkg_policy_data *pd = blkg_to_pd(bio->bi_blkg, &ioprio_policy);
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun if (!pd)
89*4882a593Smuzhiyun return NULL;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun return blkcg_to_ioprio_blkcg(pd->blkg->blkcg);
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
ioprio_show_prio_policy(struct seq_file * sf,void * v)94*4882a593Smuzhiyun static int ioprio_show_prio_policy(struct seq_file *sf, void *v)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(seq_css(sf));
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun seq_printf(sf, "%s\n", policy_name[blkcg->prio_policy]);
99*4882a593Smuzhiyun return 0;
100*4882a593Smuzhiyun }
101*4882a593Smuzhiyun
ioprio_set_prio_policy(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)102*4882a593Smuzhiyun static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
103*4882a593Smuzhiyun size_t nbytes, loff_t off)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(of_css(of));
106*4882a593Smuzhiyun int ret;
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun if (off != 0)
109*4882a593Smuzhiyun return -EIO;
110*4882a593Smuzhiyun /* kernfs_fop_write_iter() terminates 'buf' with '\0'. */
111*4882a593Smuzhiyun ret = sysfs_match_string(policy_name, buf);
112*4882a593Smuzhiyun if (ret < 0)
113*4882a593Smuzhiyun return ret;
114*4882a593Smuzhiyun blkcg->prio_policy = ret;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun return nbytes;
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun static struct blkg_policy_data *
ioprio_alloc_pd(gfp_t gfp,struct request_queue * q,struct blkcg * blkcg)120*4882a593Smuzhiyun ioprio_alloc_pd(gfp_t gfp, struct request_queue *q, struct blkcg *blkcg)
121*4882a593Smuzhiyun {
122*4882a593Smuzhiyun struct ioprio_blkg *ioprio_blkg;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun ioprio_blkg = kzalloc(sizeof(*ioprio_blkg), gfp);
125*4882a593Smuzhiyun if (!ioprio_blkg)
126*4882a593Smuzhiyun return NULL;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun return &ioprio_blkg->pd;
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun
ioprio_free_pd(struct blkg_policy_data * pd)131*4882a593Smuzhiyun static void ioprio_free_pd(struct blkg_policy_data *pd)
132*4882a593Smuzhiyun {
133*4882a593Smuzhiyun struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd);
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun kfree(ioprio_blkg);
136*4882a593Smuzhiyun }
137*4882a593Smuzhiyun
ioprio_alloc_cpd(gfp_t gfp)138*4882a593Smuzhiyun static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp)
139*4882a593Smuzhiyun {
140*4882a593Smuzhiyun struct ioprio_blkcg *blkcg;
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun blkcg = kzalloc(sizeof(*blkcg), gfp);
143*4882a593Smuzhiyun if (!blkcg)
144*4882a593Smuzhiyun return NULL;
145*4882a593Smuzhiyun blkcg->prio_policy = POLICY_NO_CHANGE;
146*4882a593Smuzhiyun return &blkcg->cpd;
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun
ioprio_free_cpd(struct blkcg_policy_data * cpd)149*4882a593Smuzhiyun static void ioprio_free_cpd(struct blkcg_policy_data *cpd)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun struct ioprio_blkcg *blkcg = container_of(cpd, typeof(*blkcg), cpd);
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun kfree(blkcg);
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun #define IOPRIO_ATTRS \
157*4882a593Smuzhiyun { \
158*4882a593Smuzhiyun .name = "prio.class", \
159*4882a593Smuzhiyun .seq_show = ioprio_show_prio_policy, \
160*4882a593Smuzhiyun .write = ioprio_set_prio_policy, \
161*4882a593Smuzhiyun }, \
162*4882a593Smuzhiyun { } /* sentinel */
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun /* cgroup v2 attributes */
165*4882a593Smuzhiyun static struct cftype ioprio_files[] = {
166*4882a593Smuzhiyun IOPRIO_ATTRS
167*4882a593Smuzhiyun };
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun /* cgroup v1 attributes */
170*4882a593Smuzhiyun static struct cftype ioprio_legacy_files[] = {
171*4882a593Smuzhiyun IOPRIO_ATTRS
172*4882a593Smuzhiyun };
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun static struct blkcg_policy ioprio_policy = {
175*4882a593Smuzhiyun .dfl_cftypes = ioprio_files,
176*4882a593Smuzhiyun .legacy_cftypes = ioprio_legacy_files,
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun .cpd_alloc_fn = ioprio_alloc_cpd,
179*4882a593Smuzhiyun .cpd_free_fn = ioprio_free_cpd,
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun .pd_alloc_fn = ioprio_alloc_pd,
182*4882a593Smuzhiyun .pd_free_fn = ioprio_free_pd,
183*4882a593Smuzhiyun };
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun struct blk_ioprio {
186*4882a593Smuzhiyun struct rq_qos rqos;
187*4882a593Smuzhiyun };
188*4882a593Smuzhiyun
blkcg_ioprio_track(struct rq_qos * rqos,struct request * rq,struct bio * bio)189*4882a593Smuzhiyun static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
190*4882a593Smuzhiyun struct bio *bio)
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun /*
195*4882a593Smuzhiyun * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
196*4882a593Smuzhiyun * correspond to a lower priority. Hence, the max_t() below selects
197*4882a593Smuzhiyun * the lower priority of bi_ioprio and the cgroup I/O priority class.
198*4882a593Smuzhiyun * If the cgroup policy has been set to POLICY_NO_CHANGE == 0, the
199*4882a593Smuzhiyun * bio I/O priority is not modified. If the bio I/O priority equals
200*4882a593Smuzhiyun * IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio.
201*4882a593Smuzhiyun */
202*4882a593Smuzhiyun bio->bi_ioprio = max_t(u16, bio->bi_ioprio,
203*4882a593Smuzhiyun IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
204*4882a593Smuzhiyun }
205*4882a593Smuzhiyun
blkcg_ioprio_exit(struct rq_qos * rqos)206*4882a593Smuzhiyun static void blkcg_ioprio_exit(struct rq_qos *rqos)
207*4882a593Smuzhiyun {
208*4882a593Smuzhiyun struct blk_ioprio *blkioprio_blkg =
209*4882a593Smuzhiyun container_of(rqos, typeof(*blkioprio_blkg), rqos);
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun blkcg_deactivate_policy(rqos->q, &ioprio_policy);
212*4882a593Smuzhiyun kfree(blkioprio_blkg);
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun static struct rq_qos_ops blkcg_ioprio_ops = {
216*4882a593Smuzhiyun .track = blkcg_ioprio_track,
217*4882a593Smuzhiyun .exit = blkcg_ioprio_exit,
218*4882a593Smuzhiyun };
219*4882a593Smuzhiyun
blk_ioprio_init(struct request_queue * q)220*4882a593Smuzhiyun int blk_ioprio_init(struct request_queue *q)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun struct blk_ioprio *blkioprio_blkg;
223*4882a593Smuzhiyun struct rq_qos *rqos;
224*4882a593Smuzhiyun int ret;
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun blkioprio_blkg = kzalloc(sizeof(*blkioprio_blkg), GFP_KERNEL);
227*4882a593Smuzhiyun if (!blkioprio_blkg)
228*4882a593Smuzhiyun return -ENOMEM;
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun ret = blkcg_activate_policy(q, &ioprio_policy);
231*4882a593Smuzhiyun if (ret) {
232*4882a593Smuzhiyun kfree(blkioprio_blkg);
233*4882a593Smuzhiyun return ret;
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun rqos = &blkioprio_blkg->rqos;
237*4882a593Smuzhiyun rqos->id = RQ_QOS_IOPRIO;
238*4882a593Smuzhiyun rqos->ops = &blkcg_ioprio_ops;
239*4882a593Smuzhiyun rqos->q = q;
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun /*
242*4882a593Smuzhiyun * Registering the rq-qos policy after activating the blk-cgroup
243*4882a593Smuzhiyun * policy guarantees that ioprio_blkcg_from_bio(bio) != NULL in the
244*4882a593Smuzhiyun * rq-qos callbacks.
245*4882a593Smuzhiyun */
246*4882a593Smuzhiyun rq_qos_add(q, rqos);
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun return 0;
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun
ioprio_init(void)251*4882a593Smuzhiyun static int __init ioprio_init(void)
252*4882a593Smuzhiyun {
253*4882a593Smuzhiyun return blkcg_policy_register(&ioprio_policy);
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun
ioprio_exit(void)256*4882a593Smuzhiyun static void __exit ioprio_exit(void)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun blkcg_policy_unregister(&ioprio_policy);
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun module_init(ioprio_init);
262*4882a593Smuzhiyun module_exit(ioprio_exit);
263