xref: /OK3568_Linux_fs/kernel/block/ioprio.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * fs/ioprio.c
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Helper functions for setting/querying io priorities of processes. The
8*4882a593Smuzhiyun  * system calls closely mimmick getpriority/setpriority, see the man page for
9*4882a593Smuzhiyun  * those. The prio argument is a composite of prio class and prio data, where
10*4882a593Smuzhiyun  * the data argument has meaning within that class. The standard scheduling
11*4882a593Smuzhiyun  * classes have 8 distinct prio levels, with 0 being the highest prio and 7
12*4882a593Smuzhiyun  * being the lowest.
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  * IOW, setting BE scheduling class with prio 2 is done ala:
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  * ioprio_set(PRIO_PROCESS, pid, prio);
19*4882a593Smuzhiyun  *
20*4882a593Smuzhiyun  * See also Documentation/block/ioprio.rst
21*4882a593Smuzhiyun  *
22*4882a593Smuzhiyun  */
23*4882a593Smuzhiyun #include <linux/gfp.h>
24*4882a593Smuzhiyun #include <linux/kernel.h>
25*4882a593Smuzhiyun #include <linux/export.h>
26*4882a593Smuzhiyun #include <linux/ioprio.h>
27*4882a593Smuzhiyun #include <linux/cred.h>
28*4882a593Smuzhiyun #include <linux/blkdev.h>
29*4882a593Smuzhiyun #include <linux/capability.h>
30*4882a593Smuzhiyun #include <linux/sched/user.h>
31*4882a593Smuzhiyun #include <linux/sched/task.h>
32*4882a593Smuzhiyun #include <linux/syscalls.h>
33*4882a593Smuzhiyun #include <linux/security.h>
34*4882a593Smuzhiyun #include <linux/pid_namespace.h>
35*4882a593Smuzhiyun 
set_task_ioprio(struct task_struct * task,int ioprio)36*4882a593Smuzhiyun int set_task_ioprio(struct task_struct *task, int ioprio)
37*4882a593Smuzhiyun {
38*4882a593Smuzhiyun 	int err;
39*4882a593Smuzhiyun 	struct io_context *ioc;
40*4882a593Smuzhiyun 	const struct cred *cred = current_cred(), *tcred;
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun 	rcu_read_lock();
43*4882a593Smuzhiyun 	tcred = __task_cred(task);
44*4882a593Smuzhiyun 	if (!uid_eq(tcred->uid, cred->euid) &&
45*4882a593Smuzhiyun 	    !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) {
46*4882a593Smuzhiyun 		rcu_read_unlock();
47*4882a593Smuzhiyun 		return -EPERM;
48*4882a593Smuzhiyun 	}
49*4882a593Smuzhiyun 	rcu_read_unlock();
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	err = security_task_setioprio(task, ioprio);
52*4882a593Smuzhiyun 	if (err)
53*4882a593Smuzhiyun 		return err;
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun 	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
56*4882a593Smuzhiyun 	if (ioc) {
57*4882a593Smuzhiyun 		ioc->ioprio = ioprio;
58*4882a593Smuzhiyun 		put_io_context(ioc);
59*4882a593Smuzhiyun 	}
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun 	return err;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(set_task_ioprio);
64*4882a593Smuzhiyun 
ioprio_check_cap(int ioprio)65*4882a593Smuzhiyun int ioprio_check_cap(int ioprio)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun 	int class = IOPRIO_PRIO_CLASS(ioprio);
68*4882a593Smuzhiyun 	int data = IOPRIO_PRIO_DATA(ioprio);
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	switch (class) {
71*4882a593Smuzhiyun 		case IOPRIO_CLASS_RT:
72*4882a593Smuzhiyun 			/*
73*4882a593Smuzhiyun 			 * Originally this only checked for CAP_SYS_ADMIN,
74*4882a593Smuzhiyun 			 * which was implicitly allowed for pid 0 by security
75*4882a593Smuzhiyun 			 * modules such as SELinux. Make sure we check
76*4882a593Smuzhiyun 			 * CAP_SYS_ADMIN first to avoid a denial/avc for
77*4882a593Smuzhiyun 			 * possibly missing CAP_SYS_NICE permission.
78*4882a593Smuzhiyun 			 */
79*4882a593Smuzhiyun 			if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
80*4882a593Smuzhiyun 				return -EPERM;
81*4882a593Smuzhiyun 			fallthrough;
82*4882a593Smuzhiyun 			/* rt has prio field too */
83*4882a593Smuzhiyun 		case IOPRIO_CLASS_BE:
84*4882a593Smuzhiyun 			if (data >= IOPRIO_BE_NR || data < 0)
85*4882a593Smuzhiyun 				return -EINVAL;
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 			break;
88*4882a593Smuzhiyun 		case IOPRIO_CLASS_IDLE:
89*4882a593Smuzhiyun 			break;
90*4882a593Smuzhiyun 		case IOPRIO_CLASS_NONE:
91*4882a593Smuzhiyun 			if (data)
92*4882a593Smuzhiyun 				return -EINVAL;
93*4882a593Smuzhiyun 			break;
94*4882a593Smuzhiyun 		default:
95*4882a593Smuzhiyun 			return -EINVAL;
96*4882a593Smuzhiyun 	}
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun 	return 0;
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun 
SYSCALL_DEFINE3(ioprio_set,int,which,int,who,int,ioprio)101*4882a593Smuzhiyun SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun 	struct task_struct *p, *g;
104*4882a593Smuzhiyun 	struct user_struct *user;
105*4882a593Smuzhiyun 	struct pid *pgrp;
106*4882a593Smuzhiyun 	kuid_t uid;
107*4882a593Smuzhiyun 	int ret;
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	ret = ioprio_check_cap(ioprio);
110*4882a593Smuzhiyun 	if (ret)
111*4882a593Smuzhiyun 		return ret;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	ret = -ESRCH;
114*4882a593Smuzhiyun 	rcu_read_lock();
115*4882a593Smuzhiyun 	switch (which) {
116*4882a593Smuzhiyun 		case IOPRIO_WHO_PROCESS:
117*4882a593Smuzhiyun 			if (!who)
118*4882a593Smuzhiyun 				p = current;
119*4882a593Smuzhiyun 			else
120*4882a593Smuzhiyun 				p = find_task_by_vpid(who);
121*4882a593Smuzhiyun 			if (p)
122*4882a593Smuzhiyun 				ret = set_task_ioprio(p, ioprio);
123*4882a593Smuzhiyun 			break;
124*4882a593Smuzhiyun 		case IOPRIO_WHO_PGRP:
125*4882a593Smuzhiyun 			if (!who)
126*4882a593Smuzhiyun 				pgrp = task_pgrp(current);
127*4882a593Smuzhiyun 			else
128*4882a593Smuzhiyun 				pgrp = find_vpid(who);
129*4882a593Smuzhiyun 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
130*4882a593Smuzhiyun 				ret = set_task_ioprio(p, ioprio);
131*4882a593Smuzhiyun 				if (ret)
132*4882a593Smuzhiyun 					break;
133*4882a593Smuzhiyun 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
134*4882a593Smuzhiyun 			break;
135*4882a593Smuzhiyun 		case IOPRIO_WHO_USER:
136*4882a593Smuzhiyun 			uid = make_kuid(current_user_ns(), who);
137*4882a593Smuzhiyun 			if (!uid_valid(uid))
138*4882a593Smuzhiyun 				break;
139*4882a593Smuzhiyun 			if (!who)
140*4882a593Smuzhiyun 				user = current_user();
141*4882a593Smuzhiyun 			else
142*4882a593Smuzhiyun 				user = find_user(uid);
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 			if (!user)
145*4882a593Smuzhiyun 				break;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 			for_each_process_thread(g, p) {
148*4882a593Smuzhiyun 				if (!uid_eq(task_uid(p), uid) ||
149*4882a593Smuzhiyun 				    !task_pid_vnr(p))
150*4882a593Smuzhiyun 					continue;
151*4882a593Smuzhiyun 				ret = set_task_ioprio(p, ioprio);
152*4882a593Smuzhiyun 				if (ret)
153*4882a593Smuzhiyun 					goto free_uid;
154*4882a593Smuzhiyun 			}
155*4882a593Smuzhiyun free_uid:
156*4882a593Smuzhiyun 			if (who)
157*4882a593Smuzhiyun 				free_uid(user);
158*4882a593Smuzhiyun 			break;
159*4882a593Smuzhiyun 		default:
160*4882a593Smuzhiyun 			ret = -EINVAL;
161*4882a593Smuzhiyun 	}
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	rcu_read_unlock();
164*4882a593Smuzhiyun 	return ret;
165*4882a593Smuzhiyun }
166*4882a593Smuzhiyun 
get_task_ioprio(struct task_struct * p)167*4882a593Smuzhiyun static int get_task_ioprio(struct task_struct *p)
168*4882a593Smuzhiyun {
169*4882a593Smuzhiyun 	int ret;
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	ret = security_task_getioprio(p);
172*4882a593Smuzhiyun 	if (ret)
173*4882a593Smuzhiyun 		goto out;
174*4882a593Smuzhiyun 	ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM);
175*4882a593Smuzhiyun 	task_lock(p);
176*4882a593Smuzhiyun 	if (p->io_context)
177*4882a593Smuzhiyun 		ret = p->io_context->ioprio;
178*4882a593Smuzhiyun 	task_unlock(p);
179*4882a593Smuzhiyun out:
180*4882a593Smuzhiyun 	return ret;
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun 
ioprio_best(unsigned short aprio,unsigned short bprio)183*4882a593Smuzhiyun int ioprio_best(unsigned short aprio, unsigned short bprio)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun 	if (!ioprio_valid(aprio))
186*4882a593Smuzhiyun 		aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
187*4882a593Smuzhiyun 	if (!ioprio_valid(bprio))
188*4882a593Smuzhiyun 		bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	return min(aprio, bprio);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun 
SYSCALL_DEFINE2(ioprio_get,int,which,int,who)193*4882a593Smuzhiyun SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun 	struct task_struct *g, *p;
196*4882a593Smuzhiyun 	struct user_struct *user;
197*4882a593Smuzhiyun 	struct pid *pgrp;
198*4882a593Smuzhiyun 	kuid_t uid;
199*4882a593Smuzhiyun 	int ret = -ESRCH;
200*4882a593Smuzhiyun 	int tmpio;
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	rcu_read_lock();
203*4882a593Smuzhiyun 	switch (which) {
204*4882a593Smuzhiyun 		case IOPRIO_WHO_PROCESS:
205*4882a593Smuzhiyun 			if (!who)
206*4882a593Smuzhiyun 				p = current;
207*4882a593Smuzhiyun 			else
208*4882a593Smuzhiyun 				p = find_task_by_vpid(who);
209*4882a593Smuzhiyun 			if (p)
210*4882a593Smuzhiyun 				ret = get_task_ioprio(p);
211*4882a593Smuzhiyun 			break;
212*4882a593Smuzhiyun 		case IOPRIO_WHO_PGRP:
213*4882a593Smuzhiyun 			if (!who)
214*4882a593Smuzhiyun 				pgrp = task_pgrp(current);
215*4882a593Smuzhiyun 			else
216*4882a593Smuzhiyun 				pgrp = find_vpid(who);
217*4882a593Smuzhiyun 			read_lock(&tasklist_lock);
218*4882a593Smuzhiyun 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
219*4882a593Smuzhiyun 				tmpio = get_task_ioprio(p);
220*4882a593Smuzhiyun 				if (tmpio < 0)
221*4882a593Smuzhiyun 					continue;
222*4882a593Smuzhiyun 				if (ret == -ESRCH)
223*4882a593Smuzhiyun 					ret = tmpio;
224*4882a593Smuzhiyun 				else
225*4882a593Smuzhiyun 					ret = ioprio_best(ret, tmpio);
226*4882a593Smuzhiyun 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
227*4882a593Smuzhiyun 			read_unlock(&tasklist_lock);
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 			break;
230*4882a593Smuzhiyun 		case IOPRIO_WHO_USER:
231*4882a593Smuzhiyun 			uid = make_kuid(current_user_ns(), who);
232*4882a593Smuzhiyun 			if (!who)
233*4882a593Smuzhiyun 				user = current_user();
234*4882a593Smuzhiyun 			else
235*4882a593Smuzhiyun 				user = find_user(uid);
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 			if (!user)
238*4882a593Smuzhiyun 				break;
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 			for_each_process_thread(g, p) {
241*4882a593Smuzhiyun 				if (!uid_eq(task_uid(p), user->uid) ||
242*4882a593Smuzhiyun 				    !task_pid_vnr(p))
243*4882a593Smuzhiyun 					continue;
244*4882a593Smuzhiyun 				tmpio = get_task_ioprio(p);
245*4882a593Smuzhiyun 				if (tmpio < 0)
246*4882a593Smuzhiyun 					continue;
247*4882a593Smuzhiyun 				if (ret == -ESRCH)
248*4882a593Smuzhiyun 					ret = tmpio;
249*4882a593Smuzhiyun 				else
250*4882a593Smuzhiyun 					ret = ioprio_best(ret, tmpio);
251*4882a593Smuzhiyun 			}
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 			if (who)
254*4882a593Smuzhiyun 				free_uid(user);
255*4882a593Smuzhiyun 			break;
256*4882a593Smuzhiyun 		default:
257*4882a593Smuzhiyun 			ret = -EINVAL;
258*4882a593Smuzhiyun 	}
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	rcu_read_unlock();
261*4882a593Smuzhiyun 	return ret;
262*4882a593Smuzhiyun }
263