xref: /OK3568_Linux_fs/kernel/Documentation/block/ioprio.rst (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun===================
2*4882a593SmuzhiyunBlock io priorities
3*4882a593Smuzhiyun===================
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun
6*4882a593SmuzhiyunIntro
7*4882a593Smuzhiyun-----
8*4882a593Smuzhiyun
9*4882a593SmuzhiyunWith the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
10*4882a593Smuzhiyunpriorities are supported for reads on files.  This enables users to io nice
11*4882a593Smuzhiyunprocesses or process groups, similar to what has been possible with cpu
12*4882a593Smuzhiyunscheduling for ages.  This document mainly details the current possibilities
13*4882a593Smuzhiyunwith cfq; other io schedulers do not support io priorities thus far.
14*4882a593Smuzhiyun
15*4882a593SmuzhiyunScheduling classes
16*4882a593Smuzhiyun------------------
17*4882a593Smuzhiyun
18*4882a593SmuzhiyunCFQ implements three generic scheduling classes that determine how io is
19*4882a593Smuzhiyunserved for a process.
20*4882a593Smuzhiyun
21*4882a593SmuzhiyunIOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
22*4882a593Smuzhiyunhigher priority than any other in the system, processes from this class are
23*4882a593Smuzhiyungiven first access to the disk every time. Thus it needs to be used with some
24*4882a593Smuzhiyuncare, one io RT process can starve the entire system. Within the RT class,
25*4882a593Smuzhiyunthere are 8 levels of class data that determine exactly how much time this
26*4882a593Smuzhiyunprocess needs the disk for on each service. In the future this might change
27*4882a593Smuzhiyunto be more directly mappable to performance, by passing in a wanted data
28*4882a593Smuzhiyunrate instead.
29*4882a593Smuzhiyun
30*4882a593SmuzhiyunIOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
31*4882a593Smuzhiyunfor any process that hasn't set a specific io priority. The class data
32*4882a593Smuzhiyundetermines how much io bandwidth the process will get, it's directly mappable
33*4882a593Smuzhiyunto the cpu nice levels just more coarsely implemented. 0 is the highest
34*4882a593SmuzhiyunBE prio level, 7 is the lowest. The mapping between cpu nice level and io
35*4882a593Smuzhiyunnice level is determined as: io_nice = (cpu_nice + 20) / 5.
36*4882a593Smuzhiyun
37*4882a593SmuzhiyunIOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
38*4882a593Smuzhiyunlevel only get io time when no one else needs the disk. The idle class has no
39*4882a593Smuzhiyunclass data, since it doesn't really apply here.
40*4882a593Smuzhiyun
41*4882a593SmuzhiyunTools
42*4882a593Smuzhiyun-----
43*4882a593Smuzhiyun
44*4882a593SmuzhiyunSee below for a sample ionice tool. Usage::
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun	# ionice -c<class> -n<level> -p<pid>
47*4882a593Smuzhiyun
48*4882a593SmuzhiyunIf pid isn't given, the current process is assumed. IO priority settings
49*4882a593Smuzhiyunare inherited on fork, so you can use ionice to start the process at a given
50*4882a593Smuzhiyunlevel::
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun	# ionice -c2 -n0 /bin/ls
53*4882a593Smuzhiyun
54*4882a593Smuzhiyunwill run ls at the best-effort scheduling class at the highest priority.
55*4882a593SmuzhiyunFor a running process, you can give the pid instead::
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun	# ionice -c1 -n2 -p100
58*4882a593Smuzhiyun
59*4882a593Smuzhiyunwill change pid 100 to run at the realtime scheduling class, at priority 2.
60*4882a593Smuzhiyun
61*4882a593Smuzhiyunionice.c tool::
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun  #include <stdio.h>
64*4882a593Smuzhiyun  #include <stdlib.h>
65*4882a593Smuzhiyun  #include <errno.h>
66*4882a593Smuzhiyun  #include <getopt.h>
67*4882a593Smuzhiyun  #include <unistd.h>
68*4882a593Smuzhiyun  #include <sys/ptrace.h>
69*4882a593Smuzhiyun  #include <asm/unistd.h>
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun  extern int sys_ioprio_set(int, int, int);
72*4882a593Smuzhiyun  extern int sys_ioprio_get(int, int);
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun  #if defined(__i386__)
75*4882a593Smuzhiyun  #define __NR_ioprio_set		289
76*4882a593Smuzhiyun  #define __NR_ioprio_get		290
77*4882a593Smuzhiyun  #elif defined(__ppc__)
78*4882a593Smuzhiyun  #define __NR_ioprio_set		273
79*4882a593Smuzhiyun  #define __NR_ioprio_get		274
80*4882a593Smuzhiyun  #elif defined(__x86_64__)
81*4882a593Smuzhiyun  #define __NR_ioprio_set		251
82*4882a593Smuzhiyun  #define __NR_ioprio_get		252
83*4882a593Smuzhiyun  #elif defined(__ia64__)
84*4882a593Smuzhiyun  #define __NR_ioprio_set		1274
85*4882a593Smuzhiyun  #define __NR_ioprio_get		1275
86*4882a593Smuzhiyun  #else
87*4882a593Smuzhiyun  #error "Unsupported arch"
88*4882a593Smuzhiyun  #endif
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun  static inline int ioprio_set(int which, int who, int ioprio)
91*4882a593Smuzhiyun  {
92*4882a593Smuzhiyun	return syscall(__NR_ioprio_set, which, who, ioprio);
93*4882a593Smuzhiyun  }
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun  static inline int ioprio_get(int which, int who)
96*4882a593Smuzhiyun  {
97*4882a593Smuzhiyun	return syscall(__NR_ioprio_get, which, who);
98*4882a593Smuzhiyun  }
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun  enum {
101*4882a593Smuzhiyun	IOPRIO_CLASS_NONE,
102*4882a593Smuzhiyun	IOPRIO_CLASS_RT,
103*4882a593Smuzhiyun	IOPRIO_CLASS_BE,
104*4882a593Smuzhiyun	IOPRIO_CLASS_IDLE,
105*4882a593Smuzhiyun  };
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun  enum {
108*4882a593Smuzhiyun	IOPRIO_WHO_PROCESS = 1,
109*4882a593Smuzhiyun	IOPRIO_WHO_PGRP,
110*4882a593Smuzhiyun	IOPRIO_WHO_USER,
111*4882a593Smuzhiyun  };
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun  #define IOPRIO_CLASS_SHIFT	13
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun  const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun  int main(int argc, char *argv[])
118*4882a593Smuzhiyun  {
119*4882a593Smuzhiyun	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
120*4882a593Smuzhiyun	int c, pid = 0;
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
123*4882a593Smuzhiyun		switch (c) {
124*4882a593Smuzhiyun		case 'n':
125*4882a593Smuzhiyun			ioprio = strtol(optarg, NULL, 10);
126*4882a593Smuzhiyun			set = 1;
127*4882a593Smuzhiyun			break;
128*4882a593Smuzhiyun		case 'c':
129*4882a593Smuzhiyun			ioprio_class = strtol(optarg, NULL, 10);
130*4882a593Smuzhiyun			set = 1;
131*4882a593Smuzhiyun			break;
132*4882a593Smuzhiyun		case 'p':
133*4882a593Smuzhiyun			pid = strtol(optarg, NULL, 10);
134*4882a593Smuzhiyun			break;
135*4882a593Smuzhiyun		}
136*4882a593Smuzhiyun	}
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun	switch (ioprio_class) {
139*4882a593Smuzhiyun		case IOPRIO_CLASS_NONE:
140*4882a593Smuzhiyun			ioprio_class = IOPRIO_CLASS_BE;
141*4882a593Smuzhiyun			break;
142*4882a593Smuzhiyun		case IOPRIO_CLASS_RT:
143*4882a593Smuzhiyun		case IOPRIO_CLASS_BE:
144*4882a593Smuzhiyun			break;
145*4882a593Smuzhiyun		case IOPRIO_CLASS_IDLE:
146*4882a593Smuzhiyun			ioprio = 7;
147*4882a593Smuzhiyun			break;
148*4882a593Smuzhiyun		default:
149*4882a593Smuzhiyun			printf("bad prio class %d\n", ioprio_class);
150*4882a593Smuzhiyun			return 1;
151*4882a593Smuzhiyun	}
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun	if (!set) {
154*4882a593Smuzhiyun		if (!pid && argv[optind])
155*4882a593Smuzhiyun			pid = strtol(argv[optind], NULL, 10);
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun		printf("pid=%d, %d\n", pid, ioprio);
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun		if (ioprio == -1)
162*4882a593Smuzhiyun			perror("ioprio_get");
163*4882a593Smuzhiyun		else {
164*4882a593Smuzhiyun			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
165*4882a593Smuzhiyun			ioprio = ioprio & 0xff;
166*4882a593Smuzhiyun			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
167*4882a593Smuzhiyun		}
168*4882a593Smuzhiyun	} else {
169*4882a593Smuzhiyun		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
170*4882a593Smuzhiyun			perror("ioprio_set");
171*4882a593Smuzhiyun			return 1;
172*4882a593Smuzhiyun		}
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun		if (argv[optind])
175*4882a593Smuzhiyun			execvp(argv[optind], &argv[optind]);
176*4882a593Smuzhiyun	}
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun	return 0;
179*4882a593Smuzhiyun  }
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun
182*4882a593SmuzhiyunMarch 11 2005, Jens Axboe <jens.axboe@oracle.com>
183