1*4882a593Smuzhiyun======== 2*4882a593SmuzhiyunCPU load 3*4882a593Smuzhiyun======== 4*4882a593Smuzhiyun 5*4882a593SmuzhiyunLinux exports various bits of information via ``/proc/stat`` and 6*4882a593Smuzhiyun``/proc/uptime`` that userland tools, such as top(1), use to calculate 7*4882a593Smuzhiyunthe average time system spent in a particular state, for example:: 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun $ iostat 10*4882a593Smuzhiyun Linux 2.6.18.3-exp (linmac) 02/20/2007 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun avg-cpu: %user %nice %system %iowait %steal %idle 13*4882a593Smuzhiyun 10.01 0.00 2.92 5.44 0.00 81.63 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun ... 16*4882a593Smuzhiyun 17*4882a593SmuzhiyunHere the system thinks that over the default sampling period the 18*4882a593Smuzhiyunsystem spent 10.01% of the time doing work in user space, 2.92% in the 19*4882a593Smuzhiyunkernel, and was overall 81.63% of the time idle. 20*4882a593Smuzhiyun 21*4882a593SmuzhiyunIn most cases the ``/proc/stat`` information reflects the reality quite 22*4882a593Smuzhiyunclosely, however due to the nature of how/when the kernel collects 23*4882a593Smuzhiyunthis data sometimes it can not be trusted at all. 24*4882a593Smuzhiyun 25*4882a593SmuzhiyunSo how is this information collected? Whenever timer interrupt is 26*4882a593Smuzhiyunsignalled the kernel looks what kind of task was running at this 27*4882a593Smuzhiyunmoment and increments the counter that corresponds to this tasks 28*4882a593Smuzhiyunkind/state. The problem with this is that the system could have 29*4882a593Smuzhiyunswitched between various states multiple times between two timer 30*4882a593Smuzhiyuninterrupts yet the counter is incremented only for the last state. 31*4882a593Smuzhiyun 32*4882a593Smuzhiyun 33*4882a593SmuzhiyunExample 34*4882a593Smuzhiyun------- 35*4882a593Smuzhiyun 36*4882a593SmuzhiyunIf we imagine the system with one task that periodically burns cycles 37*4882a593Smuzhiyunin the following manner:: 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun time line between two timer interrupts 40*4882a593Smuzhiyun |--------------------------------------| 41*4882a593Smuzhiyun ^ ^ 42*4882a593Smuzhiyun |_ something begins working | 43*4882a593Smuzhiyun |_ something goes to sleep 44*4882a593Smuzhiyun (only to be awaken quite soon) 45*4882a593Smuzhiyun 46*4882a593SmuzhiyunIn the above situation the system will be 0% loaded according to the 47*4882a593Smuzhiyun``/proc/stat`` (since the timer interrupt will always happen when the 48*4882a593Smuzhiyunsystem is executing the idle handler), but in reality the load is 49*4882a593Smuzhiyuncloser to 99%. 50*4882a593Smuzhiyun 51*4882a593SmuzhiyunOne can imagine many more situations where this behavior of the kernel 52*4882a593Smuzhiyunwill lead to quite erratic information inside ``/proc/stat``:: 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun /* gcc -o hog smallhog.c */ 56*4882a593Smuzhiyun #include <time.h> 57*4882a593Smuzhiyun #include <limits.h> 58*4882a593Smuzhiyun #include <signal.h> 59*4882a593Smuzhiyun #include <sys/time.h> 60*4882a593Smuzhiyun #define HIST 10 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun static volatile sig_atomic_t stop; 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun static void sighandler(int signr) 65*4882a593Smuzhiyun { 66*4882a593Smuzhiyun (void) signr; 67*4882a593Smuzhiyun stop = 1; 68*4882a593Smuzhiyun } 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun static unsigned long hog (unsigned long niters) 71*4882a593Smuzhiyun { 72*4882a593Smuzhiyun stop = 0; 73*4882a593Smuzhiyun while (!stop && --niters); 74*4882a593Smuzhiyun return niters; 75*4882a593Smuzhiyun } 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun int main (void) 78*4882a593Smuzhiyun { 79*4882a593Smuzhiyun int i; 80*4882a593Smuzhiyun struct itimerval it = { 81*4882a593Smuzhiyun .it_interval = { .tv_sec = 0, .tv_usec = 1 }, 82*4882a593Smuzhiyun .it_value = { .tv_sec = 0, .tv_usec = 1 } }; 83*4882a593Smuzhiyun sigset_t set; 84*4882a593Smuzhiyun unsigned long v[HIST]; 85*4882a593Smuzhiyun double tmp = 0.0; 86*4882a593Smuzhiyun unsigned long n; 87*4882a593Smuzhiyun signal(SIGALRM, &sighandler); 88*4882a593Smuzhiyun setitimer(ITIMER_REAL, &it, NULL); 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun hog (ULONG_MAX); 91*4882a593Smuzhiyun for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog(ULONG_MAX); 92*4882a593Smuzhiyun for (i = 0; i < HIST; ++i) tmp += v[i]; 93*4882a593Smuzhiyun tmp /= HIST; 94*4882a593Smuzhiyun n = tmp - (tmp / 3.0); 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun sigemptyset(&set); 97*4882a593Smuzhiyun sigaddset(&set, SIGALRM); 98*4882a593Smuzhiyun 99*4882a593Smuzhiyun for (;;) { 100*4882a593Smuzhiyun hog(n); 101*4882a593Smuzhiyun sigwait(&set, &i); 102*4882a593Smuzhiyun } 103*4882a593Smuzhiyun return 0; 104*4882a593Smuzhiyun } 105*4882a593Smuzhiyun 106*4882a593Smuzhiyun 107*4882a593SmuzhiyunReferences 108*4882a593Smuzhiyun---------- 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun- http://lkml.org/lkml/2007/2/12/6 111*4882a593Smuzhiyun- Documentation/filesystems/proc.rst (1.8) 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun 114*4882a593SmuzhiyunThanks 115*4882a593Smuzhiyun------ 116*4882a593Smuzhiyun 117*4882a593SmuzhiyunCon Kolivas, Pavel Machek 118