1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* Manage affinity to optimize IPIs inside the kernel perf API. */
3*4882a593Smuzhiyun #define _GNU_SOURCE 1
4*4882a593Smuzhiyun #include <sched.h>
5*4882a593Smuzhiyun #include <stdlib.h>
6*4882a593Smuzhiyun #include <linux/bitmap.h>
7*4882a593Smuzhiyun #include <linux/zalloc.h>
8*4882a593Smuzhiyun #include "perf.h"
9*4882a593Smuzhiyun #include "cpumap.h"
10*4882a593Smuzhiyun #include "affinity.h"
11*4882a593Smuzhiyun
get_cpu_set_size(void)12*4882a593Smuzhiyun static int get_cpu_set_size(void)
13*4882a593Smuzhiyun {
14*4882a593Smuzhiyun int sz = cpu__max_cpu() + 8 - 1;
15*4882a593Smuzhiyun /*
16*4882a593Smuzhiyun * sched_getaffinity doesn't like masks smaller than the kernel.
17*4882a593Smuzhiyun * Hopefully that's big enough.
18*4882a593Smuzhiyun */
19*4882a593Smuzhiyun if (sz < 4096)
20*4882a593Smuzhiyun sz = 4096;
21*4882a593Smuzhiyun return sz / 8;
22*4882a593Smuzhiyun }
23*4882a593Smuzhiyun
affinity__setup(struct affinity * a)24*4882a593Smuzhiyun int affinity__setup(struct affinity *a)
25*4882a593Smuzhiyun {
26*4882a593Smuzhiyun int cpu_set_size = get_cpu_set_size();
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun a->orig_cpus = bitmap_alloc(cpu_set_size * 8);
29*4882a593Smuzhiyun if (!a->orig_cpus)
30*4882a593Smuzhiyun return -1;
31*4882a593Smuzhiyun sched_getaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
32*4882a593Smuzhiyun a->sched_cpus = bitmap_alloc(cpu_set_size * 8);
33*4882a593Smuzhiyun if (!a->sched_cpus) {
34*4882a593Smuzhiyun zfree(&a->orig_cpus);
35*4882a593Smuzhiyun return -1;
36*4882a593Smuzhiyun }
37*4882a593Smuzhiyun bitmap_zero((unsigned long *)a->sched_cpus, cpu_set_size);
38*4882a593Smuzhiyun a->changed = false;
39*4882a593Smuzhiyun return 0;
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun /*
43*4882a593Smuzhiyun * perf_event_open does an IPI internally to the target CPU.
44*4882a593Smuzhiyun * It is more efficient to change perf's affinity to the target
45*4882a593Smuzhiyun * CPU and then set up all events on that CPU, so we amortize
46*4882a593Smuzhiyun * CPU communication.
47*4882a593Smuzhiyun */
affinity__set(struct affinity * a,int cpu)48*4882a593Smuzhiyun void affinity__set(struct affinity *a, int cpu)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun int cpu_set_size = get_cpu_set_size();
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun if (cpu == -1)
53*4882a593Smuzhiyun return;
54*4882a593Smuzhiyun a->changed = true;
55*4882a593Smuzhiyun set_bit(cpu, a->sched_cpus);
56*4882a593Smuzhiyun /*
57*4882a593Smuzhiyun * We ignore errors because affinity is just an optimization.
58*4882a593Smuzhiyun * This could happen for example with isolated CPUs or cpusets.
59*4882a593Smuzhiyun * In this case the IPIs inside the kernel's perf API still work.
60*4882a593Smuzhiyun */
61*4882a593Smuzhiyun sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus);
62*4882a593Smuzhiyun clear_bit(cpu, a->sched_cpus);
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun
affinity__cleanup(struct affinity * a)65*4882a593Smuzhiyun void affinity__cleanup(struct affinity *a)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun int cpu_set_size = get_cpu_set_size();
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun if (a->changed)
70*4882a593Smuzhiyun sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->orig_cpus);
71*4882a593Smuzhiyun zfree(&a->sched_cpus);
72*4882a593Smuzhiyun zfree(&a->orig_cpus);
73*4882a593Smuzhiyun }
74