1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * cpu_rmap.c: CPU affinity reverse-map support
4*4882a593Smuzhiyun * Copyright 2011 Solarflare Communications Inc.
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <linux/cpu_rmap.h>
8*4882a593Smuzhiyun #include <linux/interrupt.h>
9*4882a593Smuzhiyun #include <linux/export.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun * These functions maintain a mapping from CPUs to some ordered set of
13*4882a593Smuzhiyun * objects with CPU affinities. This can be seen as a reverse-map of
14*4882a593Smuzhiyun * CPU affinity. However, we do not assume that the object affinities
15*4882a593Smuzhiyun * cover all CPUs in the system. For those CPUs not directly covered
16*4882a593Smuzhiyun * by object affinities, we attempt to find a nearest object based on
17*4882a593Smuzhiyun * CPU topology.
18*4882a593Smuzhiyun */
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun /**
21*4882a593Smuzhiyun * alloc_cpu_rmap - allocate CPU affinity reverse-map
22*4882a593Smuzhiyun * @size: Number of objects to be mapped
23*4882a593Smuzhiyun * @flags: Allocation flags e.g. %GFP_KERNEL
24*4882a593Smuzhiyun */
alloc_cpu_rmap(unsigned int size,gfp_t flags)25*4882a593Smuzhiyun struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun struct cpu_rmap *rmap;
28*4882a593Smuzhiyun unsigned int cpu;
29*4882a593Smuzhiyun size_t obj_offset;
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun /* This is a silly number of objects, and we use u16 indices. */
32*4882a593Smuzhiyun if (size > 0xffff)
33*4882a593Smuzhiyun return NULL;
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun /* Offset of object pointer array from base structure */
36*4882a593Smuzhiyun obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
37*4882a593Smuzhiyun sizeof(void *));
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
40*4882a593Smuzhiyun if (!rmap)
41*4882a593Smuzhiyun return NULL;
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun kref_init(&rmap->refcount);
44*4882a593Smuzhiyun rmap->obj = (void **)((char *)rmap + obj_offset);
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun /* Initially assign CPUs to objects on a rota, since we have
47*4882a593Smuzhiyun * no idea where the objects are. Use infinite distance, so
48*4882a593Smuzhiyun * any object with known distance is preferable. Include the
49*4882a593Smuzhiyun * CPUs that are not present/online, since we definitely want
50*4882a593Smuzhiyun * any newly-hotplugged CPUs to have some object assigned.
51*4882a593Smuzhiyun */
52*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
53*4882a593Smuzhiyun rmap->near[cpu].index = cpu % size;
54*4882a593Smuzhiyun rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun rmap->size = size;
58*4882a593Smuzhiyun return rmap;
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun EXPORT_SYMBOL(alloc_cpu_rmap);
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun /**
63*4882a593Smuzhiyun * cpu_rmap_release - internal reclaiming helper called from kref_put
64*4882a593Smuzhiyun * @ref: kref to struct cpu_rmap
65*4882a593Smuzhiyun */
cpu_rmap_release(struct kref * ref)66*4882a593Smuzhiyun static void cpu_rmap_release(struct kref *ref)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
69*4882a593Smuzhiyun kfree(rmap);
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun /**
73*4882a593Smuzhiyun * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
74*4882a593Smuzhiyun * @rmap: reverse-map allocated with alloc_cpu_rmap()
75*4882a593Smuzhiyun */
cpu_rmap_get(struct cpu_rmap * rmap)76*4882a593Smuzhiyun static inline void cpu_rmap_get(struct cpu_rmap *rmap)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun kref_get(&rmap->refcount);
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun /**
82*4882a593Smuzhiyun * cpu_rmap_put - release ref on a cpu_rmap
83*4882a593Smuzhiyun * @rmap: reverse-map allocated with alloc_cpu_rmap()
84*4882a593Smuzhiyun */
cpu_rmap_put(struct cpu_rmap * rmap)85*4882a593Smuzhiyun int cpu_rmap_put(struct cpu_rmap *rmap)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun return kref_put(&rmap->refcount, cpu_rmap_release);
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun EXPORT_SYMBOL(cpu_rmap_put);
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun /* Reevaluate nearest object for given CPU, comparing with the given
92*4882a593Smuzhiyun * neighbours at the given distance.
93*4882a593Smuzhiyun */
cpu_rmap_copy_neigh(struct cpu_rmap * rmap,unsigned int cpu,const struct cpumask * mask,u16 dist)94*4882a593Smuzhiyun static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
95*4882a593Smuzhiyun const struct cpumask *mask, u16 dist)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun int neigh;
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun for_each_cpu(neigh, mask) {
100*4882a593Smuzhiyun if (rmap->near[cpu].dist > dist &&
101*4882a593Smuzhiyun rmap->near[neigh].dist <= dist) {
102*4882a593Smuzhiyun rmap->near[cpu].index = rmap->near[neigh].index;
103*4882a593Smuzhiyun rmap->near[cpu].dist = dist;
104*4882a593Smuzhiyun return true;
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun return false;
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun #ifdef DEBUG
debug_print_rmap(const struct cpu_rmap * rmap,const char * prefix)111*4882a593Smuzhiyun static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun unsigned index;
114*4882a593Smuzhiyun unsigned int cpu;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
119*4882a593Smuzhiyun index = rmap->near[cpu].index;
120*4882a593Smuzhiyun pr_info("cpu %d -> obj %u (distance %u)\n",
121*4882a593Smuzhiyun cpu, index, rmap->near[cpu].dist);
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun #else
125*4882a593Smuzhiyun static inline void
debug_print_rmap(const struct cpu_rmap * rmap,const char * prefix)126*4882a593Smuzhiyun debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun #endif
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun /**
132*4882a593Smuzhiyun * cpu_rmap_add - add object to a rmap
133*4882a593Smuzhiyun * @rmap: CPU rmap allocated with alloc_cpu_rmap()
134*4882a593Smuzhiyun * @obj: Object to add to rmap
135*4882a593Smuzhiyun *
136*4882a593Smuzhiyun * Return index of object.
137*4882a593Smuzhiyun */
cpu_rmap_add(struct cpu_rmap * rmap,void * obj)138*4882a593Smuzhiyun int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
139*4882a593Smuzhiyun {
140*4882a593Smuzhiyun u16 index;
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun BUG_ON(rmap->used >= rmap->size);
143*4882a593Smuzhiyun index = rmap->used++;
144*4882a593Smuzhiyun rmap->obj[index] = obj;
145*4882a593Smuzhiyun return index;
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun EXPORT_SYMBOL(cpu_rmap_add);
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun /**
150*4882a593Smuzhiyun * cpu_rmap_update - update CPU rmap following a change of object affinity
151*4882a593Smuzhiyun * @rmap: CPU rmap to update
152*4882a593Smuzhiyun * @index: Index of object whose affinity changed
153*4882a593Smuzhiyun * @affinity: New CPU affinity of object
154*4882a593Smuzhiyun */
cpu_rmap_update(struct cpu_rmap * rmap,u16 index,const struct cpumask * affinity)155*4882a593Smuzhiyun int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
156*4882a593Smuzhiyun const struct cpumask *affinity)
157*4882a593Smuzhiyun {
158*4882a593Smuzhiyun cpumask_var_t update_mask;
159*4882a593Smuzhiyun unsigned int cpu;
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
162*4882a593Smuzhiyun return -ENOMEM;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun /* Invalidate distance for all CPUs for which this used to be
165*4882a593Smuzhiyun * the nearest object. Mark those CPUs for update.
166*4882a593Smuzhiyun */
167*4882a593Smuzhiyun for_each_online_cpu(cpu) {
168*4882a593Smuzhiyun if (rmap->near[cpu].index == index) {
169*4882a593Smuzhiyun rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
170*4882a593Smuzhiyun cpumask_set_cpu(cpu, update_mask);
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun debug_print_rmap(rmap, "after invalidating old distances");
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun /* Set distance to 0 for all CPUs in the new affinity mask.
177*4882a593Smuzhiyun * Mark all CPUs within their NUMA nodes for update.
178*4882a593Smuzhiyun */
179*4882a593Smuzhiyun for_each_cpu(cpu, affinity) {
180*4882a593Smuzhiyun rmap->near[cpu].index = index;
181*4882a593Smuzhiyun rmap->near[cpu].dist = 0;
182*4882a593Smuzhiyun cpumask_or(update_mask, update_mask,
183*4882a593Smuzhiyun cpumask_of_node(cpu_to_node(cpu)));
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun debug_print_rmap(rmap, "after updating neighbours");
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun /* Update distances based on topology */
189*4882a593Smuzhiyun for_each_cpu(cpu, update_mask) {
190*4882a593Smuzhiyun if (cpu_rmap_copy_neigh(rmap, cpu,
191*4882a593Smuzhiyun topology_sibling_cpumask(cpu), 1))
192*4882a593Smuzhiyun continue;
193*4882a593Smuzhiyun if (cpu_rmap_copy_neigh(rmap, cpu,
194*4882a593Smuzhiyun topology_core_cpumask(cpu), 2))
195*4882a593Smuzhiyun continue;
196*4882a593Smuzhiyun if (cpu_rmap_copy_neigh(rmap, cpu,
197*4882a593Smuzhiyun cpumask_of_node(cpu_to_node(cpu)), 3))
198*4882a593Smuzhiyun continue;
199*4882a593Smuzhiyun /* We could continue into NUMA node distances, but for now
200*4882a593Smuzhiyun * we give up.
201*4882a593Smuzhiyun */
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun debug_print_rmap(rmap, "after copying neighbours");
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun free_cpumask_var(update_mask);
207*4882a593Smuzhiyun return 0;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun EXPORT_SYMBOL(cpu_rmap_update);
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun /* Glue between IRQ affinity notifiers and CPU rmaps */
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun struct irq_glue {
214*4882a593Smuzhiyun struct irq_affinity_notify notify;
215*4882a593Smuzhiyun struct cpu_rmap *rmap;
216*4882a593Smuzhiyun u16 index;
217*4882a593Smuzhiyun };
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun /**
220*4882a593Smuzhiyun * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
221*4882a593Smuzhiyun * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
222*4882a593Smuzhiyun *
223*4882a593Smuzhiyun * Must be called in process context, before freeing the IRQs.
224*4882a593Smuzhiyun */
free_irq_cpu_rmap(struct cpu_rmap * rmap)225*4882a593Smuzhiyun void free_irq_cpu_rmap(struct cpu_rmap *rmap)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun struct irq_glue *glue;
228*4882a593Smuzhiyun u16 index;
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun if (!rmap)
231*4882a593Smuzhiyun return;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun for (index = 0; index < rmap->used; index++) {
234*4882a593Smuzhiyun glue = rmap->obj[index];
235*4882a593Smuzhiyun irq_set_affinity_notifier(glue->notify.irq, NULL);
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun cpu_rmap_put(rmap);
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun EXPORT_SYMBOL(free_irq_cpu_rmap);
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun /**
243*4882a593Smuzhiyun * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
244*4882a593Smuzhiyun * @notify: struct irq_affinity_notify passed by irq/manage.c
245*4882a593Smuzhiyun * @mask: cpu mask for new SMP affinity
246*4882a593Smuzhiyun *
247*4882a593Smuzhiyun * This is executed in workqueue context.
248*4882a593Smuzhiyun */
249*4882a593Smuzhiyun static void
irq_cpu_rmap_notify(struct irq_affinity_notify * notify,const cpumask_t * mask)250*4882a593Smuzhiyun irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
251*4882a593Smuzhiyun {
252*4882a593Smuzhiyun struct irq_glue *glue =
253*4882a593Smuzhiyun container_of(notify, struct irq_glue, notify);
254*4882a593Smuzhiyun int rc;
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun rc = cpu_rmap_update(glue->rmap, glue->index, mask);
257*4882a593Smuzhiyun if (rc)
258*4882a593Smuzhiyun pr_warn("irq_cpu_rmap_notify: update failed: %d\n", rc);
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun /**
262*4882a593Smuzhiyun * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
263*4882a593Smuzhiyun * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
264*4882a593Smuzhiyun */
irq_cpu_rmap_release(struct kref * ref)265*4882a593Smuzhiyun static void irq_cpu_rmap_release(struct kref *ref)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun struct irq_glue *glue =
268*4882a593Smuzhiyun container_of(ref, struct irq_glue, notify.kref);
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun cpu_rmap_put(glue->rmap);
271*4882a593Smuzhiyun kfree(glue);
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun /**
275*4882a593Smuzhiyun * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
276*4882a593Smuzhiyun * @rmap: The reverse-map
277*4882a593Smuzhiyun * @irq: The IRQ number
278*4882a593Smuzhiyun *
279*4882a593Smuzhiyun * This adds an IRQ affinity notifier that will update the reverse-map
280*4882a593Smuzhiyun * automatically.
281*4882a593Smuzhiyun *
282*4882a593Smuzhiyun * Must be called in process context, after the IRQ is allocated but
283*4882a593Smuzhiyun * before it is bound with request_irq().
284*4882a593Smuzhiyun */
irq_cpu_rmap_add(struct cpu_rmap * rmap,int irq)285*4882a593Smuzhiyun int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
288*4882a593Smuzhiyun int rc;
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun if (!glue)
291*4882a593Smuzhiyun return -ENOMEM;
292*4882a593Smuzhiyun glue->notify.notify = irq_cpu_rmap_notify;
293*4882a593Smuzhiyun glue->notify.release = irq_cpu_rmap_release;
294*4882a593Smuzhiyun glue->rmap = rmap;
295*4882a593Smuzhiyun cpu_rmap_get(rmap);
296*4882a593Smuzhiyun glue->index = cpu_rmap_add(rmap, glue);
297*4882a593Smuzhiyun rc = irq_set_affinity_notifier(irq, &glue->notify);
298*4882a593Smuzhiyun if (rc) {
299*4882a593Smuzhiyun cpu_rmap_put(glue->rmap);
300*4882a593Smuzhiyun kfree(glue);
301*4882a593Smuzhiyun }
302*4882a593Smuzhiyun return rc;
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun EXPORT_SYMBOL(irq_cpu_rmap_add);
305