xref: /OK3568_Linux_fs/kernel/lib/cpu_rmap.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * cpu_rmap.c: CPU affinity reverse-map support
4*4882a593Smuzhiyun  * Copyright 2011 Solarflare Communications Inc.
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #include <linux/cpu_rmap.h>
8*4882a593Smuzhiyun #include <linux/interrupt.h>
9*4882a593Smuzhiyun #include <linux/export.h>
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun /*
12*4882a593Smuzhiyun  * These functions maintain a mapping from CPUs to some ordered set of
13*4882a593Smuzhiyun  * objects with CPU affinities.  This can be seen as a reverse-map of
14*4882a593Smuzhiyun  * CPU affinity.  However, we do not assume that the object affinities
15*4882a593Smuzhiyun  * cover all CPUs in the system.  For those CPUs not directly covered
16*4882a593Smuzhiyun  * by object affinities, we attempt to find a nearest object based on
17*4882a593Smuzhiyun  * CPU topology.
18*4882a593Smuzhiyun  */
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun /**
21*4882a593Smuzhiyun  * alloc_cpu_rmap - allocate CPU affinity reverse-map
22*4882a593Smuzhiyun  * @size: Number of objects to be mapped
23*4882a593Smuzhiyun  * @flags: Allocation flags e.g. %GFP_KERNEL
24*4882a593Smuzhiyun  */
alloc_cpu_rmap(unsigned int size,gfp_t flags)25*4882a593Smuzhiyun struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun 	struct cpu_rmap *rmap;
28*4882a593Smuzhiyun 	unsigned int cpu;
29*4882a593Smuzhiyun 	size_t obj_offset;
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun 	/* This is a silly number of objects, and we use u16 indices. */
32*4882a593Smuzhiyun 	if (size > 0xffff)
33*4882a593Smuzhiyun 		return NULL;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun 	/* Offset of object pointer array from base structure */
36*4882a593Smuzhiyun 	obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
37*4882a593Smuzhiyun 			   sizeof(void *));
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun 	rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
40*4882a593Smuzhiyun 	if (!rmap)
41*4882a593Smuzhiyun 		return NULL;
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	kref_init(&rmap->refcount);
44*4882a593Smuzhiyun 	rmap->obj = (void **)((char *)rmap + obj_offset);
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	/* Initially assign CPUs to objects on a rota, since we have
47*4882a593Smuzhiyun 	 * no idea where the objects are.  Use infinite distance, so
48*4882a593Smuzhiyun 	 * any object with known distance is preferable.  Include the
49*4882a593Smuzhiyun 	 * CPUs that are not present/online, since we definitely want
50*4882a593Smuzhiyun 	 * any newly-hotplugged CPUs to have some object assigned.
51*4882a593Smuzhiyun 	 */
52*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
53*4882a593Smuzhiyun 		rmap->near[cpu].index = cpu % size;
54*4882a593Smuzhiyun 		rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
55*4882a593Smuzhiyun 	}
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 	rmap->size = size;
58*4882a593Smuzhiyun 	return rmap;
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun EXPORT_SYMBOL(alloc_cpu_rmap);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun /**
63*4882a593Smuzhiyun  * cpu_rmap_release - internal reclaiming helper called from kref_put
64*4882a593Smuzhiyun  * @ref: kref to struct cpu_rmap
65*4882a593Smuzhiyun  */
cpu_rmap_release(struct kref * ref)66*4882a593Smuzhiyun static void cpu_rmap_release(struct kref *ref)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun 	struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
69*4882a593Smuzhiyun 	kfree(rmap);
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun /**
73*4882a593Smuzhiyun  * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
74*4882a593Smuzhiyun  * @rmap: reverse-map allocated with alloc_cpu_rmap()
75*4882a593Smuzhiyun  */
cpu_rmap_get(struct cpu_rmap * rmap)76*4882a593Smuzhiyun static inline void cpu_rmap_get(struct cpu_rmap *rmap)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun 	kref_get(&rmap->refcount);
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun /**
82*4882a593Smuzhiyun  * cpu_rmap_put - release ref on a cpu_rmap
83*4882a593Smuzhiyun  * @rmap: reverse-map allocated with alloc_cpu_rmap()
84*4882a593Smuzhiyun  */
cpu_rmap_put(struct cpu_rmap * rmap)85*4882a593Smuzhiyun int cpu_rmap_put(struct cpu_rmap *rmap)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun 	return kref_put(&rmap->refcount, cpu_rmap_release);
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun EXPORT_SYMBOL(cpu_rmap_put);
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun /* Reevaluate nearest object for given CPU, comparing with the given
92*4882a593Smuzhiyun  * neighbours at the given distance.
93*4882a593Smuzhiyun  */
cpu_rmap_copy_neigh(struct cpu_rmap * rmap,unsigned int cpu,const struct cpumask * mask,u16 dist)94*4882a593Smuzhiyun static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
95*4882a593Smuzhiyun 				const struct cpumask *mask, u16 dist)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun 	int neigh;
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	for_each_cpu(neigh, mask) {
100*4882a593Smuzhiyun 		if (rmap->near[cpu].dist > dist &&
101*4882a593Smuzhiyun 		    rmap->near[neigh].dist <= dist) {
102*4882a593Smuzhiyun 			rmap->near[cpu].index = rmap->near[neigh].index;
103*4882a593Smuzhiyun 			rmap->near[cpu].dist = dist;
104*4882a593Smuzhiyun 			return true;
105*4882a593Smuzhiyun 		}
106*4882a593Smuzhiyun 	}
107*4882a593Smuzhiyun 	return false;
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun #ifdef DEBUG
debug_print_rmap(const struct cpu_rmap * rmap,const char * prefix)111*4882a593Smuzhiyun static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun 	unsigned index;
114*4882a593Smuzhiyun 	unsigned int cpu;
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
119*4882a593Smuzhiyun 		index = rmap->near[cpu].index;
120*4882a593Smuzhiyun 		pr_info("cpu %d -> obj %u (distance %u)\n",
121*4882a593Smuzhiyun 			cpu, index, rmap->near[cpu].dist);
122*4882a593Smuzhiyun 	}
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun #else
125*4882a593Smuzhiyun static inline void
debug_print_rmap(const struct cpu_rmap * rmap,const char * prefix)126*4882a593Smuzhiyun debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun #endif
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun /**
132*4882a593Smuzhiyun  * cpu_rmap_add - add object to a rmap
133*4882a593Smuzhiyun  * @rmap: CPU rmap allocated with alloc_cpu_rmap()
134*4882a593Smuzhiyun  * @obj: Object to add to rmap
135*4882a593Smuzhiyun  *
136*4882a593Smuzhiyun  * Return index of object.
137*4882a593Smuzhiyun  */
cpu_rmap_add(struct cpu_rmap * rmap,void * obj)138*4882a593Smuzhiyun int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
139*4882a593Smuzhiyun {
140*4882a593Smuzhiyun 	u16 index;
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	BUG_ON(rmap->used >= rmap->size);
143*4882a593Smuzhiyun 	index = rmap->used++;
144*4882a593Smuzhiyun 	rmap->obj[index] = obj;
145*4882a593Smuzhiyun 	return index;
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun EXPORT_SYMBOL(cpu_rmap_add);
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun /**
150*4882a593Smuzhiyun  * cpu_rmap_update - update CPU rmap following a change of object affinity
151*4882a593Smuzhiyun  * @rmap: CPU rmap to update
152*4882a593Smuzhiyun  * @index: Index of object whose affinity changed
153*4882a593Smuzhiyun  * @affinity: New CPU affinity of object
154*4882a593Smuzhiyun  */
cpu_rmap_update(struct cpu_rmap * rmap,u16 index,const struct cpumask * affinity)155*4882a593Smuzhiyun int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
156*4882a593Smuzhiyun 		    const struct cpumask *affinity)
157*4882a593Smuzhiyun {
158*4882a593Smuzhiyun 	cpumask_var_t update_mask;
159*4882a593Smuzhiyun 	unsigned int cpu;
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
162*4882a593Smuzhiyun 		return -ENOMEM;
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 	/* Invalidate distance for all CPUs for which this used to be
165*4882a593Smuzhiyun 	 * the nearest object.  Mark those CPUs for update.
166*4882a593Smuzhiyun 	 */
167*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
168*4882a593Smuzhiyun 		if (rmap->near[cpu].index == index) {
169*4882a593Smuzhiyun 			rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
170*4882a593Smuzhiyun 			cpumask_set_cpu(cpu, update_mask);
171*4882a593Smuzhiyun 		}
172*4882a593Smuzhiyun 	}
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	debug_print_rmap(rmap, "after invalidating old distances");
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 	/* Set distance to 0 for all CPUs in the new affinity mask.
177*4882a593Smuzhiyun 	 * Mark all CPUs within their NUMA nodes for update.
178*4882a593Smuzhiyun 	 */
179*4882a593Smuzhiyun 	for_each_cpu(cpu, affinity) {
180*4882a593Smuzhiyun 		rmap->near[cpu].index = index;
181*4882a593Smuzhiyun 		rmap->near[cpu].dist = 0;
182*4882a593Smuzhiyun 		cpumask_or(update_mask, update_mask,
183*4882a593Smuzhiyun 			   cpumask_of_node(cpu_to_node(cpu)));
184*4882a593Smuzhiyun 	}
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	debug_print_rmap(rmap, "after updating neighbours");
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	/* Update distances based on topology */
189*4882a593Smuzhiyun 	for_each_cpu(cpu, update_mask) {
190*4882a593Smuzhiyun 		if (cpu_rmap_copy_neigh(rmap, cpu,
191*4882a593Smuzhiyun 					topology_sibling_cpumask(cpu), 1))
192*4882a593Smuzhiyun 			continue;
193*4882a593Smuzhiyun 		if (cpu_rmap_copy_neigh(rmap, cpu,
194*4882a593Smuzhiyun 					topology_core_cpumask(cpu), 2))
195*4882a593Smuzhiyun 			continue;
196*4882a593Smuzhiyun 		if (cpu_rmap_copy_neigh(rmap, cpu,
197*4882a593Smuzhiyun 					cpumask_of_node(cpu_to_node(cpu)), 3))
198*4882a593Smuzhiyun 			continue;
199*4882a593Smuzhiyun 		/* We could continue into NUMA node distances, but for now
200*4882a593Smuzhiyun 		 * we give up.
201*4882a593Smuzhiyun 		 */
202*4882a593Smuzhiyun 	}
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	debug_print_rmap(rmap, "after copying neighbours");
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	free_cpumask_var(update_mask);
207*4882a593Smuzhiyun 	return 0;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun EXPORT_SYMBOL(cpu_rmap_update);
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun /* Glue between IRQ affinity notifiers and CPU rmaps */
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun struct irq_glue {
214*4882a593Smuzhiyun 	struct irq_affinity_notify notify;
215*4882a593Smuzhiyun 	struct cpu_rmap *rmap;
216*4882a593Smuzhiyun 	u16 index;
217*4882a593Smuzhiyun };
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun /**
220*4882a593Smuzhiyun  * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
221*4882a593Smuzhiyun  * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
222*4882a593Smuzhiyun  *
223*4882a593Smuzhiyun  * Must be called in process context, before freeing the IRQs.
224*4882a593Smuzhiyun  */
free_irq_cpu_rmap(struct cpu_rmap * rmap)225*4882a593Smuzhiyun void free_irq_cpu_rmap(struct cpu_rmap *rmap)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun 	struct irq_glue *glue;
228*4882a593Smuzhiyun 	u16 index;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	if (!rmap)
231*4882a593Smuzhiyun 		return;
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	for (index = 0; index < rmap->used; index++) {
234*4882a593Smuzhiyun 		glue = rmap->obj[index];
235*4882a593Smuzhiyun 		irq_set_affinity_notifier(glue->notify.irq, NULL);
236*4882a593Smuzhiyun 	}
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	cpu_rmap_put(rmap);
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun EXPORT_SYMBOL(free_irq_cpu_rmap);
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun /**
243*4882a593Smuzhiyun  * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
244*4882a593Smuzhiyun  * @notify: struct irq_affinity_notify passed by irq/manage.c
245*4882a593Smuzhiyun  * @mask: cpu mask for new SMP affinity
246*4882a593Smuzhiyun  *
247*4882a593Smuzhiyun  * This is executed in workqueue context.
248*4882a593Smuzhiyun  */
249*4882a593Smuzhiyun static void
irq_cpu_rmap_notify(struct irq_affinity_notify * notify,const cpumask_t * mask)250*4882a593Smuzhiyun irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
251*4882a593Smuzhiyun {
252*4882a593Smuzhiyun 	struct irq_glue *glue =
253*4882a593Smuzhiyun 		container_of(notify, struct irq_glue, notify);
254*4882a593Smuzhiyun 	int rc;
255*4882a593Smuzhiyun 
256*4882a593Smuzhiyun 	rc = cpu_rmap_update(glue->rmap, glue->index, mask);
257*4882a593Smuzhiyun 	if (rc)
258*4882a593Smuzhiyun 		pr_warn("irq_cpu_rmap_notify: update failed: %d\n", rc);
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun /**
262*4882a593Smuzhiyun  * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
263*4882a593Smuzhiyun  * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
264*4882a593Smuzhiyun  */
irq_cpu_rmap_release(struct kref * ref)265*4882a593Smuzhiyun static void irq_cpu_rmap_release(struct kref *ref)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun 	struct irq_glue *glue =
268*4882a593Smuzhiyun 		container_of(ref, struct irq_glue, notify.kref);
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	cpu_rmap_put(glue->rmap);
271*4882a593Smuzhiyun 	kfree(glue);
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun /**
275*4882a593Smuzhiyun  * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
276*4882a593Smuzhiyun  * @rmap: The reverse-map
277*4882a593Smuzhiyun  * @irq: The IRQ number
278*4882a593Smuzhiyun  *
279*4882a593Smuzhiyun  * This adds an IRQ affinity notifier that will update the reverse-map
280*4882a593Smuzhiyun  * automatically.
281*4882a593Smuzhiyun  *
282*4882a593Smuzhiyun  * Must be called in process context, after the IRQ is allocated but
283*4882a593Smuzhiyun  * before it is bound with request_irq().
284*4882a593Smuzhiyun  */
irq_cpu_rmap_add(struct cpu_rmap * rmap,int irq)285*4882a593Smuzhiyun int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun 	struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
288*4882a593Smuzhiyun 	int rc;
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 	if (!glue)
291*4882a593Smuzhiyun 		return -ENOMEM;
292*4882a593Smuzhiyun 	glue->notify.notify = irq_cpu_rmap_notify;
293*4882a593Smuzhiyun 	glue->notify.release = irq_cpu_rmap_release;
294*4882a593Smuzhiyun 	glue->rmap = rmap;
295*4882a593Smuzhiyun 	cpu_rmap_get(rmap);
296*4882a593Smuzhiyun 	glue->index = cpu_rmap_add(rmap, glue);
297*4882a593Smuzhiyun 	rc = irq_set_affinity_notifier(irq, &glue->notify);
298*4882a593Smuzhiyun 	if (rc) {
299*4882a593Smuzhiyun 		cpu_rmap_put(glue->rmap);
300*4882a593Smuzhiyun 		kfree(glue);
301*4882a593Smuzhiyun 	}
302*4882a593Smuzhiyun 	return rc;
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun EXPORT_SYMBOL(irq_cpu_rmap_add);
305