xref: /OK3568_Linux_fs/external/rkwifibt/drivers/infineon/dhd_linux_lb.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Broadcom Dongle Host Driver (DHD), Linux-specific network interface
3  * Basically selected code segments from usb-cdc.c and usb-rndis.c
4  *
5  * Portions of this code are copyright (c) 2021 Cypress Semiconductor Corporation
6  *
7  * Copyright (C) 1999-2017, Broadcom Corporation
8  *
9  *      Unless you and Broadcom execute a separate written software license
10  * agreement governing use of this software, this software is licensed to you
11  * under the terms of the GNU General Public License version 2 (the "GPL"),
12  * available at http://www.broadcom.com/licenses/GPLv2.php, with the
13  * following added to such license:
14  *
15  *      As a special exception, the copyright holders of this software give you
16  * permission to link this software with independent modules, and to copy and
17  * distribute the resulting executable under terms of your choice, provided that
18  * you also meet, for each linked independent module, the terms and conditions of
19  * the license of that module.  An independent module is a module which is not
20  * derived from this software.  The special exception does not apply to any
21  * modifications of the software.
22  *
23  *      Notwithstanding the above, under no circumstances may you combine this
24  * software in any way with any other Broadcom software provided under a license
25  * other than the GPL, without Broadcom's express prior written consent.
26  *
27  *
28  * <<Broadcom-WL-IPTag/Open:>>
29  *
30  * $Id$
31  */
32 
33 #include <dhd_linux_priv.h>
34 
35 extern dhd_pub_t* g_dhd_pub;
36 
37 #if defined(DHD_LB)
38 
39 void
dhd_lb_set_default_cpus(dhd_info_t * dhd)40 dhd_lb_set_default_cpus(dhd_info_t *dhd)
41 {
42 	/* Default CPU allocation for the jobs */
43 	atomic_set(&dhd->rx_napi_cpu, 1);
44 	atomic_set(&dhd->rx_compl_cpu, 2);
45 	atomic_set(&dhd->tx_compl_cpu, 2);
46 	atomic_set(&dhd->tx_cpu, 2);
47 	atomic_set(&dhd->net_tx_cpu, 0);
48 }
49 
50 void
dhd_cpumasks_deinit(dhd_info_t * dhd)51 dhd_cpumasks_deinit(dhd_info_t *dhd)
52 {
53 	free_cpumask_var(dhd->cpumask_curr_avail);
54 	free_cpumask_var(dhd->cpumask_primary);
55 	free_cpumask_var(dhd->cpumask_primary_new);
56 	free_cpumask_var(dhd->cpumask_secondary);
57 	free_cpumask_var(dhd->cpumask_secondary_new);
58 }
59 
60 int
dhd_cpumasks_init(dhd_info_t * dhd)61 dhd_cpumasks_init(dhd_info_t *dhd)
62 {
63 	int id;
64 	uint32 cpus, num_cpus = num_possible_cpus();
65 	int ret = 0;
66 
67 	DHD_ERROR(("%s CPU masks primary(big)=0x%x secondary(little)=0x%x\n", __FUNCTION__,
68 		DHD_LB_PRIMARY_CPUS, DHD_LB_SECONDARY_CPUS));
69 
70 	if (!alloc_cpumask_var(&dhd->cpumask_curr_avail, GFP_KERNEL) ||
71 	    !alloc_cpumask_var(&dhd->cpumask_primary, GFP_KERNEL) ||
72 	    !alloc_cpumask_var(&dhd->cpumask_primary_new, GFP_KERNEL) ||
73 	    !alloc_cpumask_var(&dhd->cpumask_secondary, GFP_KERNEL) ||
74 	    !alloc_cpumask_var(&dhd->cpumask_secondary_new, GFP_KERNEL)) {
75 		DHD_ERROR(("%s Failed to init cpumasks\n", __FUNCTION__));
76 		ret = -ENOMEM;
77 		goto fail;
78 	}
79 
80 	cpumask_copy(dhd->cpumask_curr_avail, cpu_online_mask);
81 	cpumask_clear(dhd->cpumask_primary);
82 	cpumask_clear(dhd->cpumask_secondary);
83 
84 	if (num_cpus > 32) {
85 		DHD_ERROR(("%s max cpus must be 32, %d too big\n", __FUNCTION__, num_cpus));
86 		ASSERT(0);
87 	}
88 
89 	cpus = DHD_LB_PRIMARY_CPUS;
90 	for (id = 0; id < num_cpus; id++) {
91 		if (isset(&cpus, id))
92 			cpumask_set_cpu(id, dhd->cpumask_primary);
93 	}
94 
95 	cpus = DHD_LB_SECONDARY_CPUS;
96 	for (id = 0; id < num_cpus; id++) {
97 		if (isset(&cpus, id))
98 			cpumask_set_cpu(id, dhd->cpumask_secondary);
99 	}
100 
101 	return ret;
102 fail:
103 	dhd_cpumasks_deinit(dhd);
104 	return ret;
105 }
106 
107 /*
108  * The CPU Candidacy Algorithm
109  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
110  * The available CPUs for selection are divided into two groups
111  *  Primary Set - A CPU mask that carries the First Choice CPUs
112  *  Secondary Set - A CPU mask that carries the Second Choice CPUs.
113  *
114  * There are two types of Job, that needs to be assigned to
115  * the CPUs, from one of the above mentioned CPU group. The Jobs are
116  * 1) Rx Packet Processing - napi_cpu
117  * 2) Completion Processiong (Tx, RX) - compl_cpu
118  *
119  * To begin with both napi_cpu and compl_cpu are on CPU0. Whenever a CPU goes
120  * on-line/off-line the CPU candidacy algorithm is triggerd. The candidacy
121  * algo tries to pickup the first available non boot CPU (CPU0) for napi_cpu.
122  * If there are more processors free, it assigns one to compl_cpu.
123  * It also tries to ensure that both napi_cpu and compl_cpu are not on the same
124  * CPU, as much as possible.
125  *
126  * By design, both Tx and Rx completion jobs are run on the same CPU core, as it
127  * would allow Tx completion skb's to be released into a local free pool from
128  * which the rx buffer posts could have been serviced. it is important to note
129  * that a Tx packet may not have a large enough buffer for rx posting.
130  */
dhd_select_cpu_candidacy(dhd_info_t * dhd)131 void dhd_select_cpu_candidacy(dhd_info_t *dhd)
132 {
133 	uint32 primary_available_cpus; /* count of primary available cpus */
134 	uint32 secondary_available_cpus; /* count of secondary available cpus */
135 	uint32 napi_cpu = 0; /* cpu selected for napi rx processing */
136 	uint32 compl_cpu = 0; /* cpu selected for completion jobs */
137 	uint32 tx_cpu = 0; /* cpu selected for tx processing job */
138 
139 	cpumask_clear(dhd->cpumask_primary_new);
140 	cpumask_clear(dhd->cpumask_secondary_new);
141 
142 	/*
143 	 * Now select from the primary mask. Even if a Job is
144 	 * already running on a CPU in secondary group, we still move
145 	 * to primary CPU. So no conditional checks.
146 	 */
147 	cpumask_and(dhd->cpumask_primary_new, dhd->cpumask_primary,
148 		dhd->cpumask_curr_avail);
149 
150 	cpumask_and(dhd->cpumask_secondary_new, dhd->cpumask_secondary,
151 		dhd->cpumask_curr_avail);
152 
153 	primary_available_cpus = cpumask_weight(dhd->cpumask_primary_new);
154 
155 	if (primary_available_cpus > 0) {
156 		napi_cpu = cpumask_first(dhd->cpumask_primary_new);
157 
158 		/* If no further CPU is available,
159 		 * cpumask_next returns >= nr_cpu_ids
160 		 */
161 		tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_primary_new);
162 		if (tx_cpu >= nr_cpu_ids)
163 			tx_cpu = 0;
164 
165 		/* In case there are no more CPUs, do completions & Tx in same CPU */
166 		compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_primary_new);
167 		if (compl_cpu >= nr_cpu_ids)
168 			compl_cpu = tx_cpu;
169 	}
170 
171 	DHD_INFO(("%s After primary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
172 		__FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
173 
174 	/* -- Now check for the CPUs from the secondary mask -- */
175 	secondary_available_cpus = cpumask_weight(dhd->cpumask_secondary_new);
176 
177 	DHD_INFO(("%s Available secondary cpus %d nr_cpu_ids %d\n",
178 		__FUNCTION__, secondary_available_cpus, nr_cpu_ids));
179 
180 	if (secondary_available_cpus > 0) {
181 		/* At this point if napi_cpu is unassigned it means no CPU
182 		 * is online from Primary Group
183 		 */
184 		if (napi_cpu == 0) {
185 			napi_cpu = cpumask_first(dhd->cpumask_secondary_new);
186 			tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_secondary_new);
187 			compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
188 		} else if (tx_cpu == 0) {
189 			tx_cpu = cpumask_first(dhd->cpumask_secondary_new);
190 			compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
191 		} else if (compl_cpu == 0) {
192 			compl_cpu = cpumask_first(dhd->cpumask_secondary_new);
193 		}
194 
195 		/* If no CPU was available for tx processing, choose CPU 0 */
196 		if (tx_cpu >= nr_cpu_ids)
197 			tx_cpu = 0;
198 
199 		/* If no CPU was available for completion, choose CPU 0 */
200 		if (compl_cpu >= nr_cpu_ids)
201 			compl_cpu = 0;
202 	}
203 	if ((primary_available_cpus == 0) &&
204 		(secondary_available_cpus == 0)) {
205 		/* No CPUs available from primary or secondary mask */
206 		napi_cpu = 1;
207 		compl_cpu = 0;
208 		tx_cpu = 2;
209 	}
210 
211 	DHD_INFO(("%s After secondary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
212 		__FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
213 
214 	ASSERT(napi_cpu < nr_cpu_ids);
215 	ASSERT(compl_cpu < nr_cpu_ids);
216 	ASSERT(tx_cpu < nr_cpu_ids);
217 
218 	atomic_set(&dhd->rx_napi_cpu, napi_cpu);
219 	atomic_set(&dhd->tx_compl_cpu, compl_cpu);
220 	atomic_set(&dhd->rx_compl_cpu, compl_cpu);
221 	atomic_set(&dhd->tx_cpu, tx_cpu);
222 
223 	return;
224 }
225 
226 /*
227  * Function to handle CPU Hotplug notifications.
228  * One of the task it does is to trigger the CPU Candidacy algorithm
229  * for load balancing.
230  */
231 
232 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
233 
dhd_cpu_startup_callback(unsigned int cpu)234 int dhd_cpu_startup_callback(unsigned int cpu)
235 {
236 	dhd_info_t *dhd = g_dhd_pub->info;
237 
238 	DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
239 	DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
240 	cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
241 	dhd_select_cpu_candidacy(dhd);
242 
243 	return 0;
244 }
245 
dhd_cpu_teardown_callback(unsigned int cpu)246 int dhd_cpu_teardown_callback(unsigned int cpu)
247 {
248 	dhd_info_t *dhd = g_dhd_pub->info;
249 
250 	DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
251 	DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
252 	cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
253 	dhd_select_cpu_candidacy(dhd);
254 
255 	return 0;
256 }
257 #else
258 int
dhd_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)259 dhd_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
260 {
261 	unsigned long int cpu = (unsigned long int)hcpu;
262 
263 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
264 #pragma GCC diagnostic push
265 #pragma GCC diagnostic ignored "-Wcast-qual"
266 #endif // endif
267 	dhd_info_t *dhd = container_of(nfb, dhd_info_t, cpu_notifier);
268 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
269 #pragma GCC diagnostic pop
270 #endif // endif
271 
272 	if (!dhd || !(dhd->dhd_state & DHD_ATTACH_STATE_LB_ATTACH_DONE)) {
273 		DHD_INFO(("%s(): LB data is not initialized yet.\n",
274 			__FUNCTION__));
275 		return NOTIFY_BAD;
276 	}
277 
278 	switch (action)
279 	{
280 		case CPU_ONLINE:
281 		case CPU_ONLINE_FROZEN:
282 			DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
283 			cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
284 			dhd_select_cpu_candidacy(dhd);
285 			break;
286 
287 		case CPU_DOWN_PREPARE:
288 		case CPU_DOWN_PREPARE_FROZEN:
289 			DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
290 			cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
291 			dhd_select_cpu_candidacy(dhd);
292 			break;
293 		default:
294 			break;
295 	}
296 
297 	return NOTIFY_OK;
298 }
299 #endif /* LINUX_VERSION_CODE < 4.10.0 */
300 
dhd_register_cpuhp_callback(dhd_info_t * dhd)301 int dhd_register_cpuhp_callback(dhd_info_t *dhd)
302 {
303 	int cpuhp_ret = 0;
304 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
305 	cpuhp_ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dhd",
306 		dhd_cpu_startup_callback, dhd_cpu_teardown_callback);
307 
308 	if (cpuhp_ret < 0) {
309 		DHD_ERROR(("%s(): cpuhp_setup_state failed %d RX LB won't happen \r\n",
310 			__FUNCTION__, cpuhp_ret));
311 	}
312 #else
313 	/*
314 	 * If we are able to initialize CPU masks, lets register to the
315 	 * CPU Hotplug framework to change the CPU for each job dynamically
316 	 * using candidacy algorithm.
317 	 */
318 	dhd->cpu_notifier.notifier_call = dhd_cpu_callback;
319 	register_hotcpu_notifier(&dhd->cpu_notifier); /* Register a callback */
320 #endif /* LINUX_VERSION_CODE < 4.10.0 */
321 	return cpuhp_ret;
322 }
323 
dhd_unregister_cpuhp_callback(dhd_info_t * dhd)324 int dhd_unregister_cpuhp_callback(dhd_info_t *dhd)
325 {
326 	int ret = 0;
327 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
328 	/* Don't want to call tear down while unregistering */
329 	cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
330 #else
331 	if (dhd->cpu_notifier.notifier_call != NULL) {
332 		unregister_cpu_notifier(&dhd->cpu_notifier);
333 	}
334 #endif // endif
335 	return ret;
336 }
337 
338 #if defined(DHD_LB_STATS)
dhd_lb_stats_init(dhd_pub_t * dhdp)339 void dhd_lb_stats_init(dhd_pub_t *dhdp)
340 {
341 	dhd_info_t *dhd;
342 	int i, j, num_cpus = num_possible_cpus();
343 	int alloc_size = sizeof(uint32) * num_cpus;
344 
345 	if (dhdp == NULL) {
346 		DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
347 			__FUNCTION__));
348 		return;
349 	}
350 
351 	dhd = dhdp->info;
352 	if (dhd == NULL) {
353 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
354 		return;
355 	}
356 
357 	DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
358 	DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
359 
360 	dhd->napi_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
361 	if (!dhd->napi_percpu_run_cnt) {
362 		DHD_ERROR(("%s(): napi_percpu_run_cnt malloc failed \n",
363 			__FUNCTION__));
364 		return;
365 	}
366 	for (i = 0; i < num_cpus; i++)
367 		DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
368 
369 	DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
370 
371 	dhd->rxc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
372 	if (!dhd->rxc_percpu_run_cnt) {
373 		DHD_ERROR(("%s(): rxc_percpu_run_cnt malloc failed \n",
374 			__FUNCTION__));
375 		return;
376 	}
377 	for (i = 0; i < num_cpus; i++)
378 		DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
379 
380 	DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
381 
382 	dhd->txc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
383 	if (!dhd->txc_percpu_run_cnt) {
384 		DHD_ERROR(("%s(): txc_percpu_run_cnt malloc failed \n",
385 			__FUNCTION__));
386 		return;
387 	}
388 	for (i = 0; i < num_cpus; i++)
389 		DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
390 
391 	dhd->cpu_online_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
392 	if (!dhd->cpu_online_cnt) {
393 		DHD_ERROR(("%s(): cpu_online_cnt malloc failed \n",
394 			__FUNCTION__));
395 		return;
396 	}
397 	for (i = 0; i < num_cpus; i++)
398 		DHD_LB_STATS_CLR(dhd->cpu_online_cnt[i]);
399 
400 	dhd->cpu_offline_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
401 	if (!dhd->cpu_offline_cnt) {
402 		DHD_ERROR(("%s(): cpu_offline_cnt malloc failed \n",
403 			__FUNCTION__));
404 		return;
405 	}
406 	for (i = 0; i < num_cpus; i++)
407 		DHD_LB_STATS_CLR(dhd->cpu_offline_cnt[i]);
408 
409 	dhd->txp_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
410 	if (!dhd->txp_percpu_run_cnt) {
411 		DHD_ERROR(("%s(): txp_percpu_run_cnt malloc failed \n",
412 			__FUNCTION__));
413 		return;
414 	}
415 	for (i = 0; i < num_cpus; i++)
416 		DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
417 
418 	dhd->tx_start_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
419 	if (!dhd->tx_start_percpu_run_cnt) {
420 		DHD_ERROR(("%s(): tx_start_percpu_run_cnt malloc failed \n",
421 			__FUNCTION__));
422 		return;
423 	}
424 	for (i = 0; i < num_cpus; i++)
425 		DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
426 
427 	for (j = 0; j < HIST_BIN_SIZE; j++) {
428 		dhd->napi_rx_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
429 		if (!dhd->napi_rx_hist[j]) {
430 			DHD_ERROR(("%s(): dhd->napi_rx_hist[%d] malloc failed \n",
431 				__FUNCTION__, j));
432 			return;
433 		}
434 		for (i = 0; i < num_cpus; i++) {
435 			DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
436 		}
437 	}
438 #ifdef DHD_LB_TXC
439 	for (j = 0; j < HIST_BIN_SIZE; j++) {
440 		dhd->txc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
441 		if (!dhd->txc_hist[j]) {
442 			DHD_ERROR(("%s(): dhd->txc_hist[%d] malloc failed \n",
443 			         __FUNCTION__, j));
444 			return;
445 		}
446 		for (i = 0; i < num_cpus; i++) {
447 			DHD_LB_STATS_CLR(dhd->txc_hist[j][i]);
448 		}
449 	}
450 #endif /* DHD_LB_TXC */
451 #ifdef DHD_LB_RXC
452 	for (j = 0; j < HIST_BIN_SIZE; j++) {
453 		dhd->rxc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
454 		if (!dhd->rxc_hist[j]) {
455 			DHD_ERROR(("%s(): dhd->rxc_hist[%d] malloc failed \n",
456 				__FUNCTION__, j));
457 			return;
458 		}
459 		for (i = 0; i < num_cpus; i++) {
460 			DHD_LB_STATS_CLR(dhd->rxc_hist[j][i]);
461 		}
462 	}
463 #endif /* DHD_LB_RXC */
464 	return;
465 }
466 
dhd_lb_stats_deinit(dhd_pub_t * dhdp)467 void dhd_lb_stats_deinit(dhd_pub_t *dhdp)
468 {
469 	dhd_info_t *dhd;
470 	int j, num_cpus = num_possible_cpus();
471 	int alloc_size = sizeof(uint32) * num_cpus;
472 
473 	if (dhdp == NULL) {
474 		DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
475 			__FUNCTION__));
476 		return;
477 	}
478 
479 	dhd = dhdp->info;
480 	if (dhd == NULL) {
481 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
482 		return;
483 	}
484 
485 	if (dhd->napi_percpu_run_cnt) {
486 		MFREE(dhdp->osh, dhd->napi_percpu_run_cnt, alloc_size);
487 		dhd->napi_percpu_run_cnt = NULL;
488 	}
489 	if (dhd->rxc_percpu_run_cnt) {
490 		MFREE(dhdp->osh, dhd->rxc_percpu_run_cnt, alloc_size);
491 		dhd->rxc_percpu_run_cnt = NULL;
492 	}
493 	if (dhd->txc_percpu_run_cnt) {
494 		MFREE(dhdp->osh, dhd->txc_percpu_run_cnt, alloc_size);
495 		dhd->txc_percpu_run_cnt = NULL;
496 	}
497 	if (dhd->cpu_online_cnt) {
498 		MFREE(dhdp->osh, dhd->cpu_online_cnt, alloc_size);
499 		dhd->cpu_online_cnt = NULL;
500 	}
501 	if (dhd->cpu_offline_cnt) {
502 		MFREE(dhdp->osh, dhd->cpu_offline_cnt, alloc_size);
503 		dhd->cpu_offline_cnt = NULL;
504 	}
505 
506 	if (dhd->txp_percpu_run_cnt) {
507 		MFREE(dhdp->osh, dhd->txp_percpu_run_cnt, alloc_size);
508 		dhd->txp_percpu_run_cnt = NULL;
509 	}
510 	if (dhd->tx_start_percpu_run_cnt) {
511 		MFREE(dhdp->osh, dhd->tx_start_percpu_run_cnt, alloc_size);
512 		dhd->tx_start_percpu_run_cnt = NULL;
513 	}
514 
515 	for (j = 0; j < HIST_BIN_SIZE; j++) {
516 		if (dhd->napi_rx_hist[j]) {
517 			MFREE(dhdp->osh, dhd->napi_rx_hist[j], alloc_size);
518 			dhd->napi_rx_hist[j] = NULL;
519 		}
520 #ifdef DHD_LB_TXC
521 		if (dhd->txc_hist[j]) {
522 			MFREE(dhdp->osh, dhd->txc_hist[j], alloc_size);
523 			dhd->txc_hist[j] = NULL;
524 		}
525 #endif /* DHD_LB_TXC */
526 #ifdef DHD_LB_RXC
527 		if (dhd->rxc_hist[j]) {
528 			MFREE(dhdp->osh, dhd->rxc_hist[j], alloc_size);
529 			dhd->rxc_hist[j] = NULL;
530 		}
531 #endif /* DHD_LB_RXC */
532 	}
533 
534 	return;
535 }
536 
dhd_lb_stats_dump_histo(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint32 ** hist)537 void dhd_lb_stats_dump_histo(dhd_pub_t *dhdp,
538 	struct bcmstrbuf *strbuf, uint32 **hist)
539 {
540 	int i, j;
541 	uint32 *per_cpu_total;
542 	uint32 total = 0;
543 	uint32 num_cpus = num_possible_cpus();
544 
545 	per_cpu_total = (uint32 *)MALLOC(dhdp->osh, sizeof(uint32) * num_cpus);
546 	if (!per_cpu_total) {
547 		DHD_ERROR(("%s(): dhd->per_cpu_total malloc failed \n", __FUNCTION__));
548 		return;
549 	}
550 	bzero(per_cpu_total, sizeof(uint32) * num_cpus);
551 
552 	bcm_bprintf(strbuf, "CPU: \t\t");
553 	for (i = 0; i < num_cpus; i++)
554 		bcm_bprintf(strbuf, "%d\t", i);
555 	bcm_bprintf(strbuf, "\nBin\n");
556 
557 	for (i = 0; i < HIST_BIN_SIZE; i++) {
558 		bcm_bprintf(strbuf, "%d:\t\t", 1<<i);
559 		for (j = 0; j < num_cpus; j++) {
560 			bcm_bprintf(strbuf, "%d\t", hist[i][j]);
561 		}
562 		bcm_bprintf(strbuf, "\n");
563 	}
564 	bcm_bprintf(strbuf, "Per CPU Total \t");
565 	total = 0;
566 	for (i = 0; i < num_cpus; i++) {
567 		for (j = 0; j < HIST_BIN_SIZE; j++) {
568 			per_cpu_total[i] += (hist[j][i] * (1<<j));
569 		}
570 		bcm_bprintf(strbuf, "%d\t", per_cpu_total[i]);
571 		total += per_cpu_total[i];
572 	}
573 	bcm_bprintf(strbuf, "\nTotal\t\t%d \n", total);
574 
575 	if (per_cpu_total) {
576 		MFREE(dhdp->osh, per_cpu_total, sizeof(uint32) * num_cpus);
577 		per_cpu_total = NULL;
578 	}
579 	return;
580 }
581 
dhd_lb_stats_dump_cpu_array(struct bcmstrbuf * strbuf,uint32 * p)582 void dhd_lb_stats_dump_cpu_array(struct bcmstrbuf *strbuf, uint32 *p)
583 {
584 	int i, num_cpus = num_possible_cpus();
585 
586 	bcm_bprintf(strbuf, "CPU: \t");
587 	for (i = 0; i < num_cpus; i++)
588 		bcm_bprintf(strbuf, "%d\t", i);
589 	bcm_bprintf(strbuf, "\n");
590 
591 	bcm_bprintf(strbuf, "Val: \t");
592 	for (i = 0; i < num_cpus; i++)
593 		bcm_bprintf(strbuf, "%u\t", *(p+i));
594 	bcm_bprintf(strbuf, "\n");
595 	return;
596 }
597 
dhd_lb_stats_dump(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)598 void dhd_lb_stats_dump(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
599 {
600 	dhd_info_t *dhd;
601 
602 	if (dhdp == NULL || strbuf == NULL) {
603 		DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n",
604 			__FUNCTION__, dhdp, strbuf));
605 		return;
606 	}
607 
608 	dhd = dhdp->info;
609 	if (dhd == NULL) {
610 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
611 		return;
612 	}
613 
614 	bcm_bprintf(strbuf, "\ncpu_online_cnt:\n");
615 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_online_cnt);
616 
617 	bcm_bprintf(strbuf, "\ncpu_offline_cnt:\n");
618 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_offline_cnt);
619 
620 	bcm_bprintf(strbuf, "\nsched_cnt: dhd_dpc %u napi %u rxc %u txc %u\n",
621 		dhd->dhd_dpc_cnt, dhd->napi_sched_cnt, dhd->rxc_sched_cnt,
622 		dhd->txc_sched_cnt);
623 
624 #ifdef DHD_LB_RXP
625 	bcm_bprintf(strbuf, "\nnapi_percpu_run_cnt:\n");
626 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->napi_percpu_run_cnt);
627 	bcm_bprintf(strbuf, "\nNAPI Packets Received Histogram:\n");
628 	dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->napi_rx_hist);
629 #endif /* DHD_LB_RXP */
630 
631 #ifdef DHD_LB_RXC
632 	bcm_bprintf(strbuf, "\nrxc_percpu_run_cnt:\n");
633 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->rxc_percpu_run_cnt);
634 	bcm_bprintf(strbuf, "\nRX Completions (Buffer Post) Histogram:\n");
635 	dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->rxc_hist);
636 #endif /* DHD_LB_RXC */
637 
638 #ifdef DHD_LB_TXC
639 	bcm_bprintf(strbuf, "\ntxc_percpu_run_cnt:\n");
640 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->txc_percpu_run_cnt);
641 	bcm_bprintf(strbuf, "\nTX Completions (Buffer Free) Histogram:\n");
642 	dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->txc_hist);
643 #endif /* DHD_LB_TXC */
644 
645 #ifdef DHD_LB_TXP
646 	bcm_bprintf(strbuf, "\ntxp_percpu_run_cnt:\n");
647 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->txp_percpu_run_cnt);
648 
649 	bcm_bprintf(strbuf, "\ntx_start_percpu_run_cnt:\n");
650 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->tx_start_percpu_run_cnt);
651 #endif /* DHD_LB_TXP */
652 }
653 
654 /* Given a number 'n' returns 'm' that is next larger power of 2 after n */
next_larger_power2(uint32 num)655 static inline uint32 next_larger_power2(uint32 num)
656 {
657 	num--;
658 	num |= (num >> 1);
659 	num |= (num >> 2);
660 	num |= (num >> 4);
661 	num |= (num >> 8);
662 	num |= (num >> 16);
663 
664 	return (num + 1);
665 }
666 
dhd_lb_stats_update_histo(uint32 ** bin,uint32 count,uint32 cpu)667 void dhd_lb_stats_update_histo(uint32 **bin, uint32 count, uint32 cpu)
668 {
669 	uint32 bin_power;
670 	uint32 *p;
671 	bin_power = next_larger_power2(count);
672 
673 	switch (bin_power) {
674 		case   1: p = bin[0] + cpu; break;
675 		case   2: p = bin[1] + cpu; break;
676 		case   4: p = bin[2] + cpu; break;
677 		case   8: p = bin[3] + cpu; break;
678 		case  16: p = bin[4] + cpu; break;
679 		case  32: p = bin[5] + cpu; break;
680 		case  64: p = bin[6] + cpu; break;
681 		case 128: p = bin[7] + cpu; break;
682 		default : p = bin[8] + cpu; break;
683 	}
684 
685 	*p = *p + 1;
686 	return;
687 }
688 
dhd_lb_stats_update_napi_histo(dhd_pub_t * dhdp,uint32 count)689 void dhd_lb_stats_update_napi_histo(dhd_pub_t *dhdp, uint32 count)
690 {
691 	int cpu;
692 	dhd_info_t *dhd = dhdp->info;
693 
694 	cpu = get_cpu();
695 	put_cpu();
696 	dhd_lb_stats_update_histo(dhd->napi_rx_hist, count, cpu);
697 
698 	return;
699 }
700 
dhd_lb_stats_update_txc_histo(dhd_pub_t * dhdp,uint32 count)701 void dhd_lb_stats_update_txc_histo(dhd_pub_t *dhdp, uint32 count)
702 {
703 	int cpu;
704 	dhd_info_t *dhd = dhdp->info;
705 
706 	cpu = get_cpu();
707 	put_cpu();
708 	dhd_lb_stats_update_histo(dhd->txc_hist, count, cpu);
709 
710 	return;
711 }
712 
dhd_lb_stats_update_rxc_histo(dhd_pub_t * dhdp,uint32 count)713 void dhd_lb_stats_update_rxc_histo(dhd_pub_t *dhdp, uint32 count)
714 {
715 	int cpu;
716 	dhd_info_t *dhd = dhdp->info;
717 
718 	cpu = get_cpu();
719 	put_cpu();
720 	dhd_lb_stats_update_histo(dhd->rxc_hist, count, cpu);
721 
722 	return;
723 }
724 
dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t * dhdp)725 void dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t *dhdp)
726 {
727 	dhd_info_t *dhd = dhdp->info;
728 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txc_percpu_run_cnt);
729 }
730 
dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t * dhdp)731 void dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t *dhdp)
732 {
733 	dhd_info_t *dhd = dhdp->info;
734 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->rxc_percpu_run_cnt);
735 }
736 #endif /* DHD_LB_STATS */
737 
738 #endif /* DHD_LB */
739 #if defined(DHD_LB)
740 /**
741  * dhd_tasklet_schedule - Function that runs in IPI context of the destination
742  * CPU and schedules a tasklet.
743  * @tasklet: opaque pointer to the tasklet
744  */
745 INLINE void
dhd_tasklet_schedule(void * tasklet)746 dhd_tasklet_schedule(void *tasklet)
747 {
748 	tasklet_schedule((struct tasklet_struct *)tasklet);
749 }
750 /**
751  * dhd_tasklet_schedule_on - Executes the passed takslet in a given CPU
752  * @tasklet: tasklet to be scheduled
753  * @on_cpu: cpu core id
754  *
755  * If the requested cpu is online, then an IPI is sent to this cpu via the
756  * smp_call_function_single with no wait and the tasklet_schedule function
757  * will be invoked to schedule the specified tasklet on the requested CPU.
758  */
759 INLINE void
dhd_tasklet_schedule_on(struct tasklet_struct * tasklet,int on_cpu)760 dhd_tasklet_schedule_on(struct tasklet_struct *tasklet, int on_cpu)
761 {
762 	const int wait = 0;
763 	smp_call_function_single(on_cpu,
764 		dhd_tasklet_schedule, (void *)tasklet, wait);
765 }
766 
767 /**
768  * dhd_work_schedule_on - Executes the passed work in a given CPU
769  * @work: work to be scheduled
770  * @on_cpu: cpu core id
771  *
772  * If the requested cpu is online, then an IPI is sent to this cpu via the
773  * schedule_work_on and the work function
774  * will be invoked to schedule the specified work on the requested CPU.
775  */
776 
777 INLINE void
dhd_work_schedule_on(struct work_struct * work,int on_cpu)778 dhd_work_schedule_on(struct work_struct *work, int on_cpu)
779 {
780 	schedule_work_on(on_cpu, work);
781 }
782 
783 #if defined(DHD_LB_TXC)
784 /**
785  * dhd_lb_tx_compl_dispatch - load balance by dispatching the tx_compl_tasklet
786  * on another cpu. The tx_compl_tasklet will take care of DMA unmapping and
787  * freeing the packets placed in the tx_compl workq
788  */
789 void
dhd_lb_tx_compl_dispatch(dhd_pub_t * dhdp)790 dhd_lb_tx_compl_dispatch(dhd_pub_t *dhdp)
791 {
792 	dhd_info_t *dhd = dhdp->info;
793 	int curr_cpu, on_cpu;
794 
795 	if (dhd->rx_napi_netdev == NULL) {
796 		DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
797 		return;
798 	}
799 
800 	DHD_LB_STATS_INCR(dhd->txc_sched_cnt);
801 	/*
802 	 * If the destination CPU is NOT online or is same as current CPU
803 	 * no need to schedule the work
804 	 */
805 	curr_cpu = get_cpu();
806 	put_cpu();
807 
808 	on_cpu = atomic_read(&dhd->tx_compl_cpu);
809 
810 	if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
811 		dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
812 	} else {
813 		schedule_work(&dhd->tx_compl_dispatcher_work);
814 	}
815 }
816 
dhd_tx_compl_dispatcher_fn(struct work_struct * work)817 static void dhd_tx_compl_dispatcher_fn(struct work_struct * work)
818 {
819 	struct dhd_info *dhd =
820 		container_of(work, struct dhd_info, tx_compl_dispatcher_work);
821 	int cpu;
822 
823 	get_online_cpus();
824 	cpu = atomic_read(&dhd->tx_compl_cpu);
825 	if (!cpu_online(cpu))
826 		dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
827 	else
828 		dhd_tasklet_schedule_on(&dhd->tx_compl_tasklet, cpu);
829 	put_online_cpus();
830 }
831 #endif /* DHD_LB_TXC */
832 
833 #if defined(DHD_LB_RXC)
834 /**
835  * dhd_lb_rx_compl_dispatch - load balance by dispatching the rx_compl_tasklet
836  * on another cpu. The rx_compl_tasklet will take care of reposting rx buffers
837  * in the H2D RxBuffer Post common ring, by using the recycled pktids that were
838  * placed in the rx_compl workq.
839  *
840  * @dhdp: pointer to dhd_pub object
841  */
842 void
dhd_lb_rx_compl_dispatch(dhd_pub_t * dhdp)843 dhd_lb_rx_compl_dispatch(dhd_pub_t *dhdp)
844 {
845 	dhd_info_t *dhd = dhdp->info;
846 	int curr_cpu, on_cpu;
847 
848 	if (dhd->rx_napi_netdev == NULL) {
849 		DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
850 		return;
851 	}
852 
853 	DHD_LB_STATS_INCR(dhd->rxc_sched_cnt);
854 	/*
855 	 * If the destination CPU is NOT online or is same as current CPU
856 	 * no need to schedule the work
857 	 */
858 	curr_cpu = get_cpu();
859 	put_cpu();
860 	on_cpu = atomic_read(&dhd->rx_compl_cpu);
861 
862 	if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
863 		dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
864 	} else {
865 		schedule_work(&dhd->rx_compl_dispatcher_work);
866 	}
867 }
868 
dhd_rx_compl_dispatcher_fn(struct work_struct * work)869 void dhd_rx_compl_dispatcher_fn(struct work_struct * work)
870 {
871 	struct dhd_info *dhd =
872 		container_of(work, struct dhd_info, rx_compl_dispatcher_work);
873 	int cpu;
874 
875 	get_online_cpus();
876 	cpu = atomic_read(&dhd->rx_compl_cpu);
877 	if (!cpu_online(cpu))
878 		dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
879 	else {
880 		dhd_tasklet_schedule_on(&dhd->rx_compl_tasklet, cpu);
881 	}
882 	put_online_cpus();
883 }
884 #endif /* DHD_LB_RXC */
885 
886 #if defined(DHD_LB_TXP)
dhd_tx_dispatcher_work(struct work_struct * work)887 void dhd_tx_dispatcher_work(struct work_struct * work)
888 {
889 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
890 #pragma GCC diagnostic push
891 #pragma GCC diagnostic ignored "-Wcast-qual"
892 #endif // endif
893 	struct dhd_info *dhd =
894 		container_of(work, struct dhd_info, tx_dispatcher_work);
895 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
896 #pragma GCC diagnostic pop
897 #endif // endif
898 	dhd_tasklet_schedule(&dhd->tx_tasklet);
899 }
900 
dhd_tx_dispatcher_fn(dhd_pub_t * dhdp)901 void dhd_tx_dispatcher_fn(dhd_pub_t *dhdp)
902 {
903 	int cpu;
904 	int net_tx_cpu;
905 	dhd_info_t *dhd = dhdp->info;
906 
907 	preempt_disable();
908 	cpu = atomic_read(&dhd->tx_cpu);
909 	net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
910 
911 	/*
912 	 * Now if the NET_TX has pushed the packet in the same
913 	 * CPU that is chosen for Tx processing, seperate it out
914 	 * i.e run the TX processing tasklet in compl_cpu
915 	 */
916 	if (net_tx_cpu == cpu)
917 		cpu = atomic_read(&dhd->tx_compl_cpu);
918 
919 	if (!cpu_online(cpu)) {
920 		/*
921 		 * Ooohh... but the Chosen CPU is not online,
922 		 * Do the job in the current CPU itself.
923 		 */
924 		dhd_tasklet_schedule(&dhd->tx_tasklet);
925 	} else {
926 		/*
927 		 * Schedule tx_dispatcher_work to on the cpu which
928 		 * in turn will schedule tx_tasklet.
929 		 */
930 		dhd_work_schedule_on(&dhd->tx_dispatcher_work, cpu);
931 	}
932 	preempt_enable();
933 }
934 
935 /**
936  * dhd_lb_tx_dispatch - load balance by dispatching the tx_tasklet
937  * on another cpu. The tx_tasklet will take care of actually putting
938  * the skbs into appropriate flow ring and ringing H2D interrupt
939  *
940  * @dhdp: pointer to dhd_pub object
941  */
942 void
dhd_lb_tx_dispatch(dhd_pub_t * dhdp)943 dhd_lb_tx_dispatch(dhd_pub_t *dhdp)
944 {
945 	dhd_info_t *dhd = dhdp->info;
946 	int curr_cpu;
947 
948 	curr_cpu = get_cpu();
949 	put_cpu();
950 
951 	/* Record the CPU in which the TX request from Network stack came */
952 	atomic_set(&dhd->net_tx_cpu, curr_cpu);
953 
954 	/* Schedule the work to dispatch ... */
955 	dhd_tx_dispatcher_fn(dhdp);
956 }
957 #endif /* DHD_LB_TXP */
958 
959 #if defined(DHD_LB_RXP)
960 /**
961  * dhd_napi_poll - Load balance napi poll function to process received
962  * packets and send up the network stack using netif_receive_skb()
963  *
964  * @napi: napi object in which context this poll function is invoked
965  * @budget: number of packets to be processed.
966  *
967  * Fetch the dhd_info given the rx_napi_struct. Move all packets from the
968  * rx_napi_queue into a local rx_process_queue (lock and queue move and unlock).
969  * Dequeue each packet from head of rx_process_queue, fetch the ifid from the
970  * packet tag and sendup.
971  */
972 int
dhd_napi_poll(struct napi_struct * napi,int budget)973 dhd_napi_poll(struct napi_struct *napi, int budget)
974 {
975 	int ifid;
976 	const int pkt_count = 1;
977 	const int chan = 0;
978 	struct sk_buff * skb;
979 	unsigned long flags;
980 	struct dhd_info *dhd;
981 	int processed = 0;
982 	struct sk_buff_head rx_process_queue;
983 
984 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
985 #pragma GCC diagnostic push
986 #pragma GCC diagnostic ignored "-Wcast-qual"
987 #endif // endif
988 	dhd = container_of(napi, struct dhd_info, rx_napi_struct);
989 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
990 #pragma GCC diagnostic pop
991 #endif // endif
992 
993 	DHD_INFO(("%s napi_queue<%d> budget<%d>\n",
994 		__FUNCTION__, skb_queue_len(&dhd->rx_napi_queue), budget));
995 		__skb_queue_head_init(&rx_process_queue);
996 
997 	/* extract the entire rx_napi_queue into local rx_process_queue */
998 	spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
999 	skb_queue_splice_tail_init(&dhd->rx_napi_queue, &rx_process_queue);
1000 	spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1001 
1002 	while ((skb = __skb_dequeue(&rx_process_queue)) != NULL) {
1003 		OSL_PREFETCH(skb->data);
1004 
1005 		ifid = DHD_PKTTAG_IFID((dhd_pkttag_fr_t *)PKTTAG(skb));
1006 
1007 		DHD_INFO(("%s dhd_rx_frame pkt<%p> ifid<%d>\n",
1008 			__FUNCTION__, skb, ifid));
1009 
1010 		dhd_rx_frame(&dhd->pub, ifid, skb, pkt_count, chan);
1011 		processed++;
1012 	}
1013 
1014 	DHD_LB_STATS_UPDATE_NAPI_HISTO(&dhd->pub, processed);
1015 
1016 	DHD_INFO(("%s processed %d\n", __FUNCTION__, processed));
1017 	napi_complete(napi);
1018 
1019 	return budget - 1;
1020 }
1021 
1022 /**
1023  * dhd_napi_schedule - Place the napi struct into the current cpus softnet napi
1024  * poll list. This function may be invoked via the smp_call_function_single
1025  * from a remote CPU.
1026  *
1027  * This function will essentially invoke __raise_softirq_irqoff(NET_RX_SOFTIRQ)
1028  * after the napi_struct is added to the softnet data's poll_list
1029  *
1030  * @info: pointer to a dhd_info struct
1031  */
1032 static void
dhd_napi_schedule(void * info)1033 dhd_napi_schedule(void *info)
1034 {
1035 	dhd_info_t *dhd = (dhd_info_t *)info;
1036 
1037 	DHD_INFO(("%s rx_napi_struct<%p> on cpu<%d>\n",
1038 		__FUNCTION__, &dhd->rx_napi_struct, atomic_read(&dhd->rx_napi_cpu)));
1039 
1040 	/* add napi_struct to softnet data poll list and raise NET_RX_SOFTIRQ */
1041 	if (napi_schedule_prep(&dhd->rx_napi_struct)) {
1042 		__napi_schedule(&dhd->rx_napi_struct);
1043 #ifdef WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE
1044 		raise_softirq(NET_RX_SOFTIRQ);
1045 #endif /* WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE */
1046 	}
1047 
1048 	/*
1049 	 * If the rx_napi_struct was already running, then we let it complete
1050 	 * processing all its packets. The rx_napi_struct may only run on one
1051 	 * core at a time, to avoid out-of-order handling.
1052 	 */
1053 }
1054 
1055 /**
1056  * dhd_napi_schedule_on - API to schedule on a desired CPU core a NET_RX_SOFTIRQ
1057  * action after placing the dhd's rx_process napi object in the the remote CPU's
1058  * softnet data's poll_list.
1059  *
1060  * @dhd: dhd_info which has the rx_process napi object
1061  * @on_cpu: desired remote CPU id
1062  */
1063 static INLINE int
dhd_napi_schedule_on(dhd_info_t * dhd,int on_cpu)1064 dhd_napi_schedule_on(dhd_info_t *dhd, int on_cpu)
1065 {
1066 	int wait = 0; /* asynchronous IPI */
1067 	DHD_INFO(("%s dhd<%p> napi<%p> on_cpu<%d>\n",
1068 		__FUNCTION__, dhd, &dhd->rx_napi_struct, on_cpu));
1069 
1070 	if (smp_call_function_single(on_cpu, dhd_napi_schedule, dhd, wait)) {
1071 		DHD_ERROR(("%s smp_call_function_single on_cpu<%d> failed\n",
1072 			__FUNCTION__, on_cpu));
1073 	}
1074 
1075 	DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1076 
1077 	return 0;
1078 }
1079 
1080 /*
1081  * Call get_online_cpus/put_online_cpus around dhd_napi_schedule_on
1082  * Why should we do this?
1083  * The candidacy algorithm is run from the call back function
1084  * registered to CPU hotplug notifier. This call back happens from Worker
1085  * context. The dhd_napi_schedule_on is also from worker context.
1086  * Note that both of this can run on two different CPUs at the same time.
1087  * So we can possibly have a window where a given CPUn is being brought
1088  * down from CPUm while we try to run a function on CPUn.
1089  * To prevent this its better have the whole code to execute an SMP
1090  * function under get_online_cpus.
1091  * This function call ensures that hotplug mechanism does not kick-in
1092  * until we are done dealing with online CPUs
1093  * If the hotplug worker is already running, no worries because the
1094  * candidacy algo would then reflect the same in dhd->rx_napi_cpu.
1095  *
1096  * The below mentioned code structure is proposed in
1097  * https://www.kernel.org/doc/Documentation/cpu-hotplug.txt
1098  * for the question
1099  * Q: I need to ensure that a particular cpu is not removed when there is some
1100  *    work specific to this cpu is in progress
1101  *
1102  * According to the documentation calling get_online_cpus is NOT required, if
1103  * we are running from tasklet context. Since dhd_rx_napi_dispatcher_fn can
1104  * run from Work Queue context we have to call these functions
1105  */
dhd_rx_napi_dispatcher_fn(struct work_struct * work)1106 void dhd_rx_napi_dispatcher_fn(struct work_struct * work)
1107 {
1108 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1109 #pragma GCC diagnostic push
1110 #pragma GCC diagnostic ignored "-Wcast-qual"
1111 #endif // endif
1112 	struct dhd_info *dhd =
1113 		container_of(work, struct dhd_info, rx_napi_dispatcher_work);
1114 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1115 #pragma GCC diagnostic pop
1116 #endif // endif
1117 	int cpu;
1118 
1119 	get_online_cpus();
1120 	cpu = atomic_read(&dhd->rx_napi_cpu);
1121 
1122 	if (!cpu_online(cpu))
1123 		dhd_napi_schedule(dhd);
1124 	else
1125 		dhd_napi_schedule_on(dhd, cpu);
1126 
1127 	put_online_cpus();
1128 }
1129 
1130 /**
1131  * dhd_lb_rx_napi_dispatch - load balance by dispatching the rx_napi_struct
1132  * to run on another CPU. The rx_napi_struct's poll function will retrieve all
1133  * the packets enqueued into the rx_napi_queue and sendup.
1134  * The producer's rx packet queue is appended to the rx_napi_queue before
1135  * dispatching the rx_napi_struct.
1136  */
1137 void
dhd_lb_rx_napi_dispatch(dhd_pub_t * dhdp)1138 dhd_lb_rx_napi_dispatch(dhd_pub_t *dhdp)
1139 {
1140 	unsigned long flags;
1141 	dhd_info_t *dhd = dhdp->info;
1142 	int curr_cpu;
1143 	int on_cpu;
1144 #ifdef DHD_LB_IRQSET
1145 	cpumask_t cpus;
1146 #endif /* DHD_LB_IRQSET */
1147 
1148 	if (dhd->rx_napi_netdev == NULL) {
1149 		DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
1150 		return;
1151 	}
1152 
1153 	DHD_INFO(("%s append napi_queue<%d> pend_queue<%d>\n", __FUNCTION__,
1154 		skb_queue_len(&dhd->rx_napi_queue), skb_queue_len(&dhd->rx_pend_queue)));
1155 
1156 	/* append the producer's queue of packets to the napi's rx process queue */
1157 	spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
1158 	skb_queue_splice_tail_init(&dhd->rx_pend_queue, &dhd->rx_napi_queue);
1159 	spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1160 
1161 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->napi_percpu_run_cnt);
1162 
1163 	/* if LB RXP is disabled directly schedule NAPI */
1164 	if (atomic_read(&dhd->lb_rxp_active) == 0) {
1165 		dhd_napi_schedule(dhd);
1166 		return;
1167 	}
1168 
1169 	/*
1170 	 * If the destination CPU is NOT online or is same as current CPU
1171 	 * no need to schedule the work
1172 	 */
1173 	curr_cpu = get_cpu();
1174 	put_cpu();
1175 
1176 	preempt_disable();
1177 	on_cpu = atomic_read(&dhd->rx_napi_cpu);
1178 #ifdef DHD_LB_IRQSET
1179 	if (cpumask_and(&cpus, cpumask_of(curr_cpu), dhd->cpumask_primary) ||
1180 			(!cpu_online(on_cpu))) {
1181 #else
1182 	if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
1183 #endif /* DHD_LB_IRQSET */
1184 		DHD_INFO(("%s : curr_cpu : %d, cpumask : 0x%lx\n", __FUNCTION__,
1185 			curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1186 		dhd_napi_schedule(dhd);
1187 	} else {
1188 		DHD_INFO(("%s : schedule to curr_cpu : %d, cpumask : 0x%lx\n",
1189 			__FUNCTION__, curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1190 		schedule_work(&dhd->rx_napi_dispatcher_work);
1191 		DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1192 	}
1193 	preempt_enable();
1194 }
1195 
1196 /**
1197  * dhd_lb_rx_pkt_enqueue - Enqueue the packet into the producer's queue
1198  */
1199 void
1200 dhd_lb_rx_pkt_enqueue(dhd_pub_t *dhdp, void *pkt, int ifidx)
1201 {
1202 	dhd_info_t *dhd = dhdp->info;
1203 
1204 	DHD_INFO(("%s enqueue pkt<%p> ifidx<%d> pend_queue<%d>\n", __FUNCTION__,
1205 		pkt, ifidx, skb_queue_len(&dhd->rx_pend_queue)));
1206 	DHD_PKTTAG_SET_IFID((dhd_pkttag_fr_t *)PKTTAG(pkt), ifidx);
1207 	__skb_queue_tail(&dhd->rx_pend_queue, pkt);
1208 }
1209 #endif /* DHD_LB_RXP */
1210 #endif /* DHD_LB */
1211 
1212 #if defined(DHD_LB_IRQSET) || defined(DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON)
1213 void
1214 dhd_irq_set_affinity(dhd_pub_t *dhdp, const struct cpumask *cpumask)
1215 {
1216 	unsigned int irq = (unsigned int)-1;
1217 	int err = BCME_OK;
1218 
1219 	if (!dhdp) {
1220 		DHD_ERROR(("%s : dhdp is NULL\n", __FUNCTION__));
1221 		return;
1222 	}
1223 
1224 	if (!dhdp->bus) {
1225 		DHD_ERROR(("%s : bus is NULL\n", __FUNCTION__));
1226 		return;
1227 	}
1228 
1229 	DHD_ERROR(("%s : irq set affinity cpu:0x%lx\n",
1230 			__FUNCTION__, *cpumask_bits(cpumask)));
1231 
1232 	dhdpcie_get_pcieirq(dhdp->bus, &irq);
1233 	err = irq_set_affinity(irq, cpumask);
1234 	if (err)
1235 		DHD_ERROR(("%s : irq set affinity is failed cpu:0x%lx\n",
1236 			__FUNCTION__, *cpumask_bits(cpumask)));
1237 }
1238 #endif /* DHD_LB_IRQSET || DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON */
1239 
1240 #if defined(DHD_LB_TXP)
1241 
1242 int BCMFASTPATH
1243 dhd_lb_sendpkt(dhd_info_t *dhd, struct net_device *net,
1244 	int ifidx, void *skb)
1245 {
1246 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->tx_start_percpu_run_cnt);
1247 
1248 	/* If the feature is disabled run-time do TX from here */
1249 	if (atomic_read(&dhd->lb_txp_active) == 0) {
1250 		DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1251 		 return __dhd_sendpkt(&dhd->pub, ifidx, skb);
1252 	}
1253 
1254 	/* Store the address of net device and interface index in the Packet tag */
1255 	DHD_LB_TX_PKTTAG_SET_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), net);
1256 	DHD_LB_TX_PKTTAG_SET_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), ifidx);
1257 
1258 	/* Enqueue the skb into tx_pend_queue */
1259 	skb_queue_tail(&dhd->tx_pend_queue, skb);
1260 
1261 	DHD_TRACE(("%s(): Added skb %p for netdev %p \r\n", __FUNCTION__, skb, net));
1262 
1263 	/* Dispatch the Tx job to be processed by the tx_tasklet */
1264 	dhd_lb_tx_dispatch(&dhd->pub);
1265 
1266 	return NETDEV_TX_OK;
1267 }
1268 #endif /* DHD_LB_TXP */
1269 
1270 #ifdef DHD_LB_TXP
1271 #define DHD_LB_TXBOUND	64
1272 /*
1273  * Function that performs the TX processing on a given CPU
1274  */
1275 bool
1276 dhd_lb_tx_process(dhd_info_t *dhd)
1277 {
1278 	struct sk_buff *skb;
1279 	int cnt = 0;
1280 	struct net_device *net;
1281 	int ifidx;
1282 	bool resched = FALSE;
1283 
1284 	DHD_TRACE(("%s(): TX Processing \r\n", __FUNCTION__));
1285 	if (dhd == NULL) {
1286 		DHD_ERROR((" Null pointer DHD \r\n"));
1287 		return resched;
1288 	}
1289 
1290 	BCM_REFERENCE(net);
1291 
1292 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1293 
1294 	/* Base Loop to perform the actual Tx */
1295 	do {
1296 		skb = skb_dequeue(&dhd->tx_pend_queue);
1297 		if (skb == NULL) {
1298 			DHD_TRACE(("Dequeued a Null Packet \r\n"));
1299 			break;
1300 		}
1301 		cnt++;
1302 
1303 		net =  DHD_LB_TX_PKTTAG_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1304 		ifidx = DHD_LB_TX_PKTTAG_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1305 
1306 		DHD_TRACE(("Processing skb %p for net %p index %d \r\n", skb,
1307 			net, ifidx));
1308 
1309 		__dhd_sendpkt(&dhd->pub, ifidx, skb);
1310 
1311 		if (cnt >= DHD_LB_TXBOUND) {
1312 			resched = TRUE;
1313 			break;
1314 		}
1315 
1316 	} while (1);
1317 
1318 	DHD_INFO(("%s(): Processed %d packets \r\n", __FUNCTION__, cnt));
1319 
1320 	return resched;
1321 }
1322 
1323 void
1324 dhd_lb_tx_handler(unsigned long data)
1325 {
1326 	dhd_info_t *dhd = (dhd_info_t *)data;
1327 
1328 	if (dhd_lb_tx_process(dhd)) {
1329 		dhd_tasklet_schedule(&dhd->tx_tasklet);
1330 	}
1331 }
1332 
1333 #endif /* DHD_LB_TXP */
1334