xref: /OK3568_Linux_fs/external/rkwifibt/drivers/bcmdhd/dhd_linux_lb.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Broadcom Dongle Host Driver (DHD), Linux-specific network interface
3  * Basically selected code segments from usb-cdc.c and usb-rndis.c
4  *
5  * Copyright (C) 2020, Broadcom.
6  *
7  *      Unless you and Broadcom execute a separate written software license
8  * agreement governing use of this software, this software is licensed to you
9  * under the terms of the GNU General Public License version 2 (the "GPL"),
10  * available at http://www.broadcom.com/licenses/GPLv2.php, with the
11  * following added to such license:
12  *
13  *      As a special exception, the copyright holders of this software give you
14  * permission to link this software with independent modules, and to copy and
15  * distribute the resulting executable under terms of your choice, provided that
16  * you also meet, for each linked independent module, the terms and conditions of
17  * the license of that module.  An independent module is a module which is not
18  * derived from this software.  The special exception does not apply to any
19  * modifications of the software.
20  *
21  *
22  * <<Broadcom-WL-IPTag/Open:>>
23  *
24  * $Id$
25  */
26 
27 #include <dhd_linux_priv.h>
28 
29 extern dhd_pub_t* g_dhd_pub;
30 
31 #if defined(DHD_LB)
32 
33 #ifdef DHD_LB_STATS
34 #define DHD_NUM_NAPI_LATENCY_ROWS (17u)
35 #define DHD_NAPI_LATENCY_SIZE (sizeof(uint64) * DHD_NUM_NAPI_LATENCY_ROWS)
36 #endif /* DHD_LB_STATS */
37 
38 #ifdef DHD_REPLACE_LOG_INFO_TO_TRACE
39 #define DHD_LB_INFO DHD_TRACE
40 #else
41 #define DHD_LB_INFO DHD_INFO
42 #endif /* DHD_REPLACE_LOG_INFO_TO_TRACE */
43 
44 void
dhd_lb_set_default_cpus(dhd_info_t * dhd)45 dhd_lb_set_default_cpus(dhd_info_t *dhd)
46 {
47 	/* Default CPU allocation for the jobs */
48 	atomic_set(&dhd->rx_napi_cpu, 1);
49 	atomic_set(&dhd->tx_cpu, 2);
50 	atomic_set(&dhd->net_tx_cpu, 0);
51 	atomic_set(&dhd->dpc_cpu, 0);
52 }
53 
54 void
dhd_cpumasks_deinit(dhd_info_t * dhd)55 dhd_cpumasks_deinit(dhd_info_t *dhd)
56 {
57 	free_cpumask_var(dhd->cpumask_curr_avail);
58 	free_cpumask_var(dhd->cpumask_primary);
59 	free_cpumask_var(dhd->cpumask_primary_new);
60 	free_cpumask_var(dhd->cpumask_secondary);
61 	free_cpumask_var(dhd->cpumask_secondary_new);
62 }
63 
64 int
dhd_cpumasks_init(dhd_info_t * dhd)65 dhd_cpumasks_init(dhd_info_t *dhd)
66 {
67 	int id;
68 	uint32 cpus, num_cpus = num_possible_cpus();
69 	int ret = 0;
70 
71 	DHD_ERROR(("%s CPU masks primary(big)=0x%x secondary(little)=0x%x\n", __FUNCTION__,
72 		DHD_LB_PRIMARY_CPUS, DHD_LB_SECONDARY_CPUS));
73 
74 	/* FIXME: If one alloc fails we must free_cpumask_var the previous */
75 	if (!alloc_cpumask_var(&dhd->cpumask_curr_avail, GFP_KERNEL) ||
76 	    !alloc_cpumask_var(&dhd->cpumask_primary, GFP_KERNEL) ||
77 	    !alloc_cpumask_var(&dhd->cpumask_primary_new, GFP_KERNEL) ||
78 	    !alloc_cpumask_var(&dhd->cpumask_secondary, GFP_KERNEL) ||
79 	    !alloc_cpumask_var(&dhd->cpumask_secondary_new, GFP_KERNEL)) {
80 		DHD_ERROR(("%s Failed to init cpumasks\n", __FUNCTION__));
81 		ret = -ENOMEM;
82 		goto fail;
83 	}
84 
85 	cpumask_copy(dhd->cpumask_curr_avail, cpu_online_mask);
86 	cpumask_clear(dhd->cpumask_primary);
87 	cpumask_clear(dhd->cpumask_secondary);
88 
89 	if (num_cpus > 32) {
90 		DHD_ERROR(("%s max cpus must be 32, %d too big\n", __FUNCTION__, num_cpus));
91 		ASSERT(0);
92 	}
93 
94 	cpus = DHD_LB_PRIMARY_CPUS;
95 	for (id = 0; id < num_cpus; id++) {
96 		if (isset(&cpus, id))
97 			cpumask_set_cpu(id, dhd->cpumask_primary);
98 	}
99 
100 	cpus = DHD_LB_SECONDARY_CPUS;
101 	for (id = 0; id < num_cpus; id++) {
102 		if (isset(&cpus, id))
103 			cpumask_set_cpu(id, dhd->cpumask_secondary);
104 	}
105 
106 	return ret;
107 fail:
108 	dhd_cpumasks_deinit(dhd);
109 	return ret;
110 }
111 
112 /*
113  * The CPU Candidacy Algorithm
114  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
115  * The available CPUs for selection are divided into two groups
116  *  Primary Set - A CPU mask that carries the First Choice CPUs
117  *  Secondary Set - A CPU mask that carries the Second Choice CPUs.
118  *
119  * There are two types of Job, that needs to be assigned to
120  * the CPUs, from one of the above mentioned CPU group. The Jobs are
121  * 1) Rx Packet Processing - napi_cpu
122  *
123  * To begin with napi_cpu is on CPU0. Whenever a CPU goes
124  * on-line/off-line the CPU candidacy algorithm is triggerd. The candidacy
125  * algo tries to pickup the first available non boot CPU (CPU0) for napi_cpu.
126  *
127  */
dhd_select_cpu_candidacy(dhd_info_t * dhd)128 void dhd_select_cpu_candidacy(dhd_info_t *dhd)
129 {
130 	uint32 primary_available_cpus; /* count of primary available cpus */
131 	uint32 secondary_available_cpus; /* count of secondary available cpus */
132 	uint32 napi_cpu = 0; /* cpu selected for napi rx processing */
133 	uint32 tx_cpu = 0; /* cpu selected for tx processing job */
134 	uint32 dpc_cpu = atomic_read(&dhd->dpc_cpu);
135 	uint32 net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
136 
137 	cpumask_clear(dhd->cpumask_primary_new);
138 	cpumask_clear(dhd->cpumask_secondary_new);
139 
140 	/*
141 	 * Now select from the primary mask. Even if a Job is
142 	 * already running on a CPU in secondary group, we still move
143 	 * to primary CPU. So no conditional checks.
144 	 */
145 	cpumask_and(dhd->cpumask_primary_new, dhd->cpumask_primary,
146 		dhd->cpumask_curr_avail);
147 
148 	cpumask_and(dhd->cpumask_secondary_new, dhd->cpumask_secondary,
149 		dhd->cpumask_curr_avail);
150 
151 	/* Clear DPC cpu from new masks so that dpc cpu is not chosen for LB */
152 	cpumask_clear_cpu(dpc_cpu, dhd->cpumask_primary_new);
153 	cpumask_clear_cpu(dpc_cpu, dhd->cpumask_secondary_new);
154 
155 	/* Clear net_tx_cpu from new masks so that same is not chosen for LB */
156 	cpumask_clear_cpu(net_tx_cpu, dhd->cpumask_primary_new);
157 	cpumask_clear_cpu(net_tx_cpu, dhd->cpumask_secondary_new);
158 
159 	primary_available_cpus = cpumask_weight(dhd->cpumask_primary_new);
160 
161 #if defined(DHD_LB_HOST_CTRL)
162 	/* Does not use promary cpus if DHD received affinity off cmd
163 	*  from framework
164 	*/
165 	if (primary_available_cpus > 0 && dhd->permitted_primary_cpu)
166 #else
167 	if (primary_available_cpus > 0)
168 #endif /* DHD_LB_HOST_CTRL */
169 	{
170 		napi_cpu = cpumask_first(dhd->cpumask_primary_new);
171 
172 		/* If no further CPU is available,
173 		 * cpumask_next returns >= nr_cpu_ids
174 		 */
175 		tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_primary_new);
176 		if (tx_cpu >= nr_cpu_ids)
177 			tx_cpu = 0;
178 	}
179 
180 	DHD_INFO(("%s After primary CPU check napi_cpu %d tx_cpu %d\n",
181 		__FUNCTION__, napi_cpu, tx_cpu));
182 
183 	/* -- Now check for the CPUs from the secondary mask -- */
184 	secondary_available_cpus = cpumask_weight(dhd->cpumask_secondary_new);
185 
186 	DHD_INFO(("%s Available secondary cpus %d nr_cpu_ids %d\n",
187 		__FUNCTION__, secondary_available_cpus, nr_cpu_ids));
188 
189 	if (secondary_available_cpus > 0) {
190 		/* At this point if napi_cpu is unassigned it means no CPU
191 		 * is online from Primary Group
192 		 */
193 #if defined(DHD_LB_TXP_LITTLE_CORE_CTRL)
194 		/* Clear tx_cpu, so that it can be picked from little core */
195 		tx_cpu = 0;
196 #endif /* DHD_LB_TXP_LITTLE_CORE_CTRL */
197 		if (napi_cpu == 0) {
198 			napi_cpu = cpumask_first(dhd->cpumask_secondary_new);
199 			tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_secondary_new);
200 		} else if (tx_cpu == 0) {
201 			tx_cpu = cpumask_first(dhd->cpumask_secondary_new);
202 		}
203 
204 		/* If no CPU was available for tx processing, choose CPU 0 */
205 		if (tx_cpu >= nr_cpu_ids)
206 			tx_cpu = 0;
207 	}
208 
209 	if ((primary_available_cpus == 0) &&
210 		(secondary_available_cpus == 0)) {
211 		/* No CPUs available from primary or secondary mask */
212 		napi_cpu = 1;
213 		tx_cpu = 2;
214 	}
215 
216 	DHD_INFO(("%s After secondary CPU check napi_cpu %d tx_cpu %d\n",
217 		__FUNCTION__, napi_cpu, tx_cpu));
218 
219 	ASSERT(napi_cpu < nr_cpu_ids);
220 	ASSERT(tx_cpu < nr_cpu_ids);
221 
222 	atomic_set(&dhd->rx_napi_cpu, napi_cpu);
223 	atomic_set(&dhd->tx_cpu, tx_cpu);
224 
225 	return;
226 }
227 
228 /*
229  * Function to handle CPU Hotplug notifications.
230  * One of the task it does is to trigger the CPU Candidacy algorithm
231  * for load balancing.
232  */
233 
234 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
235 
dhd_cpu_startup_callback(unsigned int cpu)236 int dhd_cpu_startup_callback(unsigned int cpu)
237 {
238 	dhd_info_t *dhd = g_dhd_pub->info;
239 
240 	DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
241 	DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
242 	cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
243 	dhd_select_cpu_candidacy(dhd);
244 
245 	return 0;
246 }
247 
dhd_cpu_teardown_callback(unsigned int cpu)248 int dhd_cpu_teardown_callback(unsigned int cpu)
249 {
250 	dhd_info_t *dhd = g_dhd_pub->info;
251 
252 	DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
253 	DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
254 	cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
255 	dhd_select_cpu_candidacy(dhd);
256 
257 	return 0;
258 }
259 #else
260 int
dhd_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)261 dhd_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
262 {
263 	unsigned long int cpu = (unsigned long int)hcpu;
264 	dhd_info_t *dhd;
265 
266 	GCC_DIAGNOSTIC_PUSH_SUPPRESS_CAST();
267 	dhd = container_of(nfb, dhd_info_t, cpu_notifier);
268 	GCC_DIAGNOSTIC_POP();
269 
270 	if (!dhd || !(dhd->dhd_state & DHD_ATTACH_STATE_LB_ATTACH_DONE)) {
271 		DHD_INFO(("%s(): LB data is not initialized yet.\n",
272 			__FUNCTION__));
273 		return NOTIFY_BAD;
274 	}
275 
276 	/* XXX: Do we need other action types ? */
277 	switch (action)
278 	{
279 		case CPU_ONLINE:
280 		case CPU_ONLINE_FROZEN:
281 			DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
282 			cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
283 			dhd_select_cpu_candidacy(dhd);
284 			break;
285 
286 		case CPU_DOWN_PREPARE:
287 		case CPU_DOWN_PREPARE_FROZEN:
288 			DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
289 			cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
290 			dhd_select_cpu_candidacy(dhd);
291 			break;
292 		default:
293 			break;
294 	}
295 
296 	return NOTIFY_OK;
297 }
298 #endif /* LINUX_VERSION_CODE < 4.10.0 */
299 
dhd_register_cpuhp_callback(dhd_info_t * dhd)300 int dhd_register_cpuhp_callback(dhd_info_t *dhd)
301 {
302 	int cpuhp_ret = 0;
303 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
304 	cpuhp_ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dhd",
305 		dhd_cpu_startup_callback, dhd_cpu_teardown_callback);
306 
307 	if (cpuhp_ret < 0) {
308 		DHD_ERROR(("%s(): cpuhp_setup_state failed %d RX LB won't happen \r\n",
309 			__FUNCTION__, cpuhp_ret));
310 	}
311 #else
312 	/*
313 	 * If we are able to initialize CPU masks, lets register to the
314 	 * CPU Hotplug framework to change the CPU for each job dynamically
315 	 * using candidacy algorithm.
316 	 */
317 	dhd->cpu_notifier.notifier_call = dhd_cpu_callback;
318 	register_hotcpu_notifier(&dhd->cpu_notifier); /* Register a callback */
319 #endif /* LINUX_VERSION_CODE < 4.10.0 */
320 	return cpuhp_ret;
321 }
322 
dhd_unregister_cpuhp_callback(dhd_info_t * dhd)323 int dhd_unregister_cpuhp_callback(dhd_info_t *dhd)
324 {
325 	int ret = 0;
326 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
327 	/* Don't want to call tear down while unregistering */
328 	cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
329 #else
330 	if (dhd->cpu_notifier.notifier_call != NULL) {
331 		unregister_cpu_notifier(&dhd->cpu_notifier);
332 	}
333 #endif
334 	return ret;
335 }
336 
337 #if defined(DHD_LB_STATS)
dhd_lb_stats_reset(dhd_pub_t * dhdp)338 void dhd_lb_stats_reset(dhd_pub_t *dhdp)
339 {
340 	dhd_info_t *dhd;
341 	int i, j, num_cpus = num_possible_cpus();
342 
343 	if (dhdp == NULL) {
344 		DHD_ERROR(("%s dhd pub pointer is NULL \n",
345 			__FUNCTION__));
346 		return;
347 	}
348 
349 	dhd = dhdp->info;
350 	if (dhd == NULL) {
351 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
352 		return;
353 	}
354 
355 	DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
356 	DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
357 
358 	/* reset NAPI latency stats */
359 	if (dhd->napi_latency) {
360 		bzero(dhd->napi_latency, DHD_NAPI_LATENCY_SIZE);
361 	}
362 	/* reset NAPI per cpu stats */
363 	if (dhd->napi_percpu_run_cnt) {
364 		for (i = 0; i < num_cpus; i++) {
365 			DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
366 		}
367 	}
368 
369 	DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
370 
371 	if (dhd->rxc_percpu_run_cnt) {
372 		for (i = 0; i < num_cpus; i++) {
373 			DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
374 		}
375 	}
376 
377 	DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
378 
379 	if (dhd->txc_percpu_run_cnt) {
380 		for (i = 0; i < num_cpus; i++) {
381 			DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
382 		}
383 	}
384 
385 	if (dhd->txp_percpu_run_cnt) {
386 		for (i = 0; i < num_cpus; i++) {
387 			DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
388 		}
389 	}
390 
391 	if (dhd->tx_start_percpu_run_cnt) {
392 		for (i = 0; i < num_cpus; i++) {
393 			DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
394 		}
395 	}
396 
397 	for (j = 0; j < HIST_BIN_SIZE; j++) {
398 		for (i = 0; i < num_cpus; i++) {
399 			DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
400 		}
401 	}
402 
403 	dhd->pub.lb_rxp_strt_thr_hitcnt = 0;
404 	dhd->pub.lb_rxp_stop_thr_hitcnt = 0;
405 
406 	dhd->pub.lb_rxp_napi_sched_cnt = 0;
407 	dhd->pub.lb_rxp_napi_complete_cnt = 0;
408 	return;
409 }
410 
dhd_lb_stats_init(dhd_pub_t * dhdp)411 void dhd_lb_stats_init(dhd_pub_t *dhdp)
412 {
413 	dhd_info_t *dhd;
414 	int i, j, num_cpus = num_possible_cpus();
415 	int alloc_size = sizeof(uint32) * num_cpus;
416 
417 	if (dhdp == NULL) {
418 		DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
419 			__FUNCTION__));
420 		return;
421 	}
422 
423 	dhd = dhdp->info;
424 	if (dhd == NULL) {
425 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
426 		return;
427 	}
428 
429 	DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
430 	DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
431 
432 	/* NAPI latency stats */
433 	dhd->napi_latency = (uint64 *)MALLOCZ(dhdp->osh, DHD_NAPI_LATENCY_SIZE);
434 	/* NAPI per cpu stats */
435 	dhd->napi_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
436 	if (!dhd->napi_percpu_run_cnt) {
437 		DHD_ERROR(("%s(): napi_percpu_run_cnt malloc failed \n",
438 			__FUNCTION__));
439 		return;
440 	}
441 	for (i = 0; i < num_cpus; i++)
442 		DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
443 
444 	DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
445 
446 	dhd->rxc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
447 	if (!dhd->rxc_percpu_run_cnt) {
448 		DHD_ERROR(("%s(): rxc_percpu_run_cnt malloc failed \n",
449 			__FUNCTION__));
450 		return;
451 	}
452 	for (i = 0; i < num_cpus; i++)
453 		DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
454 
455 	DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
456 
457 	dhd->txc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
458 	if (!dhd->txc_percpu_run_cnt) {
459 		DHD_ERROR(("%s(): txc_percpu_run_cnt malloc failed \n",
460 			__FUNCTION__));
461 		return;
462 	}
463 	for (i = 0; i < num_cpus; i++)
464 		DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
465 
466 	dhd->cpu_online_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
467 	if (!dhd->cpu_online_cnt) {
468 		DHD_ERROR(("%s(): cpu_online_cnt malloc failed \n",
469 			__FUNCTION__));
470 		return;
471 	}
472 	for (i = 0; i < num_cpus; i++)
473 		DHD_LB_STATS_CLR(dhd->cpu_online_cnt[i]);
474 
475 	dhd->cpu_offline_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
476 	if (!dhd->cpu_offline_cnt) {
477 		DHD_ERROR(("%s(): cpu_offline_cnt malloc failed \n",
478 			__FUNCTION__));
479 		return;
480 	}
481 	for (i = 0; i < num_cpus; i++)
482 		DHD_LB_STATS_CLR(dhd->cpu_offline_cnt[i]);
483 
484 	dhd->txp_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
485 	if (!dhd->txp_percpu_run_cnt) {
486 		DHD_ERROR(("%s(): txp_percpu_run_cnt malloc failed \n",
487 			__FUNCTION__));
488 		return;
489 	}
490 	for (i = 0; i < num_cpus; i++)
491 		DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
492 
493 	dhd->tx_start_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
494 	if (!dhd->tx_start_percpu_run_cnt) {
495 		DHD_ERROR(("%s(): tx_start_percpu_run_cnt malloc failed \n",
496 			__FUNCTION__));
497 		return;
498 	}
499 	for (i = 0; i < num_cpus; i++)
500 		DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
501 
502 	for (j = 0; j < HIST_BIN_SIZE; j++) {
503 		dhd->napi_rx_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
504 		if (!dhd->napi_rx_hist[j]) {
505 			DHD_ERROR(("%s(): dhd->napi_rx_hist[%d] malloc failed \n",
506 				__FUNCTION__, j));
507 			return;
508 		}
509 		for (i = 0; i < num_cpus; i++) {
510 			DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
511 		}
512 	}
513 
514 	dhd->pub.lb_rxp_strt_thr_hitcnt = 0;
515 	dhd->pub.lb_rxp_stop_thr_hitcnt = 0;
516 
517 	dhd->pub.lb_rxp_napi_sched_cnt = 0;
518 	dhd->pub.lb_rxp_napi_complete_cnt = 0;
519 	return;
520 }
521 
dhd_lb_stats_deinit(dhd_pub_t * dhdp)522 void dhd_lb_stats_deinit(dhd_pub_t *dhdp)
523 {
524 	dhd_info_t *dhd;
525 	int j, num_cpus = num_possible_cpus();
526 	int alloc_size = sizeof(uint32) * num_cpus;
527 
528 	if (dhdp == NULL) {
529 		DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
530 			__FUNCTION__));
531 		return;
532 	}
533 
534 	dhd = dhdp->info;
535 	if (dhd == NULL) {
536 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
537 		return;
538 	}
539 
540 	if (dhd->napi_percpu_run_cnt) {
541 		MFREE(dhdp->osh, dhd->napi_percpu_run_cnt, alloc_size);
542 	}
543 	if (dhd->rxc_percpu_run_cnt) {
544 		MFREE(dhdp->osh, dhd->rxc_percpu_run_cnt, alloc_size);
545 	}
546 	if (dhd->txc_percpu_run_cnt) {
547 		MFREE(dhdp->osh, dhd->txc_percpu_run_cnt, alloc_size);
548 	}
549 	if (dhd->cpu_online_cnt) {
550 		MFREE(dhdp->osh, dhd->cpu_online_cnt, alloc_size);
551 	}
552 	if (dhd->cpu_offline_cnt) {
553 		MFREE(dhdp->osh, dhd->cpu_offline_cnt, alloc_size);
554 	}
555 
556 	if (dhd->txp_percpu_run_cnt) {
557 		MFREE(dhdp->osh, dhd->txp_percpu_run_cnt, alloc_size);
558 	}
559 	if (dhd->tx_start_percpu_run_cnt) {
560 		MFREE(dhdp->osh, dhd->tx_start_percpu_run_cnt, alloc_size);
561 	}
562 	if (dhd->napi_latency) {
563 		MFREE(dhdp->osh, dhd->napi_latency, DHD_NAPI_LATENCY_SIZE);
564 	}
565 
566 	for (j = 0; j < HIST_BIN_SIZE; j++) {
567 		if (dhd->napi_rx_hist[j]) {
568 			MFREE(dhdp->osh, dhd->napi_rx_hist[j], alloc_size);
569 		}
570 	}
571 
572 	return;
573 }
574 
dhd_lb_stats_dump_napi_latency(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint64 * napi_latency)575 void dhd_lb_stats_dump_napi_latency(dhd_pub_t *dhdp,
576 	struct bcmstrbuf *strbuf, uint64 *napi_latency)
577 {
578 	uint32 i;
579 
580 	bcm_bprintf(strbuf, "napi-latency(us): \t count\n");
581 	for (i = 0; i < DHD_NUM_NAPI_LATENCY_ROWS; i++) {
582 		bcm_bprintf(strbuf, "%16u: \t %llu\n", 1U<<i, napi_latency[i]);
583 	}
584 }
585 
dhd_lb_stats_dump_histo(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint32 ** hist)586 void dhd_lb_stats_dump_histo(dhd_pub_t *dhdp,
587 	struct bcmstrbuf *strbuf, uint32 **hist)
588 {
589 	int i, j;
590 	uint32 *per_cpu_total;
591 	uint32 total = 0;
592 	uint32 num_cpus = num_possible_cpus();
593 
594 	per_cpu_total = (uint32 *)MALLOC(dhdp->osh, sizeof(uint32) * num_cpus);
595 	if (!per_cpu_total) {
596 		DHD_ERROR(("%s(): dhd->per_cpu_total malloc failed \n", __FUNCTION__));
597 		return;
598 	}
599 	bzero(per_cpu_total, sizeof(uint32) * num_cpus);
600 
601 	bcm_bprintf(strbuf, "CPU: \t\t");
602 	for (i = 0; i < num_cpus; i++)
603 		bcm_bprintf(strbuf, "%d\t", i);
604 	bcm_bprintf(strbuf, "\nBin\n");
605 
606 	for (i = 0; i < HIST_BIN_SIZE; i++) {
607 		bcm_bprintf(strbuf, "%d:\t\t", 1<<i);
608 		for (j = 0; j < num_cpus; j++) {
609 			bcm_bprintf(strbuf, "%d\t", hist[i][j]);
610 		}
611 		bcm_bprintf(strbuf, "\n");
612 	}
613 	bcm_bprintf(strbuf, "Per CPU Total \t");
614 	total = 0;
615 	for (i = 0; i < num_cpus; i++) {
616 		for (j = 0; j < HIST_BIN_SIZE; j++) {
617 			per_cpu_total[i] += (hist[j][i] * (1<<j));
618 		}
619 		bcm_bprintf(strbuf, "%d\t", per_cpu_total[i]);
620 		total += per_cpu_total[i];
621 	}
622 	bcm_bprintf(strbuf, "\nTotal\t\t%d \n", total);
623 
624 	if (per_cpu_total) {
625 		MFREE(dhdp->osh, per_cpu_total, sizeof(uint32) * num_cpus);
626 	}
627 	return;
628 }
629 
dhd_lb_stats_dump_cpu_array(struct bcmstrbuf * strbuf,uint32 * p)630 void dhd_lb_stats_dump_cpu_array(struct bcmstrbuf *strbuf, uint32 *p)
631 {
632 	int i, num_cpus = num_possible_cpus();
633 
634 	bcm_bprintf(strbuf, "CPU: \t\t");
635 	for (i = 0; i < num_cpus; i++)
636 		bcm_bprintf(strbuf, "%d\t", i);
637 	bcm_bprintf(strbuf, "\n");
638 
639 	bcm_bprintf(strbuf, "Val: \t\t");
640 	for (i = 0; i < num_cpus; i++)
641 		bcm_bprintf(strbuf, "%u\t", *(p+i));
642 	bcm_bprintf(strbuf, "\n");
643 	return;
644 }
645 
646 #ifdef DHD_MEM_STATS
dhd_lb_mem_usage(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)647 uint64 dhd_lb_mem_usage(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
648 {
649 	dhd_info_t *dhd;
650 	uint16 rxbufpost_sz;
651 	uint16 rx_post_active = 0;
652 	uint16 rx_cmpl_active = 0;
653 	uint64 rx_path_memory_usage = 0;
654 
655 	if (dhdp == NULL || strbuf == NULL) {
656 		DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n",
657 			__FUNCTION__, dhdp, strbuf));
658 		return 0;
659 	}
660 
661 	dhd = dhdp->info;
662 	if (dhd == NULL) {
663 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
664 		return 0;
665 	}
666 	rxbufpost_sz = dhd_prot_get_rxbufpost_sz(dhdp);
667 	if (rxbufpost_sz == 0) {
668 		rxbufpost_sz = DHD_FLOWRING_RX_BUFPOST_PKTSZ;
669 	}
670 	rx_path_memory_usage = rxbufpost_sz * (skb_queue_len(&dhd->rx_pend_queue) +
671 		skb_queue_len(&dhd->rx_napi_queue) +
672 		skb_queue_len(&dhd->rx_process_queue));
673 	rx_post_active = dhd_prot_get_h2d_rx_post_active(dhdp);
674 	if (rx_post_active != 0) {
675 		rx_path_memory_usage += (rxbufpost_sz * rx_post_active);
676 	}
677 
678 	rx_cmpl_active = dhd_prot_get_d2h_rx_cpln_active(dhdp);
679 	if (rx_cmpl_active != 0) {
680 		rx_path_memory_usage += (rxbufpost_sz * rx_cmpl_active);
681 	}
682 
683 	dhdp->rxpath_mem = rx_path_memory_usage;
684 	bcm_bprintf(strbuf, "\nrxbufpost_sz: %d rx_post_active: %d rx_cmpl_active: %d "
685 		"pend_queue_len: %d napi_queue_len: %d process_queue_len: %d\n",
686 		rxbufpost_sz, rx_post_active, rx_cmpl_active,
687 		skb_queue_len(&dhd->rx_pend_queue),
688 		skb_queue_len(&dhd->rx_napi_queue), skb_queue_len(&dhd->rx_process_queue));
689 	bcm_bprintf(strbuf, "DHD rx-path memory_usage: %llubytes %lluKB \n",
690 		rx_path_memory_usage, (rx_path_memory_usage/ 1024));
691 	return rx_path_memory_usage;
692 }
693 #endif /* DHD_MEM_STATS */
694 
dhd_lb_stats_dump(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)695 void dhd_lb_stats_dump(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
696 {
697 	dhd_info_t *dhd;
698 
699 	if (dhdp == NULL || strbuf == NULL) {
700 		DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n",
701 			__FUNCTION__, dhdp, strbuf));
702 		return;
703 	}
704 
705 	dhd = dhdp->info;
706 	if (dhd == NULL) {
707 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
708 		return;
709 	}
710 
711 	bcm_bprintf(strbuf, "\ncpu_online_cnt:\n");
712 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_online_cnt);
713 
714 	bcm_bprintf(strbuf, "\ncpu_offline_cnt:\n");
715 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_offline_cnt);
716 
717 	bcm_bprintf(strbuf, "\nsched_cnt: dhd_dpc %u napi %u rxc %u txc %u\n",
718 		dhd->dhd_dpc_cnt, dhd->napi_sched_cnt, dhd->rxc_sched_cnt,
719 		dhd->txc_sched_cnt);
720 
721 	bcm_bprintf(strbuf, "\nCPUs: dpc_cpu %u napi_cpu %u net_tx_cpu %u tx_cpu %u\n",
722 		atomic_read(&dhd->dpc_cpu),
723 		atomic_read(&dhd->rx_napi_cpu),
724 		atomic_read(&dhd->net_tx_cpu),
725 		atomic_read(&dhd->tx_cpu));
726 
727 #ifdef DHD_LB_RXP
728 	bcm_bprintf(strbuf, "\nnapi_percpu_run_cnt:\n");
729 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->napi_percpu_run_cnt);
730 	bcm_bprintf(strbuf, "\nNAPI Packets Received Histogram:\n");
731 	dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->napi_rx_hist);
732 	bcm_bprintf(strbuf, "\nNAPI poll latency stats ie from napi schedule to napi execution\n");
733 	dhd_lb_stats_dump_napi_latency(dhdp, strbuf, dhd->napi_latency);
734 #endif /* DHD_LB_RXP */
735 
736 #ifdef DHD_LB_TXP
737 	bcm_bprintf(strbuf, "\ntxp_percpu_run_cnt:\n");
738 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->txp_percpu_run_cnt);
739 
740 	bcm_bprintf(strbuf, "\ntx_start_percpu_run_cnt:\n");
741 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->tx_start_percpu_run_cnt);
742 #endif /* DHD_LB_TXP */
743 }
744 
dhd_lb_stats_update_napi_latency(uint64 * bin,uint32 latency)745 void dhd_lb_stats_update_napi_latency(uint64 *bin, uint32 latency)
746 {
747 	uint64 *p;
748 	uint32 bin_power;
749 	bin_power = next_larger_power2(latency);
750 
751 	switch (bin_power) {
752 		case   1: p = bin + 0; break;
753 		case   2: p = bin + 1; break;
754 		case   4: p = bin + 2; break;
755 		case   8: p = bin + 3; break;
756 		case  16: p = bin + 4; break;
757 		case  32: p = bin + 5; break;
758 		case  64: p = bin + 6; break;
759 		case 128: p = bin + 7; break;
760 		case 256: p = bin + 8; break;
761 		case 512: p = bin + 9; break;
762 		case 1024: p = bin + 10; break;
763 		case 2048: p = bin + 11; break;
764 		case 4096: p = bin + 12; break;
765 		case 8192: p = bin + 13; break;
766 		case 16384: p = bin + 14; break;
767 		case 32768: p = bin + 15; break;
768 		default : p = bin + 16; break;
769 	}
770 	ASSERT((p - bin) < DHD_NUM_NAPI_LATENCY_ROWS);
771 	*p = *p + 1;
772 	return;
773 
774 }
775 
dhd_lb_stats_update_histo(uint32 ** bin,uint32 count,uint32 cpu)776 void dhd_lb_stats_update_histo(uint32 **bin, uint32 count, uint32 cpu)
777 {
778 	uint32 bin_power;
779 	uint32 *p;
780 	bin_power = next_larger_power2(count);
781 
782 	switch (bin_power) {
783 		case   1: p = bin[0] + cpu; break;
784 		case   2: p = bin[1] + cpu; break;
785 		case   4: p = bin[2] + cpu; break;
786 		case   8: p = bin[3] + cpu; break;
787 		case  16: p = bin[4] + cpu; break;
788 		case  32: p = bin[5] + cpu; break;
789 		case  64: p = bin[6] + cpu; break;
790 		case 128: p = bin[7] + cpu; break;
791 		default : p = bin[8] + cpu; break;
792 	}
793 
794 	*p = *p + 1;
795 	return;
796 }
797 
dhd_lb_stats_update_napi_histo(dhd_pub_t * dhdp,uint32 count)798 void dhd_lb_stats_update_napi_histo(dhd_pub_t *dhdp, uint32 count)
799 {
800 	int cpu;
801 	dhd_info_t *dhd = dhdp->info;
802 
803 	cpu = get_cpu();
804 	put_cpu();
805 	dhd_lb_stats_update_histo(dhd->napi_rx_hist, count, cpu);
806 
807 	return;
808 }
809 
dhd_lb_stats_update_txc_histo(dhd_pub_t * dhdp,uint32 count)810 void dhd_lb_stats_update_txc_histo(dhd_pub_t *dhdp, uint32 count)
811 {
812 	int cpu;
813 	dhd_info_t *dhd = dhdp->info;
814 
815 	cpu = get_cpu();
816 	put_cpu();
817 	dhd_lb_stats_update_histo(dhd->txc_hist, count, cpu);
818 
819 	return;
820 }
821 
dhd_lb_stats_update_rxc_histo(dhd_pub_t * dhdp,uint32 count)822 void dhd_lb_stats_update_rxc_histo(dhd_pub_t *dhdp, uint32 count)
823 {
824 	int cpu;
825 	dhd_info_t *dhd = dhdp->info;
826 
827 	cpu = get_cpu();
828 	put_cpu();
829 	dhd_lb_stats_update_histo(dhd->rxc_hist, count, cpu);
830 
831 	return;
832 }
833 
dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t * dhdp)834 void dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t *dhdp)
835 {
836 	dhd_info_t *dhd = dhdp->info;
837 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txc_percpu_run_cnt);
838 }
839 
dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t * dhdp)840 void dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t *dhdp)
841 {
842 	dhd_info_t *dhd = dhdp->info;
843 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->rxc_percpu_run_cnt);
844 }
845 #endif /* DHD_LB_STATS */
846 
847 /**
848  * dhd_tasklet_schedule - Function that runs in IPI context of the destination
849  * CPU and schedules a tasklet.
850  * @tasklet: opaque pointer to the tasklet
851  */
852 INLINE void
dhd_tasklet_schedule(void * tasklet)853 dhd_tasklet_schedule(void *tasklet)
854 {
855 	tasklet_schedule((struct tasklet_struct *)tasklet);
856 }
857 
858 /**
859  * dhd_work_schedule_on - Executes the passed work in a given CPU
860  * @work: work to be scheduled
861  * @on_cpu: cpu core id
862  *
863  * If the requested cpu is online, then an IPI is sent to this cpu via the
864  * schedule_work_on and the work function
865  * will be invoked to schedule the specified work on the requested CPU.
866  */
867 
868 INLINE void
dhd_work_schedule_on(struct work_struct * work,int on_cpu)869 dhd_work_schedule_on(struct work_struct *work, int on_cpu)
870 {
871 	schedule_work_on(on_cpu, work);
872 }
873 
874 INLINE void
dhd_delayed_work_schedule_on(struct delayed_work * dwork,int on_cpu,ulong delay)875 dhd_delayed_work_schedule_on(struct delayed_work *dwork, int on_cpu, ulong delay)
876 {
877 	schedule_delayed_work_on(on_cpu, dwork, delay);
878 }
879 
880 #if defined(DHD_LB_TXP)
dhd_tx_dispatcher_work(struct work_struct * work)881 void dhd_tx_dispatcher_work(struct work_struct * work)
882 {
883 	struct dhd_info *dhd;
884 
885 	GCC_DIAGNOSTIC_PUSH_SUPPRESS_CAST();
886 	dhd = container_of(work, struct dhd_info, tx_dispatcher_work);
887 	GCC_DIAGNOSTIC_POP();
888 
889 	dhd_tasklet_schedule(&dhd->tx_tasklet);
890 }
891 
892 /**
893  * dhd_lb_tx_dispatch - load balance by dispatching the tx_tasklet
894  * on another cpu. The tx_tasklet will take care of actually putting
895  * the skbs into appropriate flow ring and ringing H2D interrupt
896  *
897  * @dhdp: pointer to dhd_pub object
898  */
899 void
dhd_lb_tx_dispatch(dhd_pub_t * dhdp)900 dhd_lb_tx_dispatch(dhd_pub_t *dhdp)
901 {
902 	dhd_info_t *dhd = dhdp->info;
903 	int curr_cpu;
904 	int tx_cpu;
905 	int prev_net_tx_cpu;
906 
907 	/*
908 	 * Get cpu will disable pre-ermption and will not allow any cpu to go offline
909 	 * and call put_cpu() only after scheduling rx_napi_dispatcher_work.
910 	 */
911 	curr_cpu = get_cpu();
912 
913 	/* Record the CPU in which the TX request from Network stack came */
914 	prev_net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
915 	atomic_set(&dhd->net_tx_cpu, curr_cpu);
916 
917 	tx_cpu = atomic_read(&dhd->tx_cpu);
918 
919 	/*
920 	 * Avoid cpu candidacy, if override is set via sysfs for changing cpu mannually
921 	 */
922 	if (dhd->dhd_lb_candidacy_override) {
923 		if (!cpu_online(tx_cpu)) {
924 			tx_cpu = curr_cpu;
925 		}
926 	} else {
927 		/*
928 		 * Now if the NET TX has scheduled in the same CPU
929 		 * that is chosen for Tx processing
930 		 * OR scheduled on different cpu than previously it was scheduled,
931 		 * OR if tx_cpu is offline,
932 		 * Call cpu candidacy algorithm to recompute tx_cpu.
933 		 */
934 		if ((curr_cpu == tx_cpu) || (curr_cpu != prev_net_tx_cpu) ||
935 			!cpu_online(tx_cpu)) {
936 			/* Re compute LB CPUs */
937 			dhd_select_cpu_candidacy(dhd);
938 			/* Use updated tx cpu */
939 			tx_cpu = atomic_read(&dhd->tx_cpu);
940 		}
941 	}
942 	/*
943 	 * Schedule tx_dispatcher_work to on the cpu which
944 	 * in turn will schedule tx_tasklet.
945 	 */
946 	dhd_work_schedule_on(&dhd->tx_dispatcher_work, tx_cpu);
947 
948 	put_cpu();
949 }
950 #endif /* DHD_LB_TXP */
951 
952 #if defined(DHD_LB_RXP)
953 
954 /**
955  * dhd_napi_poll - Load balance napi poll function to process received
956  * packets and send up the network stack using netif_receive_skb()
957  *
958  * @napi: napi object in which context this poll function is invoked
959  * @budget: number of packets to be processed.
960  *
961  * Fetch the dhd_info given the rx_napi_struct. Move all packets from the
962  * rx_napi_queue into a local rx_process_queue (lock and queue move and unlock).
963  * Dequeue each packet from head of rx_process_queue, fetch the ifid from the
964  * packet tag and sendup.
965  */
966 int
dhd_napi_poll(struct napi_struct * napi,int budget)967 dhd_napi_poll(struct napi_struct *napi, int budget)
968 {
969 	int ifid;
970 	const int pkt_count = 1;
971 	const int chan = 0;
972 	struct sk_buff * skb;
973 	unsigned long flags;
974 	struct dhd_info *dhd;
975 	int processed = 0;
976 	int dpc_cpu;
977 #ifdef DHD_LB_STATS
978 	uint32 napi_latency;
979 #endif /* DHD_LB_STATS */
980 
981 	GCC_DIAGNOSTIC_PUSH_SUPPRESS_CAST();
982 	dhd = container_of(napi, struct dhd_info, rx_napi_struct);
983 	GCC_DIAGNOSTIC_POP();
984 
985 #ifdef DHD_LB_STATS
986 	napi_latency = (uint32)(OSL_SYSUPTIME_US() - dhd->napi_schedule_time);
987 	dhd_lb_stats_update_napi_latency(dhd->napi_latency, napi_latency);
988 #endif /* DHD_LB_STATS */
989 	DHD_LB_INFO(("%s napi_queue<%d> budget<%d>\n",
990 		__FUNCTION__, skb_queue_len(&dhd->rx_napi_queue), budget));
991 
992 	/*
993 	 * Extract the entire rx_napi_queue into another rx_process_queue
994 	 * and process only 'budget' number of skbs from rx_process_queue.
995 	 * If there are more items to be processed, napi poll will be rescheduled
996 	 * During the next iteration, next set of skbs from
997 	 * rx_napi_queue will be extracted and attached to the tail of rx_process_queue.
998 	 * Again budget number of skbs will be processed from rx_process_queue.
999 	 * If there are less than budget number of skbs in rx_process_queue,
1000 	 * call napi_complete to stop rescheduling napi poll.
1001 	 */
1002 	DHD_RX_NAPI_QUEUE_LOCK(&dhd->rx_napi_queue.lock, flags);
1003 	skb_queue_splice_tail_init(&dhd->rx_napi_queue, &dhd->rx_process_queue);
1004 	DHD_RX_NAPI_QUEUE_UNLOCK(&dhd->rx_napi_queue.lock, flags);
1005 
1006 	while ((processed < budget) && (skb = __skb_dequeue(&dhd->rx_process_queue)) != NULL) {
1007 		OSL_PREFETCH(skb->data);
1008 
1009 		ifid = DHD_PKTTAG_IFID((dhd_pkttag_fr_t *)PKTTAG(skb));
1010 
1011 		DHD_LB_INFO(("%s dhd_rx_frame pkt<%p> ifid<%d>\n",
1012 			__FUNCTION__, skb, ifid));
1013 
1014 		dhd_rx_frame(&dhd->pub, ifid, skb, pkt_count, chan);
1015 		processed++;
1016 	}
1017 
1018 	if (atomic_read(&dhd->pub.lb_rxp_flow_ctrl) &&
1019 		(dhd_lb_rxp_process_qlen(&dhd->pub) <= dhd->pub.lb_rxp_strt_thr)) {
1020 		/*
1021 		 * If the dpc CPU is online Schedule dhd_dpc_dispatcher_work on the dpc cpu which
1022 		 * in turn will schedule dpc tasklet. Else schedule dpc takslet.
1023 		 */
1024 		get_cpu();
1025 		dpc_cpu = atomic_read(&dhd->dpc_cpu);
1026 		if (!cpu_online(dpc_cpu)) {
1027 			dhd_tasklet_schedule(&dhd->tasklet);
1028 		} else {
1029 			dhd_delayed_work_schedule_on(&dhd->dhd_dpc_dispatcher_work, dpc_cpu, 0);
1030 		}
1031 		put_cpu();
1032 	}
1033 	DHD_LB_STATS_UPDATE_NAPI_HISTO(&dhd->pub, processed);
1034 
1035 	DHD_LB_INFO(("%s processed %d\n", __FUNCTION__, processed));
1036 
1037 	/*
1038 	 * Signal napi complete only when no more packets are processed and
1039 	 * none are left in the enqueued queue.
1040 	 */
1041 	if ((processed == 0) && (skb_queue_len(&dhd->rx_napi_queue) == 0)) {
1042 		napi_complete(napi);
1043 #ifdef DHD_LB_STATS
1044 		dhd->pub.lb_rxp_napi_complete_cnt++;
1045 #endif /* DHD_LB_STATS */
1046 		DHD_GENERAL_LOCK(&dhd->pub, flags);
1047 		DHD_BUS_BUSY_CLEAR_IN_NAPI(&dhd->pub);
1048 		DHD_GENERAL_UNLOCK(&dhd->pub, flags);
1049 		return 0;
1050 	}
1051 
1052 #ifdef DHD_LB_STATS
1053 	dhd->napi_schedule_time = OSL_SYSUPTIME_US();
1054 #endif /* DHD_LB_STATS */
1055 
1056 	/* Return budget so that it gets rescheduled immediately */
1057 	return budget;
1058 }
1059 
1060 /**
1061  * dhd_napi_schedule - Place the napi struct into the current cpus softnet napi
1062  * poll list. This function may be invoked via the smp_call_function_single
1063  * from a remote CPU.
1064  *
1065  * This function will essentially invoke __raise_softirq_irqoff(NET_RX_SOFTIRQ)
1066  * after the napi_struct is added to the softnet data's poll_list
1067  *
1068  * @info: pointer to a dhd_info struct
1069  */
1070 static void
dhd_napi_schedule(void * info)1071 dhd_napi_schedule(void *info)
1072 {
1073 	dhd_info_t *dhd = (dhd_info_t *)info;
1074 	unsigned long flags;
1075 
1076 	DHD_INFO(("%s rx_napi_struct<%p> on cpu<%d>\n",
1077 		__FUNCTION__, &dhd->rx_napi_struct, atomic_read(&dhd->rx_napi_cpu)));
1078 
1079 	/* add napi_struct to softnet data poll list and raise NET_RX_SOFTIRQ */
1080 	if (napi_schedule_prep(&dhd->rx_napi_struct)) {
1081 
1082 		/*
1083 		 * Set busbusystate in NAPI, which will be cleared after
1084 		 * napi_complete from napi_poll context
1085 		 */
1086 		DHD_GENERAL_LOCK(&dhd->pub, flags);
1087 		DHD_BUS_BUSY_SET_IN_NAPI(&dhd->pub);
1088 		DHD_GENERAL_UNLOCK(&dhd->pub, flags);
1089 
1090 #ifdef DHD_LB_STATS
1091 		dhd->napi_schedule_time = OSL_SYSUPTIME_US();
1092 		dhd->pub.lb_rxp_napi_sched_cnt++;
1093 #endif /* DHD_LB_STATS */
1094 		__napi_schedule(&dhd->rx_napi_struct);
1095 #ifdef WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE
1096 		raise_softirq(NET_RX_SOFTIRQ);
1097 #endif /* WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE */
1098 	}
1099 
1100 	/*
1101 	 * If the rx_napi_struct was already running, then we let it complete
1102 	 * processing all its packets. The rx_napi_struct may only run on one
1103 	 * core at a time, to avoid out-of-order handling.
1104 	 */
1105 }
1106 
1107 /**
1108  * dhd_napi_schedule_on - API to schedule on a desired CPU core a NET_RX_SOFTIRQ
1109  * action after placing the dhd's rx_process napi object in the the remote CPU's
1110  * softnet data's poll_list.
1111  *
1112  * @dhd: dhd_info which has the rx_process napi object
1113  * @on_cpu: desired remote CPU id
1114  */
1115 static INLINE int
dhd_napi_schedule_on(dhd_info_t * dhd,int on_cpu)1116 dhd_napi_schedule_on(dhd_info_t *dhd, int on_cpu)
1117 {
1118 	int wait = 0; /* asynchronous IPI */
1119 	DHD_INFO(("%s dhd<%p> napi<%p> on_cpu<%d>\n",
1120 		__FUNCTION__, dhd, &dhd->rx_napi_struct, on_cpu));
1121 
1122 	if (smp_call_function_single(on_cpu, dhd_napi_schedule, dhd, wait)) {
1123 		DHD_ERROR(("%s smp_call_function_single on_cpu<%d> failed\n",
1124 			__FUNCTION__, on_cpu));
1125 	}
1126 
1127 	DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1128 
1129 	return 0;
1130 }
1131 
1132 /*
1133  * Call get_online_cpus/put_online_cpus around dhd_napi_schedule_on
1134  * Why should we do this?
1135  * The candidacy algorithm is run from the call back function
1136  * registered to CPU hotplug notifier. This call back happens from Worker
1137  * context. The dhd_napi_schedule_on is also from worker context.
1138  * Note that both of this can run on two different CPUs at the same time.
1139  * So we can possibly have a window where a given CPUn is being brought
1140  * down from CPUm while we try to run a function on CPUn.
1141  * To prevent this its better have the whole code to execute an SMP
1142  * function under get_online_cpus.
1143  * This function call ensures that hotplug mechanism does not kick-in
1144  * until we are done dealing with online CPUs
1145  * If the hotplug worker is already running, no worries because the
1146  * candidacy algo would then reflect the same in dhd->rx_napi_cpu.
1147  *
1148  * The below mentioned code structure is proposed in
1149  * https://www.kernel.org/doc/Documentation/cpu-hotplug.txt
1150  * for the question
1151  * Q: I need to ensure that a particular cpu is not removed when there is some
1152  *    work specific to this cpu is in progress
1153  *
1154  * According to the documentation calling get_online_cpus is NOT required, if
1155  * we are running from tasklet context. Since dhd_rx_napi_dispatcher_work can
1156  * run from Work Queue context we have to call these functions
1157  */
dhd_rx_napi_dispatcher_work(struct work_struct * work)1158 void dhd_rx_napi_dispatcher_work(struct work_struct * work)
1159 {
1160 	struct dhd_info *dhd;
1161 	GCC_DIAGNOSTIC_PUSH_SUPPRESS_CAST();
1162 	dhd = container_of(work, struct dhd_info, rx_napi_dispatcher_work);
1163 	GCC_DIAGNOSTIC_POP();
1164 
1165 	dhd_napi_schedule(dhd);
1166 }
1167 
1168 /**
1169  * dhd_lb_rx_napi_dispatch - load balance by dispatching the rx_napi_struct
1170  * to run on another CPU. The rx_napi_struct's poll function will retrieve all
1171  * the packets enqueued into the rx_napi_queue and sendup.
1172  * The producer's rx packet queue is appended to the rx_napi_queue before
1173  * dispatching the rx_napi_struct.
1174  */
1175 void
dhd_lb_rx_napi_dispatch(dhd_pub_t * dhdp)1176 dhd_lb_rx_napi_dispatch(dhd_pub_t *dhdp)
1177 {
1178 	unsigned long flags;
1179 	dhd_info_t *dhd = dhdp->info;
1180 	int curr_cpu;
1181 	int rx_napi_cpu;
1182 	int prev_dpc_cpu;
1183 
1184 	if (dhd->rx_napi_netdev == NULL) {
1185 		DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
1186 		return;
1187 	}
1188 
1189 	DHD_LB_INFO(("%s append napi_queue<%d> pend_queue<%d>\n", __FUNCTION__,
1190 		skb_queue_len(&dhd->rx_napi_queue), skb_queue_len(&dhd->rx_pend_queue)));
1191 
1192 	/* append the producer's queue of packets to the napi's rx process queue */
1193 	DHD_RX_NAPI_QUEUE_LOCK(&dhd->rx_napi_queue.lock, flags);
1194 	skb_queue_splice_tail_init(&dhd->rx_pend_queue, &dhd->rx_napi_queue);
1195 	DHD_RX_NAPI_QUEUE_UNLOCK(&dhd->rx_napi_queue.lock, flags);
1196 
1197 	/* If sysfs lb_rxp_active is not set, schedule on current cpu */
1198 	if (!atomic_read(&dhd->lb_rxp_active))
1199 	{
1200 		dhd_napi_schedule(dhd);
1201 		return;
1202 	}
1203 
1204 	/*
1205 	 * Get cpu will disable pre-ermption and will not allow any cpu to go offline
1206 	 * and call put_cpu() only after scheduling rx_napi_dispatcher_work.
1207 	 */
1208 	curr_cpu = get_cpu();
1209 
1210 	prev_dpc_cpu = atomic_read(&dhd->prev_dpc_cpu);
1211 
1212 	rx_napi_cpu = atomic_read(&dhd->rx_napi_cpu);
1213 
1214 	/*
1215 	 * Avoid cpu candidacy, if override is set via sysfs for changing cpu mannually
1216 	 */
1217 	if (dhd->dhd_lb_candidacy_override) {
1218 		if (!cpu_online(rx_napi_cpu)) {
1219 			rx_napi_cpu = curr_cpu;
1220 		}
1221 	} else {
1222 		/*
1223 		 * Now if the DPC has scheduled in the same CPU
1224 		 * that is chosen for Rx napi processing
1225 		 * OR scheduled on different cpu than previously it was scheduled,
1226 		 * OR if rx_napi_cpu is offline,
1227 		 * Call cpu candidacy algorithm to recompute napi_cpu.
1228 		 */
1229 		if ((curr_cpu == rx_napi_cpu) || (curr_cpu != prev_dpc_cpu) ||
1230 			!cpu_online(rx_napi_cpu)) {
1231 			/* Re compute LB CPUs */
1232 			dhd_select_cpu_candidacy(dhd);
1233 			/* Use updated napi cpu */
1234 			rx_napi_cpu = atomic_read(&dhd->rx_napi_cpu);
1235 		}
1236 
1237 	}
1238 
1239 	DHD_LB_INFO(("%s : schedule to curr_cpu : %d, rx_napi_cpu : %d\n",
1240 		__FUNCTION__, curr_cpu, rx_napi_cpu));
1241 	dhd_work_schedule_on(&dhd->rx_napi_dispatcher_work, rx_napi_cpu);
1242 	DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1243 
1244 	put_cpu();
1245 }
1246 
1247 /**
1248  * dhd_lb_rx_pkt_enqueue - Enqueue the packet into the producer's queue
1249  */
1250 void
dhd_lb_rx_pkt_enqueue(dhd_pub_t * dhdp,void * pkt,int ifidx)1251 dhd_lb_rx_pkt_enqueue(dhd_pub_t *dhdp, void *pkt, int ifidx)
1252 {
1253 	dhd_info_t *dhd = dhdp->info;
1254 
1255 	DHD_LB_INFO(("%s enqueue pkt<%p> ifidx<%d> pend_queue<%d>\n", __FUNCTION__,
1256 		pkt, ifidx, skb_queue_len(&dhd->rx_pend_queue)));
1257 	DHD_PKTTAG_SET_IFID((dhd_pkttag_fr_t *)PKTTAG(pkt), ifidx);
1258 	__skb_queue_tail(&dhd->rx_pend_queue, pkt);
1259 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->napi_percpu_run_cnt);
1260 }
1261 
1262 unsigned long
dhd_read_lb_rxp(dhd_pub_t * dhdp)1263 dhd_read_lb_rxp(dhd_pub_t *dhdp)
1264 {
1265 	dhd_info_t *dhd = dhdp->info;
1266 	return atomic_read(&dhd->lb_rxp_active);
1267 }
1268 
1269 uint32
dhd_lb_rxp_process_qlen(dhd_pub_t * dhdp)1270 dhd_lb_rxp_process_qlen(dhd_pub_t *dhdp)
1271 {
1272 	dhd_info_t *dhd = dhdp->info;
1273 	return skb_queue_len(&dhd->rx_process_queue);
1274 }
1275 #endif /* DHD_LB_RXP */
1276 
1277 #if defined(DHD_LB_TXP)
1278 int
BCMFASTPATH(dhd_lb_sendpkt)1279 BCMFASTPATH(dhd_lb_sendpkt)(dhd_info_t *dhd, struct net_device *net,
1280 	int ifidx, void *skb)
1281 {
1282 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->tx_start_percpu_run_cnt);
1283 
1284 	/* If the feature is disabled run-time do TX from here */
1285 	if (atomic_read(&dhd->lb_txp_active) == 0) {
1286 		DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1287 		 return __dhd_sendpkt(&dhd->pub, ifidx, skb);
1288 	}
1289 
1290 	/* Store the address of net device and interface index in the Packet tag */
1291 	DHD_LB_TX_PKTTAG_SET_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), net);
1292 	DHD_LB_TX_PKTTAG_SET_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), ifidx);
1293 
1294 	/* Enqueue the skb into tx_pend_queue */
1295 	skb_queue_tail(&dhd->tx_pend_queue, skb);
1296 
1297 	DHD_TRACE(("%s(): Added skb %p for netdev %p \r\n", __FUNCTION__, skb, net));
1298 
1299 	/* Dispatch the Tx job to be processed by the tx_tasklet */
1300 	dhd_lb_tx_dispatch(&dhd->pub);
1301 
1302 	return NETDEV_TX_OK;
1303 }
1304 #endif /* DHD_LB_TXP */
1305 
1306 #ifdef DHD_LB_TXP
1307 #define DHD_LB_TXBOUND	64
1308 /*
1309  * Function that performs the TX processing on a given CPU
1310  */
1311 bool
dhd_lb_tx_process(dhd_info_t * dhd)1312 dhd_lb_tx_process(dhd_info_t *dhd)
1313 {
1314 	struct sk_buff *skb;
1315 	int cnt = 0;
1316 	struct net_device *net;
1317 	int ifidx;
1318 	bool resched = FALSE;
1319 
1320 	DHD_TRACE(("%s(): TX Processing \r\n", __FUNCTION__));
1321 	if (dhd == NULL) {
1322 		DHD_ERROR((" Null pointer DHD \r\n"));
1323 		return resched;
1324 	}
1325 
1326 	BCM_REFERENCE(net);
1327 
1328 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1329 
1330 	/* Base Loop to perform the actual Tx */
1331 	do {
1332 		skb = skb_dequeue(&dhd->tx_pend_queue);
1333 		if (skb == NULL) {
1334 			DHD_TRACE(("Dequeued a Null Packet \r\n"));
1335 			break;
1336 		}
1337 		cnt++;
1338 
1339 		net =  DHD_LB_TX_PKTTAG_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1340 		ifidx = DHD_LB_TX_PKTTAG_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1341 
1342 		DHD_TRACE(("Processing skb %p for net %p index %d \r\n", skb,
1343 			net, ifidx));
1344 
1345 		__dhd_sendpkt(&dhd->pub, ifidx, skb);
1346 
1347 		if (cnt >= DHD_LB_TXBOUND) {
1348 			resched = TRUE;
1349 			break;
1350 		}
1351 
1352 	} while (1);
1353 
1354 	DHD_LB_INFO(("%s(): Processed %d packets \r\n", __FUNCTION__, cnt));
1355 
1356 	return resched;
1357 }
1358 
1359 void
dhd_lb_tx_handler(unsigned long data)1360 dhd_lb_tx_handler(unsigned long data)
1361 {
1362 	dhd_info_t *dhd = (dhd_info_t *)data;
1363 
1364 	if (dhd_lb_tx_process(dhd)) {
1365 		dhd_tasklet_schedule(&dhd->tx_tasklet);
1366 	}
1367 }
1368 
1369 #endif /* DHD_LB_TXP */
1370 #endif /* DHD_LB */
1371 
1372 #if defined(DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON)
1373 void
dhd_irq_set_affinity(dhd_pub_t * dhdp,const struct cpumask * cpumask)1374 dhd_irq_set_affinity(dhd_pub_t *dhdp, const struct cpumask *cpumask)
1375 {
1376 	unsigned int irq = (unsigned int)-1;
1377 	int err = BCME_OK;
1378 
1379 	if (!dhdp) {
1380 		DHD_ERROR(("%s : dhdp is NULL\n", __FUNCTION__));
1381 		return;
1382 	}
1383 
1384 	if (!dhdp->bus) {
1385 		DHD_ERROR(("%s : bus is NULL\n", __FUNCTION__));
1386 		return;
1387 	}
1388 
1389 	DHD_ERROR(("%s : irq set affinity cpu:0x%lx\n",
1390 			__FUNCTION__, *cpumask_bits(cpumask)));
1391 
1392 	dhdpcie_get_pcieirq(dhdp->bus, &irq);
1393 #ifdef BCMDHD_MODULAR
1394 	err = irq_set_affinity_hint(irq, cpumask);
1395 #else
1396 	err = irq_set_affinity(irq, cpumask);
1397 #endif /* BCMDHD_MODULAR */
1398 	if (err)
1399 		DHD_ERROR(("%s : irq set affinity is failed cpu:0x%lx\n",
1400 				__FUNCTION__, *cpumask_bits(cpumask)));
1401 }
1402 #endif /* DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON */
1403