1 /*
2 * Broadcom Dongle Host Driver (DHD), Linux-specific network interface
3 * Basically selected code segments from usb-cdc.c and usb-rndis.c
4 *
5 * Copyright (C) 2020, Broadcom.
6 *
7 * Unless you and Broadcom execute a separate written software license
8 * agreement governing use of this software, this software is licensed to you
9 * under the terms of the GNU General Public License version 2 (the "GPL"),
10 * available at http://www.broadcom.com/licenses/GPLv2.php, with the
11 * following added to such license:
12 *
13 * As a special exception, the copyright holders of this software give you
14 * permission to link this software with independent modules, and to copy and
15 * distribute the resulting executable under terms of your choice, provided that
16 * you also meet, for each linked independent module, the terms and conditions of
17 * the license of that module. An independent module is a module which is not
18 * derived from this software. The special exception does not apply to any
19 * modifications of the software.
20 *
21 *
22 * <<Broadcom-WL-IPTag/Open:>>
23 *
24 * $Id$
25 */
26
27 #include <dhd_linux_priv.h>
28
29 extern dhd_pub_t* g_dhd_pub;
30
31 #if defined(DHD_LB)
32
33 #ifdef DHD_LB_STATS
34 #define DHD_NUM_NAPI_LATENCY_ROWS (17u)
35 #define DHD_NAPI_LATENCY_SIZE (sizeof(uint64) * DHD_NUM_NAPI_LATENCY_ROWS)
36 #endif /* DHD_LB_STATS */
37
38 #ifdef DHD_REPLACE_LOG_INFO_TO_TRACE
39 #define DHD_LB_INFO DHD_TRACE
40 #else
41 #define DHD_LB_INFO DHD_INFO
42 #endif /* DHD_REPLACE_LOG_INFO_TO_TRACE */
43
44 void
dhd_lb_set_default_cpus(dhd_info_t * dhd)45 dhd_lb_set_default_cpus(dhd_info_t *dhd)
46 {
47 /* Default CPU allocation for the jobs */
48 atomic_set(&dhd->rx_napi_cpu, 1);
49 atomic_set(&dhd->tx_cpu, 2);
50 atomic_set(&dhd->net_tx_cpu, 0);
51 atomic_set(&dhd->dpc_cpu, 0);
52 }
53
54 void
dhd_cpumasks_deinit(dhd_info_t * dhd)55 dhd_cpumasks_deinit(dhd_info_t *dhd)
56 {
57 free_cpumask_var(dhd->cpumask_curr_avail);
58 free_cpumask_var(dhd->cpumask_primary);
59 free_cpumask_var(dhd->cpumask_primary_new);
60 free_cpumask_var(dhd->cpumask_secondary);
61 free_cpumask_var(dhd->cpumask_secondary_new);
62 }
63
64 int
dhd_cpumasks_init(dhd_info_t * dhd)65 dhd_cpumasks_init(dhd_info_t *dhd)
66 {
67 int id;
68 uint32 cpus, num_cpus = num_possible_cpus();
69 int ret = 0;
70
71 DHD_ERROR(("%s CPU masks primary(big)=0x%x secondary(little)=0x%x\n", __FUNCTION__,
72 DHD_LB_PRIMARY_CPUS, DHD_LB_SECONDARY_CPUS));
73
74 /* FIXME: If one alloc fails we must free_cpumask_var the previous */
75 if (!alloc_cpumask_var(&dhd->cpumask_curr_avail, GFP_KERNEL) ||
76 !alloc_cpumask_var(&dhd->cpumask_primary, GFP_KERNEL) ||
77 !alloc_cpumask_var(&dhd->cpumask_primary_new, GFP_KERNEL) ||
78 !alloc_cpumask_var(&dhd->cpumask_secondary, GFP_KERNEL) ||
79 !alloc_cpumask_var(&dhd->cpumask_secondary_new, GFP_KERNEL)) {
80 DHD_ERROR(("%s Failed to init cpumasks\n", __FUNCTION__));
81 ret = -ENOMEM;
82 goto fail;
83 }
84
85 cpumask_copy(dhd->cpumask_curr_avail, cpu_online_mask);
86 cpumask_clear(dhd->cpumask_primary);
87 cpumask_clear(dhd->cpumask_secondary);
88
89 if (num_cpus > 32) {
90 DHD_ERROR(("%s max cpus must be 32, %d too big\n", __FUNCTION__, num_cpus));
91 ASSERT(0);
92 }
93
94 cpus = DHD_LB_PRIMARY_CPUS;
95 for (id = 0; id < num_cpus; id++) {
96 if (isset(&cpus, id))
97 cpumask_set_cpu(id, dhd->cpumask_primary);
98 }
99
100 cpus = DHD_LB_SECONDARY_CPUS;
101 for (id = 0; id < num_cpus; id++) {
102 if (isset(&cpus, id))
103 cpumask_set_cpu(id, dhd->cpumask_secondary);
104 }
105
106 return ret;
107 fail:
108 dhd_cpumasks_deinit(dhd);
109 return ret;
110 }
111
112 /*
113 * The CPU Candidacy Algorithm
114 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
115 * The available CPUs for selection are divided into two groups
116 * Primary Set - A CPU mask that carries the First Choice CPUs
117 * Secondary Set - A CPU mask that carries the Second Choice CPUs.
118 *
119 * There are two types of Job, that needs to be assigned to
120 * the CPUs, from one of the above mentioned CPU group. The Jobs are
121 * 1) Rx Packet Processing - napi_cpu
122 *
123 * To begin with napi_cpu is on CPU0. Whenever a CPU goes
124 * on-line/off-line the CPU candidacy algorithm is triggerd. The candidacy
125 * algo tries to pickup the first available non boot CPU (CPU0) for napi_cpu.
126 *
127 */
dhd_select_cpu_candidacy(dhd_info_t * dhd)128 void dhd_select_cpu_candidacy(dhd_info_t *dhd)
129 {
130 uint32 primary_available_cpus; /* count of primary available cpus */
131 uint32 secondary_available_cpus; /* count of secondary available cpus */
132 uint32 napi_cpu = 0; /* cpu selected for napi rx processing */
133 uint32 tx_cpu = 0; /* cpu selected for tx processing job */
134 uint32 dpc_cpu = atomic_read(&dhd->dpc_cpu);
135 uint32 net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
136
137 cpumask_clear(dhd->cpumask_primary_new);
138 cpumask_clear(dhd->cpumask_secondary_new);
139
140 /*
141 * Now select from the primary mask. Even if a Job is
142 * already running on a CPU in secondary group, we still move
143 * to primary CPU. So no conditional checks.
144 */
145 cpumask_and(dhd->cpumask_primary_new, dhd->cpumask_primary,
146 dhd->cpumask_curr_avail);
147
148 cpumask_and(dhd->cpumask_secondary_new, dhd->cpumask_secondary,
149 dhd->cpumask_curr_avail);
150
151 /* Clear DPC cpu from new masks so that dpc cpu is not chosen for LB */
152 cpumask_clear_cpu(dpc_cpu, dhd->cpumask_primary_new);
153 cpumask_clear_cpu(dpc_cpu, dhd->cpumask_secondary_new);
154
155 /* Clear net_tx_cpu from new masks so that same is not chosen for LB */
156 cpumask_clear_cpu(net_tx_cpu, dhd->cpumask_primary_new);
157 cpumask_clear_cpu(net_tx_cpu, dhd->cpumask_secondary_new);
158
159 primary_available_cpus = cpumask_weight(dhd->cpumask_primary_new);
160
161 #if defined(DHD_LB_HOST_CTRL)
162 /* Does not use promary cpus if DHD received affinity off cmd
163 * from framework
164 */
165 if (primary_available_cpus > 0 && dhd->permitted_primary_cpu)
166 #else
167 if (primary_available_cpus > 0)
168 #endif /* DHD_LB_HOST_CTRL */
169 {
170 napi_cpu = cpumask_first(dhd->cpumask_primary_new);
171
172 /* If no further CPU is available,
173 * cpumask_next returns >= nr_cpu_ids
174 */
175 tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_primary_new);
176 if (tx_cpu >= nr_cpu_ids)
177 tx_cpu = 0;
178 }
179
180 DHD_INFO(("%s After primary CPU check napi_cpu %d tx_cpu %d\n",
181 __FUNCTION__, napi_cpu, tx_cpu));
182
183 /* -- Now check for the CPUs from the secondary mask -- */
184 secondary_available_cpus = cpumask_weight(dhd->cpumask_secondary_new);
185
186 DHD_INFO(("%s Available secondary cpus %d nr_cpu_ids %d\n",
187 __FUNCTION__, secondary_available_cpus, nr_cpu_ids));
188
189 if (secondary_available_cpus > 0) {
190 /* At this point if napi_cpu is unassigned it means no CPU
191 * is online from Primary Group
192 */
193 #if defined(DHD_LB_TXP_LITTLE_CORE_CTRL)
194 /* Clear tx_cpu, so that it can be picked from little core */
195 tx_cpu = 0;
196 #endif /* DHD_LB_TXP_LITTLE_CORE_CTRL */
197 if (napi_cpu == 0) {
198 napi_cpu = cpumask_first(dhd->cpumask_secondary_new);
199 tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_secondary_new);
200 } else if (tx_cpu == 0) {
201 tx_cpu = cpumask_first(dhd->cpumask_secondary_new);
202 }
203
204 /* If no CPU was available for tx processing, choose CPU 0 */
205 if (tx_cpu >= nr_cpu_ids)
206 tx_cpu = 0;
207 }
208
209 if ((primary_available_cpus == 0) &&
210 (secondary_available_cpus == 0)) {
211 /* No CPUs available from primary or secondary mask */
212 napi_cpu = 1;
213 tx_cpu = 2;
214 }
215
216 DHD_INFO(("%s After secondary CPU check napi_cpu %d tx_cpu %d\n",
217 __FUNCTION__, napi_cpu, tx_cpu));
218
219 ASSERT(napi_cpu < nr_cpu_ids);
220 ASSERT(tx_cpu < nr_cpu_ids);
221
222 atomic_set(&dhd->rx_napi_cpu, napi_cpu);
223 atomic_set(&dhd->tx_cpu, tx_cpu);
224
225 return;
226 }
227
228 /*
229 * Function to handle CPU Hotplug notifications.
230 * One of the task it does is to trigger the CPU Candidacy algorithm
231 * for load balancing.
232 */
233
234 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
235
dhd_cpu_startup_callback(unsigned int cpu)236 int dhd_cpu_startup_callback(unsigned int cpu)
237 {
238 dhd_info_t *dhd = g_dhd_pub->info;
239
240 DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
241 DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
242 cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
243 dhd_select_cpu_candidacy(dhd);
244
245 return 0;
246 }
247
dhd_cpu_teardown_callback(unsigned int cpu)248 int dhd_cpu_teardown_callback(unsigned int cpu)
249 {
250 dhd_info_t *dhd = g_dhd_pub->info;
251
252 DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
253 DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
254 cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
255 dhd_select_cpu_candidacy(dhd);
256
257 return 0;
258 }
259 #else
260 int
dhd_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)261 dhd_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
262 {
263 unsigned long int cpu = (unsigned long int)hcpu;
264 dhd_info_t *dhd;
265
266 GCC_DIAGNOSTIC_PUSH_SUPPRESS_CAST();
267 dhd = container_of(nfb, dhd_info_t, cpu_notifier);
268 GCC_DIAGNOSTIC_POP();
269
270 if (!dhd || !(dhd->dhd_state & DHD_ATTACH_STATE_LB_ATTACH_DONE)) {
271 DHD_INFO(("%s(): LB data is not initialized yet.\n",
272 __FUNCTION__));
273 return NOTIFY_BAD;
274 }
275
276 /* XXX: Do we need other action types ? */
277 switch (action)
278 {
279 case CPU_ONLINE:
280 case CPU_ONLINE_FROZEN:
281 DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
282 cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
283 dhd_select_cpu_candidacy(dhd);
284 break;
285
286 case CPU_DOWN_PREPARE:
287 case CPU_DOWN_PREPARE_FROZEN:
288 DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
289 cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
290 dhd_select_cpu_candidacy(dhd);
291 break;
292 default:
293 break;
294 }
295
296 return NOTIFY_OK;
297 }
298 #endif /* LINUX_VERSION_CODE < 4.10.0 */
299
dhd_register_cpuhp_callback(dhd_info_t * dhd)300 int dhd_register_cpuhp_callback(dhd_info_t *dhd)
301 {
302 int cpuhp_ret = 0;
303 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
304 cpuhp_ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dhd",
305 dhd_cpu_startup_callback, dhd_cpu_teardown_callback);
306
307 if (cpuhp_ret < 0) {
308 DHD_ERROR(("%s(): cpuhp_setup_state failed %d RX LB won't happen \r\n",
309 __FUNCTION__, cpuhp_ret));
310 }
311 #else
312 /*
313 * If we are able to initialize CPU masks, lets register to the
314 * CPU Hotplug framework to change the CPU for each job dynamically
315 * using candidacy algorithm.
316 */
317 dhd->cpu_notifier.notifier_call = dhd_cpu_callback;
318 register_hotcpu_notifier(&dhd->cpu_notifier); /* Register a callback */
319 #endif /* LINUX_VERSION_CODE < 4.10.0 */
320 return cpuhp_ret;
321 }
322
dhd_unregister_cpuhp_callback(dhd_info_t * dhd)323 int dhd_unregister_cpuhp_callback(dhd_info_t *dhd)
324 {
325 int ret = 0;
326 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
327 /* Don't want to call tear down while unregistering */
328 cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
329 #else
330 if (dhd->cpu_notifier.notifier_call != NULL) {
331 unregister_cpu_notifier(&dhd->cpu_notifier);
332 }
333 #endif
334 return ret;
335 }
336
337 #if defined(DHD_LB_STATS)
dhd_lb_stats_reset(dhd_pub_t * dhdp)338 void dhd_lb_stats_reset(dhd_pub_t *dhdp)
339 {
340 dhd_info_t *dhd;
341 int i, j, num_cpus = num_possible_cpus();
342
343 if (dhdp == NULL) {
344 DHD_ERROR(("%s dhd pub pointer is NULL \n",
345 __FUNCTION__));
346 return;
347 }
348
349 dhd = dhdp->info;
350 if (dhd == NULL) {
351 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
352 return;
353 }
354
355 DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
356 DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
357
358 /* reset NAPI latency stats */
359 if (dhd->napi_latency) {
360 bzero(dhd->napi_latency, DHD_NAPI_LATENCY_SIZE);
361 }
362 /* reset NAPI per cpu stats */
363 if (dhd->napi_percpu_run_cnt) {
364 for (i = 0; i < num_cpus; i++) {
365 DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
366 }
367 }
368
369 DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
370
371 if (dhd->rxc_percpu_run_cnt) {
372 for (i = 0; i < num_cpus; i++) {
373 DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
374 }
375 }
376
377 DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
378
379 if (dhd->txc_percpu_run_cnt) {
380 for (i = 0; i < num_cpus; i++) {
381 DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
382 }
383 }
384
385 if (dhd->txp_percpu_run_cnt) {
386 for (i = 0; i < num_cpus; i++) {
387 DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
388 }
389 }
390
391 if (dhd->tx_start_percpu_run_cnt) {
392 for (i = 0; i < num_cpus; i++) {
393 DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
394 }
395 }
396
397 for (j = 0; j < HIST_BIN_SIZE; j++) {
398 for (i = 0; i < num_cpus; i++) {
399 DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
400 }
401 }
402
403 dhd->pub.lb_rxp_strt_thr_hitcnt = 0;
404 dhd->pub.lb_rxp_stop_thr_hitcnt = 0;
405
406 dhd->pub.lb_rxp_napi_sched_cnt = 0;
407 dhd->pub.lb_rxp_napi_complete_cnt = 0;
408 return;
409 }
410
dhd_lb_stats_init(dhd_pub_t * dhdp)411 void dhd_lb_stats_init(dhd_pub_t *dhdp)
412 {
413 dhd_info_t *dhd;
414 int i, j, num_cpus = num_possible_cpus();
415 int alloc_size = sizeof(uint32) * num_cpus;
416
417 if (dhdp == NULL) {
418 DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
419 __FUNCTION__));
420 return;
421 }
422
423 dhd = dhdp->info;
424 if (dhd == NULL) {
425 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
426 return;
427 }
428
429 DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
430 DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
431
432 /* NAPI latency stats */
433 dhd->napi_latency = (uint64 *)MALLOCZ(dhdp->osh, DHD_NAPI_LATENCY_SIZE);
434 /* NAPI per cpu stats */
435 dhd->napi_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
436 if (!dhd->napi_percpu_run_cnt) {
437 DHD_ERROR(("%s(): napi_percpu_run_cnt malloc failed \n",
438 __FUNCTION__));
439 return;
440 }
441 for (i = 0; i < num_cpus; i++)
442 DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
443
444 DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
445
446 dhd->rxc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
447 if (!dhd->rxc_percpu_run_cnt) {
448 DHD_ERROR(("%s(): rxc_percpu_run_cnt malloc failed \n",
449 __FUNCTION__));
450 return;
451 }
452 for (i = 0; i < num_cpus; i++)
453 DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
454
455 DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
456
457 dhd->txc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
458 if (!dhd->txc_percpu_run_cnt) {
459 DHD_ERROR(("%s(): txc_percpu_run_cnt malloc failed \n",
460 __FUNCTION__));
461 return;
462 }
463 for (i = 0; i < num_cpus; i++)
464 DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
465
466 dhd->cpu_online_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
467 if (!dhd->cpu_online_cnt) {
468 DHD_ERROR(("%s(): cpu_online_cnt malloc failed \n",
469 __FUNCTION__));
470 return;
471 }
472 for (i = 0; i < num_cpus; i++)
473 DHD_LB_STATS_CLR(dhd->cpu_online_cnt[i]);
474
475 dhd->cpu_offline_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
476 if (!dhd->cpu_offline_cnt) {
477 DHD_ERROR(("%s(): cpu_offline_cnt malloc failed \n",
478 __FUNCTION__));
479 return;
480 }
481 for (i = 0; i < num_cpus; i++)
482 DHD_LB_STATS_CLR(dhd->cpu_offline_cnt[i]);
483
484 dhd->txp_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
485 if (!dhd->txp_percpu_run_cnt) {
486 DHD_ERROR(("%s(): txp_percpu_run_cnt malloc failed \n",
487 __FUNCTION__));
488 return;
489 }
490 for (i = 0; i < num_cpus; i++)
491 DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
492
493 dhd->tx_start_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
494 if (!dhd->tx_start_percpu_run_cnt) {
495 DHD_ERROR(("%s(): tx_start_percpu_run_cnt malloc failed \n",
496 __FUNCTION__));
497 return;
498 }
499 for (i = 0; i < num_cpus; i++)
500 DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
501
502 for (j = 0; j < HIST_BIN_SIZE; j++) {
503 dhd->napi_rx_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
504 if (!dhd->napi_rx_hist[j]) {
505 DHD_ERROR(("%s(): dhd->napi_rx_hist[%d] malloc failed \n",
506 __FUNCTION__, j));
507 return;
508 }
509 for (i = 0; i < num_cpus; i++) {
510 DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
511 }
512 }
513
514 dhd->pub.lb_rxp_strt_thr_hitcnt = 0;
515 dhd->pub.lb_rxp_stop_thr_hitcnt = 0;
516
517 dhd->pub.lb_rxp_napi_sched_cnt = 0;
518 dhd->pub.lb_rxp_napi_complete_cnt = 0;
519 return;
520 }
521
dhd_lb_stats_deinit(dhd_pub_t * dhdp)522 void dhd_lb_stats_deinit(dhd_pub_t *dhdp)
523 {
524 dhd_info_t *dhd;
525 int j, num_cpus = num_possible_cpus();
526 int alloc_size = sizeof(uint32) * num_cpus;
527
528 if (dhdp == NULL) {
529 DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
530 __FUNCTION__));
531 return;
532 }
533
534 dhd = dhdp->info;
535 if (dhd == NULL) {
536 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
537 return;
538 }
539
540 if (dhd->napi_percpu_run_cnt) {
541 MFREE(dhdp->osh, dhd->napi_percpu_run_cnt, alloc_size);
542 }
543 if (dhd->rxc_percpu_run_cnt) {
544 MFREE(dhdp->osh, dhd->rxc_percpu_run_cnt, alloc_size);
545 }
546 if (dhd->txc_percpu_run_cnt) {
547 MFREE(dhdp->osh, dhd->txc_percpu_run_cnt, alloc_size);
548 }
549 if (dhd->cpu_online_cnt) {
550 MFREE(dhdp->osh, dhd->cpu_online_cnt, alloc_size);
551 }
552 if (dhd->cpu_offline_cnt) {
553 MFREE(dhdp->osh, dhd->cpu_offline_cnt, alloc_size);
554 }
555
556 if (dhd->txp_percpu_run_cnt) {
557 MFREE(dhdp->osh, dhd->txp_percpu_run_cnt, alloc_size);
558 }
559 if (dhd->tx_start_percpu_run_cnt) {
560 MFREE(dhdp->osh, dhd->tx_start_percpu_run_cnt, alloc_size);
561 }
562 if (dhd->napi_latency) {
563 MFREE(dhdp->osh, dhd->napi_latency, DHD_NAPI_LATENCY_SIZE);
564 }
565
566 for (j = 0; j < HIST_BIN_SIZE; j++) {
567 if (dhd->napi_rx_hist[j]) {
568 MFREE(dhdp->osh, dhd->napi_rx_hist[j], alloc_size);
569 }
570 }
571
572 return;
573 }
574
dhd_lb_stats_dump_napi_latency(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint64 * napi_latency)575 void dhd_lb_stats_dump_napi_latency(dhd_pub_t *dhdp,
576 struct bcmstrbuf *strbuf, uint64 *napi_latency)
577 {
578 uint32 i;
579
580 bcm_bprintf(strbuf, "napi-latency(us): \t count\n");
581 for (i = 0; i < DHD_NUM_NAPI_LATENCY_ROWS; i++) {
582 bcm_bprintf(strbuf, "%16u: \t %llu\n", 1U<<i, napi_latency[i]);
583 }
584 }
585
dhd_lb_stats_dump_histo(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint32 ** hist)586 void dhd_lb_stats_dump_histo(dhd_pub_t *dhdp,
587 struct bcmstrbuf *strbuf, uint32 **hist)
588 {
589 int i, j;
590 uint32 *per_cpu_total;
591 uint32 total = 0;
592 uint32 num_cpus = num_possible_cpus();
593
594 per_cpu_total = (uint32 *)MALLOC(dhdp->osh, sizeof(uint32) * num_cpus);
595 if (!per_cpu_total) {
596 DHD_ERROR(("%s(): dhd->per_cpu_total malloc failed \n", __FUNCTION__));
597 return;
598 }
599 bzero(per_cpu_total, sizeof(uint32) * num_cpus);
600
601 bcm_bprintf(strbuf, "CPU: \t\t");
602 for (i = 0; i < num_cpus; i++)
603 bcm_bprintf(strbuf, "%d\t", i);
604 bcm_bprintf(strbuf, "\nBin\n");
605
606 for (i = 0; i < HIST_BIN_SIZE; i++) {
607 bcm_bprintf(strbuf, "%d:\t\t", 1<<i);
608 for (j = 0; j < num_cpus; j++) {
609 bcm_bprintf(strbuf, "%d\t", hist[i][j]);
610 }
611 bcm_bprintf(strbuf, "\n");
612 }
613 bcm_bprintf(strbuf, "Per CPU Total \t");
614 total = 0;
615 for (i = 0; i < num_cpus; i++) {
616 for (j = 0; j < HIST_BIN_SIZE; j++) {
617 per_cpu_total[i] += (hist[j][i] * (1<<j));
618 }
619 bcm_bprintf(strbuf, "%d\t", per_cpu_total[i]);
620 total += per_cpu_total[i];
621 }
622 bcm_bprintf(strbuf, "\nTotal\t\t%d \n", total);
623
624 if (per_cpu_total) {
625 MFREE(dhdp->osh, per_cpu_total, sizeof(uint32) * num_cpus);
626 }
627 return;
628 }
629
dhd_lb_stats_dump_cpu_array(struct bcmstrbuf * strbuf,uint32 * p)630 void dhd_lb_stats_dump_cpu_array(struct bcmstrbuf *strbuf, uint32 *p)
631 {
632 int i, num_cpus = num_possible_cpus();
633
634 bcm_bprintf(strbuf, "CPU: \t\t");
635 for (i = 0; i < num_cpus; i++)
636 bcm_bprintf(strbuf, "%d\t", i);
637 bcm_bprintf(strbuf, "\n");
638
639 bcm_bprintf(strbuf, "Val: \t\t");
640 for (i = 0; i < num_cpus; i++)
641 bcm_bprintf(strbuf, "%u\t", *(p+i));
642 bcm_bprintf(strbuf, "\n");
643 return;
644 }
645
646 #ifdef DHD_MEM_STATS
dhd_lb_mem_usage(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)647 uint64 dhd_lb_mem_usage(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
648 {
649 dhd_info_t *dhd;
650 uint16 rxbufpost_sz;
651 uint16 rx_post_active = 0;
652 uint16 rx_cmpl_active = 0;
653 uint64 rx_path_memory_usage = 0;
654
655 if (dhdp == NULL || strbuf == NULL) {
656 DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n",
657 __FUNCTION__, dhdp, strbuf));
658 return 0;
659 }
660
661 dhd = dhdp->info;
662 if (dhd == NULL) {
663 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
664 return 0;
665 }
666 rxbufpost_sz = dhd_prot_get_rxbufpost_sz(dhdp);
667 if (rxbufpost_sz == 0) {
668 rxbufpost_sz = DHD_FLOWRING_RX_BUFPOST_PKTSZ;
669 }
670 rx_path_memory_usage = rxbufpost_sz * (skb_queue_len(&dhd->rx_pend_queue) +
671 skb_queue_len(&dhd->rx_napi_queue) +
672 skb_queue_len(&dhd->rx_process_queue));
673 rx_post_active = dhd_prot_get_h2d_rx_post_active(dhdp);
674 if (rx_post_active != 0) {
675 rx_path_memory_usage += (rxbufpost_sz * rx_post_active);
676 }
677
678 rx_cmpl_active = dhd_prot_get_d2h_rx_cpln_active(dhdp);
679 if (rx_cmpl_active != 0) {
680 rx_path_memory_usage += (rxbufpost_sz * rx_cmpl_active);
681 }
682
683 dhdp->rxpath_mem = rx_path_memory_usage;
684 bcm_bprintf(strbuf, "\nrxbufpost_sz: %d rx_post_active: %d rx_cmpl_active: %d "
685 "pend_queue_len: %d napi_queue_len: %d process_queue_len: %d\n",
686 rxbufpost_sz, rx_post_active, rx_cmpl_active,
687 skb_queue_len(&dhd->rx_pend_queue),
688 skb_queue_len(&dhd->rx_napi_queue), skb_queue_len(&dhd->rx_process_queue));
689 bcm_bprintf(strbuf, "DHD rx-path memory_usage: %llubytes %lluKB \n",
690 rx_path_memory_usage, (rx_path_memory_usage/ 1024));
691 return rx_path_memory_usage;
692 }
693 #endif /* DHD_MEM_STATS */
694
dhd_lb_stats_dump(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)695 void dhd_lb_stats_dump(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
696 {
697 dhd_info_t *dhd;
698
699 if (dhdp == NULL || strbuf == NULL) {
700 DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n",
701 __FUNCTION__, dhdp, strbuf));
702 return;
703 }
704
705 dhd = dhdp->info;
706 if (dhd == NULL) {
707 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
708 return;
709 }
710
711 bcm_bprintf(strbuf, "\ncpu_online_cnt:\n");
712 dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_online_cnt);
713
714 bcm_bprintf(strbuf, "\ncpu_offline_cnt:\n");
715 dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_offline_cnt);
716
717 bcm_bprintf(strbuf, "\nsched_cnt: dhd_dpc %u napi %u rxc %u txc %u\n",
718 dhd->dhd_dpc_cnt, dhd->napi_sched_cnt, dhd->rxc_sched_cnt,
719 dhd->txc_sched_cnt);
720
721 bcm_bprintf(strbuf, "\nCPUs: dpc_cpu %u napi_cpu %u net_tx_cpu %u tx_cpu %u\n",
722 atomic_read(&dhd->dpc_cpu),
723 atomic_read(&dhd->rx_napi_cpu),
724 atomic_read(&dhd->net_tx_cpu),
725 atomic_read(&dhd->tx_cpu));
726
727 #ifdef DHD_LB_RXP
728 bcm_bprintf(strbuf, "\nnapi_percpu_run_cnt:\n");
729 dhd_lb_stats_dump_cpu_array(strbuf, dhd->napi_percpu_run_cnt);
730 bcm_bprintf(strbuf, "\nNAPI Packets Received Histogram:\n");
731 dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->napi_rx_hist);
732 bcm_bprintf(strbuf, "\nNAPI poll latency stats ie from napi schedule to napi execution\n");
733 dhd_lb_stats_dump_napi_latency(dhdp, strbuf, dhd->napi_latency);
734 #endif /* DHD_LB_RXP */
735
736 #ifdef DHD_LB_TXP
737 bcm_bprintf(strbuf, "\ntxp_percpu_run_cnt:\n");
738 dhd_lb_stats_dump_cpu_array(strbuf, dhd->txp_percpu_run_cnt);
739
740 bcm_bprintf(strbuf, "\ntx_start_percpu_run_cnt:\n");
741 dhd_lb_stats_dump_cpu_array(strbuf, dhd->tx_start_percpu_run_cnt);
742 #endif /* DHD_LB_TXP */
743 }
744
dhd_lb_stats_update_napi_latency(uint64 * bin,uint32 latency)745 void dhd_lb_stats_update_napi_latency(uint64 *bin, uint32 latency)
746 {
747 uint64 *p;
748 uint32 bin_power;
749 bin_power = next_larger_power2(latency);
750
751 switch (bin_power) {
752 case 1: p = bin + 0; break;
753 case 2: p = bin + 1; break;
754 case 4: p = bin + 2; break;
755 case 8: p = bin + 3; break;
756 case 16: p = bin + 4; break;
757 case 32: p = bin + 5; break;
758 case 64: p = bin + 6; break;
759 case 128: p = bin + 7; break;
760 case 256: p = bin + 8; break;
761 case 512: p = bin + 9; break;
762 case 1024: p = bin + 10; break;
763 case 2048: p = bin + 11; break;
764 case 4096: p = bin + 12; break;
765 case 8192: p = bin + 13; break;
766 case 16384: p = bin + 14; break;
767 case 32768: p = bin + 15; break;
768 default : p = bin + 16; break;
769 }
770 ASSERT((p - bin) < DHD_NUM_NAPI_LATENCY_ROWS);
771 *p = *p + 1;
772 return;
773
774 }
775
dhd_lb_stats_update_histo(uint32 ** bin,uint32 count,uint32 cpu)776 void dhd_lb_stats_update_histo(uint32 **bin, uint32 count, uint32 cpu)
777 {
778 uint32 bin_power;
779 uint32 *p;
780 bin_power = next_larger_power2(count);
781
782 switch (bin_power) {
783 case 1: p = bin[0] + cpu; break;
784 case 2: p = bin[1] + cpu; break;
785 case 4: p = bin[2] + cpu; break;
786 case 8: p = bin[3] + cpu; break;
787 case 16: p = bin[4] + cpu; break;
788 case 32: p = bin[5] + cpu; break;
789 case 64: p = bin[6] + cpu; break;
790 case 128: p = bin[7] + cpu; break;
791 default : p = bin[8] + cpu; break;
792 }
793
794 *p = *p + 1;
795 return;
796 }
797
dhd_lb_stats_update_napi_histo(dhd_pub_t * dhdp,uint32 count)798 void dhd_lb_stats_update_napi_histo(dhd_pub_t *dhdp, uint32 count)
799 {
800 int cpu;
801 dhd_info_t *dhd = dhdp->info;
802
803 cpu = get_cpu();
804 put_cpu();
805 dhd_lb_stats_update_histo(dhd->napi_rx_hist, count, cpu);
806
807 return;
808 }
809
dhd_lb_stats_update_txc_histo(dhd_pub_t * dhdp,uint32 count)810 void dhd_lb_stats_update_txc_histo(dhd_pub_t *dhdp, uint32 count)
811 {
812 int cpu;
813 dhd_info_t *dhd = dhdp->info;
814
815 cpu = get_cpu();
816 put_cpu();
817 dhd_lb_stats_update_histo(dhd->txc_hist, count, cpu);
818
819 return;
820 }
821
dhd_lb_stats_update_rxc_histo(dhd_pub_t * dhdp,uint32 count)822 void dhd_lb_stats_update_rxc_histo(dhd_pub_t *dhdp, uint32 count)
823 {
824 int cpu;
825 dhd_info_t *dhd = dhdp->info;
826
827 cpu = get_cpu();
828 put_cpu();
829 dhd_lb_stats_update_histo(dhd->rxc_hist, count, cpu);
830
831 return;
832 }
833
dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t * dhdp)834 void dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t *dhdp)
835 {
836 dhd_info_t *dhd = dhdp->info;
837 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txc_percpu_run_cnt);
838 }
839
dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t * dhdp)840 void dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t *dhdp)
841 {
842 dhd_info_t *dhd = dhdp->info;
843 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->rxc_percpu_run_cnt);
844 }
845 #endif /* DHD_LB_STATS */
846
847 /**
848 * dhd_tasklet_schedule - Function that runs in IPI context of the destination
849 * CPU and schedules a tasklet.
850 * @tasklet: opaque pointer to the tasklet
851 */
852 INLINE void
dhd_tasklet_schedule(void * tasklet)853 dhd_tasklet_schedule(void *tasklet)
854 {
855 tasklet_schedule((struct tasklet_struct *)tasklet);
856 }
857
858 /**
859 * dhd_work_schedule_on - Executes the passed work in a given CPU
860 * @work: work to be scheduled
861 * @on_cpu: cpu core id
862 *
863 * If the requested cpu is online, then an IPI is sent to this cpu via the
864 * schedule_work_on and the work function
865 * will be invoked to schedule the specified work on the requested CPU.
866 */
867
868 INLINE void
dhd_work_schedule_on(struct work_struct * work,int on_cpu)869 dhd_work_schedule_on(struct work_struct *work, int on_cpu)
870 {
871 schedule_work_on(on_cpu, work);
872 }
873
874 INLINE void
dhd_delayed_work_schedule_on(struct delayed_work * dwork,int on_cpu,ulong delay)875 dhd_delayed_work_schedule_on(struct delayed_work *dwork, int on_cpu, ulong delay)
876 {
877 schedule_delayed_work_on(on_cpu, dwork, delay);
878 }
879
880 #if defined(DHD_LB_TXP)
dhd_tx_dispatcher_work(struct work_struct * work)881 void dhd_tx_dispatcher_work(struct work_struct * work)
882 {
883 struct dhd_info *dhd;
884
885 GCC_DIAGNOSTIC_PUSH_SUPPRESS_CAST();
886 dhd = container_of(work, struct dhd_info, tx_dispatcher_work);
887 GCC_DIAGNOSTIC_POP();
888
889 dhd_tasklet_schedule(&dhd->tx_tasklet);
890 }
891
892 /**
893 * dhd_lb_tx_dispatch - load balance by dispatching the tx_tasklet
894 * on another cpu. The tx_tasklet will take care of actually putting
895 * the skbs into appropriate flow ring and ringing H2D interrupt
896 *
897 * @dhdp: pointer to dhd_pub object
898 */
899 void
dhd_lb_tx_dispatch(dhd_pub_t * dhdp)900 dhd_lb_tx_dispatch(dhd_pub_t *dhdp)
901 {
902 dhd_info_t *dhd = dhdp->info;
903 int curr_cpu;
904 int tx_cpu;
905 int prev_net_tx_cpu;
906
907 /*
908 * Get cpu will disable pre-ermption and will not allow any cpu to go offline
909 * and call put_cpu() only after scheduling rx_napi_dispatcher_work.
910 */
911 curr_cpu = get_cpu();
912
913 /* Record the CPU in which the TX request from Network stack came */
914 prev_net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
915 atomic_set(&dhd->net_tx_cpu, curr_cpu);
916
917 tx_cpu = atomic_read(&dhd->tx_cpu);
918
919 /*
920 * Avoid cpu candidacy, if override is set via sysfs for changing cpu mannually
921 */
922 if (dhd->dhd_lb_candidacy_override) {
923 if (!cpu_online(tx_cpu)) {
924 tx_cpu = curr_cpu;
925 }
926 } else {
927 /*
928 * Now if the NET TX has scheduled in the same CPU
929 * that is chosen for Tx processing
930 * OR scheduled on different cpu than previously it was scheduled,
931 * OR if tx_cpu is offline,
932 * Call cpu candidacy algorithm to recompute tx_cpu.
933 */
934 if ((curr_cpu == tx_cpu) || (curr_cpu != prev_net_tx_cpu) ||
935 !cpu_online(tx_cpu)) {
936 /* Re compute LB CPUs */
937 dhd_select_cpu_candidacy(dhd);
938 /* Use updated tx cpu */
939 tx_cpu = atomic_read(&dhd->tx_cpu);
940 }
941 }
942 /*
943 * Schedule tx_dispatcher_work to on the cpu which
944 * in turn will schedule tx_tasklet.
945 */
946 dhd_work_schedule_on(&dhd->tx_dispatcher_work, tx_cpu);
947
948 put_cpu();
949 }
950 #endif /* DHD_LB_TXP */
951
952 #if defined(DHD_LB_RXP)
953
954 /**
955 * dhd_napi_poll - Load balance napi poll function to process received
956 * packets and send up the network stack using netif_receive_skb()
957 *
958 * @napi: napi object in which context this poll function is invoked
959 * @budget: number of packets to be processed.
960 *
961 * Fetch the dhd_info given the rx_napi_struct. Move all packets from the
962 * rx_napi_queue into a local rx_process_queue (lock and queue move and unlock).
963 * Dequeue each packet from head of rx_process_queue, fetch the ifid from the
964 * packet tag and sendup.
965 */
966 int
dhd_napi_poll(struct napi_struct * napi,int budget)967 dhd_napi_poll(struct napi_struct *napi, int budget)
968 {
969 int ifid;
970 const int pkt_count = 1;
971 const int chan = 0;
972 struct sk_buff * skb;
973 unsigned long flags;
974 struct dhd_info *dhd;
975 int processed = 0;
976 int dpc_cpu;
977 #ifdef DHD_LB_STATS
978 uint32 napi_latency;
979 #endif /* DHD_LB_STATS */
980
981 GCC_DIAGNOSTIC_PUSH_SUPPRESS_CAST();
982 dhd = container_of(napi, struct dhd_info, rx_napi_struct);
983 GCC_DIAGNOSTIC_POP();
984
985 #ifdef DHD_LB_STATS
986 napi_latency = (uint32)(OSL_SYSUPTIME_US() - dhd->napi_schedule_time);
987 dhd_lb_stats_update_napi_latency(dhd->napi_latency, napi_latency);
988 #endif /* DHD_LB_STATS */
989 DHD_LB_INFO(("%s napi_queue<%d> budget<%d>\n",
990 __FUNCTION__, skb_queue_len(&dhd->rx_napi_queue), budget));
991
992 /*
993 * Extract the entire rx_napi_queue into another rx_process_queue
994 * and process only 'budget' number of skbs from rx_process_queue.
995 * If there are more items to be processed, napi poll will be rescheduled
996 * During the next iteration, next set of skbs from
997 * rx_napi_queue will be extracted and attached to the tail of rx_process_queue.
998 * Again budget number of skbs will be processed from rx_process_queue.
999 * If there are less than budget number of skbs in rx_process_queue,
1000 * call napi_complete to stop rescheduling napi poll.
1001 */
1002 DHD_RX_NAPI_QUEUE_LOCK(&dhd->rx_napi_queue.lock, flags);
1003 skb_queue_splice_tail_init(&dhd->rx_napi_queue, &dhd->rx_process_queue);
1004 DHD_RX_NAPI_QUEUE_UNLOCK(&dhd->rx_napi_queue.lock, flags);
1005
1006 while ((processed < budget) && (skb = __skb_dequeue(&dhd->rx_process_queue)) != NULL) {
1007 OSL_PREFETCH(skb->data);
1008
1009 ifid = DHD_PKTTAG_IFID((dhd_pkttag_fr_t *)PKTTAG(skb));
1010
1011 DHD_LB_INFO(("%s dhd_rx_frame pkt<%p> ifid<%d>\n",
1012 __FUNCTION__, skb, ifid));
1013
1014 dhd_rx_frame(&dhd->pub, ifid, skb, pkt_count, chan);
1015 processed++;
1016 }
1017
1018 if (atomic_read(&dhd->pub.lb_rxp_flow_ctrl) &&
1019 (dhd_lb_rxp_process_qlen(&dhd->pub) <= dhd->pub.lb_rxp_strt_thr)) {
1020 /*
1021 * If the dpc CPU is online Schedule dhd_dpc_dispatcher_work on the dpc cpu which
1022 * in turn will schedule dpc tasklet. Else schedule dpc takslet.
1023 */
1024 get_cpu();
1025 dpc_cpu = atomic_read(&dhd->dpc_cpu);
1026 if (!cpu_online(dpc_cpu)) {
1027 dhd_tasklet_schedule(&dhd->tasklet);
1028 } else {
1029 dhd_delayed_work_schedule_on(&dhd->dhd_dpc_dispatcher_work, dpc_cpu, 0);
1030 }
1031 put_cpu();
1032 }
1033 DHD_LB_STATS_UPDATE_NAPI_HISTO(&dhd->pub, processed);
1034
1035 DHD_LB_INFO(("%s processed %d\n", __FUNCTION__, processed));
1036
1037 /*
1038 * Signal napi complete only when no more packets are processed and
1039 * none are left in the enqueued queue.
1040 */
1041 if ((processed == 0) && (skb_queue_len(&dhd->rx_napi_queue) == 0)) {
1042 napi_complete(napi);
1043 #ifdef DHD_LB_STATS
1044 dhd->pub.lb_rxp_napi_complete_cnt++;
1045 #endif /* DHD_LB_STATS */
1046 DHD_GENERAL_LOCK(&dhd->pub, flags);
1047 DHD_BUS_BUSY_CLEAR_IN_NAPI(&dhd->pub);
1048 DHD_GENERAL_UNLOCK(&dhd->pub, flags);
1049 return 0;
1050 }
1051
1052 #ifdef DHD_LB_STATS
1053 dhd->napi_schedule_time = OSL_SYSUPTIME_US();
1054 #endif /* DHD_LB_STATS */
1055
1056 /* Return budget so that it gets rescheduled immediately */
1057 return budget;
1058 }
1059
1060 /**
1061 * dhd_napi_schedule - Place the napi struct into the current cpus softnet napi
1062 * poll list. This function may be invoked via the smp_call_function_single
1063 * from a remote CPU.
1064 *
1065 * This function will essentially invoke __raise_softirq_irqoff(NET_RX_SOFTIRQ)
1066 * after the napi_struct is added to the softnet data's poll_list
1067 *
1068 * @info: pointer to a dhd_info struct
1069 */
1070 static void
dhd_napi_schedule(void * info)1071 dhd_napi_schedule(void *info)
1072 {
1073 dhd_info_t *dhd = (dhd_info_t *)info;
1074 unsigned long flags;
1075
1076 DHD_INFO(("%s rx_napi_struct<%p> on cpu<%d>\n",
1077 __FUNCTION__, &dhd->rx_napi_struct, atomic_read(&dhd->rx_napi_cpu)));
1078
1079 /* add napi_struct to softnet data poll list and raise NET_RX_SOFTIRQ */
1080 if (napi_schedule_prep(&dhd->rx_napi_struct)) {
1081
1082 /*
1083 * Set busbusystate in NAPI, which will be cleared after
1084 * napi_complete from napi_poll context
1085 */
1086 DHD_GENERAL_LOCK(&dhd->pub, flags);
1087 DHD_BUS_BUSY_SET_IN_NAPI(&dhd->pub);
1088 DHD_GENERAL_UNLOCK(&dhd->pub, flags);
1089
1090 #ifdef DHD_LB_STATS
1091 dhd->napi_schedule_time = OSL_SYSUPTIME_US();
1092 dhd->pub.lb_rxp_napi_sched_cnt++;
1093 #endif /* DHD_LB_STATS */
1094 __napi_schedule(&dhd->rx_napi_struct);
1095 #ifdef WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE
1096 raise_softirq(NET_RX_SOFTIRQ);
1097 #endif /* WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE */
1098 }
1099
1100 /*
1101 * If the rx_napi_struct was already running, then we let it complete
1102 * processing all its packets. The rx_napi_struct may only run on one
1103 * core at a time, to avoid out-of-order handling.
1104 */
1105 }
1106
1107 /**
1108 * dhd_napi_schedule_on - API to schedule on a desired CPU core a NET_RX_SOFTIRQ
1109 * action after placing the dhd's rx_process napi object in the the remote CPU's
1110 * softnet data's poll_list.
1111 *
1112 * @dhd: dhd_info which has the rx_process napi object
1113 * @on_cpu: desired remote CPU id
1114 */
1115 static INLINE int
dhd_napi_schedule_on(dhd_info_t * dhd,int on_cpu)1116 dhd_napi_schedule_on(dhd_info_t *dhd, int on_cpu)
1117 {
1118 int wait = 0; /* asynchronous IPI */
1119 DHD_INFO(("%s dhd<%p> napi<%p> on_cpu<%d>\n",
1120 __FUNCTION__, dhd, &dhd->rx_napi_struct, on_cpu));
1121
1122 if (smp_call_function_single(on_cpu, dhd_napi_schedule, dhd, wait)) {
1123 DHD_ERROR(("%s smp_call_function_single on_cpu<%d> failed\n",
1124 __FUNCTION__, on_cpu));
1125 }
1126
1127 DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1128
1129 return 0;
1130 }
1131
1132 /*
1133 * Call get_online_cpus/put_online_cpus around dhd_napi_schedule_on
1134 * Why should we do this?
1135 * The candidacy algorithm is run from the call back function
1136 * registered to CPU hotplug notifier. This call back happens from Worker
1137 * context. The dhd_napi_schedule_on is also from worker context.
1138 * Note that both of this can run on two different CPUs at the same time.
1139 * So we can possibly have a window where a given CPUn is being brought
1140 * down from CPUm while we try to run a function on CPUn.
1141 * To prevent this its better have the whole code to execute an SMP
1142 * function under get_online_cpus.
1143 * This function call ensures that hotplug mechanism does not kick-in
1144 * until we are done dealing with online CPUs
1145 * If the hotplug worker is already running, no worries because the
1146 * candidacy algo would then reflect the same in dhd->rx_napi_cpu.
1147 *
1148 * The below mentioned code structure is proposed in
1149 * https://www.kernel.org/doc/Documentation/cpu-hotplug.txt
1150 * for the question
1151 * Q: I need to ensure that a particular cpu is not removed when there is some
1152 * work specific to this cpu is in progress
1153 *
1154 * According to the documentation calling get_online_cpus is NOT required, if
1155 * we are running from tasklet context. Since dhd_rx_napi_dispatcher_work can
1156 * run from Work Queue context we have to call these functions
1157 */
dhd_rx_napi_dispatcher_work(struct work_struct * work)1158 void dhd_rx_napi_dispatcher_work(struct work_struct * work)
1159 {
1160 struct dhd_info *dhd;
1161 GCC_DIAGNOSTIC_PUSH_SUPPRESS_CAST();
1162 dhd = container_of(work, struct dhd_info, rx_napi_dispatcher_work);
1163 GCC_DIAGNOSTIC_POP();
1164
1165 dhd_napi_schedule(dhd);
1166 }
1167
1168 /**
1169 * dhd_lb_rx_napi_dispatch - load balance by dispatching the rx_napi_struct
1170 * to run on another CPU. The rx_napi_struct's poll function will retrieve all
1171 * the packets enqueued into the rx_napi_queue and sendup.
1172 * The producer's rx packet queue is appended to the rx_napi_queue before
1173 * dispatching the rx_napi_struct.
1174 */
1175 void
dhd_lb_rx_napi_dispatch(dhd_pub_t * dhdp)1176 dhd_lb_rx_napi_dispatch(dhd_pub_t *dhdp)
1177 {
1178 unsigned long flags;
1179 dhd_info_t *dhd = dhdp->info;
1180 int curr_cpu;
1181 int rx_napi_cpu;
1182 int prev_dpc_cpu;
1183
1184 if (dhd->rx_napi_netdev == NULL) {
1185 DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
1186 return;
1187 }
1188
1189 DHD_LB_INFO(("%s append napi_queue<%d> pend_queue<%d>\n", __FUNCTION__,
1190 skb_queue_len(&dhd->rx_napi_queue), skb_queue_len(&dhd->rx_pend_queue)));
1191
1192 /* append the producer's queue of packets to the napi's rx process queue */
1193 DHD_RX_NAPI_QUEUE_LOCK(&dhd->rx_napi_queue.lock, flags);
1194 skb_queue_splice_tail_init(&dhd->rx_pend_queue, &dhd->rx_napi_queue);
1195 DHD_RX_NAPI_QUEUE_UNLOCK(&dhd->rx_napi_queue.lock, flags);
1196
1197 /* If sysfs lb_rxp_active is not set, schedule on current cpu */
1198 if (!atomic_read(&dhd->lb_rxp_active))
1199 {
1200 dhd_napi_schedule(dhd);
1201 return;
1202 }
1203
1204 /*
1205 * Get cpu will disable pre-ermption and will not allow any cpu to go offline
1206 * and call put_cpu() only after scheduling rx_napi_dispatcher_work.
1207 */
1208 curr_cpu = get_cpu();
1209
1210 prev_dpc_cpu = atomic_read(&dhd->prev_dpc_cpu);
1211
1212 rx_napi_cpu = atomic_read(&dhd->rx_napi_cpu);
1213
1214 /*
1215 * Avoid cpu candidacy, if override is set via sysfs for changing cpu mannually
1216 */
1217 if (dhd->dhd_lb_candidacy_override) {
1218 if (!cpu_online(rx_napi_cpu)) {
1219 rx_napi_cpu = curr_cpu;
1220 }
1221 } else {
1222 /*
1223 * Now if the DPC has scheduled in the same CPU
1224 * that is chosen for Rx napi processing
1225 * OR scheduled on different cpu than previously it was scheduled,
1226 * OR if rx_napi_cpu is offline,
1227 * Call cpu candidacy algorithm to recompute napi_cpu.
1228 */
1229 if ((curr_cpu == rx_napi_cpu) || (curr_cpu != prev_dpc_cpu) ||
1230 !cpu_online(rx_napi_cpu)) {
1231 /* Re compute LB CPUs */
1232 dhd_select_cpu_candidacy(dhd);
1233 /* Use updated napi cpu */
1234 rx_napi_cpu = atomic_read(&dhd->rx_napi_cpu);
1235 }
1236
1237 }
1238
1239 DHD_LB_INFO(("%s : schedule to curr_cpu : %d, rx_napi_cpu : %d\n",
1240 __FUNCTION__, curr_cpu, rx_napi_cpu));
1241 dhd_work_schedule_on(&dhd->rx_napi_dispatcher_work, rx_napi_cpu);
1242 DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1243
1244 put_cpu();
1245 }
1246
1247 /**
1248 * dhd_lb_rx_pkt_enqueue - Enqueue the packet into the producer's queue
1249 */
1250 void
dhd_lb_rx_pkt_enqueue(dhd_pub_t * dhdp,void * pkt,int ifidx)1251 dhd_lb_rx_pkt_enqueue(dhd_pub_t *dhdp, void *pkt, int ifidx)
1252 {
1253 dhd_info_t *dhd = dhdp->info;
1254
1255 DHD_LB_INFO(("%s enqueue pkt<%p> ifidx<%d> pend_queue<%d>\n", __FUNCTION__,
1256 pkt, ifidx, skb_queue_len(&dhd->rx_pend_queue)));
1257 DHD_PKTTAG_SET_IFID((dhd_pkttag_fr_t *)PKTTAG(pkt), ifidx);
1258 __skb_queue_tail(&dhd->rx_pend_queue, pkt);
1259 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->napi_percpu_run_cnt);
1260 }
1261
1262 unsigned long
dhd_read_lb_rxp(dhd_pub_t * dhdp)1263 dhd_read_lb_rxp(dhd_pub_t *dhdp)
1264 {
1265 dhd_info_t *dhd = dhdp->info;
1266 return atomic_read(&dhd->lb_rxp_active);
1267 }
1268
1269 uint32
dhd_lb_rxp_process_qlen(dhd_pub_t * dhdp)1270 dhd_lb_rxp_process_qlen(dhd_pub_t *dhdp)
1271 {
1272 dhd_info_t *dhd = dhdp->info;
1273 return skb_queue_len(&dhd->rx_process_queue);
1274 }
1275 #endif /* DHD_LB_RXP */
1276
1277 #if defined(DHD_LB_TXP)
1278 int
BCMFASTPATH(dhd_lb_sendpkt)1279 BCMFASTPATH(dhd_lb_sendpkt)(dhd_info_t *dhd, struct net_device *net,
1280 int ifidx, void *skb)
1281 {
1282 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->tx_start_percpu_run_cnt);
1283
1284 /* If the feature is disabled run-time do TX from here */
1285 if (atomic_read(&dhd->lb_txp_active) == 0) {
1286 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1287 return __dhd_sendpkt(&dhd->pub, ifidx, skb);
1288 }
1289
1290 /* Store the address of net device and interface index in the Packet tag */
1291 DHD_LB_TX_PKTTAG_SET_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), net);
1292 DHD_LB_TX_PKTTAG_SET_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), ifidx);
1293
1294 /* Enqueue the skb into tx_pend_queue */
1295 skb_queue_tail(&dhd->tx_pend_queue, skb);
1296
1297 DHD_TRACE(("%s(): Added skb %p for netdev %p \r\n", __FUNCTION__, skb, net));
1298
1299 /* Dispatch the Tx job to be processed by the tx_tasklet */
1300 dhd_lb_tx_dispatch(&dhd->pub);
1301
1302 return NETDEV_TX_OK;
1303 }
1304 #endif /* DHD_LB_TXP */
1305
1306 #ifdef DHD_LB_TXP
1307 #define DHD_LB_TXBOUND 64
1308 /*
1309 * Function that performs the TX processing on a given CPU
1310 */
1311 bool
dhd_lb_tx_process(dhd_info_t * dhd)1312 dhd_lb_tx_process(dhd_info_t *dhd)
1313 {
1314 struct sk_buff *skb;
1315 int cnt = 0;
1316 struct net_device *net;
1317 int ifidx;
1318 bool resched = FALSE;
1319
1320 DHD_TRACE(("%s(): TX Processing \r\n", __FUNCTION__));
1321 if (dhd == NULL) {
1322 DHD_ERROR((" Null pointer DHD \r\n"));
1323 return resched;
1324 }
1325
1326 BCM_REFERENCE(net);
1327
1328 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1329
1330 /* Base Loop to perform the actual Tx */
1331 do {
1332 skb = skb_dequeue(&dhd->tx_pend_queue);
1333 if (skb == NULL) {
1334 DHD_TRACE(("Dequeued a Null Packet \r\n"));
1335 break;
1336 }
1337 cnt++;
1338
1339 net = DHD_LB_TX_PKTTAG_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1340 ifidx = DHD_LB_TX_PKTTAG_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1341
1342 DHD_TRACE(("Processing skb %p for net %p index %d \r\n", skb,
1343 net, ifidx));
1344
1345 __dhd_sendpkt(&dhd->pub, ifidx, skb);
1346
1347 if (cnt >= DHD_LB_TXBOUND) {
1348 resched = TRUE;
1349 break;
1350 }
1351
1352 } while (1);
1353
1354 DHD_LB_INFO(("%s(): Processed %d packets \r\n", __FUNCTION__, cnt));
1355
1356 return resched;
1357 }
1358
1359 void
dhd_lb_tx_handler(unsigned long data)1360 dhd_lb_tx_handler(unsigned long data)
1361 {
1362 dhd_info_t *dhd = (dhd_info_t *)data;
1363
1364 if (dhd_lb_tx_process(dhd)) {
1365 dhd_tasklet_schedule(&dhd->tx_tasklet);
1366 }
1367 }
1368
1369 #endif /* DHD_LB_TXP */
1370 #endif /* DHD_LB */
1371
1372 #if defined(DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON)
1373 void
dhd_irq_set_affinity(dhd_pub_t * dhdp,const struct cpumask * cpumask)1374 dhd_irq_set_affinity(dhd_pub_t *dhdp, const struct cpumask *cpumask)
1375 {
1376 unsigned int irq = (unsigned int)-1;
1377 int err = BCME_OK;
1378
1379 if (!dhdp) {
1380 DHD_ERROR(("%s : dhdp is NULL\n", __FUNCTION__));
1381 return;
1382 }
1383
1384 if (!dhdp->bus) {
1385 DHD_ERROR(("%s : bus is NULL\n", __FUNCTION__));
1386 return;
1387 }
1388
1389 DHD_ERROR(("%s : irq set affinity cpu:0x%lx\n",
1390 __FUNCTION__, *cpumask_bits(cpumask)));
1391
1392 dhdpcie_get_pcieirq(dhdp->bus, &irq);
1393 #ifdef BCMDHD_MODULAR
1394 err = irq_set_affinity_hint(irq, cpumask);
1395 #else
1396 err = irq_set_affinity(irq, cpumask);
1397 #endif /* BCMDHD_MODULAR */
1398 if (err)
1399 DHD_ERROR(("%s : irq set affinity is failed cpu:0x%lx\n",
1400 __FUNCTION__, *cpumask_bits(cpumask)));
1401 }
1402 #endif /* DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON */
1403