1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 #include "hwcnt/mali_kbase_hwcnt_gpu.h"
23 #include "hwcnt/mali_kbase_hwcnt_types.h"
24
25 #include <linux/err.h>
26
27 /** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements
28 */
29 enum enable_map_idx {
30 EM_LO,
31 EM_HI,
32 EM_COUNT,
33 };
34
kbasep_get_fe_block_type(u64 * dst,enum kbase_hwcnt_set counter_set,bool is_csf)35 static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
36 {
37 switch (counter_set) {
38 case KBASE_HWCNT_SET_PRIMARY:
39 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
40 break;
41 case KBASE_HWCNT_SET_SECONDARY:
42 if (is_csf)
43 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
44 else
45 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
46 break;
47 case KBASE_HWCNT_SET_TERTIARY:
48 if (is_csf)
49 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
50 else
51 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
52 break;
53 default:
54 WARN_ON(true);
55 }
56 }
57
kbasep_get_tiler_block_type(u64 * dst,enum kbase_hwcnt_set counter_set)58 static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
59 {
60 switch (counter_set) {
61 case KBASE_HWCNT_SET_PRIMARY:
62 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
63 break;
64 case KBASE_HWCNT_SET_SECONDARY:
65 case KBASE_HWCNT_SET_TERTIARY:
66 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED;
67 break;
68 default:
69 WARN_ON(true);
70 }
71 }
72
kbasep_get_sc_block_type(u64 * dst,enum kbase_hwcnt_set counter_set,bool is_csf)73 static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
74 {
75 switch (counter_set) {
76 case KBASE_HWCNT_SET_PRIMARY:
77 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
78 break;
79 case KBASE_HWCNT_SET_SECONDARY:
80 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
81 break;
82 case KBASE_HWCNT_SET_TERTIARY:
83 if (is_csf)
84 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
85 else
86 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED;
87 break;
88 default:
89 WARN_ON(true);
90 }
91 }
92
kbasep_get_memsys_block_type(u64 * dst,enum kbase_hwcnt_set counter_set)93 static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
94 {
95 switch (counter_set) {
96 case KBASE_HWCNT_SET_PRIMARY:
97 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
98 break;
99 case KBASE_HWCNT_SET_SECONDARY:
100 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
101 break;
102 case KBASE_HWCNT_SET_TERTIARY:
103 *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED;
104 break;
105 default:
106 WARN_ON(true);
107 }
108 }
109
110 /**
111 * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata
112 * for the GPU.
113 * @gpu_info: Non-NULL pointer to hwcnt info for current GPU.
114 * @is_csf: true for CSF GPU, otherwise false.
115 * @counter_set: The performance counter set to use.
116 * @metadata: Non-NULL pointer to where created metadata is stored
117 * on success.
118 *
119 * Return: 0 on success, else error code.
120 */
kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info * gpu_info,const bool is_csf,enum kbase_hwcnt_set counter_set,const struct kbase_hwcnt_metadata ** metadata)121 static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
122 const bool is_csf,
123 enum kbase_hwcnt_set counter_set,
124 const struct kbase_hwcnt_metadata **metadata)
125 {
126 struct kbase_hwcnt_description desc;
127 struct kbase_hwcnt_group_description group;
128 struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
129 size_t non_sc_block_count;
130 size_t sc_block_count;
131
132 WARN_ON(!gpu_info);
133 WARN_ON(!metadata);
134
135 /* Calculate number of block instances that aren't shader cores */
136 non_sc_block_count = 2 + gpu_info->l2_count;
137 /* Calculate number of block instances that are shader cores */
138 sc_block_count = fls64(gpu_info->core_mask);
139
140 /*
141 * A system can have up to 64 shader cores, but the 64-bit
142 * availability mask can't physically represent that many cores as well
143 * as the other hardware blocks.
144 * Error out if there are more blocks than our implementation can
145 * support.
146 */
147 if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
148 return -EINVAL;
149
150 /* One Front End block */
151 kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
152 blks[0].inst_cnt = 1;
153 blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
154 blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
155
156 /* One Tiler block */
157 kbasep_get_tiler_block_type(&blks[1].type, counter_set);
158 blks[1].inst_cnt = 1;
159 blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
160 blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
161
162 /* l2_count memsys blks */
163 kbasep_get_memsys_block_type(&blks[2].type, counter_set);
164 blks[2].inst_cnt = gpu_info->l2_count;
165 blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
166 blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
167
168 /*
169 * There are as many shader cores in the system as there are bits set in
170 * the core mask. However, the dump buffer memory requirements need to
171 * take into account the fact that the core mask may be non-contiguous.
172 *
173 * For example, a system with a core mask of 0b1011 has the same dump
174 * buffer memory requirements as a system with 0b1111, but requires more
175 * memory than a system with 0b0111. However, core 2 of the system with
176 * 0b1011 doesn't physically exist, and the dump buffer memory that
177 * accounts for that core will never be written to when we do a counter
178 * dump.
179 *
180 * We find the core mask's last set bit to determine the memory
181 * requirements, and embed the core mask into the availability mask so
182 * we can determine later which shader cores physically exist.
183 */
184 kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
185 blks[3].inst_cnt = sc_block_count;
186 blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
187 blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
188
189 WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
190
191 group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
192 group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
193 group.blks = blks;
194
195 desc.grp_cnt = 1;
196 desc.grps = &group;
197 desc.clk_cnt = gpu_info->clk_cnt;
198
199 /* The JM, Tiler, and L2s are always available, and are before cores */
200 desc.avail_mask = (1ull << non_sc_block_count) - 1;
201 /* Embed the core mask directly in the availability mask */
202 desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count);
203
204 return kbase_hwcnt_metadata_create(&desc, metadata);
205 }
206
207 /**
208 * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the
209 * GPU.
210 * @gpu_info: Non-NULL pointer to hwcnt info for the GPU.
211 *
212 * Return: Size of buffer the GPU needs to perform a counter dump.
213 */
kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info * gpu_info)214 static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
215 {
216 WARN_ON(!gpu_info);
217
218 return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) *
219 gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
220 }
221
kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info * gpu_info,enum kbase_hwcnt_set counter_set,const struct kbase_hwcnt_metadata ** out_metadata,size_t * out_dump_bytes)222 int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
223 enum kbase_hwcnt_set counter_set,
224 const struct kbase_hwcnt_metadata **out_metadata,
225 size_t *out_dump_bytes)
226 {
227 int errcode;
228 const struct kbase_hwcnt_metadata *metadata;
229 size_t dump_bytes;
230
231 if (!gpu_info || !out_metadata || !out_dump_bytes)
232 return -EINVAL;
233
234 /*
235 * For architectures where a max_config interface is available
236 * from the arbiter, the v5 dump bytes and the metadata v5 are
237 * based on the maximum possible allocation of the HW in the
238 * GPU cause it needs to be prepared for the worst case where
239 * all the available L2 cache and Shader cores are allocated.
240 */
241 dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
242 errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata);
243 if (errcode)
244 return errcode;
245
246 /*
247 * The physical dump size should be half of dump abstraction size in
248 * metadata since physical HW uses 32-bit per value but metadata
249 * specifies 64-bit per value.
250 */
251 WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes);
252
253 *out_metadata = metadata;
254 *out_dump_bytes = dump_bytes;
255
256 return 0;
257 }
258
kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata * metadata)259 void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
260 {
261 if (!metadata)
262 return;
263
264 kbase_hwcnt_metadata_destroy(metadata);
265 }
266
kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info * gpu_info,enum kbase_hwcnt_set counter_set,const struct kbase_hwcnt_metadata ** out_metadata)267 int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
268 enum kbase_hwcnt_set counter_set,
269 const struct kbase_hwcnt_metadata **out_metadata)
270 {
271 int errcode;
272 const struct kbase_hwcnt_metadata *metadata;
273
274 if (!gpu_info || !out_metadata)
275 return -EINVAL;
276
277 errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata);
278 if (errcode)
279 return errcode;
280
281 *out_metadata = metadata;
282
283 return 0;
284 }
285
kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata * metadata)286 void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
287 {
288 if (!metadata)
289 return;
290
291 kbase_hwcnt_metadata_destroy(metadata);
292 }
293
is_block_type_shader(const u64 grp_type,const u64 blk_type,const size_t blk)294 static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk)
295 {
296 bool is_shader = false;
297
298 /* Warn on unknown group type */
299 if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
300 return false;
301
302 if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
303 blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 ||
304 blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 ||
305 blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED)
306 is_shader = true;
307
308 return is_shader;
309 }
310
is_block_type_l2_cache(const u64 grp_type,const u64 blk_type)311 static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type)
312 {
313 bool is_l2_cache = false;
314
315 switch (grp_type) {
316 case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
317 if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
318 blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 ||
319 blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED)
320 is_l2_cache = true;
321 break;
322 default:
323 /* Warn on unknown group type */
324 WARN_ON(true);
325 }
326
327 return is_l2_cache;
328 }
329
kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer * dst,u64 * src,const struct kbase_hwcnt_enable_map * dst_enable_map,u64 pm_core_mask,const struct kbase_hwcnt_curr_config * curr_config,bool accumulate)330 int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
331 const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
332 const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
333 {
334 const struct kbase_hwcnt_metadata *metadata;
335 size_t grp, blk, blk_inst;
336 const u64 *dump_src = src;
337 size_t src_offset = 0;
338 u64 core_mask = pm_core_mask;
339
340 /* Variables to deal with the current configuration */
341 int l2_count = 0;
342
343 if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
344 return -EINVAL;
345
346 metadata = dst->metadata;
347
348 kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
349 {
350 const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
351 const size_t ctr_cnt =
352 kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
353 const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
354 const bool is_shader_core = is_block_type_shader(
355 kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk);
356 const bool is_l2_cache = is_block_type_l2_cache(
357 kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
358 const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
359 kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
360 bool hw_res_available = true;
361
362 /*
363 * If l2 blocks is greater than the current allocated number of
364 * L2 slices, there is no hw allocated to that block.
365 */
366 if (is_l2_cache) {
367 l2_count++;
368 if (l2_count > curr_config->num_l2_slices)
369 hw_res_available = false;
370 else
371 hw_res_available = true;
372 }
373 /*
374 * For the shader cores, the current shader_mask allocated is
375 * always a subgroup of the maximum shader_mask, so after
376 * jumping any L2 cache not available the available shader cores
377 * will always have a matching set of blk instances available to
378 * accumulate them.
379 */
380 else
381 hw_res_available = true;
382
383 /*
384 * Skip block if no values in the destination block are enabled.
385 */
386 if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
387 u64 *dst_blk =
388 kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
389 const u64 *src_blk = dump_src + src_offset;
390 bool blk_powered;
391
392 if (!is_shader_core) {
393 /* Under the current PM system, counters will
394 * only be enabled after all non shader core
395 * blocks are powered up.
396 */
397 blk_powered = true;
398 } else {
399 /* Check the PM core mask to see if the shader
400 * core is powered up.
401 */
402 blk_powered = core_mask & 1;
403 }
404
405 if (blk_powered && !is_undefined && hw_res_available) {
406 /* Only powered and defined blocks have valid data. */
407 if (accumulate) {
408 kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
409 hdr_cnt, ctr_cnt);
410 } else {
411 kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
412 (hdr_cnt + ctr_cnt));
413 }
414 } else {
415 /* Even though the block might be undefined, the
416 * user has enabled counter collection for it.
417 * We should not propagate garbage data.
418 */
419 if (accumulate) {
420 /* No-op to preserve existing values */
421 } else {
422 /* src is garbage, so zero the dst */
423 kbase_hwcnt_dump_buffer_block_zero(dst_blk,
424 (hdr_cnt + ctr_cnt));
425 }
426 }
427 }
428
429 /* Just increase the src_offset if the HW is available */
430 if (hw_res_available)
431 src_offset += (hdr_cnt + ctr_cnt);
432 if (is_shader_core)
433 core_mask = core_mask >> 1;
434 }
435
436 return 0;
437 }
438
kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer * dst,u64 * src,const struct kbase_hwcnt_enable_map * dst_enable_map,bool accumulate)439 int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
440 const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
441 {
442 const struct kbase_hwcnt_metadata *metadata;
443 const u64 *dump_src = src;
444 size_t src_offset = 0;
445 size_t grp, blk, blk_inst;
446
447 if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
448 return -EINVAL;
449
450 metadata = dst->metadata;
451
452 kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
453 {
454 const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
455 const size_t ctr_cnt =
456 kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
457 const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
458 const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
459 kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
460
461 /*
462 * Skip block if no values in the destination block are enabled.
463 */
464 if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
465 u64 *dst_blk =
466 kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
467 const u64 *src_blk = dump_src + src_offset;
468
469 if (!is_undefined) {
470 if (accumulate) {
471 kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
472 hdr_cnt, ctr_cnt);
473 } else {
474 kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
475 (hdr_cnt + ctr_cnt));
476 }
477 } else {
478 /* Even though the block might be undefined, the
479 * user has enabled counter collection for it.
480 * We should not propagate garbage data.
481 */
482 if (accumulate) {
483 /* No-op to preserve existing values */
484 } else {
485 /* src is garbage, so zero the dst */
486 kbase_hwcnt_dump_buffer_block_zero(dst_blk,
487 (hdr_cnt + ctr_cnt));
488 }
489 }
490 }
491
492 src_offset += (hdr_cnt + ctr_cnt);
493 }
494
495 return 0;
496 }
497
498 /**
499 * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical
500 * block enable map to a
501 * block enable map
502 * abstraction.
503 * @phys: Physical 32-bit block enable map
504 * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction
505 * will be stored.
506 * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction
507 * will be stored.
508 */
kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys,u64 * lo,u64 * hi)509 static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi)
510 {
511 u64 dwords[2] = { 0, 0 };
512
513 size_t dword_idx;
514
515 for (dword_idx = 0; dword_idx < 2; dword_idx++) {
516 const u16 packed = phys >> (16 * dword_idx);
517 u64 dword = 0;
518
519 size_t hword_bit;
520
521 for (hword_bit = 0; hword_bit < 16; hword_bit++) {
522 const size_t dword_bit = hword_bit * 4;
523 const u64 mask = (packed >> (hword_bit)) & 0x1;
524
525 dword |= mask << (dword_bit + 0);
526 dword |= mask << (dword_bit + 1);
527 dword |= mask << (dword_bit + 2);
528 dword |= mask << (dword_bit + 3);
529 }
530 dwords[dword_idx] = dword;
531 }
532 *lo = dwords[0];
533 *hi = dwords[1];
534 }
535
kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map * dst,const struct kbase_hwcnt_enable_map * src)536 void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
537 const struct kbase_hwcnt_enable_map *src)
538 {
539 const struct kbase_hwcnt_metadata *metadata;
540 u64 fe_bm[EM_COUNT] = { 0 };
541 u64 shader_bm[EM_COUNT] = { 0 };
542 u64 tiler_bm[EM_COUNT] = { 0 };
543 u64 mmu_l2_bm[EM_COUNT] = { 0 };
544 size_t grp, blk, blk_inst;
545
546 if (WARN_ON(!src) || WARN_ON(!dst))
547 return;
548
549 metadata = src->metadata;
550
551 kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
552 {
553 const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
554 const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
555 const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst);
556
557 if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
558 const size_t map_stride =
559 kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
560 size_t map_idx;
561
562 for (map_idx = 0; map_idx < map_stride; ++map_idx) {
563 if (WARN_ON(map_idx >= EM_COUNT))
564 break;
565
566 switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
567 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
568 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
569 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
570 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
571 /* Nothing to do in this case. */
572 break;
573 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
574 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
575 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
576 fe_bm[map_idx] |= blk_map[map_idx];
577 break;
578 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
579 tiler_bm[map_idx] |= blk_map[map_idx];
580 break;
581 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
582 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
583 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
584 shader_bm[map_idx] |= blk_map[map_idx];
585 break;
586 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
587 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
588 mmu_l2_bm[map_idx] |= blk_map[map_idx];
589 break;
590 default:
591 WARN_ON(true);
592 }
593 }
594 } else {
595 WARN_ON(true);
596 }
597 }
598
599 dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm[EM_LO], fe_bm[EM_HI]);
600 dst->shader_bm =
601 kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm[EM_LO], shader_bm[EM_HI]);
602 dst->tiler_bm =
603 kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]);
604 dst->mmu_l2_bm =
605 kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
606 }
607
kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set * dst,enum kbase_hwcnt_set src)608 void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src)
609 {
610 switch (src) {
611 case KBASE_HWCNT_SET_PRIMARY:
612 *dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
613 break;
614 case KBASE_HWCNT_SET_SECONDARY:
615 *dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
616 break;
617 case KBASE_HWCNT_SET_TERTIARY:
618 *dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
619 break;
620 default:
621 WARN_ON(true);
622 }
623 }
624
kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map * dst,const struct kbase_hwcnt_physical_enable_map * src)625 void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
626 const struct kbase_hwcnt_physical_enable_map *src)
627 {
628 const struct kbase_hwcnt_metadata *metadata;
629
630 u64 fe_bm[EM_COUNT] = { 0 };
631 u64 shader_bm[EM_COUNT] = { 0 };
632 u64 tiler_bm[EM_COUNT] = { 0 };
633 u64 mmu_l2_bm[EM_COUNT] = { 0 };
634 size_t grp, blk, blk_inst;
635
636 if (WARN_ON(!src) || WARN_ON(!dst))
637 return;
638
639 metadata = dst->metadata;
640
641 kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]);
642 kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO],
643 &shader_bm[EM_HI]);
644 kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO],
645 &tiler_bm[EM_HI]);
646 kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
647 &mmu_l2_bm[EM_HI]);
648
649 kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
650 {
651 const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
652 const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
653 u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
654
655 if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
656 const size_t map_stride =
657 kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
658 size_t map_idx;
659
660 for (map_idx = 0; map_idx < map_stride; ++map_idx) {
661 if (WARN_ON(map_idx >= EM_COUNT))
662 break;
663
664 switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
665 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
666 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
667 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
668 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
669 /* Nothing to do in this case. */
670 break;
671 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
672 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
673 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
674 blk_map[map_idx] = fe_bm[map_idx];
675 break;
676 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
677 blk_map[map_idx] = tiler_bm[map_idx];
678 break;
679 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
680 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
681 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
682 blk_map[map_idx] = shader_bm[map_idx];
683 break;
684 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
685 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
686 blk_map[map_idx] = mmu_l2_bm[map_idx];
687 break;
688 default:
689 WARN_ON(true);
690 }
691 }
692 } else {
693 WARN_ON(true);
694 }
695 }
696 }
697
kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer * buf,const struct kbase_hwcnt_enable_map * enable_map)698 void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
699 const struct kbase_hwcnt_enable_map *enable_map)
700 {
701 const struct kbase_hwcnt_metadata *metadata;
702 size_t grp, blk, blk_inst;
703
704 if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata))
705 return;
706
707 metadata = buf->metadata;
708
709 kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
710 {
711 const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
712 u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst);
713 const u64 *blk_map =
714 kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
715
716 if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
717 const size_t map_stride =
718 kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
719 u64 prfcnt_bm[EM_COUNT] = { 0 };
720 u32 prfcnt_en = 0;
721 size_t map_idx;
722
723 for (map_idx = 0; map_idx < map_stride; ++map_idx) {
724 if (WARN_ON(map_idx >= EM_COUNT))
725 break;
726
727 prfcnt_bm[map_idx] = blk_map[map_idx];
728 }
729
730 prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO],
731 prfcnt_bm[EM_HI]);
732
733 buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
734 } else {
735 WARN_ON(true);
736 }
737 }
738 }
739