xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include "hwcnt/mali_kbase_hwcnt_gpu.h"
23 #include "hwcnt/mali_kbase_hwcnt_types.h"
24 
25 #include <linux/err.h>
26 
27 /** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements
28  */
29 enum enable_map_idx {
30 	EM_LO,
31 	EM_HI,
32 	EM_COUNT,
33 };
34 
kbasep_get_fe_block_type(u64 * dst,enum kbase_hwcnt_set counter_set,bool is_csf)35 static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
36 {
37 	switch (counter_set) {
38 	case KBASE_HWCNT_SET_PRIMARY:
39 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
40 		break;
41 	case KBASE_HWCNT_SET_SECONDARY:
42 		if (is_csf)
43 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
44 		else
45 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
46 		break;
47 	case KBASE_HWCNT_SET_TERTIARY:
48 		if (is_csf)
49 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
50 		else
51 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
52 		break;
53 	default:
54 		WARN_ON(true);
55 	}
56 }
57 
kbasep_get_tiler_block_type(u64 * dst,enum kbase_hwcnt_set counter_set)58 static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
59 {
60 	switch (counter_set) {
61 	case KBASE_HWCNT_SET_PRIMARY:
62 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
63 		break;
64 	case KBASE_HWCNT_SET_SECONDARY:
65 	case KBASE_HWCNT_SET_TERTIARY:
66 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED;
67 		break;
68 	default:
69 		WARN_ON(true);
70 	}
71 }
72 
kbasep_get_sc_block_type(u64 * dst,enum kbase_hwcnt_set counter_set,bool is_csf)73 static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
74 {
75 	switch (counter_set) {
76 	case KBASE_HWCNT_SET_PRIMARY:
77 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
78 		break;
79 	case KBASE_HWCNT_SET_SECONDARY:
80 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
81 		break;
82 	case KBASE_HWCNT_SET_TERTIARY:
83 		if (is_csf)
84 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
85 		else
86 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED;
87 		break;
88 	default:
89 		WARN_ON(true);
90 	}
91 }
92 
kbasep_get_memsys_block_type(u64 * dst,enum kbase_hwcnt_set counter_set)93 static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
94 {
95 	switch (counter_set) {
96 	case KBASE_HWCNT_SET_PRIMARY:
97 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
98 		break;
99 	case KBASE_HWCNT_SET_SECONDARY:
100 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
101 		break;
102 	case KBASE_HWCNT_SET_TERTIARY:
103 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED;
104 		break;
105 	default:
106 		WARN_ON(true);
107 	}
108 }
109 
110 /**
111  * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata
112  *                                              for the GPU.
113  * @gpu_info:      Non-NULL pointer to hwcnt info for current GPU.
114  * @is_csf:        true for CSF GPU, otherwise false.
115  * @counter_set:   The performance counter set to use.
116  * @metadata:      Non-NULL pointer to where created metadata is stored
117  *                 on success.
118  *
119  * Return: 0 on success, else error code.
120  */
kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info * gpu_info,const bool is_csf,enum kbase_hwcnt_set counter_set,const struct kbase_hwcnt_metadata ** metadata)121 static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
122 						    const bool is_csf,
123 						    enum kbase_hwcnt_set counter_set,
124 						    const struct kbase_hwcnt_metadata **metadata)
125 {
126 	struct kbase_hwcnt_description desc;
127 	struct kbase_hwcnt_group_description group;
128 	struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
129 	size_t non_sc_block_count;
130 	size_t sc_block_count;
131 
132 	WARN_ON(!gpu_info);
133 	WARN_ON(!metadata);
134 
135 	/* Calculate number of block instances that aren't shader cores */
136 	non_sc_block_count = 2 + gpu_info->l2_count;
137 	/* Calculate number of block instances that are shader cores */
138 	sc_block_count = fls64(gpu_info->core_mask);
139 
140 	/*
141 	 * A system can have up to 64 shader cores, but the 64-bit
142 	 * availability mask can't physically represent that many cores as well
143 	 * as the other hardware blocks.
144 	 * Error out if there are more blocks than our implementation can
145 	 * support.
146 	 */
147 	if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
148 		return -EINVAL;
149 
150 	/* One Front End block */
151 	kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
152 	blks[0].inst_cnt = 1;
153 	blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
154 	blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
155 
156 	/* One Tiler block */
157 	kbasep_get_tiler_block_type(&blks[1].type, counter_set);
158 	blks[1].inst_cnt = 1;
159 	blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
160 	blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
161 
162 	/* l2_count memsys blks */
163 	kbasep_get_memsys_block_type(&blks[2].type, counter_set);
164 	blks[2].inst_cnt = gpu_info->l2_count;
165 	blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
166 	blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
167 
168 	/*
169 	 * There are as many shader cores in the system as there are bits set in
170 	 * the core mask. However, the dump buffer memory requirements need to
171 	 * take into account the fact that the core mask may be non-contiguous.
172 	 *
173 	 * For example, a system with a core mask of 0b1011 has the same dump
174 	 * buffer memory requirements as a system with 0b1111, but requires more
175 	 * memory than a system with 0b0111. However, core 2 of the system with
176 	 * 0b1011 doesn't physically exist, and the dump buffer memory that
177 	 * accounts for that core will never be written to when we do a counter
178 	 * dump.
179 	 *
180 	 * We find the core mask's last set bit to determine the memory
181 	 * requirements, and embed the core mask into the availability mask so
182 	 * we can determine later which shader cores physically exist.
183 	 */
184 	kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
185 	blks[3].inst_cnt = sc_block_count;
186 	blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
187 	blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
188 
189 	WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
190 
191 	group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
192 	group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
193 	group.blks = blks;
194 
195 	desc.grp_cnt = 1;
196 	desc.grps = &group;
197 	desc.clk_cnt = gpu_info->clk_cnt;
198 
199 	/* The JM, Tiler, and L2s are always available, and are before cores */
200 	desc.avail_mask = (1ull << non_sc_block_count) - 1;
201 	/* Embed the core mask directly in the availability mask */
202 	desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count);
203 
204 	return kbase_hwcnt_metadata_create(&desc, metadata);
205 }
206 
207 /**
208  * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the
209  *                                        GPU.
210  * @gpu_info: Non-NULL pointer to hwcnt info for the GPU.
211  *
212  * Return: Size of buffer the GPU needs to perform a counter dump.
213  */
kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info * gpu_info)214 static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
215 {
216 	WARN_ON(!gpu_info);
217 
218 	return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) *
219 	       gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
220 }
221 
kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info * gpu_info,enum kbase_hwcnt_set counter_set,const struct kbase_hwcnt_metadata ** out_metadata,size_t * out_dump_bytes)222 int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
223 				   enum kbase_hwcnt_set counter_set,
224 				   const struct kbase_hwcnt_metadata **out_metadata,
225 				   size_t *out_dump_bytes)
226 {
227 	int errcode;
228 	const struct kbase_hwcnt_metadata *metadata;
229 	size_t dump_bytes;
230 
231 	if (!gpu_info || !out_metadata || !out_dump_bytes)
232 		return -EINVAL;
233 
234 	/*
235 	 * For architectures where a max_config interface is available
236 	 * from the arbiter, the v5 dump bytes and the metadata v5 are
237 	 * based on the maximum possible allocation of the HW in the
238 	 * GPU cause it needs to be prepared for the worst case where
239 	 * all the available L2 cache and Shader cores are allocated.
240 	 */
241 	dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
242 	errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata);
243 	if (errcode)
244 		return errcode;
245 
246 	/*
247 	 * The physical dump size should be half of dump abstraction size in
248 	 * metadata since physical HW uses 32-bit per value but metadata
249 	 * specifies 64-bit per value.
250 	 */
251 	WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes);
252 
253 	*out_metadata = metadata;
254 	*out_dump_bytes = dump_bytes;
255 
256 	return 0;
257 }
258 
kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata * metadata)259 void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
260 {
261 	if (!metadata)
262 		return;
263 
264 	kbase_hwcnt_metadata_destroy(metadata);
265 }
266 
kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info * gpu_info,enum kbase_hwcnt_set counter_set,const struct kbase_hwcnt_metadata ** out_metadata)267 int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
268 				    enum kbase_hwcnt_set counter_set,
269 				    const struct kbase_hwcnt_metadata **out_metadata)
270 {
271 	int errcode;
272 	const struct kbase_hwcnt_metadata *metadata;
273 
274 	if (!gpu_info || !out_metadata)
275 		return -EINVAL;
276 
277 	errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata);
278 	if (errcode)
279 		return errcode;
280 
281 	*out_metadata = metadata;
282 
283 	return 0;
284 }
285 
kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata * metadata)286 void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
287 {
288 	if (!metadata)
289 		return;
290 
291 	kbase_hwcnt_metadata_destroy(metadata);
292 }
293 
is_block_type_shader(const u64 grp_type,const u64 blk_type,const size_t blk)294 static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk)
295 {
296 	bool is_shader = false;
297 
298 	/* Warn on unknown group type */
299 	if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
300 		return false;
301 
302 	if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
303 	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 ||
304 	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 ||
305 	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED)
306 		is_shader = true;
307 
308 	return is_shader;
309 }
310 
is_block_type_l2_cache(const u64 grp_type,const u64 blk_type)311 static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type)
312 {
313 	bool is_l2_cache = false;
314 
315 	switch (grp_type) {
316 	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
317 		if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
318 		    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 ||
319 		    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED)
320 			is_l2_cache = true;
321 		break;
322 	default:
323 		/* Warn on unknown group type */
324 		WARN_ON(true);
325 	}
326 
327 	return is_l2_cache;
328 }
329 
kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer * dst,u64 * src,const struct kbase_hwcnt_enable_map * dst_enable_map,u64 pm_core_mask,const struct kbase_hwcnt_curr_config * curr_config,bool accumulate)330 int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
331 			    const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
332 			    const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
333 {
334 	const struct kbase_hwcnt_metadata *metadata;
335 	size_t grp, blk, blk_inst;
336 	const u64 *dump_src = src;
337 	size_t src_offset = 0;
338 	u64 core_mask = pm_core_mask;
339 
340 	/* Variables to deal with the current configuration */
341 	int l2_count = 0;
342 
343 	if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
344 		return -EINVAL;
345 
346 	metadata = dst->metadata;
347 
348 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
349 	{
350 		const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
351 		const size_t ctr_cnt =
352 			kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
353 		const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
354 		const bool is_shader_core = is_block_type_shader(
355 			kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk);
356 		const bool is_l2_cache = is_block_type_l2_cache(
357 			kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
358 		const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
359 			kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
360 		bool hw_res_available = true;
361 
362 		/*
363 		 * If l2 blocks is greater than the current allocated number of
364 		 * L2 slices, there is no hw allocated to that block.
365 		 */
366 		if (is_l2_cache) {
367 			l2_count++;
368 			if (l2_count > curr_config->num_l2_slices)
369 				hw_res_available = false;
370 			else
371 				hw_res_available = true;
372 		}
373 		/*
374 		 * For the shader cores, the current shader_mask allocated is
375 		 * always a subgroup of the maximum shader_mask, so after
376 		 * jumping any L2 cache not available the available shader cores
377 		 * will always have a matching set of blk instances available to
378 		 * accumulate them.
379 		 */
380 		else
381 			hw_res_available = true;
382 
383 		/*
384 		 * Skip block if no values in the destination block are enabled.
385 		 */
386 		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
387 			u64 *dst_blk =
388 				kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
389 			const u64 *src_blk = dump_src + src_offset;
390 			bool blk_powered;
391 
392 			if (!is_shader_core) {
393 				/* Under the current PM system, counters will
394 				 * only be enabled after all non shader core
395 				 * blocks are powered up.
396 				 */
397 				blk_powered = true;
398 			} else {
399 				/* Check the PM core mask to see if the shader
400 				 * core is powered up.
401 				 */
402 				blk_powered = core_mask & 1;
403 			}
404 
405 			if (blk_powered && !is_undefined && hw_res_available) {
406 				/* Only powered and defined blocks have valid data. */
407 				if (accumulate) {
408 					kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
409 										 hdr_cnt, ctr_cnt);
410 				} else {
411 					kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
412 									   (hdr_cnt + ctr_cnt));
413 				}
414 			} else {
415 				/* Even though the block might be undefined, the
416 				 * user has enabled counter collection for it.
417 				 * We should not propagate garbage data.
418 				 */
419 				if (accumulate) {
420 					/* No-op to preserve existing values */
421 				} else {
422 					/* src is garbage, so zero the dst */
423 					kbase_hwcnt_dump_buffer_block_zero(dst_blk,
424 									   (hdr_cnt + ctr_cnt));
425 				}
426 			}
427 		}
428 
429 		/* Just increase the src_offset if the HW is available */
430 		if (hw_res_available)
431 			src_offset += (hdr_cnt + ctr_cnt);
432 		if (is_shader_core)
433 			core_mask = core_mask >> 1;
434 	}
435 
436 	return 0;
437 }
438 
kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer * dst,u64 * src,const struct kbase_hwcnt_enable_map * dst_enable_map,bool accumulate)439 int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
440 			     const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
441 {
442 	const struct kbase_hwcnt_metadata *metadata;
443 	const u64 *dump_src = src;
444 	size_t src_offset = 0;
445 	size_t grp, blk, blk_inst;
446 
447 	if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
448 		return -EINVAL;
449 
450 	metadata = dst->metadata;
451 
452 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
453 	{
454 		const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
455 		const size_t ctr_cnt =
456 			kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
457 		const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
458 		const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
459 			kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
460 
461 		/*
462 		 * Skip block if no values in the destination block are enabled.
463 		 */
464 		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
465 			u64 *dst_blk =
466 				kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
467 			const u64 *src_blk = dump_src + src_offset;
468 
469 			if (!is_undefined) {
470 				if (accumulate) {
471 					kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
472 										 hdr_cnt, ctr_cnt);
473 				} else {
474 					kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
475 									   (hdr_cnt + ctr_cnt));
476 				}
477 			} else {
478 				/* Even though the block might be undefined, the
479 				 * user has enabled counter collection for it.
480 				 * We should not propagate garbage data.
481 				 */
482 				if (accumulate) {
483 					/* No-op to preserve existing values */
484 				} else {
485 					/* src is garbage, so zero the dst */
486 					kbase_hwcnt_dump_buffer_block_zero(dst_blk,
487 									   (hdr_cnt + ctr_cnt));
488 				}
489 			}
490 		}
491 
492 		src_offset += (hdr_cnt + ctr_cnt);
493 	}
494 
495 	return 0;
496 }
497 
498 /**
499  * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical
500  *                                                      block enable map to a
501  *                                                      block enable map
502  *                                                      abstraction.
503  * @phys: Physical 32-bit block enable map
504  * @lo:   Non-NULL pointer to where low 64 bits of block enable map abstraction
505  *        will be stored.
506  * @hi:   Non-NULL pointer to where high 64 bits of block enable map abstraction
507  *        will be stored.
508  */
kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys,u64 * lo,u64 * hi)509 static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi)
510 {
511 	u64 dwords[2] = { 0, 0 };
512 
513 	size_t dword_idx;
514 
515 	for (dword_idx = 0; dword_idx < 2; dword_idx++) {
516 		const u16 packed = phys >> (16 * dword_idx);
517 		u64 dword = 0;
518 
519 		size_t hword_bit;
520 
521 		for (hword_bit = 0; hword_bit < 16; hword_bit++) {
522 			const size_t dword_bit = hword_bit * 4;
523 			const u64 mask = (packed >> (hword_bit)) & 0x1;
524 
525 			dword |= mask << (dword_bit + 0);
526 			dword |= mask << (dword_bit + 1);
527 			dword |= mask << (dword_bit + 2);
528 			dword |= mask << (dword_bit + 3);
529 		}
530 		dwords[dword_idx] = dword;
531 	}
532 	*lo = dwords[0];
533 	*hi = dwords[1];
534 }
535 
kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map * dst,const struct kbase_hwcnt_enable_map * src)536 void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
537 					    const struct kbase_hwcnt_enable_map *src)
538 {
539 	const struct kbase_hwcnt_metadata *metadata;
540 	u64 fe_bm[EM_COUNT] = { 0 };
541 	u64 shader_bm[EM_COUNT] = { 0 };
542 	u64 tiler_bm[EM_COUNT] = { 0 };
543 	u64 mmu_l2_bm[EM_COUNT] = { 0 };
544 	size_t grp, blk, blk_inst;
545 
546 	if (WARN_ON(!src) || WARN_ON(!dst))
547 		return;
548 
549 	metadata = src->metadata;
550 
551 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
552 	{
553 		const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
554 		const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
555 		const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst);
556 
557 		if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
558 			const size_t map_stride =
559 				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
560 			size_t map_idx;
561 
562 			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
563 				if (WARN_ON(map_idx >= EM_COUNT))
564 					break;
565 
566 				switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
567 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
568 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
569 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
570 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
571 					/* Nothing to do in this case. */
572 					break;
573 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
574 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
575 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
576 					fe_bm[map_idx] |= blk_map[map_idx];
577 					break;
578 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
579 					tiler_bm[map_idx] |= blk_map[map_idx];
580 					break;
581 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
582 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
583 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
584 					shader_bm[map_idx] |= blk_map[map_idx];
585 					break;
586 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
587 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
588 					mmu_l2_bm[map_idx] |= blk_map[map_idx];
589 					break;
590 				default:
591 					WARN_ON(true);
592 				}
593 			}
594 		} else {
595 			WARN_ON(true);
596 		}
597 	}
598 
599 	dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm[EM_LO], fe_bm[EM_HI]);
600 	dst->shader_bm =
601 		kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm[EM_LO], shader_bm[EM_HI]);
602 	dst->tiler_bm =
603 		kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]);
604 	dst->mmu_l2_bm =
605 		kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
606 }
607 
kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set * dst,enum kbase_hwcnt_set src)608 void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src)
609 {
610 	switch (src) {
611 	case KBASE_HWCNT_SET_PRIMARY:
612 		*dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
613 		break;
614 	case KBASE_HWCNT_SET_SECONDARY:
615 		*dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
616 		break;
617 	case KBASE_HWCNT_SET_TERTIARY:
618 		*dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
619 		break;
620 	default:
621 		WARN_ON(true);
622 	}
623 }
624 
kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map * dst,const struct kbase_hwcnt_physical_enable_map * src)625 void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
626 					      const struct kbase_hwcnt_physical_enable_map *src)
627 {
628 	const struct kbase_hwcnt_metadata *metadata;
629 
630 	u64 fe_bm[EM_COUNT] = { 0 };
631 	u64 shader_bm[EM_COUNT] = { 0 };
632 	u64 tiler_bm[EM_COUNT] = { 0 };
633 	u64 mmu_l2_bm[EM_COUNT] = { 0 };
634 	size_t grp, blk, blk_inst;
635 
636 	if (WARN_ON(!src) || WARN_ON(!dst))
637 		return;
638 
639 	metadata = dst->metadata;
640 
641 	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]);
642 	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO],
643 							 &shader_bm[EM_HI]);
644 	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO],
645 							 &tiler_bm[EM_HI]);
646 	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
647 							 &mmu_l2_bm[EM_HI]);
648 
649 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
650 	{
651 		const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
652 		const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
653 		u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
654 
655 		if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
656 			const size_t map_stride =
657 				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
658 			size_t map_idx;
659 
660 			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
661 				if (WARN_ON(map_idx >= EM_COUNT))
662 					break;
663 
664 				switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
665 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
666 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
667 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
668 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
669 					/* Nothing to do in this case. */
670 					break;
671 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
672 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
673 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
674 					blk_map[map_idx] = fe_bm[map_idx];
675 					break;
676 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
677 					blk_map[map_idx] = tiler_bm[map_idx];
678 					break;
679 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
680 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
681 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
682 					blk_map[map_idx] = shader_bm[map_idx];
683 					break;
684 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
685 				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
686 					blk_map[map_idx] = mmu_l2_bm[map_idx];
687 					break;
688 				default:
689 					WARN_ON(true);
690 				}
691 			}
692 		} else {
693 			WARN_ON(true);
694 		}
695 	}
696 }
697 
kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer * buf,const struct kbase_hwcnt_enable_map * enable_map)698 void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
699 					const struct kbase_hwcnt_enable_map *enable_map)
700 {
701 	const struct kbase_hwcnt_metadata *metadata;
702 	size_t grp, blk, blk_inst;
703 
704 	if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata))
705 		return;
706 
707 	metadata = buf->metadata;
708 
709 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
710 	{
711 		const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
712 		u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst);
713 		const u64 *blk_map =
714 			kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
715 
716 		if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
717 			const size_t map_stride =
718 				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
719 			u64 prfcnt_bm[EM_COUNT] = { 0 };
720 			u32 prfcnt_en = 0;
721 			size_t map_idx;
722 
723 			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
724 				if (WARN_ON(map_idx >= EM_COUNT))
725 					break;
726 
727 				prfcnt_bm[map_idx] = blk_map[map_idx];
728 			}
729 
730 			prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO],
731 										  prfcnt_bm[EM_HI]);
732 
733 			buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
734 		} else {
735 			WARN_ON(true);
736 		}
737 	}
738 }
739