xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /*
3  *
4  * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #ifndef _KBASE_HWCNT_GPU_H_
23 #define _KBASE_HWCNT_GPU_H_
24 
25 #include <linux/bug.h>
26 #include <linux/types.h>
27 
28 struct kbase_device;
29 struct kbase_hwcnt_metadata;
30 struct kbase_hwcnt_enable_map;
31 struct kbase_hwcnt_dump_buffer;
32 
33 /* Hardware counter version 5 definitions, V5 is the only supported version. */
34 #define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
35 #define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
36 #define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
37 #define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK                                                    \
38 	(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
39 
40 /* FrontEnd block count in V5 GPU hardware counter. */
41 #define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1
42 /* Tiler block count in V5 GPU hardware counter. */
43 #define KBASE_HWCNT_V5_TILER_BLOCK_COUNT 1
44 
45 /* Index of the PRFCNT_EN header into a V5 counter block */
46 #define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
47 
48 /* Number of bytes for each counter value in hardware. */
49 #define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32))
50 
51 /**
52  * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
53  *                                   identify metadata groups.
54  * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
55  */
56 enum kbase_hwcnt_gpu_group_type {
57 	KBASE_HWCNT_GPU_GROUP_TYPE_V5,
58 };
59 
60 /**
61  * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
62  *                                      used to identify metadata blocks.
63  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:        Front End block (Job manager
64  *                                                or CSF HW).
65  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:       Secondary Front End block (Job
66  *                                                manager or CSF HW).
67  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:       Tertiary Front End block (Job
68  *                                                manager or CSF HW).
69  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: Undefined Front End block
70  *                                                   (e.g. if a counter set that
71  *                                                   a block doesn't support is
72  *                                                   used).
73  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:     Tiler block.
74  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: Undefined Tiler block.
75  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:        Shader Core block.
76  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:       Secondary Shader Core block.
77  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:       Tertiary Shader Core block.
78  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: Undefined Shader Core block.
79  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:    Memsys block.
80  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:   Secondary Memsys block.
81  * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block.
82  */
83 enum kbase_hwcnt_gpu_v5_block_type {
84 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
85 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2,
86 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3,
87 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED,
88 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
89 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED,
90 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
91 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
92 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3,
93 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED,
94 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
95 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
96 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED,
97 };
98 
99 /**
100  * enum kbase_hwcnt_set - GPU hardware counter sets
101  * @KBASE_HWCNT_SET_PRIMARY:   The Primary set of counters
102  * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters
103  * @KBASE_HWCNT_SET_TERTIARY:  The Tertiary set of counters
104  * @KBASE_HWCNT_SET_UNDEFINED: Undefined set of counters
105  */
106 enum kbase_hwcnt_set {
107 	KBASE_HWCNT_SET_PRIMARY,
108 	KBASE_HWCNT_SET_SECONDARY,
109 	KBASE_HWCNT_SET_TERTIARY,
110 	KBASE_HWCNT_SET_UNDEFINED = 255,
111 };
112 
113 /**
114  * struct kbase_hwcnt_physical_enable_map - Representation of enable map
115  *                                          directly used by GPU.
116  * @fe_bm:     Front end (JM/CSHW) counters selection bitmask.
117  * @shader_bm: Shader counters selection bitmask.
118  * @tiler_bm:  Tiler counters selection bitmask.
119  * @mmu_l2_bm: MMU_L2 counters selection bitmask.
120  */
121 struct kbase_hwcnt_physical_enable_map {
122 	u32 fe_bm;
123 	u32 shader_bm;
124 	u32 tiler_bm;
125 	u32 mmu_l2_bm;
126 };
127 
128 /*
129  * Values for Hardware Counter SET_SELECT value.
130  * Directly passed to HW.
131  */
132 enum kbase_hwcnt_physical_set {
133 	KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0,
134 	KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1,
135 	KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2,
136 };
137 
138 /**
139  * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
140  * @l2_count:                L2 cache count.
141  * @core_mask:               Shader core mask. May be sparse.
142  * @clk_cnt:                 Number of clock domains available.
143  * @prfcnt_values_per_block: Total entries (header + counters) of performance
144  *                           counter per block.
145  */
146 struct kbase_hwcnt_gpu_info {
147 	size_t l2_count;
148 	u64 core_mask;
149 	u8 clk_cnt;
150 	size_t prfcnt_values_per_block;
151 };
152 
153 /**
154  * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the
155  *                                  GPU.
156  * @num_l2_slices:  Current number of L2 slices allocated to the GPU.
157  * @shader_present: Current shader present bitmap that is allocated to the GPU.
158  *
159  * For architectures with the max_config interface available from the Arbiter,
160  * the current resources allocated may change during runtime due to a
161  * re-partitioning (possible with partition manager). Thus, the HWC needs to be
162  * prepared to report any possible set of counters. For this reason the memory
163  * layout in the userspace is based on the maximum possible allocation. On the
164  * other hand, each partition has just the view of its currently allocated
165  * resources. Therefore, it is necessary to correctly map the dumped HWC values
166  * from the registers into this maximum memory layout so that it can be exposed
167  * to the userspace side correctly.
168  *
169  * For L2 cache just the number is enough once the allocated ones will be
170  * accumulated on the first L2 slots available in the destination buffer.
171  *
172  * For the correct mapping of the shader cores it is necessary to jump all the
173  * L2 cache slots in the destination buffer that are not allocated. But, it is
174  * not necessary to add any logic to map the shader cores bitmap into the memory
175  * layout because the shader_present allocated will always be a subset of the
176  * maximum shader_present. It is possible because:
177  * 1 - Partitions are made of slices and they are always ordered from the ones
178  *     with more shader cores to the ones with less.
179  * 2 - The shader cores in a slice are always contiguous.
180  * 3 - A partition can only have a contiguous set of slices allocated to it.
181  * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with
182  * 3 cores and 1 with 2 cores. The maximum possible shader_present would be:
183  * 0x0011|0111|0111|1111 -> note the order and that the shader cores are
184  *                          contiguous in any slice.
185  * Supposing that a partition takes the two slices in the middle, the current
186  * config shader_present for this partition would be:
187  * 0x0111|0111 -> note that this is a subset of the maximum above and the slices
188  *                are contiguous.
189  * Therefore, by directly copying any subset of the maximum possible
190  * shader_present the mapping is already achieved.
191  */
192 struct kbase_hwcnt_curr_config {
193 	size_t num_l2_slices;
194 	u64 shader_present;
195 };
196 
197 /**
198  * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined.
199  *
200  * @grp_type: Hardware counter group type.
201  * @blk_type: Hardware counter block type.
202  *
203  * Return: true if the block type is undefined, else false.
204  */
kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,const uint64_t blk_type)205 static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,
206 						       const uint64_t blk_type)
207 {
208 	/* Warn on unknown group type */
209 	if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
210 		return false;
211 
212 	return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED ||
213 		blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED ||
214 		blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED ||
215 		blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED);
216 }
217 
218 /**
219  * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the
220  *                                    JM GPUs.
221  * @info:           Non-NULL pointer to info struct.
222  * @counter_set:    The performance counter set used.
223  * @out_metadata:   Non-NULL pointer to where created metadata is stored on
224  *                  success.
225  * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
226  *                  buffer is stored on success.
227  *
228  * Return: 0 on success, else error code.
229  */
230 int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info,
231 				   enum kbase_hwcnt_set counter_set,
232 				   const struct kbase_hwcnt_metadata **out_metadata,
233 				   size_t *out_dump_bytes);
234 
235 /**
236  * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
237  *
238  * @metadata: Pointer to metadata to destroy.
239  */
240 void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
241 
242 /**
243  * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
244  *                                     CSF GPUs.
245  * @info:           Non-NULL pointer to info struct.
246  * @counter_set:    The performance counter set used.
247  * @out_metadata:   Non-NULL pointer to where created metadata is stored on
248  *                  success.
249  *
250  * Return: 0 on success, else error code.
251  */
252 int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info,
253 				    enum kbase_hwcnt_set counter_set,
254 				    const struct kbase_hwcnt_metadata **out_metadata);
255 
256 /**
257  * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
258  *                                      metadata.
259  * @metadata: Pointer to metadata to destroy.
260  */
261 void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
262 
263 /**
264  * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
265  *                             dump buffer in src into the dump buffer
266  *                             abstraction in dst.
267  * @dst:            Non-NULL pointer to destination dump buffer.
268  * @src:            Non-NULL pointer to source raw dump buffer, of same length
269  *                  as dump_buf_bytes in the metadata of destination dump
270  *                  buffer.
271  * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
272  * @pm_core_mask:   PM state synchronized shaders core mask with the dump.
273  * @curr_config:    Current allocated hardware resources to correctly map the
274  *                  source raw dump buffer to the destination dump buffer.
275  * @accumulate:     True if counters in source should be accumulated into
276  *                  destination, rather than copied.
277  *
278  * The dst and dst_enable_map MUST have been created from the same metadata as
279  * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get
280  * the length of src.
281  *
282  * Return: 0 on success, else error code.
283  */
284 int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
285 			    const struct kbase_hwcnt_enable_map *dst_enable_map,
286 			    const u64 pm_core_mask,
287 			    const struct kbase_hwcnt_curr_config *curr_config, bool accumulate);
288 
289 /**
290  * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
291  *                              dump buffer in src into the dump buffer
292  *                              abstraction in dst.
293  * @dst:            Non-NULL pointer to destination dump buffer.
294  * @src:            Non-NULL pointer to source raw dump buffer, of same length
295  *                  as dump_buf_bytes in the metadata of dst dump buffer.
296  * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
297  * @accumulate:     True if counters in src should be accumulated into
298  *                  destination, rather than copied.
299  *
300  * The dst and dst_enable_map MUST have been created from the same metadata as
301  * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
302  * the length of src.
303  *
304  * Return: 0 on success, else error code.
305  */
306 int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
307 			     const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate);
308 
309 /**
310  * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
311  *                                                   enable map abstraction to
312  *                                                   a physical block enable
313  *                                                   map.
314  * @lo: Low 64 bits of block enable map abstraction.
315  * @hi: High 64 bits of block enable map abstraction.
316  *
317  * The abstraction uses 128 bits to enable 128 block values, whereas the
318  * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
319  * Therefore, this conversion is lossy.
320  *
321  * Return: 32-bit physical block enable map.
322  */
kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo,u64 hi)323 static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi)
324 {
325 	u32 phys = 0;
326 	u64 dwords[2] = { lo, hi };
327 	size_t dword_idx;
328 
329 	for (dword_idx = 0; dword_idx < 2; dword_idx++) {
330 		const u64 dword = dwords[dword_idx];
331 		u16 packed = 0;
332 
333 		size_t hword_bit;
334 
335 		for (hword_bit = 0; hword_bit < 16; hword_bit++) {
336 			const size_t dword_bit = hword_bit * 4;
337 			const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) |
338 					 ((dword >> (dword_bit + 1)) & 0x1) |
339 					 ((dword >> (dword_bit + 2)) & 0x1) |
340 					 ((dword >> (dword_bit + 3)) & 0x1);
341 			packed |= (mask << hword_bit);
342 		}
343 		phys |= ((u32)packed) << (16 * dword_idx);
344 	}
345 	return phys;
346 }
347 
348 /**
349  * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
350  *                                            into a physical enable map.
351  * @dst: Non-NULL pointer to destination physical enable map.
352  * @src: Non-NULL pointer to source enable map abstraction.
353  *
354  * The src must have been created from a metadata returned from a call to
355  * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
356  *
357  * This is a lossy conversion, as the enable map abstraction has one bit per
358  * individual counter block value, but the physical enable map uses 1 bit for
359  * every 4 counters, shared over all instances of a block.
360  */
361 void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
362 					    const struct kbase_hwcnt_enable_map *src);
363 
364 /**
365  * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
366  *                                     SET_SELECT value.
367  *
368  * @dst: Non-NULL pointer to destination physical SET_SELECT value.
369  * @src: Non-NULL pointer to source counter set selection.
370  */
371 void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src);
372 
373 /**
374  * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
375  *                                              an enable map abstraction.
376  * @dst: Non-NULL pointer to destination enable map abstraction.
377  * @src: Non-NULL pointer to source physical enable map.
378  *
379  * The dst must have been created from a metadata returned from a call to
380  * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
381  *
382  * This is a lossy conversion, as the physical enable map can technically
383  * support counter blocks with 128 counters each, but no hardware actually uses
384  * more than 64, so the enable map abstraction has nowhere to store the enable
385  * information for the 64 non-existent counters.
386  */
387 void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
388 					      const struct kbase_hwcnt_physical_enable_map *src);
389 
390 /**
391  * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
392  *                                        enable headers in a dump buffer to
393  *                                        reflect the specified enable map.
394  * @buf:        Non-NULL pointer to dump buffer to patch.
395  * @enable_map: Non-NULL pointer to enable map.
396  *
397  * The buf and enable_map must have been created from a metadata returned from
398  * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
399  *
400  * This function should be used before handing off a dump buffer over the
401  * kernel-user boundary, to ensure the header is accurate for the enable map
402  * used by the user.
403  */
404 void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
405 					const struct kbase_hwcnt_enable_map *enable_map);
406 
407 #endif /* _KBASE_HWCNT_GPU_H_ */
408