xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/hwcnt/mali_kbase_hwcnt_gpu_narrow.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include "hwcnt/mali_kbase_hwcnt_gpu.h"
23 #include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
24 
25 #include <linux/bug.h>
26 #include <linux/err.h>
27 #include <linux/slab.h>
28 
kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow ** dst_md_narrow,const struct kbase_hwcnt_metadata * src_md)29 int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
30 					   const struct kbase_hwcnt_metadata *src_md)
31 {
32 	struct kbase_hwcnt_description desc;
33 	struct kbase_hwcnt_group_description group;
34 	struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
35 	size_t prfcnt_values_per_block;
36 	size_t blk;
37 	int err;
38 	struct kbase_hwcnt_metadata_narrow *metadata_narrow;
39 
40 	if (!dst_md_narrow || !src_md || !src_md->grp_metadata ||
41 	    !src_md->grp_metadata[0].blk_metadata)
42 		return -EINVAL;
43 
44 	/* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block
45 	 * count in the metadata.
46 	 */
47 	if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
48 	    (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
49 		return -EINVAL;
50 
51 	/* Get the values count in the first block. */
52 	prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
53 
54 	/* check all blocks should have same values count. */
55 	for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
56 		size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
57 		if (val_cnt != prfcnt_values_per_block)
58 			return -EINVAL;
59 	}
60 
61 	/* Only support 64 and 128 entries per block. */
62 	if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128))
63 		return -EINVAL;
64 
65 	metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL);
66 	if (!metadata_narrow)
67 		return -ENOMEM;
68 
69 	/* Narrow to 64 entries per block to keep API backward compatibility. */
70 	prfcnt_values_per_block = 64;
71 
72 	for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
73 		size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk);
74 		blks[blk] = (struct kbase_hwcnt_block_description){
75 			.type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
76 			.inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk),
77 			.hdr_cnt = blk_hdr_cnt,
78 			.ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
79 		};
80 	}
81 
82 	group = (struct kbase_hwcnt_group_description){
83 		.type = kbase_hwcnt_metadata_group_type(src_md, 0),
84 		.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT,
85 		.blks = blks,
86 	};
87 
88 	desc = (struct kbase_hwcnt_description){
89 		.grp_cnt = kbase_hwcnt_metadata_group_count(src_md),
90 		.avail_mask = src_md->avail_mask,
91 		.clk_cnt = src_md->clk_cnt,
92 		.grps = &group,
93 	};
94 
95 	err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata);
96 	if (!err) {
97 		/* Narrow down the buffer size to half as the narrowed metadata
98 		 * only supports 32-bit but the created metadata uses 64-bit for
99 		 * block entry.
100 		 */
101 		metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1;
102 		*dst_md_narrow = metadata_narrow;
103 	} else {
104 		kfree(metadata_narrow);
105 	}
106 
107 	return err;
108 }
109 
kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow * md_narrow)110 void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow)
111 {
112 	if (!md_narrow)
113 		return;
114 
115 	kbase_hwcnt_metadata_destroy(md_narrow->metadata);
116 	kfree(md_narrow);
117 }
118 
kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow * md_narrow,struct kbase_hwcnt_dump_buffer_narrow * dump_buf)119 int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
120 					 struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
121 {
122 	size_t dump_buf_bytes;
123 	size_t clk_cnt_buf_bytes;
124 	u8 *buf;
125 
126 	if (!md_narrow || !dump_buf)
127 		return -EINVAL;
128 
129 	dump_buf_bytes = md_narrow->dump_buf_bytes;
130 	clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
131 
132 	/* Make a single allocation for both dump_buf and clk_cnt_buf. */
133 	buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
134 	if (!buf)
135 		return -ENOMEM;
136 
137 	*dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){
138 		.md_narrow = md_narrow,
139 		.dump_buf = (u32 *)buf,
140 		.clk_cnt_buf = (u64 *)(buf + dump_buf_bytes),
141 	};
142 
143 	return 0;
144 }
145 
kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow * dump_buf_narrow)146 void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
147 {
148 	if (!dump_buf_narrow)
149 		return;
150 
151 	kfree(dump_buf_narrow->dump_buf);
152 	*dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL,
153 								    .dump_buf = NULL,
154 								    .clk_cnt_buf = NULL };
155 }
156 
kbase_hwcnt_dump_buffer_narrow_array_alloc(const struct kbase_hwcnt_metadata_narrow * md_narrow,size_t n,struct kbase_hwcnt_dump_buffer_narrow_array * dump_bufs)157 int kbase_hwcnt_dump_buffer_narrow_array_alloc(
158 	const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
159 	struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
160 {
161 	struct kbase_hwcnt_dump_buffer_narrow *buffers;
162 	size_t buf_idx;
163 	unsigned int order;
164 	unsigned long addr;
165 	size_t dump_buf_bytes;
166 	size_t clk_cnt_buf_bytes;
167 	size_t total_dump_buf_size;
168 
169 	if (!md_narrow || !dump_bufs)
170 		return -EINVAL;
171 
172 	dump_buf_bytes = md_narrow->dump_buf_bytes;
173 	clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
174 
175 	/* Allocate memory for the dump buffer struct array */
176 	buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
177 	if (!buffers)
178 		return -ENOMEM;
179 
180 	/* Allocate pages for the actual dump buffers, as they tend to be fairly
181 	 * large.
182 	 */
183 	order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
184 	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
185 
186 	if (!addr) {
187 		kfree(buffers);
188 		return -ENOMEM;
189 	}
190 
191 	*dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){
192 		.page_addr = addr,
193 		.page_order = order,
194 		.buf_cnt = n,
195 		.bufs = buffers,
196 	};
197 
198 	total_dump_buf_size = dump_buf_bytes * n;
199 	/* Set the buffer of each dump buf */
200 	for (buf_idx = 0; buf_idx < n; buf_idx++) {
201 		const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
202 		const size_t clk_cnt_buf_offset =
203 			total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx);
204 
205 		buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){
206 			.md_narrow = md_narrow,
207 			.dump_buf = (u32 *)(addr + dump_buf_offset),
208 			.clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset),
209 		};
210 	}
211 
212 	return 0;
213 }
214 
kbase_hwcnt_dump_buffer_narrow_array_free(struct kbase_hwcnt_dump_buffer_narrow_array * dump_bufs)215 void kbase_hwcnt_dump_buffer_narrow_array_free(
216 	struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
217 {
218 	if (!dump_bufs)
219 		return;
220 
221 	kfree(dump_bufs->bufs);
222 	free_pages(dump_bufs->page_addr, dump_bufs->page_order);
223 	memset(dump_bufs, 0, sizeof(*dump_bufs));
224 }
225 
kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 * dst_blk,const u64 * src_blk,const u64 * blk_em,size_t val_cnt)226 void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
227 						      const u64 *blk_em, size_t val_cnt)
228 {
229 	size_t val;
230 
231 	for (val = 0; val < val_cnt; val++) {
232 		bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
233 		u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
234 
235 		dst_blk[val] = val_enabled ? src_val : 0;
236 	}
237 }
238 
kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow * dst_narrow,const struct kbase_hwcnt_dump_buffer * src,const struct kbase_hwcnt_enable_map * dst_enable_map)239 void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
240 						const struct kbase_hwcnt_dump_buffer *src,
241 						const struct kbase_hwcnt_enable_map *dst_enable_map)
242 {
243 	const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
244 	size_t grp;
245 	size_t clk;
246 
247 	if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
248 	    WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
249 	    WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) ||
250 	    WARN_ON(src->metadata->grp_cnt != 1) ||
251 	    WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
252 		    src->metadata->grp_metadata[0].blk_cnt) ||
253 	    WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
254 		    KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
255 	    WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
256 		    src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
257 		return;
258 
259 	/* Don't use src metadata since src buffer is bigger than dst buffer. */
260 	metadata_narrow = dst_narrow->md_narrow;
261 
262 	for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) {
263 		size_t blk;
264 		size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp);
265 
266 		for (blk = 0; blk < blk_cnt; blk++) {
267 			size_t blk_inst;
268 			size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count(
269 				metadata_narrow, grp, blk);
270 
271 			for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) {
272 				/* The narrowed down buffer is only 32-bit. */
273 				u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance(
274 					dst_narrow, grp, blk, blk_inst);
275 				const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
276 					src, grp, blk, blk_inst);
277 				const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
278 					dst_enable_map, grp, blk, blk_inst);
279 				size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count(
280 					metadata_narrow, grp, blk);
281 				/* Align upwards to include padding bytes */
282 				val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
283 					val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
284 						  KBASE_HWCNT_VALUE_BYTES));
285 
286 				kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk,
287 										 blk_em, val_cnt);
288 			}
289 		}
290 	}
291 
292 	for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
293 		bool clk_enabled =
294 			kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
295 
296 		dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
297 	}
298 }
299