xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /*
23  * CSF GPU HWC backend firmware interface APIs.
24  */
25 
26 #include <mali_kbase.h>
27 #include <gpu/mali_kbase_gpu_regmap.h>
28 #include <device/mali_kbase_device.h>
29 #include "hwcnt/mali_kbase_hwcnt_gpu.h"
30 #include "hwcnt/mali_kbase_hwcnt_types.h"
31 #include <csf/mali_kbase_csf_registers.h>
32 
33 #include "csf/mali_kbase_csf_firmware.h"
34 #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
35 #include "mali_kbase_hwaccess_time.h"
36 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
37 #include <backend/gpu/mali_kbase_model_linux.h>
38 
39 #include <linux/log2.h>
40 #include "mali_kbase_ccswe.h"
41 
42 
43 /* Ring buffer virtual address start at 4GB  */
44 #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
45 
46 /**
47  * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface
48  *                                                 used to save the manual and
49  *                                                 auto HWC samples from
50  *                                                 firmware.
51  * @gpu_dump_base: Starting GPU base address of the ring buffer.
52  * @cpu_dump_base: Starting CPU address for the mapping.
53  * @buf_count:     Buffer count in the ring buffer, MUST be power of 2.
54  * @as_nr:         Address space number for the memory mapping.
55  * @phys:          Physical memory allocation used by the mapping.
56  * @num_pages:     Size of the mapping, in memory pages.
57  */
58 struct kbase_hwcnt_backend_csf_if_fw_ring_buf {
59 	u64 gpu_dump_base;
60 	void *cpu_dump_base;
61 	size_t buf_count;
62 	u32 as_nr;
63 	struct tagged_addr *phys;
64 	size_t num_pages;
65 };
66 
67 /**
68  * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF
69  *                                            interface, used to communicate
70  *                                            with firmware.
71  * @kbdev:              KBase device.
72  * @buf_bytes:	        The size in bytes for each buffer in the ring buffer.
73  * @clk_cnt:            The number of clock domains in the system.
74  *                      The maximum is 64.
75  * @clk_enable_map:     Bitmask of enabled clocks
76  * @rate_listener:      Clock rate listener callback state.
77  * @ccswe_shader_cores: Shader cores cycle count software estimator.
78  */
79 struct kbase_hwcnt_backend_csf_if_fw_ctx {
80 	struct kbase_device *kbdev;
81 	size_t buf_bytes;
82 	u8 clk_cnt;
83 	u64 clk_enable_map;
84 	struct kbase_clk_rate_listener rate_listener;
85 	struct kbase_ccswe ccswe_shader_cores;
86 };
87 
88 static void
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx * ctx)89 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
90 {
91 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
92 	struct kbase_device *kbdev;
93 
94 	WARN_ON(!ctx);
95 
96 	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
97 	kbdev = fw_ctx->kbdev;
98 
99 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
100 }
101 
kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx * ctx,unsigned long * flags)102 static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
103 						unsigned long *flags)
104 	__acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
105 			    ctx->kbdev->csf.scheduler.interrupt_lock)
106 {
107 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
108 	struct kbase_device *kbdev;
109 
110 	WARN_ON(!ctx);
111 
112 	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
113 	kbdev = fw_ctx->kbdev;
114 
115 	kbase_csf_scheduler_spin_lock(kbdev, flags);
116 }
117 
kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx * ctx,unsigned long flags)118 static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
119 						  unsigned long flags)
120 	__releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
121 			    ctx->kbdev->csf.scheduler.interrupt_lock)
122 {
123 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
124 	struct kbase_device *kbdev;
125 
126 	WARN_ON(!ctx);
127 
128 	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
129 	kbdev = fw_ctx->kbdev;
130 
131 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
132 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
133 }
134 
135 /**
136  * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
137  *
138  * @rate_listener:    Callback state
139  * @clk_index:        Clock index
140  * @clk_rate_hz:      Clock frequency(hz)
141  */
142 static void
kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener * rate_listener,u32 clk_index,u32 clk_rate_hz)143 kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
144 					      u32 clk_index, u32 clk_rate_hz)
145 {
146 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of(
147 		rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener);
148 	u64 timestamp_ns;
149 
150 	if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
151 		return;
152 
153 	timestamp_ns = ktime_get_raw_ns();
154 	kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
155 }
156 
157 /**
158  * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking
159  *
160  * @fw_ctx:         Non-NULL pointer to CSF firmware interface context.
161  * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
162  */
163 static void
kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx,u64 clk_enable_map)164 kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx,
165 					 u64 clk_enable_map)
166 {
167 	struct kbase_device *kbdev = fw_ctx->kbdev;
168 
169 	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
170 		/* software estimation for non-top clock domains */
171 		struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
172 		const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
173 		u32 cur_freq;
174 		unsigned long flags;
175 		u64 timestamp_ns;
176 
177 		timestamp_ns = ktime_get_raw_ns();
178 
179 		spin_lock_irqsave(&rtm->lock, flags);
180 
181 		cur_freq = (u32)clk_data->clock_val;
182 		kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
183 		kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq);
184 
185 		kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener);
186 
187 		spin_unlock_irqrestore(&rtm->lock, flags);
188 	}
189 
190 	fw_ctx->clk_enable_map = clk_enable_map;
191 }
192 
193 /**
194  * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking
195  *
196  * @fw_ctx:     Non-NULL pointer to CSF firmware interface context.
197  */
198 static void
kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx)199 kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
200 {
201 	struct kbase_device *kbdev = fw_ctx->kbdev;
202 	struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
203 	u64 clk_enable_map = fw_ctx->clk_enable_map;
204 
205 	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES))
206 		kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener);
207 }
208 
kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_prfcnt_info * prfcnt_info)209 static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
210 	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
211 	struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
212 {
213 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
214 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
215 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
216 
217 	*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
218 		.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
219 		.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
220 		.prfcnt_hw_size =
221 			KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
222 		.prfcnt_fw_size =
223 			KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
224 		.dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE,
225 		.prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE,
226 		.clk_cnt = 1,
227 		.clearing_samples = true,
228 	};
229 
230 	fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
231 #else
232 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
233 	struct kbase_device *kbdev;
234 	u32 prfcnt_size;
235 	u32 prfcnt_hw_size;
236 	u32 prfcnt_fw_size;
237 	u32 prfcnt_block_size =
238 		KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES;
239 
240 	WARN_ON(!ctx);
241 	WARN_ON(!prfcnt_info);
242 
243 	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
244 	kbdev = fw_ctx->kbdev;
245 	prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
246 	prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size);
247 	prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size);
248 	fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
249 
250 	/* Read the block size if the GPU has the register PRFCNT_FEATURES
251 	 * which was introduced in architecture version 11.x.7.
252 	 */
253 	if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
254 	    GPU_ID2_PRODUCT_TTUX) {
255 		prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(
256 					    kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
257 				    << 8;
258 	}
259 
260 	*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
261 		.prfcnt_hw_size = prfcnt_hw_size,
262 		.prfcnt_fw_size = prfcnt_fw_size,
263 		.dump_bytes = fw_ctx->buf_bytes,
264 		.prfcnt_block_size = prfcnt_block_size,
265 		.l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices,
266 		.core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask,
267 		.clk_cnt = fw_ctx->clk_cnt,
268 		.clearing_samples = true,
269 	};
270 
271 	/* Block size must be multiple of counter size. */
272 	WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
273 	/* Total size must be multiple of block size. */
274 	WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0);
275 #endif
276 }
277 
kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 buf_count,void ** cpu_dump_base,struct kbase_hwcnt_backend_csf_if_ring_buf ** out_ring_buf)278 static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
279 	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base,
280 	struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
281 {
282 	struct kbase_device *kbdev;
283 	struct tagged_addr *phys;
284 	struct page **page_list;
285 	void *cpu_addr;
286 	int ret;
287 	int i;
288 	size_t num_pages;
289 	u64 flags;
290 	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
291 
292 	pgprot_t cpu_map_prot = PAGE_KERNEL;
293 	u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
294 
295 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
296 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
297 
298 	/* Calls to this function are inherently asynchronous, with respect to
299 	 * MMU operations.
300 	 */
301 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
302 
303 	WARN_ON(!ctx);
304 	WARN_ON(!cpu_dump_base);
305 	WARN_ON(!out_ring_buf);
306 
307 	kbdev = fw_ctx->kbdev;
308 
309 	/* The buffer count must be power of 2 */
310 	if (!is_power_of_2(buf_count))
311 		return -EINVAL;
312 
313 	/* alignment failure */
314 	if (gpu_va_base & (2048 - 1))
315 		return -EINVAL;
316 
317 	fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL);
318 	if (!fw_ring_buf)
319 		return -ENOMEM;
320 
321 	num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count);
322 	phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
323 	if (!phys)
324 		goto phys_alloc_error;
325 
326 	page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL);
327 	if (!page_list)
328 		goto page_list_alloc_error;
329 
330 	/* Get physical page for the buffer */
331 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
332 					 phys, false, NULL);
333 	if (ret != num_pages)
334 		goto phys_mem_pool_alloc_error;
335 
336 	/* Get the CPU virtual address */
337 	for (i = 0; i < num_pages; i++)
338 		page_list[i] = as_page(phys[i]);
339 
340 	cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot);
341 	if (!cpu_addr)
342 		goto vmap_error;
343 
344 	flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
345 		KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
346 
347 	/* Update MMU table */
348 	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
349 				     num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
350 				     mmu_sync_info, NULL, false);
351 	if (ret)
352 		goto mmu_insert_failed;
353 
354 	kfree(page_list);
355 
356 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
357 	fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr;
358 #else
359 	fw_ring_buf->gpu_dump_base = gpu_va_base;
360 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
361 	fw_ring_buf->cpu_dump_base = cpu_addr;
362 	fw_ring_buf->phys = phys;
363 	fw_ring_buf->num_pages = num_pages;
364 	fw_ring_buf->buf_count = buf_count;
365 	fw_ring_buf->as_nr = MCU_AS_NR;
366 
367 	*cpu_dump_base = fw_ring_buf->cpu_dump_base;
368 	*out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
369 
370 	return 0;
371 
372 mmu_insert_failed:
373 	vunmap(cpu_addr);
374 vmap_error:
375 	kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys,
376 				  false, false);
377 phys_mem_pool_alloc_error:
378 	kfree(page_list);
379 page_list_alloc_error:
380 	kfree(phys);
381 phys_alloc_error:
382 	kfree(fw_ring_buf);
383 	return -ENOMEM;
384 }
385 
386 static void
kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf,u32 buf_index_first,u32 buf_index_last,bool for_cpu)387 kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
388 					     struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
389 					     u32 buf_index_first, u32 buf_index_last, bool for_cpu)
390 {
391 	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
392 		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
393 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
394 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
395 	size_t i;
396 	size_t pg_first;
397 	size_t pg_last;
398 	u64 start_address;
399 	u64 stop_address;
400 	u32 ring_buf_index_first;
401 	u32 ring_buf_index_last;
402 
403 	WARN_ON(!ctx);
404 	WARN_ON(!ring_buf);
405 
406 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
407 	/* When using the dummy backend syncing the ring buffer is unnecessary as
408 	 * the ring buffer is only accessed by the CPU. It may also cause data loss
409 	 * due to cache invalidation so return early.
410 	 */
411 	return;
412 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
413 
414 	/* The index arguments for this function form an inclusive, exclusive
415 	 * range.
416 	 * However, when masking back to the available buffers we will make this
417 	 * inclusive at both ends so full flushes are not 0 -> 0.
418 	 */
419 	ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
420 	ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
421 
422 	/* The start address is the offset of the first buffer. */
423 	start_address = fw_ctx->buf_bytes * ring_buf_index_first;
424 	pg_first = start_address >> PAGE_SHIFT;
425 
426 	/* The stop address is the last byte in the final buffer. */
427 	stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1;
428 	pg_last = stop_address >> PAGE_SHIFT;
429 
430 	/* Check whether the buffer range wraps. */
431 	if (start_address > stop_address) {
432 		/* sync the first part to the end of ring buffer. */
433 		for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
434 			struct page *pg = as_page(fw_ring_buf->phys[i]);
435 
436 			if (for_cpu) {
437 				kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg),
438 							  PAGE_SIZE, DMA_BIDIRECTIONAL);
439 			} else {
440 				kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg),
441 							     PAGE_SIZE, DMA_BIDIRECTIONAL);
442 			}
443 		}
444 
445 		/* second part starts from page 0. */
446 		pg_first = 0;
447 	}
448 
449 	for (i = pg_first; i <= pg_last; i++) {
450 		struct page *pg = as_page(fw_ring_buf->phys[i]);
451 
452 		if (for_cpu) {
453 			kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
454 						  DMA_BIDIRECTIONAL);
455 		} else {
456 			kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
457 						     DMA_BIDIRECTIONAL);
458 		}
459 	}
460 }
461 
kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx * ctx)462 static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
463 {
464 	CSTD_UNUSED(ctx);
465 	return ktime_get_raw_ns();
466 }
467 
468 static void
kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf)469 kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
470 					     struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
471 {
472 	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
473 		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
474 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
475 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
476 
477 	if (!fw_ring_buf)
478 		return;
479 
480 	if (fw_ring_buf->phys) {
481 		u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
482 
483 		WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
484 						 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
485 						 fw_ring_buf->num_pages, fw_ring_buf->num_pages,
486 						 MCU_AS_NR, true));
487 
488 		vunmap(fw_ring_buf->cpu_dump_base);
489 
490 		kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
491 					  fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
492 
493 		kfree(fw_ring_buf->phys);
494 
495 		kfree(fw_ring_buf);
496 	}
497 }
498 
499 static void
kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf,struct kbase_hwcnt_backend_csf_if_enable * enable)500 kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
501 					   struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
502 					   struct kbase_hwcnt_backend_csf_if_enable *enable)
503 {
504 	u32 prfcnt_config;
505 	struct kbase_device *kbdev;
506 	struct kbase_csf_global_iface *global_iface;
507 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
508 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
509 	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
510 		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
511 
512 	WARN_ON(!ctx);
513 	WARN_ON(!ring_buf);
514 	WARN_ON(!enable);
515 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
516 
517 	kbdev = fw_ctx->kbdev;
518 	global_iface = &kbdev->csf.global_iface;
519 
520 	/* Configure */
521 	prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count);
522 	prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set);
523 
524 	/* Configure the ring buffer base address */
525 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr);
526 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
527 					fw_ring_buf->gpu_dump_base & U32_MAX);
528 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
529 					fw_ring_buf->gpu_dump_base >> 32);
530 
531 	/* Set extract position to 0 */
532 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
533 
534 	/* Configure the enable bitmap */
535 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm);
536 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm);
537 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm);
538 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm);
539 
540 	/* Configure the HWC set and buffer size */
541 	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config);
542 
543 	kbdev->csf.hwcnt.enable_pending = true;
544 
545 	/* Unmask the interrupts */
546 	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
547 					     GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
548 					     GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
549 	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
550 					     GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
551 					     GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
552 	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
553 					     GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
554 					     GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
555 	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
556 					     GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
557 					     GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
558 
559 	/* Enable the HWC */
560 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
561 					     (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT),
562 					     GLB_REQ_PRFCNT_ENABLE_MASK);
563 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
564 
565 	prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG);
566 
567 	kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map);
568 }
569 
kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx * ctx)570 static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
571 {
572 	struct kbase_device *kbdev;
573 	struct kbase_csf_global_iface *global_iface;
574 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
575 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
576 
577 	WARN_ON(!ctx);
578 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
579 
580 	kbdev = fw_ctx->kbdev;
581 	global_iface = &kbdev->csf.global_iface;
582 
583 	/* Disable the HWC */
584 	kbdev->csf.hwcnt.enable_pending = true;
585 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK);
586 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
587 
588 	/* mask the interrupts */
589 	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
590 					     GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
591 	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
592 					     GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
593 	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
594 					     GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
595 
596 	/* In case we have a previous request in flight when the disable
597 	 * happens.
598 	 */
599 	kbdev->csf.hwcnt.request_pending = false;
600 
601 	kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
602 }
603 
kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx * ctx)604 static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
605 {
606 	u32 glb_req;
607 	struct kbase_device *kbdev;
608 	struct kbase_csf_global_iface *global_iface;
609 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
610 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
611 
612 	WARN_ON(!ctx);
613 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
614 
615 	kbdev = fw_ctx->kbdev;
616 	global_iface = &kbdev->csf.global_iface;
617 
618 	/* Trigger dumping */
619 	kbdev->csf.hwcnt.request_pending = true;
620 	glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
621 	glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
622 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
623 					     GLB_REQ_PRFCNT_SAMPLE_MASK);
624 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
625 }
626 
kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 * extract_index,u32 * insert_index)627 static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
628 						       u32 *extract_index, u32 *insert_index)
629 {
630 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
631 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
632 
633 	WARN_ON(!ctx);
634 	WARN_ON(!extract_index);
635 	WARN_ON(!insert_index);
636 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
637 
638 	*extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface,
639 							      GLB_PRFCNT_EXTRACT);
640 	*insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface,
641 							 GLB_PRFCNT_INSERT);
642 }
643 
644 static void
kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 extract_idx)645 kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
646 						 u32 extract_idx)
647 {
648 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
649 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
650 
651 	WARN_ON(!ctx);
652 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
653 
654 	/* Set the raw extract index to release the buffer back to the ring
655 	 * buffer.
656 	 */
657 	kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT,
658 					extract_idx);
659 }
660 
661 static void
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u64 * cycle_counts,u64 clk_enable_map)662 kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
663 						   u64 *cycle_counts, u64 clk_enable_map)
664 {
665 	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
666 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
667 	u8 clk;
668 	u64 timestamp_ns = ktime_get_raw_ns();
669 
670 	WARN_ON(!ctx);
671 	WARN_ON(!cycle_counts);
672 	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
673 
674 	for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
675 		if (!(clk_enable_map & (1ull << clk)))
676 			continue;
677 
678 		if (clk == KBASE_CLOCK_DOMAIN_TOP) {
679 			/* Read cycle count for top clock domain. */
680 			kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk],
681 							     NULL, NULL);
682 		} else {
683 			/* Estimate cycle count for non-top clock domain. */
684 			cycle_counts[clk] =
685 				kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns);
686 		}
687 	}
688 }
689 
690 /**
691  * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context.
692  *
693  * @fw_ctx: Pointer to context to destroy.
694  */
695 static void
kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx)696 kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
697 {
698 	if (!fw_ctx)
699 		return;
700 
701 	kfree(fw_ctx);
702 }
703 
704 /**
705  * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context.
706  *
707  * @kbdev:   Non_NULL pointer to kbase device.
708  * @out_ctx: Non-NULL pointer to where info is stored on success.
709  * Return: 0 on success, else error code.
710  */
711 static int
kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device * kbdev,struct kbase_hwcnt_backend_csf_if_fw_ctx ** out_ctx)712 kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev,
713 					  struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
714 {
715 	u8 clk;
716 	int errcode = -ENOMEM;
717 	struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
718 
719 	WARN_ON(!kbdev);
720 	WARN_ON(!out_ctx);
721 
722 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
723 	if (!ctx)
724 		goto error;
725 
726 	ctx->kbdev = kbdev;
727 
728 	/* Determine the number of available clock domains. */
729 	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
730 		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
731 			break;
732 	}
733 	ctx->clk_cnt = clk;
734 
735 	ctx->clk_enable_map = 0;
736 	kbase_ccswe_init(&ctx->ccswe_shader_cores);
737 	ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
738 
739 	*out_ctx = ctx;
740 
741 	return 0;
742 error:
743 	kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx);
744 	return errcode;
745 }
746 
kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if * if_fw)747 void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw)
748 {
749 	if (!if_fw)
750 		return;
751 
752 	kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
753 		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx);
754 	memset(if_fw, 0, sizeof(*if_fw));
755 }
756 
kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device * kbdev,struct kbase_hwcnt_backend_csf_if * if_fw)757 int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
758 					 struct kbase_hwcnt_backend_csf_if *if_fw)
759 {
760 	int errcode;
761 	struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
762 
763 	if (!kbdev || !if_fw)
764 		return -EINVAL;
765 
766 	errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx);
767 	if (errcode)
768 		return errcode;
769 
770 	if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
771 	if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
772 	if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
773 	if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
774 	if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
775 	if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
776 	if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
777 	if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
778 	if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
779 	if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
780 	if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
781 	if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
782 	if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
783 	if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
784 	if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
785 
786 	return 0;
787 }
788