1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 /*
23 * CSF GPU HWC backend firmware interface APIs.
24 */
25
26 #include <mali_kbase.h>
27 #include <gpu/mali_kbase_gpu_regmap.h>
28 #include <device/mali_kbase_device.h>
29 #include "hwcnt/mali_kbase_hwcnt_gpu.h"
30 #include "hwcnt/mali_kbase_hwcnt_types.h"
31 #include <csf/mali_kbase_csf_registers.h>
32
33 #include "csf/mali_kbase_csf_firmware.h"
34 #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
35 #include "mali_kbase_hwaccess_time.h"
36 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
37 #include <backend/gpu/mali_kbase_model_linux.h>
38
39 #include <linux/log2.h>
40 #include "mali_kbase_ccswe.h"
41
42
43 /* Ring buffer virtual address start at 4GB */
44 #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
45
46 /**
47 * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface
48 * used to save the manual and
49 * auto HWC samples from
50 * firmware.
51 * @gpu_dump_base: Starting GPU base address of the ring buffer.
52 * @cpu_dump_base: Starting CPU address for the mapping.
53 * @buf_count: Buffer count in the ring buffer, MUST be power of 2.
54 * @as_nr: Address space number for the memory mapping.
55 * @phys: Physical memory allocation used by the mapping.
56 * @num_pages: Size of the mapping, in memory pages.
57 */
58 struct kbase_hwcnt_backend_csf_if_fw_ring_buf {
59 u64 gpu_dump_base;
60 void *cpu_dump_base;
61 size_t buf_count;
62 u32 as_nr;
63 struct tagged_addr *phys;
64 size_t num_pages;
65 };
66
67 /**
68 * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF
69 * interface, used to communicate
70 * with firmware.
71 * @kbdev: KBase device.
72 * @buf_bytes: The size in bytes for each buffer in the ring buffer.
73 * @clk_cnt: The number of clock domains in the system.
74 * The maximum is 64.
75 * @clk_enable_map: Bitmask of enabled clocks
76 * @rate_listener: Clock rate listener callback state.
77 * @ccswe_shader_cores: Shader cores cycle count software estimator.
78 */
79 struct kbase_hwcnt_backend_csf_if_fw_ctx {
80 struct kbase_device *kbdev;
81 size_t buf_bytes;
82 u8 clk_cnt;
83 u64 clk_enable_map;
84 struct kbase_clk_rate_listener rate_listener;
85 struct kbase_ccswe ccswe_shader_cores;
86 };
87
88 static void
kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx * ctx)89 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
90 {
91 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
92 struct kbase_device *kbdev;
93
94 WARN_ON(!ctx);
95
96 fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
97 kbdev = fw_ctx->kbdev;
98
99 kbase_csf_scheduler_spin_lock_assert_held(kbdev);
100 }
101
kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx * ctx,unsigned long * flags)102 static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
103 unsigned long *flags)
104 __acquires(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
105 ctx->kbdev->csf.scheduler.interrupt_lock)
106 {
107 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
108 struct kbase_device *kbdev;
109
110 WARN_ON(!ctx);
111
112 fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
113 kbdev = fw_ctx->kbdev;
114
115 kbase_csf_scheduler_spin_lock(kbdev, flags);
116 }
117
kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx * ctx,unsigned long flags)118 static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
119 unsigned long flags)
120 __releases(&(struct kbase_hwcnt_backend_csf_if_fw_ctx)
121 ctx->kbdev->csf.scheduler.interrupt_lock)
122 {
123 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
124 struct kbase_device *kbdev;
125
126 WARN_ON(!ctx);
127
128 fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
129 kbdev = fw_ctx->kbdev;
130
131 kbase_csf_scheduler_spin_lock_assert_held(kbdev);
132 kbase_csf_scheduler_spin_unlock(kbdev, flags);
133 }
134
135 /**
136 * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
137 *
138 * @rate_listener: Callback state
139 * @clk_index: Clock index
140 * @clk_rate_hz: Clock frequency(hz)
141 */
142 static void
kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener * rate_listener,u32 clk_index,u32 clk_rate_hz)143 kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
144 u32 clk_index, u32 clk_rate_hz)
145 {
146 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of(
147 rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener);
148 u64 timestamp_ns;
149
150 if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
151 return;
152
153 timestamp_ns = ktime_get_raw_ns();
154 kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
155 }
156
157 /**
158 * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking
159 *
160 * @fw_ctx: Non-NULL pointer to CSF firmware interface context.
161 * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
162 */
163 static void
kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx,u64 clk_enable_map)164 kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx,
165 u64 clk_enable_map)
166 {
167 struct kbase_device *kbdev = fw_ctx->kbdev;
168
169 if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
170 /* software estimation for non-top clock domains */
171 struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
172 const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
173 u32 cur_freq;
174 unsigned long flags;
175 u64 timestamp_ns;
176
177 timestamp_ns = ktime_get_raw_ns();
178
179 spin_lock_irqsave(&rtm->lock, flags);
180
181 cur_freq = (u32)clk_data->clock_val;
182 kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
183 kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq);
184
185 kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener);
186
187 spin_unlock_irqrestore(&rtm->lock, flags);
188 }
189
190 fw_ctx->clk_enable_map = clk_enable_map;
191 }
192
193 /**
194 * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking
195 *
196 * @fw_ctx: Non-NULL pointer to CSF firmware interface context.
197 */
198 static void
kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx)199 kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
200 {
201 struct kbase_device *kbdev = fw_ctx->kbdev;
202 struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
203 u64 clk_enable_map = fw_ctx->clk_enable_map;
204
205 if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES))
206 kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener);
207 }
208
kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_prfcnt_info * prfcnt_info)209 static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
210 struct kbase_hwcnt_backend_csf_if_ctx *ctx,
211 struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
212 {
213 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
214 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
215 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
216
217 *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
218 .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
219 .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
220 .prfcnt_hw_size =
221 KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
222 .prfcnt_fw_size =
223 KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
224 .dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE,
225 .prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE,
226 .clk_cnt = 1,
227 .clearing_samples = true,
228 };
229
230 fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
231 #else
232 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
233 struct kbase_device *kbdev;
234 u32 prfcnt_size;
235 u32 prfcnt_hw_size;
236 u32 prfcnt_fw_size;
237 u32 prfcnt_block_size =
238 KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES;
239
240 WARN_ON(!ctx);
241 WARN_ON(!prfcnt_info);
242
243 fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
244 kbdev = fw_ctx->kbdev;
245 prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
246 prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size);
247 prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size);
248 fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
249
250 /* Read the block size if the GPU has the register PRFCNT_FEATURES
251 * which was introduced in architecture version 11.x.7.
252 */
253 if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
254 GPU_ID2_PRODUCT_TTUX) {
255 prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(
256 kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
257 << 8;
258 }
259
260 *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
261 .prfcnt_hw_size = prfcnt_hw_size,
262 .prfcnt_fw_size = prfcnt_fw_size,
263 .dump_bytes = fw_ctx->buf_bytes,
264 .prfcnt_block_size = prfcnt_block_size,
265 .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices,
266 .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask,
267 .clk_cnt = fw_ctx->clk_cnt,
268 .clearing_samples = true,
269 };
270
271 /* Block size must be multiple of counter size. */
272 WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
273 /* Total size must be multiple of block size. */
274 WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0);
275 #endif
276 }
277
kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 buf_count,void ** cpu_dump_base,struct kbase_hwcnt_backend_csf_if_ring_buf ** out_ring_buf)278 static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
279 struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base,
280 struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
281 {
282 struct kbase_device *kbdev;
283 struct tagged_addr *phys;
284 struct page **page_list;
285 void *cpu_addr;
286 int ret;
287 int i;
288 size_t num_pages;
289 u64 flags;
290 struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
291
292 pgprot_t cpu_map_prot = PAGE_KERNEL;
293 u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
294
295 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
296 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
297
298 /* Calls to this function are inherently asynchronous, with respect to
299 * MMU operations.
300 */
301 const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
302
303 WARN_ON(!ctx);
304 WARN_ON(!cpu_dump_base);
305 WARN_ON(!out_ring_buf);
306
307 kbdev = fw_ctx->kbdev;
308
309 /* The buffer count must be power of 2 */
310 if (!is_power_of_2(buf_count))
311 return -EINVAL;
312
313 /* alignment failure */
314 if (gpu_va_base & (2048 - 1))
315 return -EINVAL;
316
317 fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL);
318 if (!fw_ring_buf)
319 return -ENOMEM;
320
321 num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count);
322 phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
323 if (!phys)
324 goto phys_alloc_error;
325
326 page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL);
327 if (!page_list)
328 goto page_list_alloc_error;
329
330 /* Get physical page for the buffer */
331 ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
332 phys, false, NULL);
333 if (ret != num_pages)
334 goto phys_mem_pool_alloc_error;
335
336 /* Get the CPU virtual address */
337 for (i = 0; i < num_pages; i++)
338 page_list[i] = as_page(phys[i]);
339
340 cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot);
341 if (!cpu_addr)
342 goto vmap_error;
343
344 flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
345 KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
346
347 /* Update MMU table */
348 ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
349 num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
350 mmu_sync_info, NULL, false);
351 if (ret)
352 goto mmu_insert_failed;
353
354 kfree(page_list);
355
356 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
357 fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr;
358 #else
359 fw_ring_buf->gpu_dump_base = gpu_va_base;
360 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
361 fw_ring_buf->cpu_dump_base = cpu_addr;
362 fw_ring_buf->phys = phys;
363 fw_ring_buf->num_pages = num_pages;
364 fw_ring_buf->buf_count = buf_count;
365 fw_ring_buf->as_nr = MCU_AS_NR;
366
367 *cpu_dump_base = fw_ring_buf->cpu_dump_base;
368 *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
369
370 return 0;
371
372 mmu_insert_failed:
373 vunmap(cpu_addr);
374 vmap_error:
375 kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys,
376 false, false);
377 phys_mem_pool_alloc_error:
378 kfree(page_list);
379 page_list_alloc_error:
380 kfree(phys);
381 phys_alloc_error:
382 kfree(fw_ring_buf);
383 return -ENOMEM;
384 }
385
386 static void
kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf,u32 buf_index_first,u32 buf_index_last,bool for_cpu)387 kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
388 struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
389 u32 buf_index_first, u32 buf_index_last, bool for_cpu)
390 {
391 struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
392 (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
393 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
394 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
395 size_t i;
396 size_t pg_first;
397 size_t pg_last;
398 u64 start_address;
399 u64 stop_address;
400 u32 ring_buf_index_first;
401 u32 ring_buf_index_last;
402
403 WARN_ON(!ctx);
404 WARN_ON(!ring_buf);
405
406 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
407 /* When using the dummy backend syncing the ring buffer is unnecessary as
408 * the ring buffer is only accessed by the CPU. It may also cause data loss
409 * due to cache invalidation so return early.
410 */
411 return;
412 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
413
414 /* The index arguments for this function form an inclusive, exclusive
415 * range.
416 * However, when masking back to the available buffers we will make this
417 * inclusive at both ends so full flushes are not 0 -> 0.
418 */
419 ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
420 ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
421
422 /* The start address is the offset of the first buffer. */
423 start_address = fw_ctx->buf_bytes * ring_buf_index_first;
424 pg_first = start_address >> PAGE_SHIFT;
425
426 /* The stop address is the last byte in the final buffer. */
427 stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1;
428 pg_last = stop_address >> PAGE_SHIFT;
429
430 /* Check whether the buffer range wraps. */
431 if (start_address > stop_address) {
432 /* sync the first part to the end of ring buffer. */
433 for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
434 struct page *pg = as_page(fw_ring_buf->phys[i]);
435
436 if (for_cpu) {
437 kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg),
438 PAGE_SIZE, DMA_BIDIRECTIONAL);
439 } else {
440 kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg),
441 PAGE_SIZE, DMA_BIDIRECTIONAL);
442 }
443 }
444
445 /* second part starts from page 0. */
446 pg_first = 0;
447 }
448
449 for (i = pg_first; i <= pg_last; i++) {
450 struct page *pg = as_page(fw_ring_buf->phys[i]);
451
452 if (for_cpu) {
453 kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
454 DMA_BIDIRECTIONAL);
455 } else {
456 kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
457 DMA_BIDIRECTIONAL);
458 }
459 }
460 }
461
kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx * ctx)462 static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
463 {
464 CSTD_UNUSED(ctx);
465 return ktime_get_raw_ns();
466 }
467
468 static void
kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf)469 kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
470 struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
471 {
472 struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
473 (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
474 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
475 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
476
477 if (!fw_ring_buf)
478 return;
479
480 if (fw_ring_buf->phys) {
481 u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
482
483 WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
484 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
485 fw_ring_buf->num_pages, fw_ring_buf->num_pages,
486 MCU_AS_NR, true));
487
488 vunmap(fw_ring_buf->cpu_dump_base);
489
490 kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
491 fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
492
493 kfree(fw_ring_buf->phys);
494
495 kfree(fw_ring_buf);
496 }
497 }
498
499 static void
kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx * ctx,struct kbase_hwcnt_backend_csf_if_ring_buf * ring_buf,struct kbase_hwcnt_backend_csf_if_enable * enable)500 kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
501 struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
502 struct kbase_hwcnt_backend_csf_if_enable *enable)
503 {
504 u32 prfcnt_config;
505 struct kbase_device *kbdev;
506 struct kbase_csf_global_iface *global_iface;
507 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
508 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
509 struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
510 (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
511
512 WARN_ON(!ctx);
513 WARN_ON(!ring_buf);
514 WARN_ON(!enable);
515 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
516
517 kbdev = fw_ctx->kbdev;
518 global_iface = &kbdev->csf.global_iface;
519
520 /* Configure */
521 prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count);
522 prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set);
523
524 /* Configure the ring buffer base address */
525 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr);
526 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
527 fw_ring_buf->gpu_dump_base & U32_MAX);
528 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
529 fw_ring_buf->gpu_dump_base >> 32);
530
531 /* Set extract position to 0 */
532 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
533
534 /* Configure the enable bitmap */
535 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm);
536 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm);
537 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm);
538 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm);
539
540 /* Configure the HWC set and buffer size */
541 kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config);
542
543 kbdev->csf.hwcnt.enable_pending = true;
544
545 /* Unmask the interrupts */
546 kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
547 GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
548 GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
549 kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
550 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
551 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
552 kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
553 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
554 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
555 kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
556 GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
557 GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
558
559 /* Enable the HWC */
560 kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
561 (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT),
562 GLB_REQ_PRFCNT_ENABLE_MASK);
563 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
564
565 prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG);
566
567 kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map);
568 }
569
kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx * ctx)570 static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
571 {
572 struct kbase_device *kbdev;
573 struct kbase_csf_global_iface *global_iface;
574 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
575 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
576
577 WARN_ON(!ctx);
578 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
579
580 kbdev = fw_ctx->kbdev;
581 global_iface = &kbdev->csf.global_iface;
582
583 /* Disable the HWC */
584 kbdev->csf.hwcnt.enable_pending = true;
585 kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK);
586 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
587
588 /* mask the interrupts */
589 kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
590 GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
591 kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
592 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
593 kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
594 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
595
596 /* In case we have a previous request in flight when the disable
597 * happens.
598 */
599 kbdev->csf.hwcnt.request_pending = false;
600
601 kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
602 }
603
kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx * ctx)604 static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
605 {
606 u32 glb_req;
607 struct kbase_device *kbdev;
608 struct kbase_csf_global_iface *global_iface;
609 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
610 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
611
612 WARN_ON(!ctx);
613 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
614
615 kbdev = fw_ctx->kbdev;
616 global_iface = &kbdev->csf.global_iface;
617
618 /* Trigger dumping */
619 kbdev->csf.hwcnt.request_pending = true;
620 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
621 glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
622 kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
623 GLB_REQ_PRFCNT_SAMPLE_MASK);
624 kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
625 }
626
kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 * extract_index,u32 * insert_index)627 static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
628 u32 *extract_index, u32 *insert_index)
629 {
630 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
631 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
632
633 WARN_ON(!ctx);
634 WARN_ON(!extract_index);
635 WARN_ON(!insert_index);
636 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
637
638 *extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface,
639 GLB_PRFCNT_EXTRACT);
640 *insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface,
641 GLB_PRFCNT_INSERT);
642 }
643
644 static void
kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u32 extract_idx)645 kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
646 u32 extract_idx)
647 {
648 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
649 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
650
651 WARN_ON(!ctx);
652 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
653
654 /* Set the raw extract index to release the buffer back to the ring
655 * buffer.
656 */
657 kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT,
658 extract_idx);
659 }
660
661 static void
kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx * ctx,u64 * cycle_counts,u64 clk_enable_map)662 kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
663 u64 *cycle_counts, u64 clk_enable_map)
664 {
665 struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
666 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
667 u8 clk;
668 u64 timestamp_ns = ktime_get_raw_ns();
669
670 WARN_ON(!ctx);
671 WARN_ON(!cycle_counts);
672 kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
673
674 for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
675 if (!(clk_enable_map & (1ull << clk)))
676 continue;
677
678 if (clk == KBASE_CLOCK_DOMAIN_TOP) {
679 /* Read cycle count for top clock domain. */
680 kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk],
681 NULL, NULL);
682 } else {
683 /* Estimate cycle count for non-top clock domain. */
684 cycle_counts[clk] =
685 kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns);
686 }
687 }
688 }
689
690 /**
691 * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context.
692 *
693 * @fw_ctx: Pointer to context to destroy.
694 */
695 static void
kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx * fw_ctx)696 kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
697 {
698 if (!fw_ctx)
699 return;
700
701 kfree(fw_ctx);
702 }
703
704 /**
705 * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context.
706 *
707 * @kbdev: Non_NULL pointer to kbase device.
708 * @out_ctx: Non-NULL pointer to where info is stored on success.
709 * Return: 0 on success, else error code.
710 */
711 static int
kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device * kbdev,struct kbase_hwcnt_backend_csf_if_fw_ctx ** out_ctx)712 kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev,
713 struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
714 {
715 u8 clk;
716 int errcode = -ENOMEM;
717 struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
718
719 WARN_ON(!kbdev);
720 WARN_ON(!out_ctx);
721
722 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
723 if (!ctx)
724 goto error;
725
726 ctx->kbdev = kbdev;
727
728 /* Determine the number of available clock domains. */
729 for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
730 if (kbdev->pm.clk_rtm.clks[clk] == NULL)
731 break;
732 }
733 ctx->clk_cnt = clk;
734
735 ctx->clk_enable_map = 0;
736 kbase_ccswe_init(&ctx->ccswe_shader_cores);
737 ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
738
739 *out_ctx = ctx;
740
741 return 0;
742 error:
743 kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx);
744 return errcode;
745 }
746
kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if * if_fw)747 void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw)
748 {
749 if (!if_fw)
750 return;
751
752 kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
753 (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx);
754 memset(if_fw, 0, sizeof(*if_fw));
755 }
756
kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device * kbdev,struct kbase_hwcnt_backend_csf_if * if_fw)757 int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
758 struct kbase_hwcnt_backend_csf_if *if_fw)
759 {
760 int errcode;
761 struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
762
763 if (!kbdev || !if_fw)
764 return -EINVAL;
765
766 errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx);
767 if (errcode)
768 return errcode;
769
770 if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
771 if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
772 if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
773 if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
774 if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
775 if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
776 if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
777 if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
778 if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
779 if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
780 if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
781 if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
782 if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
783 if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
784 if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
785
786 return 0;
787 }
788