1 /*
2 *
3 * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
4 *
5 * This program is free software and is provided to you under the terms of the
6 * GNU General Public License version 2 as published by the Free Software
7 * Foundation, and any use by you of this program is subject to the terms
8 * of such GNU licence.
9 *
10 * A copy of the licence is included with the program, and can also be obtained
11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12 * Boston, MA 02110-1301, USA.
13 *
14 */
15
16
17
18
19
20 /*
21 * GPU backend instrumentation APIs.
22 */
23
24 #include <mali_kbase.h>
25 #include <mali_midg_regmap.h>
26 #include <mali_kbase_hwaccess_instr.h>
27 #include <backend/gpu/mali_kbase_device_internal.h>
28 #include <backend/gpu/mali_kbase_pm_internal.h>
29 #include <backend/gpu/mali_kbase_instr_internal.h>
30
31 /**
32 * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
33 * hardware
34 *
35 * @kbdev: Kbase device
36 */
kbasep_instr_hwcnt_cacheclean(struct kbase_device * kbdev)37 static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
38 {
39 unsigned long flags;
40 unsigned long pm_flags;
41 u32 irq_mask;
42
43 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
44 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
45 KBASE_INSTR_STATE_REQUEST_CLEAN);
46
47 /* Enable interrupt */
48 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
49 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
50 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
51 irq_mask | CLEAN_CACHES_COMPLETED, NULL);
52 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
53
54 /* clean&invalidate the caches so we're sure the mmu tables for the dump
55 * buffer is valid */
56 KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
57 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
58 GPU_COMMAND_CLEAN_INV_CACHES, NULL);
59 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
60
61 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
62 }
63
kbase_instr_hwcnt_enable_internal(struct kbase_device * kbdev,struct kbase_context * kctx,struct kbase_uk_hwcnt_setup * setup)64 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
65 struct kbase_context *kctx,
66 struct kbase_uk_hwcnt_setup *setup)
67 {
68 unsigned long flags, pm_flags;
69 int err = -EINVAL;
70 u32 irq_mask;
71 int ret;
72 u64 shader_cores_needed;
73 u32 prfcnt_config;
74
75 shader_cores_needed = kbase_pm_get_present_cores(kbdev,
76 KBASE_PM_CORE_SHADER);
77
78 /* alignment failure */
79 if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
80 goto out_err;
81
82 /* Override core availability policy to ensure all cores are available
83 */
84 kbase_pm_ca_instr_enable(kbdev);
85
86 /* Request the cores early on synchronously - we'll release them on any
87 * errors (e.g. instrumentation already active) */
88 kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
89
90 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
91
92 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
93 /* Instrumentation is already enabled */
94 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
95 goto out_unrequest_cores;
96 }
97
98 /* Enable interrupt */
99 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
100 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
101 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
102 PRFCNT_SAMPLE_COMPLETED, NULL);
103 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
104
105 /* In use, this context is the owner */
106 kbdev->hwcnt.kctx = kctx;
107 /* Remember the dump address so we can reprogram it later */
108 kbdev->hwcnt.addr = setup->dump_buffer;
109
110 /* Request the clean */
111 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
112 kbdev->hwcnt.backend.triggered = 0;
113 /* Clean&invalidate the caches so we're sure the mmu tables for the dump
114 * buffer is valid */
115 ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
116 &kbdev->hwcnt.backend.cache_clean_work);
117 KBASE_DEBUG_ASSERT(ret);
118
119 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
120
121 /* Wait for cacheclean to complete */
122 wait_event(kbdev->hwcnt.backend.wait,
123 kbdev->hwcnt.backend.triggered != 0);
124
125 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
126 KBASE_INSTR_STATE_IDLE);
127
128 kbase_pm_request_l2_caches(kbdev);
129
130 /* Configure */
131 prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
132 #ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
133 {
134 u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
135 u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
136 >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
137 int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
138
139 if (arch_v6)
140 prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
141 }
142 #endif
143
144 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
145 prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
146
147 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
148 setup->dump_buffer & 0xFFFFFFFF, kctx);
149 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
150 setup->dump_buffer >> 32, kctx);
151 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
152 setup->jm_bm, kctx);
153 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
154 setup->shader_bm, kctx);
155 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
156 setup->mmu_l2_bm, kctx);
157 /* Due to PRLAM-8186 we need to disable the Tiler before we enable the
158 * HW counter dump. */
159 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
160 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
161 kctx);
162 else
163 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
164 setup->tiler_bm, kctx);
165
166 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
167 prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
168
169 /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
170 */
171 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
172 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
173 setup->tiler_bm, kctx);
174
175 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
176
177 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
178 kbdev->hwcnt.backend.triggered = 1;
179 wake_up(&kbdev->hwcnt.backend.wait);
180
181 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
182
183 err = 0;
184
185 dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
186 return err;
187 out_unrequest_cores:
188 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
189 kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
190 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
191 out_err:
192 return err;
193 }
194
kbase_instr_hwcnt_disable_internal(struct kbase_context * kctx)195 int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
196 {
197 unsigned long flags, pm_flags;
198 int err = -EINVAL;
199 u32 irq_mask;
200 struct kbase_device *kbdev = kctx->kbdev;
201
202 while (1) {
203 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
204
205 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
206 /* Instrumentation is not enabled */
207 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
208 goto out;
209 }
210
211 if (kbdev->hwcnt.kctx != kctx) {
212 /* Instrumentation has been setup for another context */
213 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
214 goto out;
215 }
216
217 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
218 break;
219
220 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
221
222 /* Ongoing dump/setup - wait for its completion */
223 wait_event(kbdev->hwcnt.backend.wait,
224 kbdev->hwcnt.backend.triggered != 0);
225 }
226
227 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
228 kbdev->hwcnt.backend.triggered = 0;
229
230 /* Disable interrupt */
231 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
232 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
233 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
234 irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
235
236 /* Disable the counters */
237 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
238
239 kbdev->hwcnt.kctx = NULL;
240 kbdev->hwcnt.addr = 0ULL;
241
242 kbase_pm_ca_instr_disable(kbdev);
243
244 kbase_pm_unrequest_cores(kbdev, true,
245 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
246
247 kbase_pm_release_l2_caches(kbdev);
248
249 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
250 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
251
252 dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
253 kctx);
254
255 err = 0;
256
257 out:
258 return err;
259 }
260
kbase_instr_hwcnt_request_dump(struct kbase_context * kctx)261 int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
262 {
263 unsigned long flags;
264 int err = -EINVAL;
265 struct kbase_device *kbdev = kctx->kbdev;
266
267 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
268
269 if (kbdev->hwcnt.kctx != kctx) {
270 /* The instrumentation has been setup for another context */
271 goto unlock;
272 }
273
274 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
275 /* HW counters are disabled or another dump is ongoing, or we're
276 * resetting */
277 goto unlock;
278 }
279
280 kbdev->hwcnt.backend.triggered = 0;
281
282 /* Mark that we're dumping - the PF handler can signal that we faulted
283 */
284 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
285
286 /* Reconfigure the dump address */
287 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
288 kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
289 kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
290 kbdev->hwcnt.addr >> 32, NULL);
291
292 /* Start dumping */
293 KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
294 kbdev->hwcnt.addr, 0);
295 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
296 GPU_COMMAND_PRFCNT_SAMPLE, kctx);
297
298 dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
299
300 err = 0;
301
302 unlock:
303 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
304 return err;
305 }
306 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
307
kbase_instr_hwcnt_dump_complete(struct kbase_context * kctx,bool * const success)308 bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
309 bool * const success)
310 {
311 unsigned long flags;
312 bool complete = false;
313 struct kbase_device *kbdev = kctx->kbdev;
314
315 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
316
317 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
318 *success = true;
319 complete = true;
320 } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
321 *success = false;
322 complete = true;
323 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
324 }
325
326 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
327
328 return complete;
329 }
330 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
331
kbasep_cache_clean_worker(struct work_struct * data)332 void kbasep_cache_clean_worker(struct work_struct *data)
333 {
334 struct kbase_device *kbdev;
335 unsigned long flags;
336
337 kbdev = container_of(data, struct kbase_device,
338 hwcnt.backend.cache_clean_work);
339
340 mutex_lock(&kbdev->cacheclean_lock);
341 kbasep_instr_hwcnt_cacheclean(kbdev);
342
343 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
344 /* Wait for our condition, and any reset to complete */
345 while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
346 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
347 wait_event(kbdev->hwcnt.backend.cache_clean_wait,
348 kbdev->hwcnt.backend.state !=
349 KBASE_INSTR_STATE_CLEANING);
350 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
351 }
352 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
353 KBASE_INSTR_STATE_CLEANED);
354
355 /* All finished and idle */
356 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
357 kbdev->hwcnt.backend.triggered = 1;
358 wake_up(&kbdev->hwcnt.backend.wait);
359
360 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
361 mutex_unlock(&kbdev->cacheclean_lock);
362 }
363
kbase_instr_hwcnt_sample_done(struct kbase_device * kbdev)364 void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
365 {
366 unsigned long flags;
367
368 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
369
370 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
371 kbdev->hwcnt.backend.triggered = 1;
372 wake_up(&kbdev->hwcnt.backend.wait);
373 } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
374 int ret;
375 /* Always clean and invalidate the cache after a successful dump
376 */
377 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
378 ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
379 &kbdev->hwcnt.backend.cache_clean_work);
380 KBASE_DEBUG_ASSERT(ret);
381 }
382
383 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
384 }
385
kbase_clean_caches_done(struct kbase_device * kbdev)386 void kbase_clean_caches_done(struct kbase_device *kbdev)
387 {
388 u32 irq_mask;
389
390 if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
391 unsigned long flags;
392 unsigned long pm_flags;
393
394 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
395 /* Disable interrupt */
396 spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
397 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
398 NULL);
399 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
400 irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
401 spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
402
403 /* Wakeup... */
404 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
405 /* Only wake if we weren't resetting */
406 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
407 wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
408 }
409
410 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
411 }
412 }
413
kbase_instr_hwcnt_wait_for_dump(struct kbase_context * kctx)414 int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
415 {
416 struct kbase_device *kbdev = kctx->kbdev;
417 unsigned long flags;
418 int err;
419
420 /* Wait for dump & cacheclean to complete */
421 wait_event(kbdev->hwcnt.backend.wait,
422 kbdev->hwcnt.backend.triggered != 0);
423
424 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
425
426 if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
427 err = -EINVAL;
428 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
429 } else {
430 /* Dump done */
431 KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
432 KBASE_INSTR_STATE_IDLE);
433 err = 0;
434 }
435
436 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
437
438 return err;
439 }
440
kbase_instr_hwcnt_clear(struct kbase_context * kctx)441 int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
442 {
443 unsigned long flags;
444 int err = -EINVAL;
445 struct kbase_device *kbdev = kctx->kbdev;
446
447 spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
448
449 /* Check it's the context previously set up and we're not already
450 * dumping */
451 if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
452 KBASE_INSTR_STATE_IDLE)
453 goto out;
454
455 /* Clear the counters */
456 KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
457 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
458 GPU_COMMAND_PRFCNT_CLEAR, kctx);
459
460 err = 0;
461
462 out:
463 spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
464 return err;
465 }
466 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
467
kbase_instr_backend_init(struct kbase_device * kbdev)468 int kbase_instr_backend_init(struct kbase_device *kbdev)
469 {
470 int ret = 0;
471
472 kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
473
474 init_waitqueue_head(&kbdev->hwcnt.backend.wait);
475 init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
476 INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
477 kbasep_cache_clean_worker);
478 kbdev->hwcnt.backend.triggered = 0;
479
480 kbdev->hwcnt.backend.cache_clean_wq =
481 alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
482 if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
483 ret = -EINVAL;
484
485 return ret;
486 }
487
kbase_instr_backend_term(struct kbase_device * kbdev)488 void kbase_instr_backend_term(struct kbase_device *kbdev)
489 {
490 destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
491 }
492
493