xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  *
3  * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15 
16 
17 
18 
19 
20 /*
21  * GPU backend instrumentation APIs.
22  */
23 
24 #include <mali_kbase.h>
25 #include <mali_midg_regmap.h>
26 #include <mali_kbase_hwaccess_instr.h>
27 #include <backend/gpu/mali_kbase_device_internal.h>
28 #include <backend/gpu/mali_kbase_pm_internal.h>
29 #include <backend/gpu/mali_kbase_instr_internal.h>
30 
31 /**
32  * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
33  * hardware
34  *
35  * @kbdev: Kbase device
36  */
kbasep_instr_hwcnt_cacheclean(struct kbase_device * kbdev)37 static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
38 {
39 	unsigned long flags;
40 	unsigned long pm_flags;
41 	u32 irq_mask;
42 
43 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
44 	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
45 					KBASE_INSTR_STATE_REQUEST_CLEAN);
46 
47 	/* Enable interrupt */
48 	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
49 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
50 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
51 				irq_mask | CLEAN_CACHES_COMPLETED, NULL);
52 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
53 
54 	/* clean&invalidate the caches so we're sure the mmu tables for the dump
55 	 * buffer is valid */
56 	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
57 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
58 					GPU_COMMAND_CLEAN_INV_CACHES, NULL);
59 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
60 
61 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
62 }
63 
kbase_instr_hwcnt_enable_internal(struct kbase_device * kbdev,struct kbase_context * kctx,struct kbase_uk_hwcnt_setup * setup)64 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
65 					struct kbase_context *kctx,
66 					struct kbase_uk_hwcnt_setup *setup)
67 {
68 	unsigned long flags, pm_flags;
69 	int err = -EINVAL;
70 	u32 irq_mask;
71 	int ret;
72 	u64 shader_cores_needed;
73 	u32 prfcnt_config;
74 
75 	shader_cores_needed = kbase_pm_get_present_cores(kbdev,
76 							KBASE_PM_CORE_SHADER);
77 
78 	/* alignment failure */
79 	if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
80 		goto out_err;
81 
82 	/* Override core availability policy to ensure all cores are available
83 	 */
84 	kbase_pm_ca_instr_enable(kbdev);
85 
86 	/* Request the cores early on synchronously - we'll release them on any
87 	 * errors (e.g. instrumentation already active) */
88 	kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
89 
90 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
91 
92 	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
93 		/* Instrumentation is already enabled */
94 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
95 		goto out_unrequest_cores;
96 	}
97 
98 	/* Enable interrupt */
99 	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
100 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
101 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
102 						PRFCNT_SAMPLE_COMPLETED, NULL);
103 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
104 
105 	/* In use, this context is the owner */
106 	kbdev->hwcnt.kctx = kctx;
107 	/* Remember the dump address so we can reprogram it later */
108 	kbdev->hwcnt.addr = setup->dump_buffer;
109 
110 	/* Request the clean */
111 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
112 	kbdev->hwcnt.backend.triggered = 0;
113 	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
114 	 * buffer is valid */
115 	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
116 					&kbdev->hwcnt.backend.cache_clean_work);
117 	KBASE_DEBUG_ASSERT(ret);
118 
119 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
120 
121 	/* Wait for cacheclean to complete */
122 	wait_event(kbdev->hwcnt.backend.wait,
123 					kbdev->hwcnt.backend.triggered != 0);
124 
125 	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
126 							KBASE_INSTR_STATE_IDLE);
127 
128 	kbase_pm_request_l2_caches(kbdev);
129 
130 	/* Configure */
131 	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
132 #ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
133 	{
134 		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
135 		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
136 			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
137 		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
138 
139 		if (arch_v6)
140 			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
141 	}
142 #endif
143 
144 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
145 			prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx);
146 
147 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
148 					setup->dump_buffer & 0xFFFFFFFF, kctx);
149 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
150 					setup->dump_buffer >> 32,        kctx);
151 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
152 					setup->jm_bm,                    kctx);
153 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
154 					setup->shader_bm,                kctx);
155 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
156 					setup->mmu_l2_bm,                kctx);
157 	/* Due to PRLAM-8186 we need to disable the Tiler before we enable the
158 	 * HW counter dump. */
159 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
160 		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
161 									kctx);
162 	else
163 		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
164 							setup->tiler_bm, kctx);
165 
166 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
167 			prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx);
168 
169 	/* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
170 	 */
171 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
172 		kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
173 							setup->tiler_bm, kctx);
174 
175 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
176 
177 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
178 	kbdev->hwcnt.backend.triggered = 1;
179 	wake_up(&kbdev->hwcnt.backend.wait);
180 
181 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
182 
183 	err = 0;
184 
185 	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
186 	return err;
187  out_unrequest_cores:
188 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
189 	kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
190 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
191  out_err:
192 	return err;
193 }
194 
kbase_instr_hwcnt_disable_internal(struct kbase_context * kctx)195 int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
196 {
197 	unsigned long flags, pm_flags;
198 	int err = -EINVAL;
199 	u32 irq_mask;
200 	struct kbase_device *kbdev = kctx->kbdev;
201 
202 	while (1) {
203 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
204 
205 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
206 			/* Instrumentation is not enabled */
207 			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
208 			goto out;
209 		}
210 
211 		if (kbdev->hwcnt.kctx != kctx) {
212 			/* Instrumentation has been setup for another context */
213 			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
214 			goto out;
215 		}
216 
217 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
218 			break;
219 
220 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
221 
222 		/* Ongoing dump/setup - wait for its completion */
223 		wait_event(kbdev->hwcnt.backend.wait,
224 					kbdev->hwcnt.backend.triggered != 0);
225 	}
226 
227 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
228 	kbdev->hwcnt.backend.triggered = 0;
229 
230 	/* Disable interrupt */
231 	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
232 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
233 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
234 				irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
235 
236 	/* Disable the counters */
237 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
238 
239 	kbdev->hwcnt.kctx = NULL;
240 	kbdev->hwcnt.addr = 0ULL;
241 
242 	kbase_pm_ca_instr_disable(kbdev);
243 
244 	kbase_pm_unrequest_cores(kbdev, true,
245 		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
246 
247 	kbase_pm_release_l2_caches(kbdev);
248 
249 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
250 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
251 
252 	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
253 									kctx);
254 
255 	err = 0;
256 
257  out:
258 	return err;
259 }
260 
kbase_instr_hwcnt_request_dump(struct kbase_context * kctx)261 int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
262 {
263 	unsigned long flags;
264 	int err = -EINVAL;
265 	struct kbase_device *kbdev = kctx->kbdev;
266 
267 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
268 
269 	if (kbdev->hwcnt.kctx != kctx) {
270 		/* The instrumentation has been setup for another context */
271 		goto unlock;
272 	}
273 
274 	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
275 		/* HW counters are disabled or another dump is ongoing, or we're
276 		 * resetting */
277 		goto unlock;
278 	}
279 
280 	kbdev->hwcnt.backend.triggered = 0;
281 
282 	/* Mark that we're dumping - the PF handler can signal that we faulted
283 	 */
284 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
285 
286 	/* Reconfigure the dump address */
287 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
288 					kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
289 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
290 					kbdev->hwcnt.addr >> 32, NULL);
291 
292 	/* Start dumping */
293 	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
294 					kbdev->hwcnt.addr, 0);
295 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
296 					GPU_COMMAND_PRFCNT_SAMPLE, kctx);
297 
298 	dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
299 
300 	err = 0;
301 
302  unlock:
303 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
304 	return err;
305 }
306 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
307 
kbase_instr_hwcnt_dump_complete(struct kbase_context * kctx,bool * const success)308 bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
309 						bool * const success)
310 {
311 	unsigned long flags;
312 	bool complete = false;
313 	struct kbase_device *kbdev = kctx->kbdev;
314 
315 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
316 
317 	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
318 		*success = true;
319 		complete = true;
320 	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
321 		*success = false;
322 		complete = true;
323 		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
324 	}
325 
326 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
327 
328 	return complete;
329 }
330 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
331 
kbasep_cache_clean_worker(struct work_struct * data)332 void kbasep_cache_clean_worker(struct work_struct *data)
333 {
334 	struct kbase_device *kbdev;
335 	unsigned long flags;
336 
337 	kbdev = container_of(data, struct kbase_device,
338 						hwcnt.backend.cache_clean_work);
339 
340 	mutex_lock(&kbdev->cacheclean_lock);
341 	kbasep_instr_hwcnt_cacheclean(kbdev);
342 
343 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
344 	/* Wait for our condition, and any reset to complete */
345 	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
346 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
347 		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
348 				kbdev->hwcnt.backend.state !=
349 						KBASE_INSTR_STATE_CLEANING);
350 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
351 	}
352 	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
353 						KBASE_INSTR_STATE_CLEANED);
354 
355 	/* All finished and idle */
356 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
357 	kbdev->hwcnt.backend.triggered = 1;
358 	wake_up(&kbdev->hwcnt.backend.wait);
359 
360 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
361 	mutex_unlock(&kbdev->cacheclean_lock);
362 }
363 
kbase_instr_hwcnt_sample_done(struct kbase_device * kbdev)364 void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
365 {
366 	unsigned long flags;
367 
368 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
369 
370 	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
371 		kbdev->hwcnt.backend.triggered = 1;
372 		wake_up(&kbdev->hwcnt.backend.wait);
373 	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
374 		int ret;
375 		/* Always clean and invalidate the cache after a successful dump
376 		 */
377 		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
378 		ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
379 					&kbdev->hwcnt.backend.cache_clean_work);
380 		KBASE_DEBUG_ASSERT(ret);
381 	}
382 
383 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
384 }
385 
kbase_clean_caches_done(struct kbase_device * kbdev)386 void kbase_clean_caches_done(struct kbase_device *kbdev)
387 {
388 	u32 irq_mask;
389 
390 	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
391 		unsigned long flags;
392 		unsigned long pm_flags;
393 
394 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
395 		/* Disable interrupt */
396 		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
397 		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
398 									NULL);
399 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
400 				irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
401 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
402 
403 		/* Wakeup... */
404 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
405 			/* Only wake if we weren't resetting */
406 			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
407 			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
408 		}
409 
410 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
411 	}
412 }
413 
kbase_instr_hwcnt_wait_for_dump(struct kbase_context * kctx)414 int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
415 {
416 	struct kbase_device *kbdev = kctx->kbdev;
417 	unsigned long flags;
418 	int err;
419 
420 	/* Wait for dump & cacheclean to complete */
421 	wait_event(kbdev->hwcnt.backend.wait,
422 					kbdev->hwcnt.backend.triggered != 0);
423 
424 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
425 
426 	if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
427 		err = -EINVAL;
428 		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
429 	} else {
430 		/* Dump done */
431 		KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
432 							KBASE_INSTR_STATE_IDLE);
433 		err = 0;
434 	}
435 
436 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
437 
438 	return err;
439 }
440 
kbase_instr_hwcnt_clear(struct kbase_context * kctx)441 int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
442 {
443 	unsigned long flags;
444 	int err = -EINVAL;
445 	struct kbase_device *kbdev = kctx->kbdev;
446 
447 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
448 
449 	/* Check it's the context previously set up and we're not already
450 	 * dumping */
451 	if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
452 							KBASE_INSTR_STATE_IDLE)
453 		goto out;
454 
455 	/* Clear the counters */
456 	KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
457 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
458 						GPU_COMMAND_PRFCNT_CLEAR, kctx);
459 
460 	err = 0;
461 
462 out:
463 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
464 	return err;
465 }
466 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
467 
kbase_instr_backend_init(struct kbase_device * kbdev)468 int kbase_instr_backend_init(struct kbase_device *kbdev)
469 {
470 	int ret = 0;
471 
472 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
473 
474 	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
475 	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
476 	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
477 						kbasep_cache_clean_worker);
478 	kbdev->hwcnt.backend.triggered = 0;
479 
480 	kbdev->hwcnt.backend.cache_clean_wq =
481 			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
482 	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
483 		ret = -EINVAL;
484 
485 	return ret;
486 }
487 
kbase_instr_backend_term(struct kbase_device * kbdev)488 void kbase_instr_backend_term(struct kbase_device *kbdev)
489 {
490 	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
491 }
492 
493