1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 /*
23 * Base kernel job manager APIs
24 */
25
26 #include <mali_kbase.h>
27 #include <mali_kbase_config.h>
28 #include <gpu/mali_kbase_gpu_regmap.h>
29 #include <tl/mali_kbase_tracepoints.h>
30 #include <mali_linux_trace.h>
31 #include <mali_kbase_hw.h>
32 #include <mali_kbase_hwaccess_jm.h>
33 #include <mali_kbase_reset_gpu.h>
34 #include <mali_kbase_ctx_sched.h>
35 #include <mali_kbase_kinstr_jm.h>
36 #include <mali_kbase_hwaccess_instr.h>
37 #include <hwcnt/mali_kbase_hwcnt_context.h>
38 #include <device/mali_kbase_device.h>
39 #include <backend/gpu/mali_kbase_irq_internal.h>
40 #include <backend/gpu/mali_kbase_jm_internal.h>
41 #include <mali_kbase_regs_history_debugfs.h>
42
43 static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
44 static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
45 const u64 affinity, const u64 limited_core_mask);
46
kbase_job_write_affinity(struct kbase_device * kbdev,base_jd_core_req core_req,unsigned int js,const u64 limited_core_mask)47 static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req,
48 unsigned int js, const u64 limited_core_mask)
49 {
50 u64 affinity;
51 bool skip_affinity_check = false;
52
53 if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
54 BASE_JD_REQ_T) {
55 /* Tiler-only atom, affinity value can be programed as 0 */
56 affinity = 0;
57 skip_affinity_check = true;
58 } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
59 BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
60 unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
61 struct mali_base_gpu_coherent_group_info *coherency_info =
62 &kbdev->gpu_props.props.coherency_info;
63
64 affinity = kbdev->pm.backend.shaders_avail &
65 kbdev->pm.debug_core_mask[js];
66
67 /* JS2 on a dual core group system targets core group 1. All
68 * other cases target core group 0.
69 */
70 if (js == 2 && num_core_groups > 1)
71 affinity &= coherency_info->group[1].core_mask;
72 else if (num_core_groups > 1)
73 affinity &= coherency_info->group[0].core_mask;
74 else
75 affinity &= kbdev->gpu_props.curr_config.shader_present;
76 } else {
77 /* Use all cores */
78 affinity = kbdev->pm.backend.shaders_avail &
79 kbdev->pm.debug_core_mask[js];
80 }
81
82 if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) {
83 /* Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK by applying the limited core mask. */
84 affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask);
85 }
86
87 if (unlikely(!affinity && !skip_affinity_check)) {
88 #ifdef CONFIG_MALI_BIFROST_DEBUG
89 u64 shaders_ready =
90 kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
91
92 WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail));
93 #endif
94
95 affinity = kbdev->pm.backend.shaders_avail;
96
97 if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) {
98 /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */
99 affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask);
100
101 #ifdef CONFIG_MALI_BIFROST_DEBUG
102 /* affinity should never be 0 */
103 WARN_ON(!affinity);
104 #endif
105 }
106 }
107
108 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
109 affinity & 0xFFFFFFFF);
110 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
111 affinity >> 32);
112
113 return affinity;
114 }
115
116 /**
117 * select_job_chain() - Select which job chain to submit to the GPU
118 * @katom: Pointer to the atom about to be submitted to the GPU
119 *
120 * Selects one of the fragment job chains attached to the special atom at the
121 * end of a renderpass, or returns the address of the single job chain attached
122 * to any other type of atom.
123 *
124 * Which job chain is selected depends upon whether the tiling phase of the
125 * renderpass completed normally or was soft-stopped because it used too
126 * much memory. It also depends upon whether one of the fragment job chains
127 * has already been run as part of the same renderpass.
128 *
129 * Return: GPU virtual address of the selected job chain
130 */
select_job_chain(struct kbase_jd_atom * katom)131 static u64 select_job_chain(struct kbase_jd_atom *katom)
132 {
133 struct kbase_context *const kctx = katom->kctx;
134 u64 jc = katom->jc;
135 struct kbase_jd_renderpass *rp;
136
137 lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
138
139 if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS))
140 return jc;
141
142 compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <=
143 ARRAY_SIZE(kctx->jctx.renderpasses),
144 "Should check invalid access to renderpasses");
145
146 rp = &kctx->jctx.renderpasses[katom->renderpass_id];
147 /* We can read a subset of renderpass state without holding
148 * higher-level locks (but not end_katom, for example).
149 * If the end-of-renderpass atom is running with as-yet indeterminate
150 * OOM state then assume that the start atom was not soft-stopped.
151 */
152 switch (rp->state) {
153 case KBASE_JD_RP_OOM:
154 /* Tiling ran out of memory.
155 * Start of incremental rendering, used once.
156 */
157 jc = katom->jc_fragment.norm_read_forced_write;
158 break;
159 case KBASE_JD_RP_START:
160 case KBASE_JD_RP_PEND_OOM:
161 /* Tiling completed successfully first time.
162 * Single-iteration rendering, used once.
163 */
164 jc = katom->jc_fragment.norm_read_norm_write;
165 break;
166 case KBASE_JD_RP_RETRY_OOM:
167 /* Tiling ran out of memory again.
168 * Continuation of incremental rendering, used as
169 * many times as required.
170 */
171 jc = katom->jc_fragment.forced_read_forced_write;
172 break;
173 case KBASE_JD_RP_RETRY:
174 case KBASE_JD_RP_RETRY_PEND_OOM:
175 /* Tiling completed successfully this time.
176 * End of incremental rendering, used once.
177 */
178 jc = katom->jc_fragment.forced_read_norm_write;
179 break;
180 default:
181 WARN_ON(1);
182 break;
183 }
184
185 dev_dbg(kctx->kbdev->dev,
186 "Selected job chain 0x%llx for end atom %pK in state %d\n",
187 jc, (void *)katom, (int)rp->state);
188
189 katom->jc = jc;
190 return jc;
191 }
192
kbasep_jm_wait_js_free(struct kbase_device * kbdev,unsigned int js,struct kbase_context * kctx)193 static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js,
194 struct kbase_context *kctx)
195 {
196 const ktime_t wait_loop_start = ktime_get_raw();
197 const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms;
198 s64 diff = 0;
199
200 /* wait for the JS_COMMAND_NEXT register to reach the given status value */
201 do {
202 if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)))
203 return true;
204
205 diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
206 } while (diff < max_timeout);
207
208 dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js,
209 kctx->tgid, kctx->id);
210
211 return false;
212 }
213
kbase_job_hw_submit(struct kbase_device * kbdev,struct kbase_jd_atom * katom,unsigned int js)214 int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js)
215 {
216 struct kbase_context *kctx;
217 u32 cfg;
218 u64 const jc_head = select_job_chain(katom);
219 u64 affinity;
220 struct slot_rb *ptr_slot_rb = &kbdev->hwaccess.backend.slot_rb[js];
221
222 lockdep_assert_held(&kbdev->hwaccess_lock);
223
224 kctx = katom->kctx;
225
226 /* Command register must be available */
227 if (!kbasep_jm_wait_js_free(kbdev, js, kctx))
228 return -EPERM;
229
230 dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
231 jc_head, (void *)katom);
232
233 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
234 jc_head & 0xFFFFFFFF);
235 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
236 jc_head >> 32);
237
238 affinity = kbase_job_write_affinity(kbdev, katom->core_req, js,
239 kctx->limited_core_mask);
240
241 /* start MMU, medium priority, cache clean/flush on end, clean/flush on
242 * start
243 */
244 cfg = kctx->as_nr;
245
246 if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) &&
247 !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
248 cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
249
250 if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) {
251 /* Force a cache maintenance operation if the newly submitted
252 * katom to the slot is from a different kctx. For a JM GPU
253 * that has the feature BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
254 * applies a FLUSH_INV_SHADER_OTHER. Otherwise, do a
255 * FLUSH_CLEAN_INVALIDATE.
256 */
257 u64 tagged_kctx = ptr_slot_rb->last_kctx_tagged;
258
259 if (tagged_kctx != SLOT_RB_NULL_TAG_VAL && tagged_kctx != SLOT_RB_TAG_KCTX(kctx)) {
260 if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER))
261 cfg |= JS_CONFIG_START_FLUSH_INV_SHADER_OTHER;
262 else
263 cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
264 } else
265 cfg |= JS_CONFIG_START_FLUSH_NO_ACTION;
266 } else
267 cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
268
269 if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
270 !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
271 cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
272 else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
273 cfg |= JS_CONFIG_END_FLUSH_CLEAN;
274 else
275 cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
276
277 cfg |= JS_CONFIG_THREAD_PRI(8);
278
279 if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) ||
280 (katom->core_req & BASE_JD_REQ_END_RENDERPASS))
281 cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK;
282
283 if (!ptr_slot_rb->job_chain_flag) {
284 cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
285 katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
286 ptr_slot_rb->job_chain_flag = true;
287 } else {
288 katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
289 ptr_slot_rb->job_chain_flag = false;
290 }
291
292 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg);
293
294 if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION))
295 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT),
296 katom->flush_id);
297
298 /* Write an approximate start timestamp.
299 * It's approximate because there might be a job in the HEAD register.
300 */
301 katom->start_timestamp = ktime_get_raw();
302
303 /* GO ! */
304 dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx",
305 katom, kctx, js, jc_head);
306
307 KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
308 (u32)affinity);
309
310 KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx,
311 js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START);
312
313 KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head,
314 affinity, cfg);
315 KBASE_TLSTREAM_TL_RET_CTX_LPU(
316 kbdev,
317 kctx,
318 &kbdev->gpu_props.props.raw_props.js_features[
319 katom->slot_nr]);
320 KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]);
321 KBASE_TLSTREAM_TL_RET_ATOM_LPU(
322 kbdev,
323 katom,
324 &kbdev->gpu_props.props.raw_props.js_features[js],
325 "ctx_nr,atom_nr");
326 kbase_kinstr_jm_atom_hw_submit(katom);
327
328 /* Update the slot's last katom submission kctx */
329 ptr_slot_rb->last_kctx_tagged = SLOT_RB_TAG_KCTX(kctx);
330
331 #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
332 if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
333 /* If this is the only job on the slot, trace it as starting */
334 char js_string[16];
335
336 trace_gpu_sched_switch(
337 kbasep_make_job_slot_string(js, js_string,
338 sizeof(js_string)),
339 ktime_to_ns(katom->start_timestamp),
340 (u32)katom->kctx->id, 0, katom->work_id);
341 }
342 #endif
343
344 trace_sysgraph_gpu(SGR_SUBMIT, kctx->id,
345 kbase_jd_atom_id(kctx, katom), js);
346
347 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
348 JS_COMMAND_START);
349
350 return 0;
351 }
352
353 /**
354 * kbasep_job_slot_update_head_start_timestamp - Update timestamp
355 * @kbdev: kbase device
356 * @js: job slot
357 * @end_timestamp: timestamp
358 *
359 * Update the start_timestamp of the job currently in the HEAD, based on the
360 * fact that we got an IRQ for the previous set of completed jobs.
361 *
362 * The estimate also takes into account the time the job was submitted, to
363 * work out the best estimate (which might still result in an over-estimate to
364 * the calculated time spent)
365 */
kbasep_job_slot_update_head_start_timestamp(struct kbase_device * kbdev,unsigned int js,ktime_t end_timestamp)366 static void kbasep_job_slot_update_head_start_timestamp(struct kbase_device *kbdev, unsigned int js,
367 ktime_t end_timestamp)
368 {
369 ktime_t timestamp_diff;
370 struct kbase_jd_atom *katom;
371
372 /* Checking the HEAD position for the job slot */
373 katom = kbase_gpu_inspect(kbdev, js, 0);
374 if (katom != NULL) {
375 timestamp_diff = ktime_sub(end_timestamp,
376 katom->start_timestamp);
377 if (ktime_to_ns(timestamp_diff) >= 0) {
378 /* Only update the timestamp if it's a better estimate
379 * than what's currently stored. This is because our
380 * estimate that accounts for the throttle time may be
381 * too much of an overestimate
382 */
383 katom->start_timestamp = end_timestamp;
384 }
385 }
386 }
387
388 /**
389 * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline
390 * tracepoint
391 * @kbdev: kbase device
392 * @js: job slot
393 *
394 * Make a tracepoint call to the instrumentation module informing that
395 * softstop happened on given lpu (job slot).
396 */
kbasep_trace_tl_event_lpu_softstop(struct kbase_device * kbdev,unsigned int js)397 static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, unsigned int js)
398 {
399 KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
400 kbdev,
401 &kbdev->gpu_props.props.raw_props.js_features[js]);
402 }
403
kbase_job_done(struct kbase_device * kbdev,u32 done)404 void kbase_job_done(struct kbase_device *kbdev, u32 done)
405 {
406 u32 count = 0;
407 ktime_t end_timestamp;
408
409 lockdep_assert_held(&kbdev->hwaccess_lock);
410
411 KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done);
412
413 end_timestamp = ktime_get_raw();
414
415 while (done) {
416 unsigned int i;
417 u32 failed = done >> 16;
418
419 /* treat failed slots as finished slots */
420 u32 finished = (done & 0xFFFF) | failed;
421
422 /* Note: This is inherently unfair, as we always check for lower
423 * numbered interrupts before the higher numbered ones.
424 */
425 i = ffs(finished) - 1;
426
427 do {
428 int nr_done;
429 u32 active;
430 u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
431 u64 job_tail = 0;
432
433 if (failed & (1u << i)) {
434 /* read out the job slot status code if the job
435 * slot reported failure
436 */
437 completion_code = kbase_reg_read(kbdev,
438 JOB_SLOT_REG(i, JS_STATUS));
439
440 if (completion_code == BASE_JD_EVENT_STOPPED) {
441 u64 job_head;
442
443 KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(
444 kbdev, NULL,
445 i, 0, TL_JS_EVENT_SOFT_STOP);
446
447 kbasep_trace_tl_event_lpu_softstop(
448 kbdev, i);
449
450 /* Soft-stopped job - read the value of
451 * JS<n>_TAIL so that the job chain can
452 * be resumed
453 */
454 job_tail = (u64)kbase_reg_read(kbdev,
455 JOB_SLOT_REG(i, JS_TAIL_LO)) |
456 ((u64)kbase_reg_read(kbdev,
457 JOB_SLOT_REG(i, JS_TAIL_HI))
458 << 32);
459 job_head = (u64)kbase_reg_read(kbdev,
460 JOB_SLOT_REG(i, JS_HEAD_LO)) |
461 ((u64)kbase_reg_read(kbdev,
462 JOB_SLOT_REG(i, JS_HEAD_HI))
463 << 32);
464 /* For a soft-stopped job chain js_tail should
465 * same as the js_head, but if not then the
466 * job chain was incorrectly marked as
467 * soft-stopped. In such case we should not
468 * be resuming the job chain from js_tail and
469 * report the completion_code as UNKNOWN.
470 */
471 if (job_tail != job_head)
472 completion_code = BASE_JD_EVENT_UNKNOWN;
473
474 } else if (completion_code ==
475 BASE_JD_EVENT_NOT_STARTED) {
476 /* PRLAM-10673 can cause a TERMINATED
477 * job to come back as NOT_STARTED,
478 * but the error interrupt helps us
479 * detect it
480 */
481 completion_code =
482 BASE_JD_EVENT_TERMINATED;
483 }
484
485 kbase_gpu_irq_evict(kbdev, i, completion_code);
486
487 /* Some jobs that encounter a BUS FAULT may
488 * result in corrupted state causing future
489 * jobs to hang. Reset GPU before allowing
490 * any other jobs on the slot to continue.
491 */
492 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) {
493 if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) {
494 if (kbase_prepare_to_reset_gpu_locked(
495 kbdev,
496 RESET_FLAGS_NONE))
497 kbase_reset_gpu_locked(kbdev);
498 }
499 }
500 }
501
502 kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
503 done & ((1 << i) | (1 << (i + 16))));
504 active = kbase_reg_read(kbdev,
505 JOB_CONTROL_REG(JOB_IRQ_JS_STATE));
506
507 if (((active >> i) & 1) == 0 &&
508 (((done >> (i + 16)) & 1) == 0)) {
509 /* There is a potential race we must work
510 * around:
511 *
512 * 1. A job slot has a job in both current and
513 * next registers
514 * 2. The job in current completes
515 * successfully, the IRQ handler reads
516 * RAWSTAT and calls this function with the
517 * relevant bit set in "done"
518 * 3. The job in the next registers becomes the
519 * current job on the GPU
520 * 4. Sometime before the JOB_IRQ_CLEAR line
521 * above the job on the GPU _fails_
522 * 5. The IRQ_CLEAR clears the done bit but not
523 * the failed bit. This atomically sets
524 * JOB_IRQ_JS_STATE. However since both jobs
525 * have now completed the relevant bits for
526 * the slot are set to 0.
527 *
528 * If we now did nothing then we'd incorrectly
529 * assume that _both_ jobs had completed
530 * successfully (since we haven't yet observed
531 * the fail bit being set in RAWSTAT).
532 *
533 * So at this point if there are no active jobs
534 * left we check to see if RAWSTAT has a failure
535 * bit set for the job slot. If it does we know
536 * that there has been a new failure that we
537 * didn't previously know about, so we make sure
538 * that we record this in active (but we wait
539 * for the next loop to deal with it).
540 *
541 * If we were handling a job failure (i.e. done
542 * has the relevant high bit set) then we know
543 * that the value read back from
544 * JOB_IRQ_JS_STATE is the correct number of
545 * remaining jobs because the failed job will
546 * have prevented any futher jobs from starting
547 * execution.
548 */
549 u32 rawstat = kbase_reg_read(kbdev,
550 JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
551
552 if ((rawstat >> (i + 16)) & 1) {
553 /* There is a failed job that we've
554 * missed - add it back to active
555 */
556 active |= (1u << i);
557 }
558 }
559
560 dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
561 completion_code);
562
563 nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
564 nr_done -= (active >> i) & 1;
565 nr_done -= (active >> (i + 16)) & 1;
566
567 if (nr_done <= 0) {
568 dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
569 i);
570
571 goto spurious;
572 }
573
574 count += nr_done;
575
576 while (nr_done) {
577 if (nr_done == 1) {
578 kbase_gpu_complete_hw(kbdev, i,
579 completion_code,
580 job_tail,
581 &end_timestamp);
582 kbase_jm_try_kick_all(kbdev);
583 } else {
584 /* More than one job has completed.
585 * Since this is not the last job being
586 * reported this time it must have
587 * passed. This is because the hardware
588 * will not allow further jobs in a job
589 * slot to complete until the failed job
590 * is cleared from the IRQ status.
591 */
592 kbase_gpu_complete_hw(kbdev, i,
593 BASE_JD_EVENT_DONE,
594 0,
595 &end_timestamp);
596 }
597 nr_done--;
598 }
599 spurious:
600 done = kbase_reg_read(kbdev,
601 JOB_CONTROL_REG(JOB_IRQ_RAWSTAT));
602
603 failed = done >> 16;
604 finished = (done & 0xFFFF) | failed;
605 if (done)
606 end_timestamp = ktime_get_raw();
607 } while (finished & (1 << i));
608
609 kbasep_job_slot_update_head_start_timestamp(kbdev, i,
610 end_timestamp);
611 }
612
613 if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
614 KBASE_RESET_GPU_COMMITTED) {
615 /* If we're trying to reset the GPU then we might be able to do
616 * it early (without waiting for a timeout) because some jobs
617 * have completed
618 */
619 kbasep_try_reset_gpu_early_locked(kbdev);
620 }
621 KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
622 }
623
kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device * kbdev,unsigned int js,u32 action,base_jd_core_req core_reqs,struct kbase_jd_atom * target_katom)624 void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, unsigned int js,
625 u32 action, base_jd_core_req core_reqs,
626 struct kbase_jd_atom *target_katom)
627 {
628 #if KBASE_KTRACE_ENABLE
629 u32 status_reg_before;
630 u64 job_in_head_before;
631 u32 status_reg_after;
632
633 WARN_ON(action & (~JS_COMMAND_MASK));
634
635 /* Check the head pointer */
636 job_in_head_before = ((u64) kbase_reg_read(kbdev,
637 JOB_SLOT_REG(js, JS_HEAD_LO)))
638 | (((u64) kbase_reg_read(kbdev,
639 JOB_SLOT_REG(js, JS_HEAD_HI)))
640 << 32);
641 status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
642 #endif
643
644 if (action == JS_COMMAND_SOFT_STOP) {
645 if (kbase_jd_katom_is_protected(target_katom)) {
646 #ifdef CONFIG_MALI_BIFROST_DEBUG
647 dev_dbg(kbdev->dev,
648 "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x",
649 (unsigned int)core_reqs);
650 #endif /* CONFIG_MALI_BIFROST_DEBUG */
651 return;
652 }
653
654 /* We are about to issue a soft stop, so mark the atom as having
655 * been soft stopped
656 */
657 target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED;
658
659 /* Mark the point where we issue the soft-stop command */
660 KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom);
661
662 action = (target_katom->atom_flags &
663 KBASE_KATOM_FLAGS_JOBCHAIN) ?
664 JS_COMMAND_SOFT_STOP_1 :
665 JS_COMMAND_SOFT_STOP_0;
666 } else if (action == JS_COMMAND_HARD_STOP) {
667 target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
668
669 action = (target_katom->atom_flags &
670 KBASE_KATOM_FLAGS_JOBCHAIN) ?
671 JS_COMMAND_HARD_STOP_1 :
672 JS_COMMAND_HARD_STOP_0;
673 }
674
675 kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action);
676
677 #if KBASE_KTRACE_ENABLE
678 status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS));
679 if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
680 struct kbase_jd_atom *head;
681 struct kbase_context *head_kctx;
682
683 head = kbase_gpu_inspect(kbdev, js, 0);
684 if (unlikely(!head)) {
685 dev_err(kbdev->dev, "Can't get a katom from js(%d)\n", js);
686 return;
687 }
688 head_kctx = head->kctx;
689
690 if (status_reg_before == BASE_JD_EVENT_ACTIVE)
691 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js);
692 else
693 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js);
694
695 switch (action) {
696 case JS_COMMAND_SOFT_STOP:
697 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js);
698 break;
699 case JS_COMMAND_SOFT_STOP_0:
700 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js);
701 break;
702 case JS_COMMAND_SOFT_STOP_1:
703 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js);
704 break;
705 case JS_COMMAND_HARD_STOP:
706 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js);
707 break;
708 case JS_COMMAND_HARD_STOP_0:
709 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js);
710 break;
711 case JS_COMMAND_HARD_STOP_1:
712 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js);
713 break;
714 default:
715 WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action,
716 (void *)target_katom, (void *)target_katom->kctx);
717 break;
718 }
719 } else {
720 if (status_reg_before == BASE_JD_EVENT_ACTIVE)
721 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js);
722 else
723 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js);
724
725 switch (action) {
726 case JS_COMMAND_SOFT_STOP:
727 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js);
728 break;
729 case JS_COMMAND_SOFT_STOP_0:
730 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js);
731 break;
732 case JS_COMMAND_SOFT_STOP_1:
733 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js);
734 break;
735 case JS_COMMAND_HARD_STOP:
736 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js);
737 break;
738 case JS_COMMAND_HARD_STOP_0:
739 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js);
740 break;
741 case JS_COMMAND_HARD_STOP_1:
742 KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js);
743 break;
744 default:
745 WARN(1, "Unknown action %d on atom %pK in kctx %pK\n", action,
746 (void *)target_katom, (void *)target_katom->kctx);
747 break;
748 }
749 }
750 #endif
751 }
752
kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context * kctx)753 void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
754 {
755 struct kbase_device *kbdev = kctx->kbdev;
756 unsigned int i;
757
758 lockdep_assert_held(&kbdev->hwaccess_lock);
759
760 for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
761 kbase_job_slot_hardstop(kctx, i, NULL);
762 }
763
kbase_job_slot_ctx_priority_check_locked(struct kbase_context * kctx,struct kbase_jd_atom * target_katom)764 void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
765 struct kbase_jd_atom *target_katom)
766 {
767 struct kbase_device *kbdev;
768 unsigned int target_js = target_katom->slot_nr;
769 int i;
770 bool stop_sent = false;
771
772 kbdev = kctx->kbdev;
773
774 lockdep_assert_held(&kbdev->hwaccess_lock);
775
776 for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, target_js); i++) {
777 struct kbase_jd_atom *slot_katom;
778
779 slot_katom = kbase_gpu_inspect(kbdev, target_js, i);
780 if (!slot_katom)
781 continue;
782
783 if (kbase_js_atom_runs_before(kbdev, target_katom, slot_katom,
784 KBASE_ATOM_ORDERING_FLAG_SEQNR)) {
785 if (!stop_sent)
786 KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
787 kbdev,
788 target_katom);
789
790 kbase_job_slot_softstop(kbdev, target_js, slot_katom);
791 stop_sent = true;
792 }
793 }
794 }
795
softstop_start_rp_nolock(struct kbase_context * kctx,struct kbase_va_region * reg)796 static int softstop_start_rp_nolock(
797 struct kbase_context *kctx, struct kbase_va_region *reg)
798 {
799 struct kbase_device *const kbdev = kctx->kbdev;
800 struct kbase_jd_atom *katom;
801 struct kbase_jd_renderpass *rp;
802
803 lockdep_assert_held(&kbdev->hwaccess_lock);
804
805 katom = kbase_gpu_inspect(kbdev, 1, 0);
806
807 if (!katom) {
808 dev_dbg(kctx->kbdev->dev, "No atom on job slot\n");
809 return -ESRCH;
810 }
811
812 if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) {
813 dev_dbg(kctx->kbdev->dev,
814 "Atom %pK on job slot is not start RP\n", (void *)katom);
815 return -EPERM;
816 }
817
818 compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <=
819 ARRAY_SIZE(kctx->jctx.renderpasses),
820 "Should check invalid access to renderpasses");
821
822 rp = &kctx->jctx.renderpasses[katom->renderpass_id];
823 if (WARN_ON(rp->state != KBASE_JD_RP_START &&
824 rp->state != KBASE_JD_RP_RETRY))
825 return -EINVAL;
826
827 dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n",
828 (int)rp->state, (void *)reg);
829
830 if (WARN_ON(katom != rp->start_katom))
831 return -EINVAL;
832
833 dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n",
834 (void *)reg, (void *)&rp->oom_reg_list);
835 list_move_tail(®->link, &rp->oom_reg_list);
836 dev_dbg(kctx->kbdev->dev, "Added region to list\n");
837
838 rp->state = (rp->state == KBASE_JD_RP_START ?
839 KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM);
840
841 kbase_job_slot_softstop(kbdev, 1, katom);
842
843 return 0;
844 }
845
kbase_job_slot_softstop_start_rp(struct kbase_context * const kctx,struct kbase_va_region * const reg)846 int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx,
847 struct kbase_va_region *const reg)
848 {
849 struct kbase_device *const kbdev = kctx->kbdev;
850 int err;
851 unsigned long flags;
852
853 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
854 err = softstop_start_rp_nolock(kctx, reg);
855 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
856
857 return err;
858 }
859
kbase_jm_wait_for_zero_jobs(struct kbase_context * kctx)860 void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
861 {
862 struct kbase_device *kbdev = kctx->kbdev;
863 unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT);
864
865 timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait,
866 kctx->jctx.job_nr == 0, timeout);
867
868 if (timeout != 0)
869 timeout = wait_event_timeout(
870 kctx->jctx.sched_info.ctx.is_scheduled_wait,
871 !kbase_ctx_flag(kctx, KCTX_SCHEDULED),
872 timeout);
873
874 /* Neither wait timed out; all done! */
875 if (timeout != 0)
876 goto exit;
877
878 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) {
879 dev_err(kbdev->dev,
880 "Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
881 ZAP_TIMEOUT);
882 kbase_reset_gpu(kbdev);
883 }
884
885 /* Wait for the reset to complete */
886 kbase_reset_gpu_wait(kbdev);
887 exit:
888 dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx);
889
890 /* Ensure that the signallers of the waitqs have finished */
891 mutex_lock(&kctx->jctx.lock);
892 mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
893 mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
894 mutex_unlock(&kctx->jctx.lock);
895 }
896
kbase_backend_get_current_flush_id(struct kbase_device * kbdev)897 u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev)
898 {
899 u32 flush_id = 0;
900
901 if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) {
902 mutex_lock(&kbdev->pm.lock);
903 if (kbdev->pm.backend.gpu_powered)
904 flush_id = kbase_reg_read(kbdev,
905 GPU_CONTROL_REG(LATEST_FLUSH));
906 mutex_unlock(&kbdev->pm.lock);
907 }
908
909 return flush_id;
910 }
911
kbase_job_slot_init(struct kbase_device * kbdev)912 int kbase_job_slot_init(struct kbase_device *kbdev)
913 {
914 CSTD_UNUSED(kbdev);
915 return 0;
916 }
917 KBASE_EXPORT_TEST_API(kbase_job_slot_init);
918
kbase_job_slot_halt(struct kbase_device * kbdev)919 void kbase_job_slot_halt(struct kbase_device *kbdev)
920 {
921 CSTD_UNUSED(kbdev);
922 }
923
kbase_job_slot_term(struct kbase_device * kbdev)924 void kbase_job_slot_term(struct kbase_device *kbdev)
925 {
926 CSTD_UNUSED(kbdev);
927 }
928 KBASE_EXPORT_TEST_API(kbase_job_slot_term);
929
930
931 /**
932 * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
933 * @kbdev: The kbase device
934 * @js: The job slot to soft-stop
935 * @target_katom: The job that should be soft-stopped (or NULL for any job)
936 * @sw_flags: Flags to pass in about the soft-stop
937 *
938 * Context:
939 * The job slot lock must be held when calling this function.
940 * The job slot must not already be in the process of being soft-stopped.
941 *
942 * Soft-stop the specified job slot, with extra information about the stop
943 *
944 * Where possible any job in the next register is evicted before the soft-stop.
945 */
kbase_job_slot_softstop_swflags(struct kbase_device * kbdev,unsigned int js,struct kbase_jd_atom * target_katom,u32 sw_flags)946 void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, unsigned int js,
947 struct kbase_jd_atom *target_katom, u32 sw_flags)
948 {
949 dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n",
950 target_katom, sw_flags, js);
951
952 if (sw_flags & JS_COMMAND_MASK) {
953 WARN(true, "Atom %pK in kctx %pK received non-NOP flags %d\n", (void *)target_katom,
954 target_katom ? (void *)target_katom->kctx : NULL, sw_flags);
955 sw_flags &= ~((u32)JS_COMMAND_MASK);
956 }
957 kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
958 JS_COMMAND_SOFT_STOP | sw_flags);
959 }
960
kbase_job_slot_softstop(struct kbase_device * kbdev,int js,struct kbase_jd_atom * target_katom)961 void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
962 struct kbase_jd_atom *target_katom)
963 {
964 kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
965 }
966
kbase_job_slot_hardstop(struct kbase_context * kctx,unsigned int js,struct kbase_jd_atom * target_katom)967 void kbase_job_slot_hardstop(struct kbase_context *kctx, unsigned int js,
968 struct kbase_jd_atom *target_katom)
969 {
970 struct kbase_device *kbdev = kctx->kbdev;
971 bool stopped;
972
973 stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
974 target_katom,
975 JS_COMMAND_HARD_STOP);
976 CSTD_UNUSED(stopped);
977 }
978
kbase_job_check_enter_disjoint(struct kbase_device * kbdev,u32 action,base_jd_core_req core_reqs,struct kbase_jd_atom * target_katom)979 void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
980 base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
981 {
982 u32 hw_action = action & JS_COMMAND_MASK;
983
984 /* For soft-stop, don't enter if soft-stop not allowed, or isn't
985 * causing disjoint.
986 */
987 if (hw_action == JS_COMMAND_SOFT_STOP &&
988 (kbase_jd_katom_is_protected(target_katom) ||
989 (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT))))
990 return;
991
992 /* Nothing to do if already logged disjoint state on this atom */
993 if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
994 return;
995
996 target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
997 kbase_disjoint_state_up(kbdev);
998 }
999
kbase_job_check_leave_disjoint(struct kbase_device * kbdev,struct kbase_jd_atom * target_katom)1000 void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
1001 struct kbase_jd_atom *target_katom)
1002 {
1003 if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
1004 target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
1005 kbase_disjoint_state_down(kbdev);
1006 }
1007 }
1008
kbase_reset_gpu_prevent_and_wait(struct kbase_device * kbdev)1009 int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev)
1010 {
1011 WARN(true, "%s Not implemented for JM GPUs", __func__);
1012 return -EINVAL;
1013 }
1014
kbase_reset_gpu_try_prevent(struct kbase_device * kbdev)1015 int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev)
1016 {
1017 WARN(true, "%s Not implemented for JM GPUs", __func__);
1018 return -EINVAL;
1019 }
1020
kbase_reset_gpu_allow(struct kbase_device * kbdev)1021 void kbase_reset_gpu_allow(struct kbase_device *kbdev)
1022 {
1023 WARN(true, "%s Not implemented for JM GPUs", __func__);
1024 }
1025
kbase_reset_gpu_assert_prevented(struct kbase_device * kbdev)1026 void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev)
1027 {
1028 WARN(true, "%s Not implemented for JM GPUs", __func__);
1029 }
1030
kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device * kbdev)1031 void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev)
1032 {
1033 WARN(true, "%s Not implemented for JM GPUs", __func__);
1034 }
1035
kbase_debug_dump_registers(struct kbase_device * kbdev)1036 static void kbase_debug_dump_registers(struct kbase_device *kbdev)
1037 {
1038 int i;
1039
1040 kbase_io_history_dump(kbdev);
1041
1042 dev_err(kbdev->dev, "Register state:");
1043 dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
1044 kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
1045 kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)));
1046 dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x",
1047 kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
1048 kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE)));
1049 for (i = 0; i < 3; i++) {
1050 dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x",
1051 i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)),
1052 i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO)));
1053 }
1054 dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
1055 kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
1056 kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
1057 dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x",
1058 kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
1059 kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
1060 kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
1061 dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x",
1062 kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
1063 kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
1064 dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x",
1065 kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
1066 kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)));
1067 dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x",
1068 kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)),
1069 kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)));
1070 }
1071
kbasep_reset_timeout_worker(struct work_struct * data)1072 static void kbasep_reset_timeout_worker(struct work_struct *data)
1073 {
1074 unsigned long flags;
1075 struct kbase_device *kbdev;
1076 ktime_t end_timestamp = ktime_get_raw();
1077 struct kbasep_js_device_data *js_devdata;
1078 bool silent = false;
1079 u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
1080
1081 kbdev = container_of(data, struct kbase_device,
1082 hwaccess.backend.reset_work);
1083
1084 js_devdata = &kbdev->js_data;
1085
1086 if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
1087 KBASE_RESET_GPU_SILENT)
1088 silent = true;
1089
1090 if (kbase_is_quick_reset_enabled(kbdev))
1091 silent = true;
1092
1093 KBASE_KTRACE_ADD_JM(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
1094
1095 /* Disable GPU hardware counters.
1096 * This call will block until counters are disabled.
1097 */
1098 kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
1099
1100 /* Make sure the timer has completed - this cannot be done from
1101 * interrupt context, so this cannot be done within
1102 * kbasep_try_reset_gpu_early.
1103 */
1104 hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
1105
1106 if (kbase_pm_context_active_handle_suspend(kbdev,
1107 KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
1108 /* This would re-activate the GPU. Since it's already idle,
1109 * there's no need to reset it
1110 */
1111 atomic_set(&kbdev->hwaccess.backend.reset_gpu,
1112 KBASE_RESET_GPU_NOT_PENDING);
1113 kbase_disjoint_state_down(kbdev);
1114 wake_up(&kbdev->hwaccess.backend.reset_wait);
1115 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1116 kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
1117 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1118 return;
1119 }
1120
1121 WARN(kbdev->irq_reset_flush, "%s: GPU reset already in flight\n", __func__);
1122
1123 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1124 spin_lock(&kbdev->mmu_mask_change);
1125 kbase_pm_reset_start_locked(kbdev);
1126
1127 /* We're about to flush out the IRQs and their bottom half's */
1128 kbdev->irq_reset_flush = true;
1129
1130 /* Disable IRQ to avoid IRQ handlers to kick in after releasing the
1131 * spinlock; this also clears any outstanding interrupts
1132 */
1133 kbase_pm_disable_interrupts_nolock(kbdev);
1134
1135 spin_unlock(&kbdev->mmu_mask_change);
1136 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1137
1138 /* Ensure that any IRQ handlers have finished
1139 * Must be done without any locks IRQ handlers will take
1140 */
1141 kbase_synchronize_irqs(kbdev);
1142
1143 /* Flush out any in-flight work items */
1144 kbase_flush_mmu_wqs(kbdev);
1145
1146 /* The flush has completed so reset the active indicator */
1147 kbdev->irq_reset_flush = false;
1148
1149 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) {
1150 /* Ensure that L2 is not transitioning when we send the reset
1151 * command
1152 */
1153 while (--max_loops && kbase_pm_get_trans_cores(kbdev,
1154 KBASE_PM_CORE_L2))
1155 ;
1156
1157 WARN(!max_loops, "L2 power transition timed out while trying to reset\n");
1158 }
1159
1160 mutex_lock(&kbdev->pm.lock);
1161 /* We hold the pm lock, so there ought to be a current policy */
1162 if (unlikely(!kbdev->pm.backend.pm_current_policy))
1163 dev_warn(kbdev->dev, "No power policy set!");
1164
1165 /* All slot have been soft-stopped and we've waited
1166 * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
1167 * assume that anything that is still left on the GPU is stuck there and
1168 * we'll kill it when we reset the GPU
1169 */
1170
1171 if (!silent)
1172 dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
1173 RESET_TIMEOUT);
1174
1175 /* Output the state of some interesting registers to help in the
1176 * debugging of GPU resets
1177 */
1178 if (!silent)
1179 kbase_debug_dump_registers(kbdev);
1180
1181 /* Complete any jobs that were still on the GPU */
1182 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1183 kbdev->protected_mode = false;
1184 if (!kbdev->pm.backend.protected_entry_transition_override)
1185 kbase_backend_reset(kbdev, &end_timestamp);
1186 kbase_pm_metrics_update(kbdev, NULL);
1187 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1188
1189 /* Tell hardware counters a reset is about to occur.
1190 * If the instr backend is in an unrecoverable error state (e.g. due to
1191 * HW being unresponsive), this will transition the backend out of
1192 * it, on the assumption a reset will fix whatever problem there was.
1193 */
1194 kbase_instr_hwcnt_on_before_reset(kbdev);
1195
1196 /* Reset the GPU */
1197 kbase_pm_init_hw(kbdev, 0);
1198
1199 mutex_unlock(&kbdev->pm.lock);
1200
1201 mutex_lock(&js_devdata->runpool_mutex);
1202
1203 mutex_lock(&kbdev->mmu_hw_mutex);
1204 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1205 kbase_ctx_sched_restore_all_as(kbdev);
1206 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1207 mutex_unlock(&kbdev->mmu_hw_mutex);
1208
1209 kbase_pm_enable_interrupts(kbdev);
1210
1211 kbase_disjoint_state_down(kbdev);
1212
1213 mutex_unlock(&js_devdata->runpool_mutex);
1214
1215 mutex_lock(&kbdev->pm.lock);
1216
1217 kbase_pm_reset_complete(kbdev);
1218
1219 /* Find out what cores are required now */
1220 kbase_pm_update_cores_state(kbdev);
1221
1222 /* Synchronously request and wait for those cores, because if
1223 * instrumentation is enabled it would need them immediately.
1224 */
1225 kbase_pm_wait_for_desired_state(kbdev);
1226
1227 mutex_unlock(&kbdev->pm.lock);
1228
1229 atomic_set(&kbdev->hwaccess.backend.reset_gpu,
1230 KBASE_RESET_GPU_NOT_PENDING);
1231
1232 wake_up(&kbdev->hwaccess.backend.reset_wait);
1233 if (!silent)
1234 dev_err(kbdev->dev, "Reset complete");
1235
1236 /* Try submitting some jobs to restart processing */
1237 KBASE_KTRACE_ADD_JM(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0);
1238 kbase_js_sched_all(kbdev);
1239
1240 /* Process any pending slot updates */
1241 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1242 kbase_backend_slot_update(kbdev);
1243 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1244
1245 kbase_pm_context_idle(kbdev);
1246
1247 /* Re-enable GPU hardware counters */
1248 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1249 kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
1250 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1251
1252 KBASE_KTRACE_ADD_JM(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
1253 }
1254
kbasep_reset_timer_callback(struct hrtimer * timer)1255 static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
1256 {
1257 struct kbase_device *kbdev = container_of(timer, struct kbase_device,
1258 hwaccess.backend.reset_timer);
1259
1260 /* Reset still pending? */
1261 if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
1262 KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
1263 KBASE_RESET_GPU_COMMITTED)
1264 queue_work(kbdev->hwaccess.backend.reset_workq,
1265 &kbdev->hwaccess.backend.reset_work);
1266
1267 return HRTIMER_NORESTART;
1268 }
1269
1270 /*
1271 * If all jobs are evicted from the GPU then we can reset the GPU
1272 * immediately instead of waiting for the timeout to elapse
1273 */
1274
kbasep_try_reset_gpu_early_locked(struct kbase_device * kbdev)1275 static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
1276 {
1277 unsigned int i;
1278 int pending_jobs = 0;
1279
1280 /* Count the number of jobs */
1281 for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
1282 pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
1283
1284 if (pending_jobs > 0) {
1285 /* There are still jobs on the GPU - wait */
1286 return;
1287 }
1288
1289 /* To prevent getting incorrect registers when dumping failed job,
1290 * skip early reset.
1291 */
1292 if (atomic_read(&kbdev->job_fault_debug) > 0)
1293 return;
1294
1295 /* Check that the reset has been committed to (i.e. kbase_reset_gpu has
1296 * been called), and that no other thread beat this thread to starting
1297 * the reset
1298 */
1299 if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
1300 KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
1301 KBASE_RESET_GPU_COMMITTED) {
1302 /* Reset has already occurred */
1303 return;
1304 }
1305
1306 queue_work(kbdev->hwaccess.backend.reset_workq,
1307 &kbdev->hwaccess.backend.reset_work);
1308 }
1309
kbasep_try_reset_gpu_early(struct kbase_device * kbdev)1310 static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
1311 {
1312 unsigned long flags;
1313
1314 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
1315 kbasep_try_reset_gpu_early_locked(kbdev);
1316 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
1317 }
1318
1319 /**
1320 * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
1321 * @kbdev: kbase device
1322 * @flags: Bitfield indicating impact of reset (see flag defines)
1323 *
1324 * This function soft-stops all the slots to ensure that as many jobs as
1325 * possible are saved.
1326 *
1327 * Return: boolean which should be interpreted as follows:
1328 * true - Prepared for reset, kbase_reset_gpu_locked should be called.
1329 * false - Another thread is performing a reset, kbase_reset_gpu should
1330 * not be called.
1331 */
kbase_prepare_to_reset_gpu_locked(struct kbase_device * kbdev,unsigned int flags)1332 bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
1333 unsigned int flags)
1334 {
1335 int i;
1336
1337 #ifdef CONFIG_MALI_ARBITER_SUPPORT
1338 if (kbase_pm_is_gpu_lost(kbdev)) {
1339 /* GPU access has been removed, reset will be done by
1340 * Arbiter instead
1341 */
1342 return false;
1343 }
1344 #endif
1345
1346 if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
1347 kbase_instr_hwcnt_on_unrecoverable_error(kbdev);
1348
1349 if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
1350 KBASE_RESET_GPU_NOT_PENDING,
1351 KBASE_RESET_GPU_PREPARED) !=
1352 KBASE_RESET_GPU_NOT_PENDING) {
1353 /* Some other thread is already resetting the GPU */
1354 return false;
1355 }
1356
1357 kbase_disjoint_state_up(kbdev);
1358
1359 for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
1360 kbase_job_slot_softstop(kbdev, i, NULL);
1361
1362 return true;
1363 }
1364
kbase_prepare_to_reset_gpu(struct kbase_device * kbdev,unsigned int flags)1365 bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
1366 {
1367 unsigned long lock_flags;
1368 bool ret;
1369
1370 spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags);
1371 ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags);
1372 spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags);
1373
1374 return ret;
1375 }
1376 KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
1377
1378 /*
1379 * This function should be called after kbase_prepare_to_reset_gpu if it
1380 * returns true. It should never be called without a corresponding call to
1381 * kbase_prepare_to_reset_gpu.
1382 *
1383 * After this function is called (or not called if kbase_prepare_to_reset_gpu
1384 * returned false), the caller should wait for
1385 * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
1386 * has completed.
1387 */
kbase_reset_gpu(struct kbase_device * kbdev)1388 void kbase_reset_gpu(struct kbase_device *kbdev)
1389 {
1390 /* Note this is an assert/atomic_set because it is a software issue for
1391 * a race to be occurring here
1392 */
1393 if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED))
1394 return;
1395 atomic_set(&kbdev->hwaccess.backend.reset_gpu,
1396 KBASE_RESET_GPU_COMMITTED);
1397
1398 if (!kbase_is_quick_reset_enabled(kbdev))
1399 dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
1400 kbdev->reset_timeout_ms);
1401
1402 hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
1403 HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
1404 HRTIMER_MODE_REL);
1405
1406 /* Try resetting early */
1407 kbasep_try_reset_gpu_early(kbdev);
1408 }
1409 KBASE_EXPORT_TEST_API(kbase_reset_gpu);
1410
kbase_reset_gpu_locked(struct kbase_device * kbdev)1411 void kbase_reset_gpu_locked(struct kbase_device *kbdev)
1412 {
1413 /* Note this is an assert/atomic_set because it is a software issue for
1414 * a race to be occurring here
1415 */
1416 if (WARN_ON(atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_PREPARED))
1417 return;
1418 atomic_set(&kbdev->hwaccess.backend.reset_gpu,
1419 KBASE_RESET_GPU_COMMITTED);
1420
1421 if (!kbase_is_quick_reset_enabled(kbdev))
1422 dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
1423 kbdev->reset_timeout_ms);
1424 hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
1425 HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
1426 HRTIMER_MODE_REL);
1427
1428 /* Try resetting early */
1429 kbasep_try_reset_gpu_early_locked(kbdev);
1430 }
1431
kbase_reset_gpu_silent(struct kbase_device * kbdev)1432 int kbase_reset_gpu_silent(struct kbase_device *kbdev)
1433 {
1434 if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
1435 KBASE_RESET_GPU_NOT_PENDING,
1436 KBASE_RESET_GPU_SILENT) !=
1437 KBASE_RESET_GPU_NOT_PENDING) {
1438 /* Some other thread is already resetting the GPU */
1439 return -EAGAIN;
1440 }
1441
1442 kbase_disjoint_state_up(kbdev);
1443
1444 queue_work(kbdev->hwaccess.backend.reset_workq,
1445 &kbdev->hwaccess.backend.reset_work);
1446
1447 return 0;
1448 }
1449
kbase_reset_gpu_is_active(struct kbase_device * kbdev)1450 bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
1451 {
1452 if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
1453 KBASE_RESET_GPU_NOT_PENDING)
1454 return false;
1455
1456 return true;
1457 }
1458
kbase_reset_gpu_is_not_pending(struct kbase_device * kbdev)1459 bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev)
1460 {
1461 return atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_NOT_PENDING;
1462 }
1463
kbase_reset_gpu_wait(struct kbase_device * kbdev)1464 int kbase_reset_gpu_wait(struct kbase_device *kbdev)
1465 {
1466 wait_event(kbdev->hwaccess.backend.reset_wait,
1467 atomic_read(&kbdev->hwaccess.backend.reset_gpu)
1468 == KBASE_RESET_GPU_NOT_PENDING);
1469
1470 return 0;
1471 }
1472 KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait);
1473
kbase_reset_gpu_init(struct kbase_device * kbdev)1474 int kbase_reset_gpu_init(struct kbase_device *kbdev)
1475 {
1476 kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
1477 "Mali reset workqueue", 0, 1);
1478 if (kbdev->hwaccess.backend.reset_workq == NULL)
1479 return -ENOMEM;
1480
1481 INIT_WORK(&kbdev->hwaccess.backend.reset_work,
1482 kbasep_reset_timeout_worker);
1483
1484 hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
1485 HRTIMER_MODE_REL);
1486 kbdev->hwaccess.backend.reset_timer.function =
1487 kbasep_reset_timer_callback;
1488
1489 return 0;
1490 }
1491
kbase_reset_gpu_term(struct kbase_device * kbdev)1492 void kbase_reset_gpu_term(struct kbase_device *kbdev)
1493 {
1494 destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
1495 }
1496
kbasep_apply_limited_core_mask(const struct kbase_device * kbdev,const u64 affinity,const u64 limited_core_mask)1497 static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
1498 const u64 affinity, const u64 limited_core_mask)
1499 {
1500 const u64 result = affinity & limited_core_mask;
1501
1502 #ifdef CONFIG_MALI_BIFROST_DEBUG
1503 dev_dbg(kbdev->dev,
1504 "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n",
1505 (unsigned long)affinity,
1506 (unsigned long)result,
1507 (unsigned long)limited_core_mask);
1508 #else
1509 CSTD_UNUSED(kbdev);
1510 #endif
1511
1512 return result;
1513 }
1514