1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 #include <mali_kbase.h>
23 #include "mali_kbase_config_defaults.h"
24 #include <mali_kbase_ctx_sched.h>
25 #include <mali_kbase_reset_gpu.h>
26 #include <mali_kbase_as_fault_debugfs.h>
27 #include "mali_kbase_csf.h"
28 #include <tl/mali_kbase_tracepoints.h>
29 #include <backend/gpu/mali_kbase_pm_internal.h>
30 #include <linux/export.h>
31 #include <csf/mali_kbase_csf_registers.h>
32 #include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
33 #include <mali_kbase_hwaccess_time.h>
34 #include "mali_kbase_csf_tiler_heap_reclaim.h"
35 #include "mali_kbase_csf_mcu_shared_reg.h"
36
37 /* Value to indicate that a queue group is not groups_to_schedule list */
38 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
39
40 /* This decides the upper limit on the waiting time for the Scheduler
41 * to exit the sleep state. Usually the value of autosuspend_delay is
42 * expected to be around 100 milli seconds.
43 */
44 #define MAX_AUTO_SUSPEND_DELAY_MS (5000)
45
46 /* Maximum number of endpoints which may run tiler jobs. */
47 #define CSG_TILER_MAX ((u8)1)
48
49 /* Maximum dynamic CSG slot priority value */
50 #define MAX_CSG_SLOT_PRIORITY ((u8)15)
51
52 /* CSF scheduler time slice value */
53 #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */
54
55 /* A GPU address space slot is reserved for MCU. */
56 #define NUM_RESERVED_AS_SLOTS (1)
57
58 /* Time to wait for completion of PING req before considering MCU as hung */
59 #define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
60
61 /* Explicitly defining this blocked_reason code as SB_WAIT for clarity */
62 #define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT
63
64 static int scheduler_group_schedule(struct kbase_queue_group *group);
65 static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
66 static
67 void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
68 struct kbase_queue_group *const group,
69 enum kbase_csf_group_state run_state);
70 static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
71 struct kbase_device *const kbdev,
72 struct kbase_queue_group *const group);
73 static struct kbase_queue_group *get_tock_top_group(
74 struct kbase_csf_scheduler *const scheduler);
75 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
76 static int suspend_active_queue_groups(struct kbase_device *kbdev,
77 unsigned long *slot_mask);
78 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
79 bool system_suspend);
80 static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
81 static bool queue_group_scheduled_locked(struct kbase_queue_group *group);
82
83 #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
84
85 /**
86 * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and
87 * scheduling tick/tock to complete before the group deschedule.
88 *
89 * @group: Pointer to the group that is being descheduled.
90 *
91 * This function blocks the descheduling of the group until the dump on fault is
92 * completed and scheduling tick/tock has completed.
93 * To deschedule an on slot group CSG termination request would be sent and that
94 * might time out if the fault had occurred and also potentially affect the state
95 * being dumped. Moreover the scheduler lock would be held, so the access to debugfs
96 * files would get blocked.
97 * Scheduler lock and 'kctx->csf.lock' are released before this function starts
98 * to wait. When a request sent by the Scheduler to the FW times out, Scheduler
99 * would also wait for the dumping to complete and release the Scheduler lock
100 * before the wait. Meanwhile Userspace can try to delete the group, this function
101 * would ensure that the group doesn't exit the Scheduler until scheduling
102 * tick/tock has completed. Though very unlikely, group deschedule can be triggered
103 * from multiple threads around the same time and after the wait Userspace thread
104 * can win the race and get the group descheduled and free the memory for group
105 * pointer before the other threads wake up and notice that group has already been
106 * descheduled. To avoid the freeing in such a case, a sort of refcount is used
107 * for the group which is incremented & decremented across the wait.
108 */
109 static
wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group * group)110 void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group)
111 {
112 #if IS_ENABLED(CONFIG_DEBUG_FS)
113 struct kbase_device *kbdev = group->kctx->kbdev;
114 struct kbase_context *kctx = group->kctx;
115 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
116
117 lockdep_assert_held(&kctx->csf.lock);
118 lockdep_assert_held(&scheduler->lock);
119
120 if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
121 return;
122
123 while ((!kbase_debug_csf_fault_dump_complete(kbdev) ||
124 (scheduler->state == SCHED_BUSY)) &&
125 queue_group_scheduled_locked(group)) {
126 group->deschedule_deferred_cnt++;
127 mutex_unlock(&scheduler->lock);
128 mutex_unlock(&kctx->csf.lock);
129 kbase_debug_csf_fault_wait_completion(kbdev);
130 mutex_lock(&kctx->csf.lock);
131 mutex_lock(&scheduler->lock);
132 group->deschedule_deferred_cnt--;
133 }
134 #endif
135 }
136
137 /**
138 * schedule_actions_trigger_df() - Notify the client about the fault and
139 * wait for the dumping to complete.
140 *
141 * @kbdev: Pointer to the device
142 * @kctx: Pointer to the context associated with the CSG slot for which
143 * the timeout was seen.
144 * @error: Error code indicating the type of timeout that occurred.
145 *
146 * This function notifies the Userspace client waiting for the faults and wait
147 * for the Client to complete the dumping.
148 * The function is called only from Scheduling tick/tock when a request sent by
149 * the Scheduler to FW times out or from the protm event work item of the group
150 * when the protected mode entry request times out.
151 * In the latter case there is no wait done as scheduler lock would be released
152 * immediately. In the former case the function waits and releases the scheduler
153 * lock before the wait. It has been ensured that the Scheduler view of the groups
154 * won't change meanwhile, so no group can enter/exit the Scheduler, become
155 * runnable or go off slot.
156 */
schedule_actions_trigger_df(struct kbase_device * kbdev,struct kbase_context * kctx,enum dumpfault_error_type error)157 static void schedule_actions_trigger_df(struct kbase_device *kbdev,
158 struct kbase_context *kctx, enum dumpfault_error_type error)
159 {
160 #if IS_ENABLED(CONFIG_DEBUG_FS)
161 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
162
163 lockdep_assert_held(&scheduler->lock);
164
165 if (!kbase_debug_csf_fault_notify(kbdev, kctx, error))
166 return;
167
168 if (unlikely(scheduler->state != SCHED_BUSY)) {
169 WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE);
170 return;
171 }
172
173 mutex_unlock(&scheduler->lock);
174 kbase_debug_csf_fault_wait_completion(kbdev);
175 mutex_lock(&scheduler->lock);
176 WARN_ON(scheduler->state != SCHED_BUSY);
177 #endif
178 }
179
180 #ifdef KBASE_PM_RUNTIME
181 /**
182 * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
183 * sleeping state.
184 *
185 * @kbdev: Pointer to the device
186 *
187 * This function waits until the Scheduler has exited the sleep state and
188 * it is called when an on-slot group is terminated or when the suspend
189 * buffer of an on-slot group needs to be captured.
190 *
191 * Return: 0 when the wait is successful, otherwise an error code.
192 */
wait_for_scheduler_to_exit_sleep(struct kbase_device * kbdev)193 static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
194 {
195 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
196 int autosuspend_delay = kbdev->dev->power.autosuspend_delay;
197 unsigned int sleep_exit_wait_time;
198 long remaining;
199 int ret = 0;
200
201 lockdep_assert_held(&scheduler->lock);
202 WARN_ON(scheduler->state != SCHED_SLEEPING);
203
204 /* No point in waiting if autosuspend_delay value is negative.
205 * For the negative value of autosuspend_delay Driver will directly
206 * go for the suspend of Scheduler, but the autosuspend_delay value
207 * could have been changed after the sleep was initiated.
208 */
209 if (autosuspend_delay < 0)
210 return -EINVAL;
211
212 if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS)
213 autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS;
214
215 /* Usually Scheduler would remain in sleeping state until the
216 * auto-suspend timer expires and all active CSGs are suspended.
217 */
218 sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms;
219
220 remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time);
221
222 while ((scheduler->state == SCHED_SLEEPING) && !ret) {
223 mutex_unlock(&scheduler->lock);
224 remaining = wait_event_timeout(
225 kbdev->csf.event_wait,
226 (scheduler->state != SCHED_SLEEPING),
227 remaining);
228 mutex_lock(&scheduler->lock);
229 if (!remaining && (scheduler->state == SCHED_SLEEPING))
230 ret = -ETIMEDOUT;
231 }
232
233 return ret;
234 }
235
236 /**
237 * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state
238 *
239 * @kbdev: Pointer to the device
240 *
241 * This function will force the Scheduler to exit the sleep state by doing the
242 * wake up of MCU and suspension of on-slot groups. It is called at the time of
243 * system suspend.
244 *
245 * Return: 0 on success.
246 */
force_scheduler_to_exit_sleep(struct kbase_device * kbdev)247 static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
248 {
249 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
250 unsigned long flags;
251 int ret = 0;
252
253 lockdep_assert_held(&scheduler->lock);
254 WARN_ON(scheduler->state != SCHED_SLEEPING);
255 WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active);
256
257 kbase_pm_lock(kbdev);
258 ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
259 kbase_pm_unlock(kbdev);
260 if (ret) {
261 dev_warn(kbdev->dev,
262 "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
263 kbase_backend_get_cycle_cnt(kbdev));
264 goto out;
265 }
266
267 ret = suspend_active_groups_on_powerdown(kbdev, true);
268 if (ret)
269 goto out;
270
271 kbase_pm_lock(kbdev);
272 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
273 kbdev->pm.backend.gpu_sleep_mode_active = false;
274 kbdev->pm.backend.gpu_wakeup_override = false;
275 kbase_pm_update_state(kbdev);
276 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
277 ret = kbase_pm_wait_for_desired_state(kbdev);
278 kbase_pm_unlock(kbdev);
279 if (ret) {
280 dev_warn(kbdev->dev,
281 "[%llu] Wait for pm state change failed on forced scheduler suspend",
282 kbase_backend_get_cycle_cnt(kbdev));
283 goto out;
284 }
285
286 scheduler->state = SCHED_SUSPENDED;
287 KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
288
289 return 0;
290
291 out:
292 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
293 kbdev->pm.backend.exit_gpu_sleep_mode = true;
294 kbdev->pm.backend.gpu_wakeup_override = false;
295 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
296 kbase_csf_scheduler_invoke_tick(kbdev);
297
298 return ret;
299 }
300 #endif
301
302 /**
303 * tick_timer_callback() - Callback function for the scheduling tick hrtimer
304 *
305 * @timer: Pointer to the scheduling tick hrtimer
306 *
307 * This function will enqueue the scheduling tick work item for immediate
308 * execution, if it has not been queued already.
309 *
310 * Return: enum value to indicate that timer should not be restarted.
311 */
tick_timer_callback(struct hrtimer * timer)312 static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
313 {
314 struct kbase_device *kbdev = container_of(timer, struct kbase_device,
315 csf.scheduler.tick_timer);
316
317 kbase_csf_scheduler_tick_advance(kbdev);
318 return HRTIMER_NORESTART;
319 }
320
321 /**
322 * start_tick_timer() - Start the scheduling tick hrtimer.
323 *
324 * @kbdev: Pointer to the device
325 *
326 * This function will start the scheduling tick hrtimer and is supposed to
327 * be called only from the tick work item function. The tick hrtimer should
328 * not be active already.
329 */
start_tick_timer(struct kbase_device * kbdev)330 static void start_tick_timer(struct kbase_device *kbdev)
331 {
332 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
333 unsigned long flags;
334
335 lockdep_assert_held(&scheduler->lock);
336
337 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
338 WARN_ON(scheduler->tick_timer_active);
339 if (likely(!work_pending(&scheduler->tick_work))) {
340 scheduler->tick_timer_active = true;
341
342 hrtimer_start(&scheduler->tick_timer,
343 HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
344 HRTIMER_MODE_REL);
345 }
346 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
347 }
348
349 /**
350 * cancel_tick_timer() - Cancel the scheduling tick hrtimer
351 *
352 * @kbdev: Pointer to the device
353 */
cancel_tick_timer(struct kbase_device * kbdev)354 static void cancel_tick_timer(struct kbase_device *kbdev)
355 {
356 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
357 unsigned long flags;
358
359 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
360 scheduler->tick_timer_active = false;
361 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
362 hrtimer_cancel(&scheduler->tick_timer);
363 }
364
365 /**
366 * enqueue_tick_work() - Enqueue the scheduling tick work item
367 *
368 * @kbdev: Pointer to the device
369 *
370 * This function will queue the scheduling tick work item for immediate
371 * execution. This shall only be called when both the tick hrtimer and tick
372 * work item are not active/pending.
373 */
enqueue_tick_work(struct kbase_device * kbdev)374 static void enqueue_tick_work(struct kbase_device *kbdev)
375 {
376 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
377
378 lockdep_assert_held(&scheduler->lock);
379
380 kbase_csf_scheduler_invoke_tick(kbdev);
381 }
382
release_doorbell(struct kbase_device * kbdev,int doorbell_nr)383 static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
384 {
385 WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
386
387 lockdep_assert_held(&kbdev->csf.scheduler.lock);
388 clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
389 }
390
acquire_doorbell(struct kbase_device * kbdev)391 static int acquire_doorbell(struct kbase_device *kbdev)
392 {
393 int doorbell_nr;
394
395 lockdep_assert_held(&kbdev->csf.scheduler.lock);
396
397 doorbell_nr = find_first_zero_bit(
398 kbdev->csf.scheduler.doorbell_inuse_bitmap,
399 CSF_NUM_DOORBELL);
400
401 if (doorbell_nr >= CSF_NUM_DOORBELL)
402 return KBASEP_USER_DB_NR_INVALID;
403
404 set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
405
406 return doorbell_nr;
407 }
408
unassign_user_doorbell_from_group(struct kbase_device * kbdev,struct kbase_queue_group * group)409 static void unassign_user_doorbell_from_group(struct kbase_device *kbdev,
410 struct kbase_queue_group *group)
411 {
412 lockdep_assert_held(&kbdev->csf.scheduler.lock);
413
414 if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
415 release_doorbell(kbdev, group->doorbell_nr);
416 group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
417 }
418 }
419
unassign_user_doorbell_from_queue(struct kbase_device * kbdev,struct kbase_queue * queue)420 static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev,
421 struct kbase_queue *queue)
422 {
423 lockdep_assert_held(&kbdev->csf.scheduler.lock);
424
425 mutex_lock(&kbdev->csf.reg_lock);
426
427 if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
428 queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
429 /* After this the dummy page would be mapped in */
430 unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping,
431 queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
432 }
433
434 mutex_unlock(&kbdev->csf.reg_lock);
435 }
436
assign_user_doorbell_to_group(struct kbase_device * kbdev,struct kbase_queue_group * group)437 static void assign_user_doorbell_to_group(struct kbase_device *kbdev,
438 struct kbase_queue_group *group)
439 {
440 lockdep_assert_held(&kbdev->csf.scheduler.lock);
441
442 if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
443 group->doorbell_nr = acquire_doorbell(kbdev);
444 }
445
assign_user_doorbell_to_queue(struct kbase_device * kbdev,struct kbase_queue * const queue)446 static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
447 struct kbase_queue *const queue)
448 {
449 lockdep_assert_held(&kbdev->csf.scheduler.lock);
450
451 mutex_lock(&kbdev->csf.reg_lock);
452
453 /* If bind operation for the queue hasn't completed yet, then the
454 * CSI can't be programmed for the queue
455 * (even in stopped state) and so the doorbell also can't be assigned
456 * to it.
457 */
458 if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) &&
459 (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) {
460 WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID);
461 queue->doorbell_nr = queue->group->doorbell_nr;
462
463 /* After this the real Hw doorbell page would be mapped in */
464 unmap_mapping_range(
465 kbdev->csf.db_filp->f_inode->i_mapping,
466 queue->db_file_offset << PAGE_SHIFT,
467 PAGE_SIZE, 1);
468 }
469
470 mutex_unlock(&kbdev->csf.reg_lock);
471 }
472
scheduler_doorbell_init(struct kbase_device * kbdev)473 static void scheduler_doorbell_init(struct kbase_device *kbdev)
474 {
475 int doorbell_nr;
476
477 bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap,
478 CSF_NUM_DOORBELL);
479
480 mutex_lock(&kbdev->csf.scheduler.lock);
481 /* Reserve doorbell 0 for use by kernel driver */
482 doorbell_nr = acquire_doorbell(kbdev);
483 mutex_unlock(&kbdev->csf.scheduler.lock);
484
485 WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
486 }
487
488 /**
489 * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
490 *
491 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
492 *
493 * This function updates the EXTRACT offset for all queues which groups have
494 * been assigned a physical slot. These values could be used to detect a
495 * queue's true idleness status. This is intended to be an additional check
496 * on top of the GPU idle notification to account for race conditions.
497 * This function is supposed to be called only when GPU idle notification
498 * interrupt is received.
499 */
update_on_slot_queues_offsets(struct kbase_device * kbdev)500 static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
501 {
502 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
503 /* All CSGs have the same number of CSs */
504 size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
505 size_t i;
506
507 lockdep_assert_held(&scheduler->interrupt_lock);
508
509 /* csg_slots_idle_mask is not used here for the looping, as it could get
510 * updated concurrently when Scheduler re-evaluates the idle status of
511 * the CSGs for which idle notification was received previously.
512 */
513 for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
514 struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
515 size_t j;
516
517 if (WARN_ON(!group))
518 continue;
519
520 for (j = 0; j < max_streams; ++j) {
521 struct kbase_queue *const queue = group->bound_queues[j];
522
523 if (queue) {
524 if (queue->user_io_addr) {
525 u64 const *const output_addr =
526 (u64 const *)(queue->user_io_addr + PAGE_SIZE);
527
528 queue->extract_ofs =
529 output_addr[CS_EXTRACT_LO / sizeof(u64)];
530 } else {
531 dev_warn(kbdev->dev,
532 "%s(): queue->user_io_addr is NULL, queue: %p",
533 __func__,
534 queue);
535 }
536 }
537 }
538 }
539 }
540
enqueue_gpu_idle_work(struct kbase_csf_scheduler * const scheduler)541 static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
542 {
543 atomic_set(&scheduler->gpu_no_longer_idle, false);
544 queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
545 }
546
kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device * kbdev)547 void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
548 {
549 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
550 int non_idle_offslot_grps;
551 bool can_suspend_on_idle;
552
553 lockdep_assert_held(&kbdev->hwaccess_lock);
554 lockdep_assert_held(&scheduler->interrupt_lock);
555
556 non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
557 can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
558 KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
559 ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
560
561 if (!non_idle_offslot_grps) {
562 if (can_suspend_on_idle) {
563 /* fast_gpu_idle_handling is protected by the
564 * interrupt_lock, which would prevent this from being
565 * updated whilst gpu_idle_worker() is executing.
566 */
567 scheduler->fast_gpu_idle_handling =
568 (kbdev->csf.gpu_idle_hysteresis_us == 0) ||
569 !kbase_csf_scheduler_all_csgs_idle(kbdev);
570
571 /* The GPU idle worker relies on update_on_slot_queues_offsets() to have
572 * finished. It's queued before to reduce the time it takes till execution
573 * but it'll eventually be blocked by the scheduler->interrupt_lock.
574 */
575 enqueue_gpu_idle_work(scheduler);
576
577 /* The extract offsets are unused in fast GPU idle handling */
578 if (!scheduler->fast_gpu_idle_handling)
579 update_on_slot_queues_offsets(kbdev);
580 }
581 } else {
582 /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
583 kbase_csf_scheduler_tick_advance_nolock(kbdev);
584 }
585 }
586
kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device * kbdev)587 u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
588 {
589 u32 nr_active_csgs;
590
591 lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
592
593 nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap,
594 kbdev->csf.global_iface.group_num);
595
596 return nr_active_csgs;
597 }
598
kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device * kbdev)599 u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev)
600 {
601 u32 nr_active_csgs;
602 unsigned long flags;
603
604 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
605 nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev);
606 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
607
608 return nr_active_csgs;
609 }
610
611 /**
612 * csg_slot_in_use - returns true if a queue group has been programmed on a
613 * given CSG slot.
614 *
615 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
616 * @slot: Index/number of the CSG slot in question.
617 *
618 * Return: the interface is actively engaged flag.
619 *
620 * Note: Caller must hold the scheduler lock.
621 */
csg_slot_in_use(struct kbase_device * kbdev,int slot)622 static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot)
623 {
624 lockdep_assert_held(&kbdev->csf.scheduler.lock);
625
626 return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL);
627 }
628
queue_group_suspended_locked(struct kbase_queue_group * group)629 static bool queue_group_suspended_locked(struct kbase_queue_group *group)
630 {
631 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
632
633 return (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
634 group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE ||
635 group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
636 }
637
queue_group_idle_locked(struct kbase_queue_group * group)638 static bool queue_group_idle_locked(struct kbase_queue_group *group)
639 {
640 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
641
642 return (group->run_state == KBASE_CSF_GROUP_IDLE ||
643 group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
644 }
645
on_slot_group_idle_locked(struct kbase_queue_group * group)646 static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
647 {
648 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
649
650 return (group->run_state == KBASE_CSF_GROUP_IDLE);
651 }
652
can_schedule_idle_group(struct kbase_queue_group * group)653 static bool can_schedule_idle_group(struct kbase_queue_group *group)
654 {
655 return (on_slot_group_idle_locked(group) ||
656 (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME));
657 }
658
queue_group_scheduled(struct kbase_queue_group * group)659 static bool queue_group_scheduled(struct kbase_queue_group *group)
660 {
661 return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
662 group->run_state != KBASE_CSF_GROUP_TERMINATED &&
663 group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED);
664 }
665
queue_group_scheduled_locked(struct kbase_queue_group * group)666 static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
667 {
668 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
669
670 return queue_group_scheduled(group);
671 }
672
673 /**
674 * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
675 *
676 * @kbdev: Pointer to the GPU device
677 *
678 * This function waits for the GPU to exit protected mode which is confirmed
679 * when active_protm_grp is set to NULL.
680 *
681 * Return: true on success, false otherwise.
682 */
scheduler_protm_wait_quit(struct kbase_device * kbdev)683 static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
684 {
685 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
686 long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
687 long remaining;
688 bool success = true;
689
690 lockdep_assert_held(&scheduler->lock);
691
692 KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt));
693
694 remaining = wait_event_timeout(kbdev->csf.event_wait,
695 !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
696
697 if (unlikely(!remaining)) {
698 struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp;
699 struct kbase_context *kctx = group ? group->kctx : NULL;
700
701 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
702 kbase_backend_get_cycle_cnt(kbdev),
703 kbdev->csf.fw_timeout_ms);
704 schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT);
705 success = false;
706 }
707
708 KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining));
709
710 return success;
711 }
712
713 /**
714 * scheduler_force_protm_exit() - Force GPU to exit protected mode.
715 *
716 * @kbdev: Pointer to the GPU device
717 *
718 * This function sends a ping request to the firmware and waits for the GPU
719 * to exit protected mode.
720 *
721 * If the GPU does not exit protected mode, it is considered as hang.
722 * A GPU reset would then be triggered.
723 */
scheduler_force_protm_exit(struct kbase_device * kbdev)724 static void scheduler_force_protm_exit(struct kbase_device *kbdev)
725 {
726 unsigned long flags;
727
728 lockdep_assert_held(&kbdev->csf.scheduler.lock);
729
730 kbase_csf_firmware_ping(kbdev);
731
732 if (scheduler_protm_wait_quit(kbdev))
733 return;
734
735 dev_err(kbdev->dev, "Possible GPU hang in Protected mode");
736
737 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
738 if (kbdev->csf.scheduler.active_protm_grp) {
739 dev_err(kbdev->dev,
740 "Group-%d of context %d_%d ran in protected mode for too long on slot %d",
741 kbdev->csf.scheduler.active_protm_grp->handle,
742 kbdev->csf.scheduler.active_protm_grp->kctx->tgid,
743 kbdev->csf.scheduler.active_protm_grp->kctx->id,
744 kbdev->csf.scheduler.active_protm_grp->csg_nr);
745 }
746 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
747
748 /* The GPU could be stuck in Protected mode. To prevent a hang,
749 * a GPU reset is performed.
750 */
751 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
752 kbase_reset_gpu(kbdev);
753 }
754
755 /**
756 * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
757 * automatically for periodic tasks.
758 *
759 * @kbdev: Pointer to the device
760 *
761 * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the
762 * CSF scheduler lock to already have been held.
763 *
764 * Return: true if the scheduler is configured to wake up periodically
765 */
scheduler_timer_is_enabled_nolock(struct kbase_device * kbdev)766 static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
767 {
768 lockdep_assert_held(&kbdev->csf.scheduler.lock);
769
770 return kbdev->csf.scheduler.timer_enabled;
771 }
772
773 /**
774 * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
775 * Scheduler
776 *
777 * @kbdev: Pointer to the device
778 * @suspend_handler: Handler code for how to handle a suspend that might occur.
779 *
780 * This function is usually called when Scheduler needs to be activated.
781 * The PM reference count is acquired for the Scheduler and the power on
782 * of GPU is initiated.
783 *
784 * Return: 0 if successful or a negative error code on failure.
785 */
scheduler_pm_active_handle_suspend(struct kbase_device * kbdev,enum kbase_pm_suspend_handler suspend_handler)786 static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
787 enum kbase_pm_suspend_handler suspend_handler)
788 {
789 unsigned long flags;
790 u32 prev_count;
791 int ret = 0;
792
793 lockdep_assert_held(&kbdev->csf.scheduler.lock);
794
795 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
796 prev_count = kbdev->csf.scheduler.pm_active_count;
797 if (!WARN_ON(prev_count == U32_MAX))
798 kbdev->csf.scheduler.pm_active_count++;
799 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
800
801 /* On 0 => 1, make a pm_ctx_active request */
802 if (!prev_count) {
803 ret = kbase_pm_context_active_handle_suspend(kbdev,
804 suspend_handler);
805 /* Invoke the PM state machines again as the change in MCU
806 * desired status, due to the update of scheduler.pm_active_count,
807 * may be missed by the thread that called pm_wait_for_desired_state()
808 */
809 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
810 if (ret)
811 kbdev->csf.scheduler.pm_active_count--;
812 kbase_pm_update_state(kbdev);
813 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
814 }
815
816 return ret;
817 }
818
819 #ifdef KBASE_PM_RUNTIME
820 /**
821 * scheduler_pm_active_after_sleep() - Acquire the PM reference count for
822 * Scheduler
823 *
824 * @kbdev: Pointer to the device
825 * @flags: Pointer to the flags variable containing the interrupt state
826 * when hwaccess lock was acquired.
827 *
828 * This function is called when Scheduler needs to be activated from the
829 * sleeping state.
830 * The PM reference count is acquired for the Scheduler and the wake up of
831 * MCU is initiated. It resets the flag that indicates to the MCU state
832 * machine that MCU needs to be put in sleep state.
833 *
834 * Note: This function shall be called with hwaccess lock held and it may
835 * release that lock and reacquire it.
836 *
837 * Return: zero when the PM reference was taken and non-zero when the
838 * system is being suspending/suspended.
839 */
scheduler_pm_active_after_sleep(struct kbase_device * kbdev,unsigned long * flags)840 static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
841 unsigned long *flags)
842 {
843 u32 prev_count;
844 int ret = 0;
845
846 lockdep_assert_held(&kbdev->csf.scheduler.lock);
847 lockdep_assert_held(&kbdev->hwaccess_lock);
848
849 prev_count = kbdev->csf.scheduler.pm_active_count;
850 if (!WARN_ON(prev_count == U32_MAX))
851 kbdev->csf.scheduler.pm_active_count++;
852
853 /* On 0 => 1, make a pm_ctx_active request */
854 if (!prev_count) {
855 spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags);
856
857 ret = kbase_pm_context_active_handle_suspend(kbdev,
858 KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
859
860 spin_lock_irqsave(&kbdev->hwaccess_lock, *flags);
861 if (ret)
862 kbdev->csf.scheduler.pm_active_count--;
863 else
864 kbdev->pm.backend.gpu_sleep_mode_active = false;
865 kbase_pm_update_state(kbdev);
866 }
867
868 return ret;
869 }
870 #endif
871
872 /**
873 * scheduler_pm_idle() - Release the PM reference count held by Scheduler
874 *
875 * @kbdev: Pointer to the device
876 *
877 * This function is usually called after Scheduler is suspended.
878 * The PM reference count held by the Scheduler is released to trigger the
879 * power down of GPU.
880 */
scheduler_pm_idle(struct kbase_device * kbdev)881 static void scheduler_pm_idle(struct kbase_device *kbdev)
882 {
883 unsigned long flags;
884 u32 prev_count;
885
886 lockdep_assert_held(&kbdev->csf.scheduler.lock);
887
888 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
889 prev_count = kbdev->csf.scheduler.pm_active_count;
890 if (!WARN_ON(prev_count == 0))
891 kbdev->csf.scheduler.pm_active_count--;
892 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
893
894 if (prev_count == 1) {
895 kbase_pm_context_idle(kbdev);
896 /* Invoke the PM state machines again as the change in MCU
897 * desired status, due to the update of scheduler.pm_active_count,
898 * may be missed by the thread that called pm_wait_for_desired_state()
899 */
900 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
901 kbase_pm_update_state(kbdev);
902 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
903 }
904 }
905
906 #ifdef KBASE_PM_RUNTIME
907 /**
908 * scheduler_pm_idle_before_sleep() - Release the PM reference count and
909 * trigger the tranistion to sleep state.
910 *
911 * @kbdev: Pointer to the device
912 *
913 * This function is called on the GPU idle notification. It releases the
914 * Scheduler's PM reference count and sets the flag to indicate to the
915 * MCU state machine that MCU needs to be put in sleep state.
916 */
scheduler_pm_idle_before_sleep(struct kbase_device * kbdev)917 static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
918 {
919 unsigned long flags;
920 u32 prev_count;
921
922 lockdep_assert_held(&kbdev->csf.scheduler.lock);
923
924 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
925 prev_count = kbdev->csf.scheduler.pm_active_count;
926 if (!WARN_ON(prev_count == 0))
927 kbdev->csf.scheduler.pm_active_count--;
928 kbdev->pm.backend.gpu_sleep_mode_active = true;
929 kbdev->pm.backend.exit_gpu_sleep_mode = false;
930 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
931
932 if (prev_count == 1) {
933 kbase_pm_context_idle(kbdev);
934 /* Invoke the PM state machines again as the change in MCU
935 * desired status, due to the update of scheduler.pm_active_count,
936 * may be missed by the thread that called pm_wait_for_desired_state()
937 */
938 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
939 kbase_pm_update_state(kbdev);
940 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
941 }
942 }
943 #endif
944
scheduler_wakeup(struct kbase_device * kbdev,bool kick)945 static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
946 {
947 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
948 int ret;
949
950 lockdep_assert_held(&scheduler->lock);
951
952 if ((scheduler->state != SCHED_SUSPENDED) &&
953 (scheduler->state != SCHED_SLEEPING))
954 return;
955
956 if (scheduler->state == SCHED_SUSPENDED) {
957 dev_dbg(kbdev->dev,
958 "Re-activating the Scheduler after suspend");
959 ret = scheduler_pm_active_handle_suspend(kbdev,
960 KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
961 } else {
962 #ifdef KBASE_PM_RUNTIME
963 unsigned long flags;
964
965 dev_dbg(kbdev->dev,
966 "Re-activating the Scheduler out of sleep");
967
968 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
969 ret = scheduler_pm_active_after_sleep(kbdev, &flags);
970 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
971 #endif
972 }
973
974 if (ret) {
975 /* GPUCORE-29850 would add the handling for the case where
976 * Scheduler could not be activated due to system suspend.
977 */
978 dev_info(kbdev->dev,
979 "Couldn't wakeup Scheduler due to system suspend");
980 return;
981 }
982
983 scheduler->state = SCHED_INACTIVE;
984 KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
985
986 if (kick)
987 scheduler_enable_tick_timer_nolock(kbdev);
988 }
989
scheduler_suspend(struct kbase_device * kbdev)990 static void scheduler_suspend(struct kbase_device *kbdev)
991 {
992 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
993
994 lockdep_assert_held(&scheduler->lock);
995
996 if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
997 dev_dbg(kbdev->dev, "Suspending the Scheduler");
998 scheduler_pm_idle(kbdev);
999 scheduler->state = SCHED_SUSPENDED;
1000 KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
1001 }
1002 }
1003
1004 /**
1005 * update_idle_suspended_group_state() - Move the queue group to a non-idle
1006 * suspended state.
1007 * @group: Pointer to the queue group.
1008 *
1009 * This function is called to change the state of queue group to non-idle
1010 * suspended state, if the group was suspended when all the queues bound to it
1011 * became empty or when some queues got blocked on a sync wait & others became
1012 * empty. The group is also moved to the runnable list from idle wait list in
1013 * the latter case.
1014 * So the function gets called when a queue is kicked or sync wait condition
1015 * gets satisfied.
1016 */
update_idle_suspended_group_state(struct kbase_queue_group * group)1017 static void update_idle_suspended_group_state(struct kbase_queue_group *group)
1018 {
1019 struct kbase_csf_scheduler *scheduler =
1020 &group->kctx->kbdev->csf.scheduler;
1021 int new_val;
1022
1023 lockdep_assert_held(&scheduler->lock);
1024
1025 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) {
1026 remove_group_from_idle_wait(group);
1027 insert_group_to_runnable(scheduler, group,
1028 KBASE_CSF_GROUP_SUSPENDED);
1029 } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
1030 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
1031 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
1032 group->run_state);
1033
1034 /* If scheduler is not suspended and the given group's
1035 * static priority (reflected by the scan_seq_num) is inside
1036 * the current tick slot-range, or there are some on_slot
1037 * idle groups, schedule an async tock.
1038 */
1039 if (scheduler->state != SCHED_SUSPENDED) {
1040 unsigned long flags;
1041 int n_idle;
1042 int n_used;
1043 int n_slots =
1044 group->kctx->kbdev->csf.global_iface.group_num;
1045
1046 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
1047 n_idle = bitmap_weight(scheduler->csg_slots_idle_mask,
1048 n_slots);
1049 n_used = bitmap_weight(scheduler->csg_inuse_bitmap,
1050 n_slots);
1051 spin_unlock_irqrestore(&scheduler->interrupt_lock,
1052 flags);
1053
1054 if (n_idle ||
1055 n_used < scheduler->num_csg_slots_for_tick ||
1056 group->scan_seq_num <
1057 scheduler->num_csg_slots_for_tick)
1058 schedule_in_cycle(group, true);
1059 }
1060 } else
1061 return;
1062
1063 new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
1064 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
1065 new_val);
1066 }
1067
kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group * group)1068 int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
1069 {
1070 struct kbase_csf_scheduler *scheduler =
1071 &group->kctx->kbdev->csf.scheduler;
1072 int slot_num = group->csg_nr;
1073
1074 lockdep_assert_held(&scheduler->interrupt_lock);
1075
1076 if (slot_num >= 0) {
1077 if (WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
1078 group))
1079 return -1;
1080 }
1081
1082 return slot_num;
1083 }
1084
kbase_csf_scheduler_group_get_slot(struct kbase_queue_group * group)1085 int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group)
1086 {
1087 struct kbase_csf_scheduler *scheduler =
1088 &group->kctx->kbdev->csf.scheduler;
1089 unsigned long flags;
1090 int slot_num;
1091
1092 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
1093 slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
1094 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
1095
1096 return slot_num;
1097 }
1098
1099 /* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot.
1100 *
1101 * @group: GPU queue group to be checked
1102 *
1103 * This function needs to be called with scheduler's lock held
1104 *
1105 * Return: true if @group is on slot.
1106 */
kbasep_csf_scheduler_group_is_on_slot_locked(struct kbase_queue_group * group)1107 static bool kbasep_csf_scheduler_group_is_on_slot_locked(
1108 struct kbase_queue_group *group)
1109 {
1110 struct kbase_csf_scheduler *scheduler =
1111 &group->kctx->kbdev->csf.scheduler;
1112 int slot_num = group->csg_nr;
1113
1114 lockdep_assert_held(&scheduler->lock);
1115
1116 if (slot_num >= 0) {
1117 if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
1118 group))
1119 return true;
1120 }
1121
1122 return false;
1123 }
1124
kbase_csf_scheduler_group_events_enabled(struct kbase_device * kbdev,struct kbase_queue_group * group)1125 bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
1126 struct kbase_queue_group *group)
1127 {
1128 struct kbase_csf_scheduler *scheduler =
1129 &group->kctx->kbdev->csf.scheduler;
1130 int slot_num = group->csg_nr;
1131
1132 lockdep_assert_held(&scheduler->interrupt_lock);
1133
1134 if (WARN_ON(slot_num < 0))
1135 return false;
1136
1137 return test_bit(slot_num, scheduler->csgs_events_enable_mask);
1138 }
1139
kbase_csf_scheduler_get_group_on_slot(struct kbase_device * kbdev,int slot)1140 struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(
1141 struct kbase_device *kbdev, int slot)
1142 {
1143 lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
1144
1145 return kbdev->csf.scheduler.csg_slots[slot].resident_group;
1146 }
1147
halt_stream_sync(struct kbase_queue * queue)1148 static int halt_stream_sync(struct kbase_queue *queue)
1149 {
1150 struct kbase_queue_group *group = queue->group;
1151 struct kbase_device *kbdev = queue->kctx->kbdev;
1152 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1153 struct kbase_csf_cmd_stream_group_info *ginfo;
1154 struct kbase_csf_cmd_stream_info *stream;
1155 int csi_index = queue->csi_index;
1156 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1157 unsigned long flags;
1158
1159 if (WARN_ON(!group) ||
1160 WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1161 return -EINVAL;
1162
1163 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1164 ginfo = &global_iface->groups[group->csg_nr];
1165 stream = &ginfo->streams[csi_index];
1166
1167 if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) ==
1168 CS_REQ_STATE_START) {
1169
1170 remaining = wait_event_timeout(kbdev->csf.event_wait,
1171 (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
1172 == CS_ACK_STATE_START), remaining);
1173
1174 if (!remaining) {
1175 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d",
1176 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1177 csi_index, group->handle, group->csg_nr);
1178 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
1179 kbase_reset_gpu(kbdev);
1180
1181
1182 return -ETIMEDOUT;
1183 }
1184
1185 remaining =
1186 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1187 }
1188
1189 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1190 /* Set state to STOP */
1191 kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
1192 CS_REQ_STATE_MASK);
1193
1194 kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
1195 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
1196
1197 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
1198
1199 /* Timed wait */
1200 remaining = wait_event_timeout(kbdev->csf.event_wait,
1201 (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
1202 == CS_ACK_STATE_STOP), remaining);
1203
1204 if (!remaining) {
1205 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d",
1206 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1207 queue->csi_index, group->handle, group->csg_nr);
1208
1209 /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
1210 * will be reset as a work-around.
1211 */
1212 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
1213 kbase_reset_gpu(kbdev);
1214
1215
1216 }
1217 return (remaining) ? 0 : -ETIMEDOUT;
1218 }
1219
can_halt_stream(struct kbase_device * kbdev,struct kbase_queue_group * group)1220 static bool can_halt_stream(struct kbase_device *kbdev,
1221 struct kbase_queue_group *group)
1222 {
1223 struct kbase_csf_csg_slot *const csg_slot =
1224 kbdev->csf.scheduler.csg_slots;
1225 unsigned long flags;
1226 bool can_halt;
1227 int slot;
1228
1229 if (!queue_group_scheduled(group))
1230 return true;
1231
1232 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1233 slot = kbase_csf_scheduler_group_get_slot_locked(group);
1234 can_halt = (slot >= 0) &&
1235 (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
1236 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
1237 flags);
1238
1239 return can_halt;
1240 }
1241
1242 /**
1243 * sched_halt_stream() - Stop a GPU queue when its queue group is not running
1244 * on a CSG slot.
1245 * @queue: Pointer to the GPU queue to stop.
1246 *
1247 * This function handles stopping gpu queues for groups that are either not on
1248 * a CSG slot or are on the slot but undergoing transition to
1249 * resume or suspend states.
1250 * It waits until the queue group is scheduled on a slot and starts running,
1251 * which is needed as groups that were suspended may need to resume all queues
1252 * that were enabled and running at the time of suspension.
1253 *
1254 * Return: 0 on success, or negative on failure.
1255 */
sched_halt_stream(struct kbase_queue * queue)1256 static int sched_halt_stream(struct kbase_queue *queue)
1257 {
1258 struct kbase_queue_group *group = queue->group;
1259 struct kbase_device *kbdev = queue->kctx->kbdev;
1260 struct kbase_csf_scheduler *const scheduler =
1261 &kbdev->csf.scheduler;
1262 struct kbase_csf_csg_slot *const csg_slot =
1263 kbdev->csf.scheduler.csg_slots;
1264 bool retry_needed = false;
1265 bool retried = false;
1266 long remaining;
1267 int slot;
1268 int err = 0;
1269 const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
1270
1271 if (WARN_ON(!group))
1272 return -EINVAL;
1273
1274 lockdep_assert_held(&queue->kctx->csf.lock);
1275 lockdep_assert_held(&scheduler->lock);
1276
1277 slot = kbase_csf_scheduler_group_get_slot(group);
1278
1279 if (slot >= 0) {
1280 WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
1281
1282 if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
1283 dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state",
1284 queue->csi_index, group->handle);
1285 retry_needed = true;
1286 }
1287 }
1288 retry:
1289 /* Update the group state so that it can get scheduled soon */
1290 update_idle_suspended_group_state(group);
1291
1292 mutex_unlock(&scheduler->lock);
1293
1294 /* This function is called when the queue group is either not on a CSG
1295 * slot or is on the slot but undergoing transition.
1296 *
1297 * To stop the queue, the function needs to wait either for the queue
1298 * group to be assigned a CSG slot (and that slot has to reach the
1299 * running state) or for the eviction of the queue group from the
1300 * scheduler's list.
1301 *
1302 * In order to evaluate the latter condition, the function doesn't
1303 * really need to lock the scheduler, as any update to the run_state
1304 * of the queue group by sched_evict_group() would be visible due
1305 * to implicit barriers provided by the kernel waitqueue macros.
1306 *
1307 * The group pointer cannot disappear meanwhile, as the high level
1308 * CSF context is locked. Therefore, the scheduler would be
1309 * the only one to update the run_state of the group.
1310 */
1311 remaining = wait_event_timeout(
1312 kbdev->csf.event_wait, can_halt_stream(kbdev, group),
1313 kbase_csf_timeout_in_jiffies(group_schedule_timeout));
1314
1315 mutex_lock(&scheduler->lock);
1316
1317 if (remaining && queue_group_scheduled_locked(group)) {
1318 slot = kbase_csf_scheduler_group_get_slot(group);
1319
1320 /* If the group is still on slot and slot is in running state
1321 * then explicitly stop the CSI of the
1322 * queue. Otherwise there are different cases to consider
1323 *
1324 * - If the queue group was already undergoing transition to
1325 * resume/start state when this function was entered then it
1326 * would not have disabled the CSI of the
1327 * queue being stopped and the previous wait would have ended
1328 * once the slot was in a running state with CS
1329 * interface still enabled.
1330 * Now the group is going through another transition either
1331 * to a suspend state or to a resume state (it could have
1332 * been suspended before the scheduler lock was grabbed).
1333 * In both scenarios need to wait again for the group to
1334 * come on a slot and that slot to reach the running state,
1335 * as that would guarantee that firmware will observe the
1336 * CSI as disabled.
1337 *
1338 * - If the queue group was either off the slot or was
1339 * undergoing transition to suspend state on entering this
1340 * function, then the group would have been resumed with the
1341 * queue's CSI in disabled state.
1342 * So now if the group is undergoing another transition
1343 * (after the resume) then just need to wait for the state
1344 * bits in the ACK register of CSI to be
1345 * set to STOP value. It is expected that firmware will
1346 * process the stop/disable request of the CS
1347 * interface after resuming the group before it processes
1348 * another state change request of the group.
1349 */
1350 if ((slot >= 0) &&
1351 (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) {
1352 err = halt_stream_sync(queue);
1353 } else if (retry_needed && !retried) {
1354 retried = true;
1355 goto retry;
1356 } else if (slot >= 0) {
1357 struct kbase_csf_global_iface *global_iface =
1358 &kbdev->csf.global_iface;
1359 struct kbase_csf_cmd_stream_group_info *ginfo =
1360 &global_iface->groups[slot];
1361 struct kbase_csf_cmd_stream_info *stream =
1362 &ginfo->streams[queue->csi_index];
1363 u32 cs_req =
1364 kbase_csf_firmware_cs_input_read(stream, CS_REQ);
1365
1366 if (!WARN_ON(CS_REQ_STATE_GET(cs_req) !=
1367 CS_REQ_STATE_STOP)) {
1368 /* Timed wait */
1369 remaining = wait_event_timeout(
1370 kbdev->csf.event_wait,
1371 (CS_ACK_STATE_GET(
1372 kbase_csf_firmware_cs_output(
1373 stream, CS_ACK)) ==
1374 CS_ACK_STATE_STOP),
1375 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms));
1376
1377 if (!remaining) {
1378 dev_warn(kbdev->dev,
1379 "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d",
1380 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1381 queue->csi_index,
1382 group->handle, group->csg_nr);
1383
1384
1385 err = -ETIMEDOUT;
1386 }
1387 }
1388 }
1389 } else if (!remaining) {
1390 dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)",
1391 kbase_backend_get_cycle_cnt(kbdev),
1392 group->handle, queue->csi_index,
1393 group_schedule_timeout);
1394
1395
1396 err = -ETIMEDOUT;
1397 }
1398
1399 return err;
1400 }
1401
1402 /**
1403 * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU
1404 * queue needs to be stopped.
1405 *
1406 * @queue: Pointer the GPU command queue
1407 *
1408 * This function is called when the CSI to which GPU queue is bound needs to
1409 * be stopped. For that the corresponding queue group needs to be resident on
1410 * the CSG slot and MCU firmware should be running. So this function makes the
1411 * Scheduler exit the sleeping or suspended state.
1412 */
scheduler_activate_on_queue_stop(struct kbase_queue * queue)1413 static void scheduler_activate_on_queue_stop(struct kbase_queue *queue)
1414 {
1415 struct kbase_device *kbdev = queue->kctx->kbdev;
1416
1417 scheduler_wakeup(kbdev, true);
1418
1419 /* Wait for MCU firmware to start running */
1420 if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
1421 dev_warn(
1422 kbdev->dev,
1423 "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d",
1424 kbase_backend_get_cycle_cnt(kbdev),
1425 queue->csi_index, queue->group->handle,
1426 queue->kctx->tgid, queue->kctx->id,
1427 queue->group->csg_nr);
1428 }
1429 }
1430
kbase_csf_scheduler_queue_stop(struct kbase_queue * queue)1431 int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
1432 {
1433 struct kbase_device *kbdev = queue->kctx->kbdev;
1434 struct kbase_queue_group *group = queue->group;
1435 bool const cs_enabled = queue->enabled;
1436 int err = 0;
1437
1438 if (WARN_ON(!group))
1439 return -EINVAL;
1440
1441 kbase_reset_gpu_assert_failed_or_prevented(kbdev);
1442 lockdep_assert_held(&queue->kctx->csf.lock);
1443 mutex_lock(&kbdev->csf.scheduler.lock);
1444
1445 queue->enabled = false;
1446 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled);
1447
1448 if (cs_enabled && queue_group_scheduled_locked(group)) {
1449 struct kbase_csf_csg_slot *const csg_slot =
1450 kbdev->csf.scheduler.csg_slots;
1451 int slot = kbase_csf_scheduler_group_get_slot(group);
1452
1453 /* Since the group needs to be resumed in order to stop the queue,
1454 * check if GPU needs to be powered up.
1455 */
1456 scheduler_activate_on_queue_stop(queue);
1457
1458 if ((slot >= 0) &&
1459 (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING))
1460 err = halt_stream_sync(queue);
1461 else
1462 err = sched_halt_stream(queue);
1463
1464 unassign_user_doorbell_from_queue(kbdev, queue);
1465 kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue);
1466 }
1467
1468 mutex_unlock(&kbdev->csf.scheduler.lock);
1469 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_STOP, group, queue, group->run_state);
1470 return err;
1471 }
1472
update_hw_active(struct kbase_queue * queue,bool active)1473 static void update_hw_active(struct kbase_queue *queue, bool active)
1474 {
1475 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
1476 if (queue && queue->enabled) {
1477 u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
1478
1479 output_addr[CS_ACTIVE / sizeof(u32)] = active;
1480 }
1481 #else
1482 CSTD_UNUSED(queue);
1483 CSTD_UNUSED(active);
1484 #endif
1485 }
1486
program_cs_extract_init(struct kbase_queue * queue)1487 static void program_cs_extract_init(struct kbase_queue *queue)
1488 {
1489 u64 *input_addr = (u64 *)queue->user_io_addr;
1490 u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
1491
1492 input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] =
1493 output_addr[CS_EXTRACT_LO / sizeof(u64)];
1494 }
1495
program_cs_trace_cfg(struct kbase_csf_cmd_stream_info * stream,struct kbase_queue * queue)1496 static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream,
1497 struct kbase_queue *queue)
1498 {
1499 struct kbase_device *kbdev = queue->kctx->kbdev;
1500 u32 const glb_version = kbdev->csf.global_iface.version;
1501
1502 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1503
1504 /* If cs_trace_command not supported, nothing to program */
1505 if (glb_version < kbase_csf_interface_version(1, 1, 0))
1506 return;
1507
1508 /* Program for cs_trace if enabled. In the current arrangement, it is
1509 * possible for the context to enable the cs_trace after some queues
1510 * has been registered in cs_trace in disabled state. This is tracked by
1511 * the queue's trace buffer base address, which had been validated at the
1512 * queue's register_ex call.
1513 */
1514 if (kbase_csf_scheduler_queue_has_trace(queue)) {
1515 u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET(
1516 queue->trace_cfg, queue->kctx->as_nr);
1517
1518 kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg);
1519 kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE,
1520 queue->trace_buffer_size);
1521
1522 kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO,
1523 queue->trace_buffer_base & U32_MAX);
1524 kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI,
1525 queue->trace_buffer_base >> 32);
1526
1527 kbase_csf_firmware_cs_input(
1528 stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO,
1529 queue->trace_offset_ptr & U32_MAX);
1530 kbase_csf_firmware_cs_input(
1531 stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI,
1532 queue->trace_offset_ptr >> 32);
1533 } else {
1534 /* Place the configuration to the disabled condition */
1535 kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0);
1536 kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0);
1537 }
1538 }
1539
program_cs(struct kbase_device * kbdev,struct kbase_queue * queue,bool ring_csg_doorbell)1540 static void program_cs(struct kbase_device *kbdev,
1541 struct kbase_queue *queue, bool ring_csg_doorbell)
1542 {
1543 struct kbase_queue_group *group = queue->group;
1544 struct kbase_csf_cmd_stream_group_info *ginfo;
1545 struct kbase_csf_cmd_stream_info *stream;
1546 int csi_index = queue->csi_index;
1547 unsigned long flags;
1548 u64 user_input;
1549 u64 user_output;
1550
1551 if (WARN_ON(!group))
1552 return;
1553
1554 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1555
1556 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1557 return;
1558
1559 ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
1560
1561 if (WARN_ON(csi_index < 0) ||
1562 WARN_ON(csi_index >= ginfo->stream_num))
1563 return;
1564
1565 if (queue->enabled) {
1566 assign_user_doorbell_to_queue(kbdev, queue);
1567 if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
1568 return;
1569
1570 WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
1571 }
1572
1573 if (queue->enabled && queue_group_suspended_locked(group))
1574 program_cs_extract_init(queue);
1575
1576 stream = &ginfo->streams[csi_index];
1577
1578 kbase_csf_firmware_cs_input(stream, CS_BASE_LO,
1579 queue->base_addr & 0xFFFFFFFF);
1580 kbase_csf_firmware_cs_input(stream, CS_BASE_HI,
1581 queue->base_addr >> 32);
1582 kbase_csf_firmware_cs_input(stream, CS_SIZE,
1583 queue->size);
1584
1585 user_input = queue->user_io_gpu_va;
1586 WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va");
1587
1588 kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF);
1589 kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32);
1590
1591 user_output = user_input + PAGE_SIZE;
1592 kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF);
1593 kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32);
1594
1595 kbase_csf_firmware_cs_input(stream, CS_CONFIG,
1596 (queue->doorbell_nr << 8) | (queue->priority & 0xF));
1597
1598 /* Program the queue's cs_trace configuration */
1599 program_cs_trace_cfg(stream, queue);
1600
1601 /* Enable all interrupts for now */
1602 kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
1603
1604 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1605
1606 /* The fault bit could be misaligned between CS_REQ and CS_ACK if the
1607 * acknowledgment was deferred due to dump on fault and the group was
1608 * removed from the CSG slot before the fault could be acknowledged.
1609 */
1610 if (queue->enabled) {
1611 u32 const cs_ack =
1612 kbase_csf_firmware_cs_output(stream, CS_ACK);
1613
1614 kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
1615 CS_REQ_FAULT_MASK);
1616 }
1617
1618 /*
1619 * Enable the CSG idle notification once the CS's ringbuffer
1620 * becomes empty or the CS becomes sync_idle, waiting sync update
1621 * or protected mode switch.
1622 */
1623 kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1624 CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
1625 CS_REQ_IDLE_SHARED_SB_DEC_MASK,
1626 CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
1627 CS_REQ_IDLE_SHARED_SB_DEC_MASK);
1628
1629 /* Set state to START/STOP */
1630 kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1631 queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP,
1632 CS_REQ_STATE_MASK);
1633 kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
1634 ring_csg_doorbell);
1635 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
1636
1637 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
1638
1639 update_hw_active(queue, true);
1640 }
1641
onslot_csg_add_new_queue(struct kbase_queue * queue)1642 static int onslot_csg_add_new_queue(struct kbase_queue *queue)
1643 {
1644 struct kbase_device *kbdev = queue->kctx->kbdev;
1645 int err;
1646
1647 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1648
1649 err = kbase_csf_mcu_shared_add_queue(kbdev, queue);
1650 if (!err)
1651 program_cs(kbdev, queue, true);
1652
1653 return err;
1654 }
1655
kbase_csf_scheduler_queue_start(struct kbase_queue * queue)1656 int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
1657 {
1658 struct kbase_queue_group *group = queue->group;
1659 struct kbase_device *kbdev = queue->kctx->kbdev;
1660 bool const cs_enabled = queue->enabled;
1661 int err = 0;
1662 bool evicted = false;
1663
1664 kbase_reset_gpu_assert_prevented(kbdev);
1665 lockdep_assert_held(&queue->kctx->csf.lock);
1666
1667 if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
1668 return -EINVAL;
1669
1670 mutex_lock(&kbdev->csf.scheduler.lock);
1671
1672 #if IS_ENABLED(CONFIG_DEBUG_FS)
1673 if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) {
1674 mutex_unlock(&kbdev->csf.scheduler.lock);
1675 return -EBUSY;
1676 }
1677 #endif
1678
1679 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
1680 group->run_state);
1681 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue,
1682 queue->status_wait);
1683
1684 if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
1685 err = -EIO;
1686 evicted = true;
1687 } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
1688 && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
1689 dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked",
1690 queue->csi_index, group->handle);
1691 } else {
1692 err = scheduler_group_schedule(group);
1693
1694 if (!err) {
1695 queue->enabled = true;
1696 if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
1697 if (cs_enabled) {
1698 /* In normal situation, when a queue is
1699 * already running, the queue update
1700 * would be a doorbell kick on user
1701 * side. However, if such a kick is
1702 * shortly following a start or resume,
1703 * the queue may actually in transition
1704 * hence the said kick would enter the
1705 * kernel as the hw_active flag is yet
1706 * to be set. The sheduler needs to
1707 * give a kick to the corresponding
1708 * user door-bell on such a case.
1709 */
1710 kbase_csf_ring_cs_user_doorbell(kbdev, queue);
1711 } else {
1712 err = onslot_csg_add_new_queue(queue);
1713 /* For an on slot CSG, the only error in adding a new
1714 * queue to run is that the scheduler could not map
1715 * the required userio pages due to likely some resource
1716 * issues. In such a case, and if the group is yet
1717 * to enter its fatal error state, we return a -EBUSY
1718 * to the submitter for another kick. The queue itself
1719 * has yet to be programmed hence needs to remain its
1720 * previous (disabled) state. If the error persists,
1721 * the group will eventually reports a fatal error by
1722 * the group's error reporting mechanism, when the MCU
1723 * shared region map retry limit of the group is
1724 * exceeded. For such a case, the expected error value
1725 * is -EIO.
1726 */
1727 if (unlikely(err)) {
1728 queue->enabled = cs_enabled;
1729 mutex_unlock(&kbdev->csf.scheduler.lock);
1730 return (err != -EIO) ? -EBUSY : err;
1731 }
1732 }
1733 }
1734 queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
1735 msecs_to_jiffies(kbase_get_timeout_ms(
1736 kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
1737 }
1738 }
1739
1740 mutex_unlock(&kbdev->csf.scheduler.lock);
1741
1742 if (evicted)
1743 kbase_csf_term_descheduled_queue_group(group);
1744
1745 return err;
1746 }
1747
update_csg_slot_status(struct kbase_device * kbdev,s8 slot)1748 static enum kbase_csf_csg_slot_state update_csg_slot_status(
1749 struct kbase_device *kbdev, s8 slot)
1750 {
1751 struct kbase_csf_csg_slot *csg_slot =
1752 &kbdev->csf.scheduler.csg_slots[slot];
1753 struct kbase_csf_cmd_stream_group_info *ginfo =
1754 &kbdev->csf.global_iface.groups[slot];
1755 u32 state;
1756 enum kbase_csf_csg_slot_state slot_state;
1757
1758 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1759
1760 state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
1761 CSG_ACK));
1762 slot_state = atomic_read(&csg_slot->state);
1763
1764 switch (slot_state) {
1765 case CSG_SLOT_READY2RUN:
1766 if ((state == CSG_ACK_STATE_START) ||
1767 (state == CSG_ACK_STATE_RESUME)) {
1768 slot_state = CSG_SLOT_RUNNING;
1769 atomic_set(&csg_slot->state, slot_state);
1770 csg_slot->trigger_jiffies = jiffies;
1771 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group,
1772 state);
1773 dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
1774 csg_slot->resident_group->handle, slot);
1775 }
1776 break;
1777 case CSG_SLOT_DOWN2STOP:
1778 if ((state == CSG_ACK_STATE_SUSPEND) ||
1779 (state == CSG_ACK_STATE_TERMINATE)) {
1780 slot_state = CSG_SLOT_STOPPED;
1781 atomic_set(&csg_slot->state, slot_state);
1782 csg_slot->trigger_jiffies = jiffies;
1783 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state);
1784 dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n",
1785 csg_slot->resident_group->handle, slot);
1786 }
1787 break;
1788 case CSG_SLOT_DOWN2STOP_TIMEDOUT:
1789 case CSG_SLOT_READY2RUN_TIMEDOUT:
1790 case CSG_SLOT_READY:
1791 case CSG_SLOT_RUNNING:
1792 case CSG_SLOT_STOPPED:
1793 break;
1794 default:
1795 dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state);
1796 break;
1797 }
1798
1799 return slot_state;
1800 }
1801
csg_slot_running(struct kbase_device * kbdev,s8 slot)1802 static bool csg_slot_running(struct kbase_device *kbdev, s8 slot)
1803 {
1804 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1805
1806 return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING);
1807 }
1808
csg_slot_stopped_locked(struct kbase_device * kbdev,s8 slot)1809 static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot)
1810 {
1811 enum kbase_csf_csg_slot_state slot_state;
1812
1813 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1814
1815 slot_state = update_csg_slot_status(kbdev, slot);
1816
1817 return (slot_state == CSG_SLOT_STOPPED ||
1818 slot_state == CSG_SLOT_READY);
1819 }
1820
csg_slot_stopped_raw(struct kbase_device * kbdev,s8 slot)1821 static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot)
1822 {
1823 struct kbase_csf_cmd_stream_group_info *ginfo =
1824 &kbdev->csf.global_iface.groups[slot];
1825 u32 state;
1826
1827 state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
1828 CSG_ACK));
1829
1830 if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) {
1831 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state);
1832 dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot);
1833 return true;
1834 }
1835
1836 return false;
1837 }
1838
halt_csg_slot(struct kbase_queue_group * group,bool suspend)1839 static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
1840 {
1841 struct kbase_device *kbdev = group->kctx->kbdev;
1842 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1843 struct kbase_csf_csg_slot *csg_slot =
1844 kbdev->csf.scheduler.csg_slots;
1845 s8 slot;
1846
1847 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1848
1849 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1850 return;
1851
1852 slot = group->csg_nr;
1853
1854 /* When in transition, wait for it to complete */
1855 if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
1856 long remaining =
1857 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1858
1859 dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot);
1860 remaining = wait_event_timeout(kbdev->csf.event_wait,
1861 csg_slot_running(kbdev, slot), remaining);
1862 if (!remaining)
1863 dev_warn(kbdev->dev,
1864 "[%llu] slot %d timeout (%d ms) on up-running\n",
1865 kbase_backend_get_cycle_cnt(kbdev),
1866 slot, kbdev->csf.fw_timeout_ms);
1867 }
1868
1869 if (csg_slot_running(kbdev, slot)) {
1870 unsigned long flags;
1871 struct kbase_csf_cmd_stream_group_info *ginfo =
1872 &global_iface->groups[slot];
1873
1874 u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
1875 CSG_REQ_STATE_TERMINATE;
1876
1877 dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d",
1878 suspend, group->handle, group->kctx->tgid, group->kctx->id, slot);
1879
1880 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1881 /* Set state to SUSPEND/TERMINATE */
1882 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd,
1883 CSG_REQ_STATE_MASK);
1884 kbase_csf_ring_csg_doorbell(kbdev, slot);
1885 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
1886 flags);
1887 atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
1888 csg_slot[slot].trigger_jiffies = jiffies;
1889 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
1890
1891 KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
1892 kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
1893 }
1894 }
1895
term_csg_slot(struct kbase_queue_group * group)1896 static void term_csg_slot(struct kbase_queue_group *group)
1897 {
1898 halt_csg_slot(group, false);
1899 }
1900
suspend_csg_slot(struct kbase_queue_group * group)1901 static void suspend_csg_slot(struct kbase_queue_group *group)
1902 {
1903 halt_csg_slot(group, true);
1904 }
1905
csf_wait_ge_condition_supported(struct kbase_device * kbdev)1906 static bool csf_wait_ge_condition_supported(struct kbase_device *kbdev)
1907 {
1908 const uint32_t glb_major = GLB_VERSION_MAJOR_GET(kbdev->csf.global_iface.version);
1909 const uint32_t glb_minor = GLB_VERSION_MINOR_GET(kbdev->csf.global_iface.version);
1910
1911 switch (glb_major) {
1912 case 0:
1913 break;
1914 case 1:
1915 if (glb_minor >= 4)
1916 return true;
1917 break;
1918 case 2:
1919 if (glb_minor >= 6)
1920 return true;
1921 break;
1922 case 3:
1923 if (glb_minor >= 6)
1924 return true;
1925 break;
1926 default:
1927 return true;
1928 }
1929 return false;
1930 }
1931 /**
1932 * evaluate_sync_update() - Evaluate the sync wait condition the GPU command
1933 * queue has been blocked on.
1934 *
1935 * @queue: Pointer to the GPU command queue
1936 *
1937 * Return: true if sync wait condition is satisfied.
1938 */
evaluate_sync_update(struct kbase_queue * queue)1939 static bool evaluate_sync_update(struct kbase_queue *queue)
1940 {
1941 struct kbase_vmap_struct *mapping;
1942 bool updated = false;
1943 u32 *sync_ptr;
1944 u32 sync_wait_size;
1945 u32 sync_wait_align_mask;
1946 u32 sync_wait_cond;
1947 u32 sync_current_val;
1948 struct kbase_device *kbdev;
1949 bool sync_wait_align_valid = false;
1950 bool sync_wait_cond_valid = false;
1951
1952 if (WARN_ON(!queue))
1953 return false;
1954
1955 kbdev = queue->kctx->kbdev;
1956
1957 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1958
1959 sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait);
1960 sync_wait_align_mask =
1961 (sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1;
1962 sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0;
1963 if (!sync_wait_align_valid) {
1964 dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned",
1965 queue->sync_ptr);
1966 goto out;
1967 }
1968
1969 sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
1970 &mapping);
1971
1972 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue,
1973 queue->sync_ptr);
1974 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue,
1975 queue->blocked_reason);
1976
1977 if (!sync_ptr) {
1978 dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
1979 queue->sync_ptr);
1980 goto out;
1981 }
1982
1983 sync_wait_cond =
1984 CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait);
1985 sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) ||
1986 (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) ||
1987 ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
1988 csf_wait_ge_condition_supported(kbdev));
1989
1990 WARN_ON(!sync_wait_cond_valid);
1991
1992 sync_current_val = READ_ONCE(*sync_ptr);
1993 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue,
1994 sync_current_val);
1995
1996 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue,
1997 queue->sync_value);
1998
1999 if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
2000 (sync_current_val > queue->sync_value)) ||
2001 ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
2002 (sync_current_val >= queue->sync_value) && csf_wait_ge_condition_supported(kbdev)) ||
2003 ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) &&
2004 (sync_current_val <= queue->sync_value))) {
2005 /* The sync wait condition is satisfied so the group to which
2006 * queue is bound can be re-scheduled.
2007 */
2008 updated = true;
2009 } else {
2010 dev_dbg(queue->kctx->kbdev->dev,
2011 "sync memory not updated yet(%u)", sync_current_val);
2012 }
2013
2014 kbase_phy_alloc_mapping_put(queue->kctx, mapping);
2015 out:
2016 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated);
2017 return updated;
2018 }
2019
2020 /**
2021 * save_slot_cs() - Save the state for blocked GPU command queue.
2022 *
2023 * @ginfo: Pointer to the CSG interface used by the group
2024 * the queue is bound to.
2025 * @queue: Pointer to the GPU command queue.
2026 *
2027 * This function will check if GPU command queue is blocked on a sync wait and
2028 * evaluate the wait condition. If the wait condition isn't satisfied it would
2029 * save the state needed to reevaluate the condition in future.
2030 * The group to which queue is bound shall be in idle state.
2031 *
2032 * Return: true if the queue is blocked on a sync wait operation.
2033 */
2034 static
save_slot_cs(struct kbase_csf_cmd_stream_group_info const * const ginfo,struct kbase_queue * queue)2035 bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
2036 struct kbase_queue *queue)
2037 {
2038 struct kbase_csf_cmd_stream_info *const stream =
2039 &ginfo->streams[queue->csi_index];
2040 u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
2041 bool is_waiting = false;
2042
2043 #if IS_ENABLED(CONFIG_DEBUG_FS)
2044 u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
2045
2046 cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
2047 queue->saved_cmd_ptr = cmd_ptr;
2048 #endif
2049
2050 KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
2051 queue, status);
2052
2053 if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) {
2054 queue->status_wait = status;
2055 queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
2056 CS_STATUS_WAIT_SYNC_POINTER_LO);
2057 queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream,
2058 CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
2059 queue->sync_value = kbase_csf_firmware_cs_output(stream,
2060 CS_STATUS_WAIT_SYNC_VALUE);
2061
2062 queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
2063 kbase_csf_firmware_cs_output(stream,
2064 CS_STATUS_SCOREBOARDS));
2065 queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET(
2066 kbase_csf_firmware_cs_output(stream,
2067 CS_STATUS_BLOCKED_REASON));
2068
2069 if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) ||
2070 !evaluate_sync_update(queue)) {
2071 is_waiting = true;
2072 } else {
2073 /* Sync object already got updated & met the condition
2074 * thus it doesn't need to be reevaluated and so can
2075 * clear the 'status_wait' here.
2076 */
2077 queue->status_wait = 0;
2078 }
2079 } else {
2080 /* Invalidate wait status info that would have been recorded if
2081 * this queue was blocked when the group (in idle state) was
2082 * suspended previously. After that the group could have been
2083 * unblocked due to the kicking of another queue bound to it &
2084 * so the wait status info would have stuck with this queue.
2085 */
2086 queue->status_wait = 0;
2087 }
2088
2089 return is_waiting;
2090 }
2091
schedule_in_cycle(struct kbase_queue_group * group,bool force)2092 static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
2093 {
2094 struct kbase_context *kctx = group->kctx;
2095 struct kbase_device *kbdev = kctx->kbdev;
2096 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2097
2098 lockdep_assert_held(&scheduler->lock);
2099
2100 /* Only try to schedule work for this event if no requests are pending,
2101 * otherwise the function will end up canceling previous work requests,
2102 * and scheduler is configured to wake up periodically (or the schedule
2103 * of work needs to be enforced in situation such as entering into
2104 * protected mode).
2105 */
2106 if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) {
2107 dev_dbg(kbdev->dev, "Kicking async for group %d\n",
2108 group->handle);
2109 kbase_csf_scheduler_invoke_tock(kbdev);
2110 }
2111 }
2112
ktrace_log_group_state(struct kbase_queue_group * const group)2113 static void ktrace_log_group_state(struct kbase_queue_group *const group)
2114 {
2115 switch (group->run_state) {
2116 case KBASE_CSF_GROUP_INACTIVE:
2117 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
2118 group->run_state);
2119 break;
2120 case KBASE_CSF_GROUP_RUNNABLE:
2121 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group,
2122 group->run_state);
2123 break;
2124 case KBASE_CSF_GROUP_IDLE:
2125 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group,
2126 group->run_state);
2127 break;
2128 case KBASE_CSF_GROUP_SUSPENDED:
2129 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
2130 group->run_state);
2131 break;
2132 case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE:
2133 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
2134 group->run_state);
2135 break;
2136 case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC:
2137 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC,
2138 group, group->run_state);
2139 break;
2140 case KBASE_CSF_GROUP_FAULT_EVICTED:
2141 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group,
2142 group->run_state);
2143 break;
2144 case KBASE_CSF_GROUP_TERMINATED:
2145 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group,
2146 group->run_state);
2147 break;
2148 }
2149 }
2150
2151 static
insert_group_to_runnable(struct kbase_csf_scheduler * const scheduler,struct kbase_queue_group * const group,enum kbase_csf_group_state run_state)2152 void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
2153 struct kbase_queue_group *const group,
2154 enum kbase_csf_group_state run_state)
2155 {
2156 struct kbase_context *const kctx = group->kctx;
2157 struct kbase_device *const kbdev = kctx->kbdev;
2158
2159 lockdep_assert_held(&scheduler->lock);
2160
2161 WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2162
2163 if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
2164 return;
2165
2166 group->run_state = run_state;
2167
2168 ktrace_log_group_state(group);
2169
2170 if (run_state == KBASE_CSF_GROUP_RUNNABLE)
2171 group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
2172
2173 list_add_tail(&group->link,
2174 &kctx->csf.sched.runnable_groups[group->priority]);
2175 kctx->csf.sched.num_runnable_grps++;
2176 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group,
2177 kctx->csf.sched.num_runnable_grps);
2178
2179 /* Add the kctx if not yet in runnable kctxs */
2180 if (kctx->csf.sched.num_runnable_grps == 1) {
2181 /* First runnable csg, adds to the runnable_kctxs */
2182 INIT_LIST_HEAD(&kctx->csf.link);
2183 list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
2184 KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u);
2185 }
2186
2187 scheduler->total_runnable_grps++;
2188
2189 if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
2190 (scheduler->total_runnable_grps == 1 ||
2191 scheduler->state == SCHED_SUSPENDED ||
2192 scheduler->state == SCHED_SLEEPING)) {
2193 dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
2194 /* Fire a scheduling to start the time-slice */
2195 enqueue_tick_work(kbdev);
2196 } else
2197 schedule_in_cycle(group, false);
2198
2199 /* Since a new group has become runnable, check if GPU needs to be
2200 * powered up.
2201 */
2202 scheduler_wakeup(kbdev, false);
2203 }
2204
2205 static
remove_group_from_runnable(struct kbase_csf_scheduler * const scheduler,struct kbase_queue_group * group,enum kbase_csf_group_state run_state)2206 void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
2207 struct kbase_queue_group *group,
2208 enum kbase_csf_group_state run_state)
2209 {
2210 struct kbase_context *kctx = group->kctx;
2211 struct kbase_queue_group *new_head_grp;
2212 struct list_head *list =
2213 &kctx->csf.sched.runnable_groups[group->priority];
2214 unsigned long flags;
2215
2216 lockdep_assert_held(&scheduler->lock);
2217
2218 WARN_ON(!queue_group_scheduled_locked(group));
2219
2220 group->run_state = run_state;
2221
2222 ktrace_log_group_state(group);
2223
2224 list_del_init(&group->link);
2225
2226 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
2227 /* The below condition will be true when the group running in protected
2228 * mode is being terminated but the protected mode exit interrupt was't
2229 * received. This can happen if the FW got stuck during protected mode
2230 * for some reason (like GPU page fault or some internal error).
2231 * In normal cases FW is expected to send the protected mode exit
2232 * interrupt before it handles the CSG termination request.
2233 */
2234 if (unlikely(scheduler->active_protm_grp == group)) {
2235 /* CSG slot cleanup should have happened for the pmode group */
2236 WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group));
2237 WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2238 /* Initiate a GPU reset, in case it wasn't initiated yet,
2239 * in order to rectify the anomaly.
2240 */
2241 if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
2242 kbase_reset_gpu(kctx->kbdev);
2243
2244 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT,
2245 scheduler->active_protm_grp, 0u);
2246 scheduler->active_protm_grp = NULL;
2247 }
2248 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
2249
2250 if (scheduler->top_grp == group) {
2251 /*
2252 * Note: this disables explicit rotation in the next scheduling
2253 * cycle. However, removing the top_grp is the same as an
2254 * implicit rotation (e.g. if we instead rotated the top_ctx
2255 * and then remove top_grp)
2256 *
2257 * This implicit rotation is assumed by the scheduler rotate
2258 * functions.
2259 */
2260 scheduler->top_grp = NULL;
2261
2262 /*
2263 * Trigger a scheduling tock for a CSG containing protected
2264 * content in case there has been any in order to minimise
2265 * latency.
2266 */
2267 group = scheduler_get_protm_enter_async_group(kctx->kbdev,
2268 NULL);
2269 if (group)
2270 schedule_in_cycle(group, true);
2271 }
2272
2273 kctx->csf.sched.num_runnable_grps--;
2274 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group,
2275 kctx->csf.sched.num_runnable_grps);
2276 new_head_grp = (!list_empty(list)) ?
2277 list_first_entry(list, struct kbase_queue_group, link) :
2278 NULL;
2279 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
2280
2281 if (kctx->csf.sched.num_runnable_grps == 0) {
2282 struct kbase_context *new_head_kctx;
2283 struct list_head *kctx_list = &scheduler->runnable_kctxs;
2284 /* drop the kctx */
2285 list_del_init(&kctx->csf.link);
2286 if (scheduler->top_ctx == kctx)
2287 scheduler->top_ctx = NULL;
2288 KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u);
2289 new_head_kctx = (!list_empty(kctx_list)) ?
2290 list_first_entry(kctx_list, struct kbase_context, csf.link) :
2291 NULL;
2292 KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u);
2293 }
2294
2295 WARN_ON(scheduler->total_runnable_grps == 0);
2296 scheduler->total_runnable_grps--;
2297 if (!scheduler->total_runnable_grps) {
2298 dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
2299 cancel_tick_timer(kctx->kbdev);
2300 WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
2301 if (scheduler->state != SCHED_SUSPENDED)
2302 enqueue_gpu_idle_work(scheduler);
2303 }
2304 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
2305 scheduler->num_active_address_spaces |
2306 (((u64)scheduler->total_runnable_grps) << 32));
2307 }
2308
insert_group_to_idle_wait(struct kbase_queue_group * const group)2309 static void insert_group_to_idle_wait(struct kbase_queue_group *const group)
2310 {
2311 struct kbase_context *kctx = group->kctx;
2312
2313 lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
2314
2315 WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE);
2316
2317 list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
2318 kctx->csf.sched.num_idle_wait_grps++;
2319 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group,
2320 kctx->csf.sched.num_idle_wait_grps);
2321 group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
2322 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group,
2323 group->run_state);
2324 dev_dbg(kctx->kbdev->dev,
2325 "Group-%d suspended on sync_wait, total wait_groups: %u\n",
2326 group->handle, kctx->csf.sched.num_idle_wait_grps);
2327 }
2328
remove_group_from_idle_wait(struct kbase_queue_group * const group)2329 static void remove_group_from_idle_wait(struct kbase_queue_group *const group)
2330 {
2331 struct kbase_context *kctx = group->kctx;
2332 struct list_head *list = &kctx->csf.sched.idle_wait_groups;
2333 struct kbase_queue_group *new_head_grp;
2334
2335 lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
2336
2337 WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
2338
2339 list_del_init(&group->link);
2340 WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
2341 kctx->csf.sched.num_idle_wait_grps--;
2342 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group,
2343 kctx->csf.sched.num_idle_wait_grps);
2344 new_head_grp = (!list_empty(list)) ?
2345 list_first_entry(list, struct kbase_queue_group, link) :
2346 NULL;
2347 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u);
2348 group->run_state = KBASE_CSF_GROUP_INACTIVE;
2349 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state);
2350 }
2351
deschedule_idle_wait_group(struct kbase_csf_scheduler * scheduler,struct kbase_queue_group * group)2352 static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
2353 struct kbase_queue_group *group)
2354 {
2355 lockdep_assert_held(&scheduler->lock);
2356
2357 if (WARN_ON(!group))
2358 return;
2359
2360 remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE);
2361 insert_group_to_idle_wait(group);
2362 }
2363
update_offslot_non_idle_cnt(struct kbase_queue_group * group)2364 static void update_offslot_non_idle_cnt(struct kbase_queue_group *group)
2365 {
2366 struct kbase_device *kbdev = group->kctx->kbdev;
2367 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2368
2369 lockdep_assert_held(&scheduler->lock);
2370
2371 if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
2372 int new_val =
2373 atomic_dec_return(&scheduler->non_idle_offslot_grps);
2374 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
2375 }
2376 }
2377
update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group * group)2378 static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group)
2379 {
2380 struct kbase_device *kbdev = group->kctx->kbdev;
2381 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2382
2383 lockdep_assert_held(&scheduler->lock);
2384
2385 WARN_ON(group->csg_nr < 0);
2386
2387 if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
2388 int new_val =
2389 atomic_dec_return(&scheduler->non_idle_offslot_grps);
2390 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
2391 }
2392 }
2393
update_offslot_non_idle_cnt_on_grp_suspend(struct kbase_queue_group * group)2394 static void update_offslot_non_idle_cnt_on_grp_suspend(
2395 struct kbase_queue_group *group)
2396 {
2397 struct kbase_device *kbdev = group->kctx->kbdev;
2398 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2399
2400 lockdep_assert_held(&scheduler->lock);
2401
2402 if (scheduler->state == SCHED_BUSY) {
2403 /* active phase or, async entering the protected mode */
2404 if (group->prepared_seq_num >=
2405 scheduler->non_idle_scanout_grps) {
2406 /* At scanout, it was tagged as on-slot idle */
2407 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
2408 int new_val = atomic_inc_return(
2409 &scheduler->non_idle_offslot_grps);
2410 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC,
2411 group, new_val);
2412 }
2413 } else {
2414 if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
2415 int new_val = atomic_dec_return(
2416 &scheduler->non_idle_offslot_grps);
2417 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC,
2418 group, new_val);
2419 }
2420 }
2421 } else {
2422 /* async phases */
2423 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
2424 int new_val = atomic_inc_return(
2425 &scheduler->non_idle_offslot_grps);
2426 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
2427 new_val);
2428 }
2429 }
2430 }
2431
confirm_cmd_buf_empty(struct kbase_queue const * queue)2432 static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
2433 {
2434 bool cs_empty;
2435 bool cs_idle;
2436 u32 sb_status = 0;
2437
2438 struct kbase_device const *const kbdev = queue->group->kctx->kbdev;
2439 struct kbase_csf_global_iface const *const iface =
2440 &kbdev->csf.global_iface;
2441
2442 u32 glb_version = iface->version;
2443
2444 u64 const *input_addr = (u64 const *)queue->user_io_addr;
2445 u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
2446
2447 if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
2448 /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
2449 struct kbase_csf_cmd_stream_group_info const *const ginfo =
2450 &kbdev->csf.global_iface.groups[queue->group->csg_nr];
2451 struct kbase_csf_cmd_stream_info const *const stream =
2452 &ginfo->streams[queue->csi_index];
2453
2454 sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
2455 kbase_csf_firmware_cs_output(stream,
2456 CS_STATUS_SCOREBOARDS));
2457 }
2458
2459 cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] ==
2460 output_addr[CS_EXTRACT_LO / sizeof(u64)]);
2461 cs_idle = cs_empty && (!sb_status);
2462
2463 return cs_idle;
2464 }
2465
save_csg_slot(struct kbase_queue_group * group)2466 static void save_csg_slot(struct kbase_queue_group *group)
2467 {
2468 struct kbase_device *kbdev = group->kctx->kbdev;
2469 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2470 struct kbase_csf_cmd_stream_group_info *ginfo;
2471 u32 state;
2472
2473 lockdep_assert_held(&scheduler->lock);
2474
2475 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2476 return;
2477
2478 ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
2479
2480 state =
2481 CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK));
2482
2483 if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) &&
2484 (state != CSG_ACK_STATE_TERMINATE))) {
2485 u32 max_streams = ginfo->stream_num;
2486 u32 i;
2487 bool sync_wait = false;
2488 bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
2489 CSG_STATUS_STATE_IDLE_MASK;
2490 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
2491 for (i = 0; i < max_streams; i++)
2492 update_hw_active(group->bound_queues[i], false);
2493 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
2494 for (i = 0; idle && i < max_streams; i++) {
2495 struct kbase_queue *const queue =
2496 group->bound_queues[i];
2497
2498 if (!queue || !queue->enabled)
2499 continue;
2500
2501 if (save_slot_cs(ginfo, queue)) {
2502 /* sync_wait is only true if the queue is blocked on
2503 * a CQS and not a scoreboard.
2504 */
2505 if (queue->blocked_reason !=
2506 CS_STATUS_BLOCKED_ON_SB_WAIT)
2507 sync_wait = true;
2508 } else {
2509 /* Need to confirm if ringbuffer of the GPU
2510 * queue is empty or not. A race can arise
2511 * between the flush of GPU queue and suspend
2512 * of CSG. If a queue is flushed after FW has
2513 * set the IDLE bit in CSG_STATUS_STATE, then
2514 * Scheduler will incorrectly consider CSG
2515 * as idle. And there may not be any further
2516 * flush call for the GPU queue, which would
2517 * have de-idled the CSG.
2518 */
2519 idle = confirm_cmd_buf_empty(queue);
2520 }
2521 }
2522
2523 if (idle) {
2524 /* Take the suspended group out of the runnable_groups
2525 * list of the context and move it to the
2526 * idle_wait_groups list.
2527 */
2528 if (sync_wait)
2529 deschedule_idle_wait_group(scheduler, group);
2530 else {
2531 group->run_state =
2532 KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
2533 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
2534 group->run_state);
2535 dev_dbg(kbdev->dev, "Group-%d suspended: idle",
2536 group->handle);
2537 }
2538 } else {
2539 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
2540 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group,
2541 group->run_state);
2542 }
2543
2544 update_offslot_non_idle_cnt_on_grp_suspend(group);
2545 kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(group);
2546 }
2547 }
2548
2549 /* Cleanup_csg_slot after it has been vacated, ready for next csg run.
2550 * Return whether there is a kctx address fault associated with the group
2551 * for which the clean-up is done.
2552 */
cleanup_csg_slot(struct kbase_queue_group * group)2553 static bool cleanup_csg_slot(struct kbase_queue_group *group)
2554 {
2555 struct kbase_context *kctx = group->kctx;
2556 struct kbase_device *kbdev = kctx->kbdev;
2557 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
2558 struct kbase_csf_cmd_stream_group_info *ginfo;
2559 s8 slot;
2560 struct kbase_csf_csg_slot *csg_slot;
2561 unsigned long flags;
2562 u32 i;
2563 bool as_fault = false;
2564
2565 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2566
2567 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2568 return as_fault;
2569
2570 slot = group->csg_nr;
2571 csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
2572 ginfo = &global_iface->groups[slot];
2573
2574 /* Now loop through all the bound CSs, and clean them via a stop */
2575 for (i = 0; i < ginfo->stream_num; i++) {
2576 struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i];
2577
2578 if (group->bound_queues[i]) {
2579 if (group->bound_queues[i]->enabled) {
2580 kbase_csf_firmware_cs_input_mask(stream,
2581 CS_REQ, CS_REQ_STATE_STOP,
2582 CS_REQ_STATE_MASK);
2583 }
2584
2585 unassign_user_doorbell_from_queue(kbdev,
2586 group->bound_queues[i]);
2587 }
2588 }
2589
2590 unassign_user_doorbell_from_group(kbdev, group);
2591
2592 /* The csg does not need cleanup other than drop its AS */
2593 spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
2594 as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT);
2595 kbase_ctx_sched_release_ctx(kctx);
2596 if (unlikely(group->faulted))
2597 as_fault = true;
2598 spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
2599
2600 /* now marking the slot is vacant */
2601 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2602 kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL;
2603 clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask);
2604 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
2605 kbdev->csf.scheduler.csg_slots_idle_mask[0]);
2606
2607 group->csg_nr = KBASEP_CSG_NR_INVALID;
2608 set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask);
2609 clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
2610 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2611
2612 csg_slot->trigger_jiffies = jiffies;
2613 atomic_set(&csg_slot->state, CSG_SLOT_READY);
2614
2615 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot);
2616 dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n",
2617 group->handle, slot);
2618
2619 KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
2620 kbdev->gpu_props.props.raw_props.gpu_id, slot);
2621
2622 /* Notify the group is off-slot and the csg_reg might be available for
2623 * resue with other groups in a 'lazy unbinding' style.
2624 */
2625 kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
2626
2627 return as_fault;
2628 }
2629
update_csg_slot_priority(struct kbase_queue_group * group,u8 prio)2630 static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
2631 {
2632 struct kbase_device *kbdev = group->kctx->kbdev;
2633 struct kbase_csf_csg_slot *csg_slot;
2634 struct kbase_csf_cmd_stream_group_info *ginfo;
2635 s8 slot;
2636 u8 prev_prio;
2637 u32 ep_cfg;
2638 u32 csg_req;
2639 unsigned long flags;
2640
2641 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2642
2643 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2644 return;
2645
2646 slot = group->csg_nr;
2647 csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
2648 ginfo = &kbdev->csf.global_iface.groups[slot];
2649
2650 /* CSGs remaining on-slot can be either idle or runnable.
2651 * This also applies in protected mode.
2652 */
2653 WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) ||
2654 (group->run_state == KBASE_CSF_GROUP_IDLE)));
2655
2656 /* Update consumes a group from scanout */
2657 update_offslot_non_idle_cnt_for_onslot_grp(group);
2658
2659 if (csg_slot->priority == prio)
2660 return;
2661
2662 /* Read the csg_ep_cfg back for updating the priority field */
2663 ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ);
2664 prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg);
2665 ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
2666 kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
2667
2668 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2669 csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2670 csg_req ^= CSG_REQ_EP_CFG_MASK;
2671 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
2672 CSG_REQ_EP_CFG_MASK);
2673 kbase_csf_ring_csg_doorbell(kbdev, slot);
2674 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2675
2676 csg_slot->priority = prio;
2677
2678 dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n",
2679 group->handle, group->kctx->tgid, group->kctx->id, slot,
2680 prev_prio, prio);
2681
2682 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio);
2683
2684 set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
2685 }
2686
program_csg_slot(struct kbase_queue_group * group,s8 slot,u8 prio)2687 static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
2688 u8 prio)
2689 {
2690 struct kbase_context *kctx = group->kctx;
2691 struct kbase_device *kbdev = kctx->kbdev;
2692 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
2693 const u64 shader_core_mask =
2694 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER);
2695 const u64 tiler_core_mask =
2696 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER);
2697 const u64 compute_mask = shader_core_mask & group->compute_mask;
2698 const u64 fragment_mask = shader_core_mask & group->fragment_mask;
2699 const u64 tiler_mask = tiler_core_mask & group->tiler_mask;
2700 const u8 num_cores = kbdev->gpu_props.num_cores;
2701 const u8 compute_max = min(num_cores, group->compute_max);
2702 const u8 fragment_max = min(num_cores, group->fragment_max);
2703 const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max);
2704 struct kbase_csf_cmd_stream_group_info *ginfo;
2705 u32 ep_cfg = 0;
2706 u32 csg_req;
2707 u32 state;
2708 int i;
2709 unsigned long flags;
2710 u64 normal_suspend_buf;
2711 u64 protm_suspend_buf;
2712 struct kbase_csf_csg_slot *csg_slot =
2713 &kbdev->csf.scheduler.csg_slots[slot];
2714
2715 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2716
2717 if (WARN_ON(slot < 0) &&
2718 WARN_ON(slot >= global_iface->group_num))
2719 return;
2720
2721 WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
2722
2723 if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) {
2724 dev_warn(kbdev->dev,
2725 "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u",
2726 group->handle, group->kctx->tgid, kctx->id, slot);
2727 kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
2728 return;
2729 }
2730
2731 /* The suspend buf has already been mapped through binding to csg_reg */
2732 normal_suspend_buf = group->normal_suspend_buf.gpu_va;
2733 protm_suspend_buf = group->protected_suspend_buf.gpu_va;
2734 WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped");
2735
2736 ginfo = &global_iface->groups[slot];
2737
2738 /* Pick an available address space for this context */
2739 mutex_lock(&kbdev->mmu_hw_mutex);
2740 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
2741 kbase_ctx_sched_retain_ctx(kctx);
2742 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
2743 mutex_unlock(&kbdev->mmu_hw_mutex);
2744
2745 if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
2746 dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
2747 group->handle, kctx->tgid, kctx->id, slot);
2748 kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
2749 return;
2750 }
2751
2752 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2753 set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
2754 kbdev->csf.scheduler.csg_slots[slot].resident_group = group;
2755 group->csg_nr = slot;
2756 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2757
2758 assign_user_doorbell_to_group(kbdev, group);
2759
2760 /* Now loop through all the bound & kicked CSs, and program them */
2761 for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
2762 struct kbase_queue *queue = group->bound_queues[i];
2763
2764 if (queue)
2765 program_cs(kbdev, queue, false);
2766 }
2767
2768
2769 /* Endpoint programming for CSG */
2770 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO,
2771 compute_mask & U32_MAX);
2772 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI,
2773 compute_mask >> 32);
2774 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO,
2775 fragment_mask & U32_MAX);
2776 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI,
2777 fragment_mask >> 32);
2778 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
2779 tiler_mask & U32_MAX);
2780
2781 /* Register group UID with firmware */
2782 kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG,
2783 group->group_uid);
2784
2785 ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
2786 ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
2787 ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
2788 ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
2789 kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
2790
2791 /* Program the address space number assigned to the context */
2792 kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr);
2793
2794 kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO,
2795 normal_suspend_buf & U32_MAX);
2796 kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
2797 normal_suspend_buf >> 32);
2798
2799 /* Note, we program the P-mode buffer pointer here, but actual runtime
2800 * enter into pmode execution is controlled by the P-mode phy pages are
2801 * allocated and mapped with the bound csg_reg, which has a specific flag
2802 * for indicating this P-mode runnable condition before a group is
2803 * granted its p-mode section entry. Without a P-mode entry, the buffer
2804 * pointed is not going to be accessed at all.
2805 */
2806 kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX);
2807 kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32);
2808
2809 if (group->dvs_buf) {
2810 kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO,
2811 group->dvs_buf & U32_MAX);
2812 kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI,
2813 group->dvs_buf >> 32);
2814 }
2815
2816 /* Enable all interrupts for now */
2817 kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0));
2818
2819 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2820 csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2821 csg_req ^= CSG_REQ_EP_CFG_MASK;
2822 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
2823 CSG_REQ_EP_CFG_MASK);
2824
2825 /* Set state to START/RESUME */
2826 if (queue_group_suspended_locked(group)) {
2827 state = CSG_REQ_STATE_RESUME;
2828 } else {
2829 WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE);
2830 state = CSG_REQ_STATE_START;
2831 }
2832
2833 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
2834 state, CSG_REQ_STATE_MASK);
2835 kbase_csf_ring_csg_doorbell(kbdev, slot);
2836 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2837
2838 /* Update status before rings the door-bell, marking ready => run */
2839 atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN);
2840 csg_slot->trigger_jiffies = jiffies;
2841 csg_slot->priority = prio;
2842
2843 /* Trace the programming of the CSG on the slot */
2844 KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
2845 kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
2846 group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);
2847
2848 dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
2849 group->handle, kctx->tgid, kctx->id, slot, prio);
2850
2851 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group,
2852 (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) |
2853 (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
2854
2855 /* Update the heap reclaim manager */
2856 kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(group);
2857
2858 /* Programming a slot consumes a group from scanout */
2859 update_offslot_non_idle_cnt_for_onslot_grp(group);
2860
2861 /* Notify the group's bound csg_reg is now in active use */
2862 kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group);
2863 }
2864
remove_scheduled_group(struct kbase_device * kbdev,struct kbase_queue_group * group)2865 static void remove_scheduled_group(struct kbase_device *kbdev,
2866 struct kbase_queue_group *group)
2867 {
2868 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2869
2870 lockdep_assert_held(&scheduler->lock);
2871
2872 WARN_ON(group->prepared_seq_num ==
2873 KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID);
2874 WARN_ON(list_empty(&group->link_to_schedule));
2875
2876 list_del_init(&group->link_to_schedule);
2877 scheduler->ngrp_to_schedule--;
2878 group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
2879 group->kctx->csf.sched.ngrp_to_schedule--;
2880 }
2881
sched_evict_group(struct kbase_queue_group * group,bool fault,bool update_non_idle_offslot_grps_cnt_from_run_state)2882 static void sched_evict_group(struct kbase_queue_group *group, bool fault,
2883 bool update_non_idle_offslot_grps_cnt_from_run_state)
2884 {
2885 struct kbase_context *kctx = group->kctx;
2886 struct kbase_device *kbdev = kctx->kbdev;
2887 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2888
2889 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2890
2891 if (queue_group_scheduled_locked(group)) {
2892 u32 i;
2893
2894 if (update_non_idle_offslot_grps_cnt_from_run_state &&
2895 (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
2896 group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
2897 int new_val = atomic_dec_return(
2898 &scheduler->non_idle_offslot_grps);
2899 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group,
2900 new_val);
2901 }
2902
2903 for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
2904 if (group->bound_queues[i])
2905 group->bound_queues[i]->enabled = false;
2906 }
2907
2908 if (group->prepared_seq_num !=
2909 KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) {
2910 if (!update_non_idle_offslot_grps_cnt_from_run_state)
2911 update_offslot_non_idle_cnt(group);
2912 remove_scheduled_group(kbdev, group);
2913 }
2914
2915 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
2916 remove_group_from_idle_wait(group);
2917 else {
2918 remove_group_from_runnable(scheduler, group,
2919 KBASE_CSF_GROUP_INACTIVE);
2920 }
2921
2922 WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2923
2924 if (fault) {
2925 group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
2926 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_FAULT_EVICTED, group,
2927 scheduler->total_runnable_grps);
2928 }
2929
2930 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group,
2931 (((u64)scheduler->total_runnable_grps) << 32) |
2932 ((u32)group->run_state));
2933 dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
2934 group->handle, scheduler->total_runnable_grps);
2935 /* Notify a group has been evicted */
2936 wake_up_all(&kbdev->csf.event_wait);
2937 }
2938
2939 kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group);
2940
2941 /* Clear all the bound shared regions and unmap any in-place MMU maps */
2942 kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group);
2943 }
2944
term_group_sync(struct kbase_queue_group * group)2945 static int term_group_sync(struct kbase_queue_group *group)
2946 {
2947 struct kbase_device *kbdev = group->kctx->kbdev;
2948 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
2949 int err = 0;
2950
2951 term_csg_slot(group);
2952
2953 remaining = wait_event_timeout(kbdev->csf.event_wait,
2954 group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr),
2955 remaining);
2956
2957 if (unlikely(!remaining)) {
2958 enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT;
2959
2960 dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
2961 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
2962 group->handle, group->kctx->tgid,
2963 group->kctx->id, group->csg_nr);
2964 if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
2965 error_type = DF_PING_REQUEST_TIMEOUT;
2966 kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type);
2967 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
2968 kbase_reset_gpu(kbdev);
2969
2970
2971 err = -ETIMEDOUT;
2972 }
2973
2974 return err;
2975 }
2976
kbase_csf_scheduler_group_deschedule(struct kbase_queue_group * group)2977 void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
2978 {
2979 struct kbase_device *kbdev = group->kctx->kbdev;
2980 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2981 bool wait_for_termination = true;
2982 bool on_slot;
2983
2984 kbase_reset_gpu_assert_failed_or_prevented(kbdev);
2985 lockdep_assert_held(&group->kctx->csf.lock);
2986 mutex_lock(&scheduler->lock);
2987
2988 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
2989 wait_for_dump_complete_on_group_deschedule(group);
2990 if (!queue_group_scheduled_locked(group))
2991 goto unlock;
2992
2993 on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
2994
2995 #ifdef KBASE_PM_RUNTIME
2996 /* If the queue group is on slot and Scheduler is in SLEEPING state,
2997 * then we need to wake up the Scheduler to exit the sleep state rather
2998 * than waiting for the runtime suspend or power down of GPU.
2999 * The group termination is usually triggered in the context of Application
3000 * thread and it has been seen that certain Apps can destroy groups at
3001 * random points and not necessarily when the App is exiting.
3002 */
3003 if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
3004 scheduler_wakeup(kbdev, true);
3005
3006 /* Wait for MCU firmware to start running */
3007 if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
3008 dev_warn(
3009 kbdev->dev,
3010 "[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
3011 kbase_backend_get_cycle_cnt(kbdev),
3012 group->handle, group->kctx->tgid,
3013 group->kctx->id, group->csg_nr);
3014 /* No point in waiting for CSG termination if MCU didn't
3015 * become active.
3016 */
3017 wait_for_termination = false;
3018 }
3019 }
3020 #endif
3021 if (!on_slot) {
3022 sched_evict_group(group, false, true);
3023 } else {
3024 bool as_faulty;
3025
3026 if (likely(wait_for_termination))
3027 term_group_sync(group);
3028 else
3029 term_csg_slot(group);
3030
3031 /* Treat the csg been terminated */
3032 as_faulty = cleanup_csg_slot(group);
3033 /* remove from the scheduler list */
3034 sched_evict_group(group, as_faulty, false);
3035 }
3036
3037 WARN_ON(queue_group_scheduled_locked(group));
3038
3039 unlock:
3040 mutex_unlock(&scheduler->lock);
3041 }
3042
3043 /**
3044 * scheduler_group_schedule() - Schedule a GPU command queue group on firmware
3045 *
3046 * @group: Pointer to the queue group to be scheduled.
3047 *
3048 * This function would enable the scheduling of GPU command queue group on
3049 * firmware.
3050 *
3051 * Return: 0 on success, or negative on failure.
3052 */
scheduler_group_schedule(struct kbase_queue_group * group)3053 static int scheduler_group_schedule(struct kbase_queue_group *group)
3054 {
3055 struct kbase_context *kctx = group->kctx;
3056 struct kbase_device *kbdev = kctx->kbdev;
3057 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3058
3059 lockdep_assert_held(&kctx->csf.lock);
3060 lockdep_assert_held(&scheduler->lock);
3061
3062 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state);
3063 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
3064 update_idle_suspended_group_state(group);
3065 else if (queue_group_idle_locked(group)) {
3066 WARN_ON(kctx->csf.sched.num_runnable_grps == 0);
3067 WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0);
3068
3069 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE)
3070 update_idle_suspended_group_state(group);
3071 else {
3072 struct kbase_queue_group *protm_grp;
3073 unsigned long flags;
3074
3075 WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(
3076 group));
3077
3078 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3079 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
3080 group->run_state);
3081
3082 /* A normal mode CSG could be idle onslot during
3083 * protected mode. In this case clear the
3084 * appropriate bit in csg_slots_idle_mask.
3085 */
3086 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3087 protm_grp = scheduler->active_protm_grp;
3088 if (protm_grp && protm_grp != group) {
3089 clear_bit((unsigned int)group->csg_nr,
3090 scheduler->csg_slots_idle_mask);
3091 /* Request the update to confirm the condition inferred. */
3092 group->reevaluate_idle_status = true;
3093 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
3094 scheduler->csg_slots_idle_mask[0]);
3095 }
3096 spin_unlock_irqrestore(&scheduler->interrupt_lock,
3097 flags);
3098
3099 /* If GPU is in protected mode then any doorbells rang
3100 * would have no effect. Check if GPU is in protected
3101 * mode and if this group has higher priority than the
3102 * active protected mode group. If so prompt the FW
3103 * to exit protected mode.
3104 */
3105 if (protm_grp &&
3106 group->scan_seq_num < protm_grp->scan_seq_num) {
3107 /* Prompt the FW to exit protected mode */
3108 scheduler_force_protm_exit(kbdev);
3109 }
3110 }
3111 } else if (!queue_group_scheduled_locked(group)) {
3112 int new_val;
3113
3114 insert_group_to_runnable(&kbdev->csf.scheduler, group,
3115 KBASE_CSF_GROUP_RUNNABLE);
3116 /* A new group into the scheduler */
3117 new_val = atomic_inc_return(
3118 &kbdev->csf.scheduler.non_idle_offslot_grps);
3119 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
3120 }
3121
3122 /* Since a group has become active now, check if GPU needs to be
3123 * powered up. Also rekick the Scheduler.
3124 */
3125 scheduler_wakeup(kbdev, true);
3126
3127 return 0;
3128 }
3129
3130 /**
3131 * set_max_csg_slots() - Set the number of available CSG slots
3132 *
3133 * @kbdev: Pointer of the GPU device.
3134 *
3135 * This function would set/limit the number of CSG slots that
3136 * can be used in the given tick/tock. It would be less than the total CSG
3137 * slots supported by firmware if the number of GPU address space slots
3138 * required to utilize all the CSG slots is more than the available
3139 * address space slots.
3140 */
set_max_csg_slots(struct kbase_device * kbdev)3141 static inline void set_max_csg_slots(struct kbase_device *kbdev)
3142 {
3143 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3144 unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
3145 unsigned int max_address_space_slots =
3146 kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
3147
3148 WARN_ON(scheduler->num_active_address_spaces > total_csg_slots);
3149
3150 if (likely(scheduler->num_active_address_spaces <=
3151 max_address_space_slots))
3152 scheduler->num_csg_slots_for_tick = total_csg_slots;
3153 }
3154
3155 /**
3156 * count_active_address_space() - Count the number of GPU address space slots
3157 *
3158 * @kbdev: Pointer of the GPU device.
3159 * @kctx: Pointer of the Kbase context.
3160 *
3161 * This function would update the counter that is tracking the number of GPU
3162 * address space slots that would be required to program the CS
3163 * group slots from the groups at the head of groups_to_schedule list.
3164 */
count_active_address_space(struct kbase_device * kbdev,struct kbase_context * kctx)3165 static inline void count_active_address_space(struct kbase_device *kbdev,
3166 struct kbase_context *kctx)
3167 {
3168 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3169 unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
3170 unsigned int max_address_space_slots =
3171 kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
3172
3173 if (scheduler->ngrp_to_schedule <= total_csg_slots) {
3174 if (kctx->csf.sched.ngrp_to_schedule == 1)
3175 scheduler->num_active_address_spaces++;
3176
3177 if (scheduler->num_active_address_spaces <=
3178 max_address_space_slots)
3179 scheduler->num_csg_slots_for_tick++;
3180 }
3181 }
3182
3183 /* Two schemes are used in assigning the priority to CSG slots for a given
3184 * CSG from the 'groups_to_schedule' list.
3185 * This is needed as an idle on-slot group is deprioritized by moving it to
3186 * the tail of 'groups_to_schedule' list. As a result it can either get
3187 * evicted from the CSG slot in current tick/tock dealing, or its position
3188 * can be after the lower priority non-idle groups in the 'groups_to_schedule'
3189 * list. The latter case can result in the on-slot subset containing both
3190 * non-idle and idle CSGs, and is handled through the 2nd scheme described
3191 * below.
3192 *
3193 * First scheme :- If all the slots are going to be occupied by the non-idle or
3194 * idle groups, then a simple assignment of the priority is done as per the
3195 * position of a group in the 'groups_to_schedule' list. So maximum priority
3196 * gets assigned to the slot of a group which is at the head of the list.
3197 * Here the 'groups_to_schedule' list would effectively be ordered as per the
3198 * static priority of groups.
3199 *
3200 * Second scheme :- If the slots are going to be occupied by a mix of idle and
3201 * non-idle groups then the priority assignment needs to ensure that the
3202 * priority of a slot belonging to a higher priority idle group will always be
3203 * greater than the priority of a slot belonging to a lower priority non-idle
3204 * group, reflecting the original position of a group in the scan order (i.e
3205 * static priority) 'scan_seq_num', which is set during the prepare phase of a
3206 * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it
3207 * is idle.
3208 * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first
3209 * 'slots_for_tick' groups in the original scan order are assigned a priority in
3210 * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick),
3211 * whereas rest of the groups are assigned the priority in the subrange
3212 * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher
3213 * priority group ends up after the non-idle lower priority groups in the
3214 * 'groups_to_schedule' list, it will get a higher slot priority. And this will
3215 * enable the FW to quickly start the execution of higher priority group when it
3216 * gets de-idled.
3217 */
get_slot_priority(struct kbase_queue_group * group)3218 static u8 get_slot_priority(struct kbase_queue_group *group)
3219 {
3220 struct kbase_csf_scheduler *scheduler =
3221 &group->kctx->kbdev->csf.scheduler;
3222 u8 slot_prio;
3223 u32 slots_for_tick = scheduler->num_csg_slots_for_tick;
3224 u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots;
3225 /* Check if all the slots are going to be occupied by the non-idle or
3226 * idle groups.
3227 */
3228 if (scheduler->non_idle_scanout_grps >= slots_for_tick ||
3229 !scheduler->non_idle_scanout_grps) {
3230 slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots);
3231 } else {
3232 /* There will be a mix of idle and non-idle groups. */
3233 if (group->scan_seq_num < slots_for_tick)
3234 slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY -
3235 group->scan_seq_num);
3236 else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots))
3237 slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots));
3238 else
3239 slot_prio = 0;
3240 }
3241 return slot_prio;
3242 }
3243
3244 /**
3245 * update_resident_groups_priority() - Update the priority of resident groups
3246 *
3247 * @kbdev: The GPU device.
3248 *
3249 * This function will update the priority of all resident queue groups
3250 * that are at the head of groups_to_schedule list, preceding the first
3251 * non-resident group.
3252 *
3253 * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on
3254 * the priority update.
3255 */
update_resident_groups_priority(struct kbase_device * kbdev)3256 static void update_resident_groups_priority(struct kbase_device *kbdev)
3257 {
3258 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3259 u32 num_groups = scheduler->num_csg_slots_for_tick;
3260
3261 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3262 while (!list_empty(&scheduler->groups_to_schedule)) {
3263 struct kbase_queue_group *group =
3264 list_first_entry(&scheduler->groups_to_schedule,
3265 struct kbase_queue_group,
3266 link_to_schedule);
3267 bool resident =
3268 kbasep_csf_scheduler_group_is_on_slot_locked(group);
3269
3270 if ((group->prepared_seq_num >= num_groups) || !resident)
3271 break;
3272
3273 update_csg_slot_priority(group,
3274 get_slot_priority(group));
3275
3276 /* Drop the head group from the list */
3277 remove_scheduled_group(kbdev, group);
3278 scheduler->remaining_tick_slots--;
3279 }
3280 }
3281
3282 /**
3283 * program_group_on_vacant_csg_slot() - Program a non-resident group on the
3284 * given vacant CSG slot.
3285 * @kbdev: Pointer to the GPU device.
3286 * @slot: Vacant CSG slot number.
3287 *
3288 * This function will program a non-resident group at the head of
3289 * kbase_csf_scheduler.groups_to_schedule list on the given vacant
3290 * CSG slot, provided the initial position of the non-resident
3291 * group in the list is less than the number of CSG slots and there is
3292 * an available GPU address space slot.
3293 * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after
3294 * programming the slot.
3295 */
program_group_on_vacant_csg_slot(struct kbase_device * kbdev,s8 slot)3296 static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
3297 s8 slot)
3298 {
3299 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3300 struct kbase_queue_group *const group =
3301 list_empty(&scheduler->groups_to_schedule) ? NULL :
3302 list_first_entry(&scheduler->groups_to_schedule,
3303 struct kbase_queue_group,
3304 link_to_schedule);
3305 u32 num_groups = scheduler->num_csg_slots_for_tick;
3306
3307 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3308 if (group && (group->prepared_seq_num < num_groups)) {
3309 bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group);
3310
3311 if (!WARN_ON(ret)) {
3312 if (kctx_as_enabled(group->kctx) && !group->faulted) {
3313 program_csg_slot(group, slot,
3314 get_slot_priority(group));
3315
3316 if (likely(csg_slot_in_use(kbdev, slot))) {
3317 /* Drop the head group from the list */
3318 remove_scheduled_group(kbdev, group);
3319 scheduler->remaining_tick_slots--;
3320 }
3321 } else {
3322 update_offslot_non_idle_cnt(group);
3323 remove_scheduled_group(kbdev, group);
3324 }
3325 }
3326 }
3327 }
3328
3329 /**
3330 * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident
3331 * group and update the priority of resident groups.
3332 *
3333 * @kbdev: Pointer to the GPU device.
3334 * @slot: Vacant CSG slot number.
3335 *
3336 * This function will first update the priority of all resident queue groups
3337 * that are at the head of groups_to_schedule list, preceding the first
3338 * non-resident group, it will then try to program the given CS
3339 * group slot with the non-resident group. Finally update the priority of all
3340 * resident queue groups following the non-resident group.
3341 *
3342 * kbase_csf_scheduler.remaining_tick_slots would also be adjusted.
3343 */
program_vacant_csg_slot(struct kbase_device * kbdev,s8 slot)3344 static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot)
3345 {
3346 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3347 struct kbase_csf_csg_slot *const csg_slot =
3348 scheduler->csg_slots;
3349
3350 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3351 WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY);
3352
3353 /* First update priority for already resident groups (if any)
3354 * before the non-resident group
3355 */
3356 update_resident_groups_priority(kbdev);
3357
3358 /* Now consume the vacant slot for the non-resident group */
3359 program_group_on_vacant_csg_slot(kbdev, slot);
3360
3361 /* Now update priority for already resident groups (if any)
3362 * following the non-resident group
3363 */
3364 update_resident_groups_priority(kbdev);
3365 }
3366
slots_state_changed(struct kbase_device * kbdev,unsigned long * slots_mask,bool (* state_check_func)(struct kbase_device *,s8))3367 static bool slots_state_changed(struct kbase_device *kbdev,
3368 unsigned long *slots_mask,
3369 bool (*state_check_func)(struct kbase_device *, s8))
3370 {
3371 u32 num_groups = kbdev->csf.global_iface.group_num;
3372 DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0};
3373 bool changed = false;
3374 u32 i;
3375
3376 for_each_set_bit(i, slots_mask, num_groups) {
3377 if (state_check_func(kbdev, (s8)i)) {
3378 set_bit(i, changed_slots);
3379 changed = true;
3380 }
3381 }
3382
3383 if (changed)
3384 bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS);
3385
3386 return changed;
3387 }
3388
3389 /**
3390 * program_suspending_csg_slots() - Program the CSG slots vacated on suspension
3391 * of queue groups running on them.
3392 *
3393 * @kbdev: Pointer to the GPU device.
3394 *
3395 * This function will first wait for the ongoing suspension to complete on a
3396 * CSG slot and will then program the vacant slot with the
3397 * non-resident queue group inside the groups_to_schedule list.
3398 * The programming of the non-resident queue group on the vacant slot could
3399 * fail due to unavailability of free GPU address space slot and so the
3400 * programming is re-attempted after the ongoing suspension has completed
3401 * for all the CSG slots.
3402 * The priority of resident groups before and after the non-resident group
3403 * in the groups_to_schedule list would also be updated.
3404 * This would be repeated for all the slots undergoing suspension.
3405 * GPU reset would be initiated if the wait for suspend times out.
3406 */
program_suspending_csg_slots(struct kbase_device * kbdev)3407 static void program_suspending_csg_slots(struct kbase_device *kbdev)
3408 {
3409 u32 num_groups = kbdev->csf.global_iface.group_num;
3410 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3411 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS);
3412 DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0};
3413 bool suspend_wait_failed = false;
3414 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3415
3416 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3417
3418 /* In the current implementation, csgs_events_enable_mask would be used
3419 * only to indicate suspending CSGs.
3420 */
3421 bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask,
3422 MAX_SUPPORTED_CSGS);
3423
3424 while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
3425 DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3426
3427 bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
3428
3429 remaining = wait_event_timeout(kbdev->csf.event_wait,
3430 slots_state_changed(kbdev, changed,
3431 csg_slot_stopped_raw),
3432 remaining);
3433
3434 if (likely(remaining)) {
3435 u32 i;
3436
3437 for_each_set_bit(i, changed, num_groups) {
3438 struct kbase_queue_group *group =
3439 scheduler->csg_slots[i].resident_group;
3440
3441 if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
3442 continue;
3443
3444 /* The on slot csg is now stopped */
3445 clear_bit(i, slot_mask);
3446
3447 KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
3448 kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
3449
3450 if (likely(group)) {
3451 bool as_fault;
3452 /* Only do save/cleanup if the
3453 * group is not terminated during
3454 * the sleep.
3455 */
3456 save_csg_slot(group);
3457 as_fault = cleanup_csg_slot(group);
3458 /* If AS fault detected, evict it */
3459 if (as_fault) {
3460 sched_evict_group(group, true, true);
3461 set_bit(i, evicted_mask);
3462 }
3463 }
3464
3465 program_vacant_csg_slot(kbdev, (s8)i);
3466 }
3467 } else {
3468 u32 i;
3469
3470 /* Groups that have failed to suspend in time shall
3471 * raise a fatal error as they could no longer be
3472 * safely resumed.
3473 */
3474 for_each_set_bit(i, slot_mask, num_groups) {
3475 struct kbase_queue_group *const group =
3476 scheduler->csg_slots[i].resident_group;
3477 enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
3478
3479 struct base_gpu_queue_group_error const
3480 err_payload = { .error_type =
3481 BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
3482 .payload = {
3483 .fatal_group = {
3484 .status =
3485 GPU_EXCEPTION_TYPE_SW_FAULT_2,
3486 } } };
3487
3488 if (unlikely(group == NULL))
3489 continue;
3490
3491 /* TODO GPUCORE-25328: The CSG can't be
3492 * terminated, the GPU will be reset as a
3493 * work-around.
3494 */
3495 dev_warn(
3496 kbdev->dev,
3497 "[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)",
3498 kbase_backend_get_cycle_cnt(kbdev),
3499 group->handle, group->kctx->tgid,
3500 group->kctx->id, i,
3501 kbdev->csf.fw_timeout_ms);
3502 if (kbase_csf_firmware_ping_wait(kbdev,
3503 FW_PING_AFTER_ERROR_TIMEOUT_MS))
3504 error_type = DF_PING_REQUEST_TIMEOUT;
3505 schedule_actions_trigger_df(kbdev, group->kctx, error_type);
3506
3507 kbase_csf_add_group_fatal_error(group, &err_payload);
3508 kbase_event_wakeup(group->kctx);
3509
3510 /* The group has failed suspension, stop
3511 * further examination.
3512 */
3513 clear_bit(i, slot_mask);
3514 set_bit(i, scheduler->csgs_events_enable_mask);
3515 }
3516
3517 suspend_wait_failed = true;
3518 }
3519 }
3520
3521 if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS))
3522 dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n",
3523 num_groups, evicted_mask);
3524
3525 if (likely(!suspend_wait_failed)) {
3526 u32 i;
3527
3528 while (scheduler->ngrp_to_schedule &&
3529 scheduler->remaining_tick_slots) {
3530 i = find_first_zero_bit(scheduler->csg_inuse_bitmap,
3531 num_groups);
3532 if (WARN_ON(i == num_groups))
3533 break;
3534 program_vacant_csg_slot(kbdev, (s8)i);
3535 if (!csg_slot_in_use(kbdev, (int)i)) {
3536 dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i);
3537 break;
3538 }
3539 }
3540 } else {
3541 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
3542 kbase_reset_gpu(kbdev);
3543 }
3544 }
3545
suspend_queue_group(struct kbase_queue_group * group)3546 static void suspend_queue_group(struct kbase_queue_group *group)
3547 {
3548 unsigned long flags;
3549 struct kbase_csf_scheduler *const scheduler =
3550 &group->kctx->kbdev->csf.scheduler;
3551
3552 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3553 /* This shall be used in program_suspending_csg_slots() where we
3554 * assume that whilst CSGs are being suspended, this bitmask is not
3555 * used by anything else i.e., it indicates only the CSGs going
3556 * through suspension.
3557 */
3558 clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask);
3559 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3560
3561 /* If AS fault detected, terminate the group */
3562 if (!kctx_as_enabled(group->kctx) || group->faulted)
3563 term_csg_slot(group);
3564 else
3565 suspend_csg_slot(group);
3566 }
3567
wait_csg_slots_start(struct kbase_device * kbdev)3568 static void wait_csg_slots_start(struct kbase_device *kbdev)
3569 {
3570 u32 num_groups = kbdev->csf.global_iface.group_num;
3571 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3572 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3573 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
3574 u32 i;
3575
3576 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3577
3578 /* extract start slot flags for check */
3579 for (i = 0; i < num_groups; i++) {
3580 if (atomic_read(&scheduler->csg_slots[i].state) ==
3581 CSG_SLOT_READY2RUN)
3582 set_bit(i, slot_mask);
3583 }
3584
3585 while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
3586 DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3587
3588 bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
3589
3590 remaining = wait_event_timeout(kbdev->csf.event_wait,
3591 slots_state_changed(kbdev, changed, csg_slot_running),
3592 remaining);
3593
3594 if (likely(remaining)) {
3595 for_each_set_bit(i, changed, num_groups) {
3596 struct kbase_queue_group *group =
3597 scheduler->csg_slots[i].resident_group;
3598
3599 /* The on slot csg is now running */
3600 clear_bit(i, slot_mask);
3601 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3602 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
3603 group->run_state);
3604 }
3605 } else {
3606 const int csg_nr = ffs(slot_mask[0]) - 1;
3607 struct kbase_queue_group *group =
3608 scheduler->csg_slots[csg_nr].resident_group;
3609 enum dumpfault_error_type error_type = DF_CSG_START_TIMEOUT;
3610
3611 dev_err(kbdev->dev,
3612 "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n",
3613 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
3614 num_groups, slot_mask);
3615 if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
3616 error_type = DF_PING_REQUEST_TIMEOUT;
3617 schedule_actions_trigger_df(kbdev, group->kctx, error_type);
3618
3619 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
3620 kbase_reset_gpu(kbdev);
3621 break;
3622 }
3623 }
3624 }
3625
3626 /**
3627 * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state
3628 * flagged after the completion of a CSG status
3629 * update command
3630 *
3631 * @kbdev: Pointer to the GPU device.
3632 * @slot: The given slot for checking an occupying resident group's idle
3633 * state.
3634 *
3635 * This function is called at the start of scheduling tick to check the
3636 * idle status of a queue group resident on a CSG slot.
3637 * The caller must make sure the corresponding status update command has
3638 * been called and completed before checking this status.
3639 *
3640 * Return: true if the group resident on slot is idle, otherwise false.
3641 */
group_on_slot_is_idle(struct kbase_device * kbdev,unsigned long slot)3642 static bool group_on_slot_is_idle(struct kbase_device *kbdev,
3643 unsigned long slot)
3644 {
3645 struct kbase_csf_cmd_stream_group_info *ginfo =
3646 &kbdev->csf.global_iface.groups[slot];
3647 bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
3648 CSG_STATUS_STATE_IDLE_MASK;
3649
3650 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3651
3652 return idle;
3653 }
3654
3655 /**
3656 * slots_update_state_changed() - Check the handshake state of a subset of
3657 * command group slots.
3658 *
3659 * @kbdev: The GPU device.
3660 * @field_mask: The field mask for checking the state in the csg_req/ack.
3661 * @slots_mask: A bit_map specifying the slots to check.
3662 * @slots_done: A cleared bit_map for returning the slots that
3663 * have finished update.
3664 *
3665 * Checks the state of a subset of slots selected through the slots_mask
3666 * bit_map. Records which slots' handshake completed and send it back in the
3667 * slots_done bit_map.
3668 *
3669 * Return: true if the slots_done is set for at least one slot.
3670 * Otherwise false.
3671 */
3672 static
slots_update_state_changed(struct kbase_device * kbdev,u32 field_mask,const unsigned long * slots_mask,unsigned long * slots_done)3673 bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask,
3674 const unsigned long *slots_mask, unsigned long *slots_done)
3675 {
3676 u32 num_groups = kbdev->csf.global_iface.group_num;
3677 bool changed = false;
3678 u32 i;
3679
3680 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3681
3682 for_each_set_bit(i, slots_mask, num_groups) {
3683 struct kbase_csf_cmd_stream_group_info const *const ginfo =
3684 &kbdev->csf.global_iface.groups[i];
3685 u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
3686
3687 state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
3688
3689 if (!(state & field_mask)) {
3690 set_bit(i, slots_done);
3691 changed = true;
3692 }
3693 }
3694
3695 return changed;
3696 }
3697
3698 /**
3699 * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on
3700 * the specified groups.
3701 *
3702 * @kbdev: Pointer to the GPU device.
3703 * @field_mask: The field mask for checking the state in the csg_req/ack.
3704 * @slot_mask: Bitmap reflecting the slots, the function will modify
3705 * the acknowledged slots by clearing their corresponding
3706 * bits.
3707 * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out.
3708 *
3709 * This function waits for the acknowledgment of the request that have
3710 * already been placed for the CSG slots by the caller. Currently used for
3711 * the CSG priority update and status update requests.
3712 *
3713 * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For
3714 * timed out condition with unacknowledged slots, their bits remain
3715 * set in the slot_mask.
3716 */
wait_csg_slots_handshake_ack(struct kbase_device * kbdev,u32 field_mask,unsigned long * slot_mask,long wait_in_jiffies)3717 static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev,
3718 u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies)
3719 {
3720 const u32 num_groups = kbdev->csf.global_iface.group_num;
3721 long remaining = wait_in_jiffies;
3722
3723 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3724
3725 while (!bitmap_empty(slot_mask, num_groups) &&
3726 !kbase_reset_gpu_is_active(kbdev)) {
3727 DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 };
3728
3729 remaining = wait_event_timeout(kbdev->csf.event_wait,
3730 slots_update_state_changed(kbdev, field_mask,
3731 slot_mask, dones),
3732 remaining);
3733
3734 if (likely(remaining))
3735 bitmap_andnot(slot_mask, slot_mask, dones, num_groups);
3736 else {
3737
3738 /* Timed-out on the wait */
3739 return -ETIMEDOUT;
3740 }
3741 }
3742
3743 return 0;
3744 }
3745
wait_csg_slots_finish_prio_update(struct kbase_device * kbdev)3746 static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
3747 {
3748 unsigned long *slot_mask =
3749 kbdev->csf.scheduler.csg_slots_prio_update;
3750 long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3751 int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK,
3752 slot_mask, wait_time);
3753
3754 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3755
3756 if (unlikely(ret != 0)) {
3757 const int csg_nr = ffs(slot_mask[0]) - 1;
3758 struct kbase_queue_group *group =
3759 kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
3760 enum dumpfault_error_type error_type = DF_CSG_EP_CFG_TIMEOUT;
3761
3762 dev_warn(
3763 kbdev->dev,
3764 "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
3765 kbase_backend_get_cycle_cnt(kbdev),
3766 kbdev->csf.fw_timeout_ms,
3767 slot_mask[0]);
3768 if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
3769 error_type = DF_PING_REQUEST_TIMEOUT;
3770 schedule_actions_trigger_df(kbdev, group->kctx, error_type);
3771
3772 /* Timeout could indicate firmware is unresponsive so trigger a GPU reset. */
3773 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
3774 kbase_reset_gpu(kbdev);
3775 }
3776 }
3777
report_csg_termination(struct kbase_queue_group * const group)3778 static void report_csg_termination(struct kbase_queue_group *const group)
3779 {
3780 struct base_gpu_queue_group_error
3781 err = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
3782 .payload = { .fatal_group = {
3783 .status = GPU_EXCEPTION_TYPE_SW_FAULT_2,
3784 } } };
3785
3786 kbase_csf_add_group_fatal_error(group, &err);
3787 }
3788
kbase_csf_scheduler_evict_ctx_slots(struct kbase_device * kbdev,struct kbase_context * kctx,struct list_head * evicted_groups)3789 void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
3790 struct kbase_context *kctx, struct list_head *evicted_groups)
3791 {
3792 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3793 struct kbase_queue_group *group;
3794 u32 num_groups = kbdev->csf.global_iface.group_num;
3795 u32 slot;
3796 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
3797
3798 lockdep_assert_held(&kctx->csf.lock);
3799 mutex_lock(&scheduler->lock);
3800
3801 /* This code is only called during reset, so we don't wait for the CSG
3802 * slots to be stopped
3803 */
3804 WARN_ON(!kbase_reset_gpu_is_active(kbdev));
3805
3806 KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u);
3807 for (slot = 0; slot < num_groups; slot++) {
3808 group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
3809 if (group && group->kctx == kctx) {
3810 bool as_fault;
3811
3812 dev_dbg(kbdev->dev, "Evicting group [%d] running on slot [%d] due to reset",
3813 group->handle, group->csg_nr);
3814
3815 term_csg_slot(group);
3816 as_fault = cleanup_csg_slot(group);
3817 /* remove the group from the scheduler list */
3818 sched_evict_group(group, as_fault, false);
3819 /* signal Userspace that CSG is being terminated */
3820 report_csg_termination(group);
3821 /* return the evicted group to the caller */
3822 list_add_tail(&group->link, evicted_groups);
3823 set_bit(slot, slot_mask);
3824 }
3825 }
3826
3827 dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n",
3828 kctx->tgid, kctx->id, num_groups, slot_mask);
3829
3830 /* Fatal errors may have been the cause of the GPU reset
3831 * taking place, in which case we want to make sure that
3832 * we wake up the fatal event queue to notify userspace
3833 * only once. Otherwise, we may have duplicate event
3834 * notifications between the time the first notification
3835 * occurs and the time the GPU is reset.
3836 */
3837 kbase_event_wakeup(kctx);
3838
3839 mutex_unlock(&scheduler->lock);
3840 KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_END, kctx, num_groups);
3841 }
3842
3843 /**
3844 * scheduler_slot_protm_ack - Acknowledging the protected region requests
3845 * from the resident group on a given slot.
3846 *
3847 * @kbdev: Pointer to the GPU device.
3848 * @group: Pointer to the resident group on the given slot.
3849 * @slot: The slot that the given group is actively operating on.
3850 *
3851 * The function assumes that the given slot is in stable running state and
3852 * has already been judged by the caller on that any pending protected region
3853 * requests of the resident group should be acknowledged.
3854 *
3855 * Return: true if the group has pending protm request(s) and is acknowledged.
3856 * The caller should arrange to enter the protected mode for servicing
3857 * it. Otherwise return false, indicating the group has no pending protm
3858 * request.
3859 */
scheduler_slot_protm_ack(struct kbase_device * const kbdev,struct kbase_queue_group * const group,const int slot)3860 static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
3861 struct kbase_queue_group *const group,
3862 const int slot)
3863 {
3864 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3865 bool protm_ack = false;
3866 struct kbase_csf_cmd_stream_group_info *ginfo =
3867 &kbdev->csf.global_iface.groups[slot];
3868 u32 max_csi;
3869 int i;
3870
3871 if (WARN_ON(scheduler->csg_slots[slot].resident_group != group))
3872 return protm_ack;
3873
3874 lockdep_assert_held(&scheduler->lock);
3875 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock);
3876
3877 max_csi = ginfo->stream_num;
3878 for (i = find_first_bit(group->protm_pending_bitmap, max_csi);
3879 i < max_csi;
3880 i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) {
3881 struct kbase_queue *queue = group->bound_queues[i];
3882
3883 clear_bit(i, group->protm_pending_bitmap);
3884 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue,
3885 group->protm_pending_bitmap[0]);
3886
3887 if (!WARN_ON(!queue) && queue->enabled) {
3888 struct kbase_csf_cmd_stream_info *stream =
3889 &ginfo->streams[i];
3890 u32 cs_protm_ack = kbase_csf_firmware_cs_output(
3891 stream, CS_ACK) &
3892 CS_ACK_PROTM_PEND_MASK;
3893 u32 cs_protm_req = kbase_csf_firmware_cs_input_read(
3894 stream, CS_REQ) &
3895 CS_REQ_PROTM_PEND_MASK;
3896
3897 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group,
3898 queue, cs_protm_ack ^ cs_protm_req);
3899
3900 if (cs_protm_ack == cs_protm_req) {
3901 dev_dbg(kbdev->dev,
3902 "PROTM-ack already done for queue-%d group-%d slot-%d",
3903 queue->csi_index, group->handle, slot);
3904 continue;
3905 }
3906
3907 kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
3908 cs_protm_ack,
3909 CS_ACK_PROTM_PEND_MASK);
3910 protm_ack = true;
3911 dev_dbg(kbdev->dev,
3912 "PROTM-ack for queue-%d, group-%d slot-%d",
3913 queue->csi_index, group->handle, slot);
3914 }
3915 }
3916
3917 return protm_ack;
3918 }
3919
3920 /**
3921 * protm_enter_set_next_pending_seq - Update the scheduler's field of
3922 * tick_protm_pending_seq to that from the next available on-slot protm
3923 * pending CSG.
3924 *
3925 * @kbdev: Pointer to the GPU device.
3926 *
3927 * If applicable, the function updates the scheduler's tick_protm_pending_seq
3928 * field from the next available on-slot protm pending CSG. If not, the field
3929 * is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID.
3930 */
protm_enter_set_next_pending_seq(struct kbase_device * const kbdev)3931 static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev)
3932 {
3933 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3934 u32 num_groups = kbdev->csf.global_iface.group_num;
3935 u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num;
3936 DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 };
3937 u32 i;
3938
3939 kbase_csf_scheduler_spin_lock_assert_held(kbdev);
3940
3941 bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap,
3942 num_groups);
3943 /* Reset the tick's pending protm seq number to invalid initially */
3944 scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
3945 for_each_set_bit(i, active_csgs, num_groups) {
3946 struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group;
3947
3948 /* Set to the next pending protm group's scan_seq_number */
3949 if ((group != scheduler->active_protm_grp) &&
3950 (!bitmap_empty(group->protm_pending_bitmap, num_csis)) &&
3951 (group->scan_seq_num < scheduler->tick_protm_pending_seq))
3952 scheduler->tick_protm_pending_seq = group->scan_seq_num;
3953 }
3954 }
3955
3956 /**
3957 * scheduler_group_check_protm_enter - Request the given group to be evaluated
3958 * for triggering the protected mode.
3959 *
3960 * @kbdev: Pointer to the GPU device.
3961 * @input_grp: Pointer to the GPU queue group.
3962 *
3963 * The function assumes the given group is either an active running group or
3964 * the scheduler internally maintained field scheduler->top_grp.
3965 *
3966 * If the GPU is not already running in protected mode and the input group
3967 * has protected region requests from its bound queues, the requests are
3968 * acknowledged and the GPU is instructed to enter the protected mode.
3969 */
scheduler_group_check_protm_enter(struct kbase_device * const kbdev,struct kbase_queue_group * const input_grp)3970 static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
3971 struct kbase_queue_group *const input_grp)
3972 {
3973 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3974 struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf;
3975 unsigned long flags;
3976 bool protm_in_use;
3977
3978 lockdep_assert_held(&scheduler->lock);
3979
3980 /* Return early if the physical pages have not been allocated yet */
3981 if (unlikely(!sbuf->pma))
3982 return;
3983
3984 /* This lock is taken to prevent the issuing of MMU command during the
3985 * transition to protected mode. This helps avoid the scenario where the
3986 * entry to protected mode happens with a memory region being locked and
3987 * the same region is then accessed by the GPU in protected mode.
3988 */
3989 mutex_lock(&kbdev->mmu_hw_mutex);
3990 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3991
3992 /* Check if the previous transition to enter & exit the protected
3993 * mode has completed or not.
3994 */
3995 protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
3996 kbdev->protected_mode;
3997 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use);
3998
3999 /* Firmware samples the PROTM_PEND ACK bit for CSs when
4000 * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
4001 * is set for a CS after Host has sent the PROTM_ENTER
4002 * Global request, then there is no guarantee that firmware will
4003 * notice that prior to switching to protected mode. And firmware
4004 * may not again raise the PROTM_PEND interrupt for that CS
4005 * later on. To avoid that uncertainty PROTM_PEND ACK bit
4006 * is not set for a CS if the request to enter protected
4007 * mode has already been sent. It will be set later (after the exit
4008 * from protected mode has taken place) when the group to which
4009 * CS is bound becomes the top group.
4010 *
4011 * The actual decision of entering protected mode is hinging on the
4012 * input group is the top priority group, or, in case the previous
4013 * top-group is evicted from the scheduler during the tick, its would
4014 * be replacement, and that it is currently in a stable state (i.e. the
4015 * slot state is running).
4016 */
4017 if (!protm_in_use && !WARN_ON(!input_grp)) {
4018 const int slot =
4019 kbase_csf_scheduler_group_get_slot_locked(input_grp);
4020
4021 /* check the input_grp is running and requesting protected mode
4022 */
4023 if (slot >= 0 &&
4024 atomic_read(&scheduler->csg_slots[slot].state) ==
4025 CSG_SLOT_RUNNING) {
4026 if (kctx_as_enabled(input_grp->kctx) &&
4027 scheduler_slot_protm_ack(kbdev, input_grp, slot)) {
4028 int err;
4029
4030 /* Option of acknowledging to multiple
4031 * CSGs from the same kctx is dropped,
4032 * after consulting with the
4033 * architecture team. See the comment in
4034 * GPUCORE-21394.
4035 */
4036
4037 /* Switch to protected mode */
4038 scheduler->active_protm_grp = input_grp;
4039 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
4040 0u);
4041
4042 #if IS_ENABLED(CONFIG_MALI_CORESIGHT)
4043 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4044
4045 /* Coresight must be disabled before entering protected mode. */
4046 kbase_debug_coresight_csf_disable_pmode_enter(kbdev);
4047
4048 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4049 #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
4050
4051 kbase_csf_enter_protected_mode(kbdev);
4052 /* Set the pending protm seq number to the next one */
4053 protm_enter_set_next_pending_seq(kbdev);
4054
4055 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4056
4057 err = kbase_csf_wait_protected_mode_enter(kbdev);
4058 mutex_unlock(&kbdev->mmu_hw_mutex);
4059
4060 if (err)
4061 schedule_actions_trigger_df(kbdev, input_grp->kctx,
4062 DF_PROTECTED_MODE_ENTRY_FAILURE);
4063
4064 scheduler->protm_enter_time = ktime_get_raw();
4065
4066 return;
4067 }
4068 }
4069 }
4070
4071 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4072 mutex_unlock(&kbdev->mmu_hw_mutex);
4073 }
4074
4075 /**
4076 * scheduler_check_pmode_progress - Check if protected mode execution is progressing
4077 *
4078 * @kbdev: Pointer to the GPU device.
4079 *
4080 * This function is called when the GPU is in protected mode.
4081 *
4082 * It will check if the time spent in protected mode is less
4083 * than CSF_SCHED_PROTM_PROGRESS_TIMEOUT. If not, a PROTM_EXIT
4084 * request is sent to the FW.
4085 */
scheduler_check_pmode_progress(struct kbase_device * kbdev)4086 static void scheduler_check_pmode_progress(struct kbase_device *kbdev)
4087 {
4088 u64 protm_spent_time_ms;
4089 u64 protm_progress_timeout =
4090 kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT);
4091 s64 diff_ms_signed =
4092 ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time);
4093
4094 if (diff_ms_signed < 0)
4095 return;
4096
4097 lockdep_assert_held(&kbdev->csf.scheduler.lock);
4098
4099 protm_spent_time_ms = (u64)diff_ms_signed;
4100 if (protm_spent_time_ms < protm_progress_timeout)
4101 return;
4102
4103 dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu",
4104 protm_spent_time_ms, protm_progress_timeout);
4105
4106 /* Prompt the FW to exit protected mode */
4107 scheduler_force_protm_exit(kbdev);
4108 }
4109
scheduler_apply(struct kbase_device * kbdev)4110 static void scheduler_apply(struct kbase_device *kbdev)
4111 {
4112 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4113 const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
4114 const u32 available_csg_slots = scheduler->num_csg_slots_for_tick;
4115 u32 suspend_cnt = 0;
4116 u32 remain_cnt = 0;
4117 u32 resident_cnt = 0;
4118 struct kbase_queue_group *group;
4119 u32 i;
4120 u32 spare;
4121
4122 lockdep_assert_held(&scheduler->lock);
4123
4124 /* Suspend those resident groups not in the run list */
4125 for (i = 0; i < total_csg_slots; i++) {
4126 group = scheduler->csg_slots[i].resident_group;
4127 if (group) {
4128 resident_cnt++;
4129 if (group->prepared_seq_num >= available_csg_slots) {
4130 suspend_queue_group(group);
4131 suspend_cnt++;
4132 } else
4133 remain_cnt++;
4134 }
4135 }
4136
4137 /* Initialize the remaining available csg slots for the tick/tock */
4138 scheduler->remaining_tick_slots = available_csg_slots;
4139
4140 /* If there are spare slots, apply heads in the list */
4141 spare = (available_csg_slots > resident_cnt) ?
4142 (available_csg_slots - resident_cnt) : 0;
4143 while (!list_empty(&scheduler->groups_to_schedule)) {
4144 group = list_first_entry(&scheduler->groups_to_schedule,
4145 struct kbase_queue_group,
4146 link_to_schedule);
4147
4148 if (kbasep_csf_scheduler_group_is_on_slot_locked(group) &&
4149 group->prepared_seq_num < available_csg_slots) {
4150 /* One of the resident remainders */
4151 update_csg_slot_priority(group,
4152 get_slot_priority(group));
4153 } else if (spare != 0) {
4154 s8 slot = (s8)find_first_zero_bit(
4155 kbdev->csf.scheduler.csg_inuse_bitmap,
4156 total_csg_slots);
4157
4158 if (WARN_ON(slot >= (s8)total_csg_slots))
4159 break;
4160
4161 if (!kctx_as_enabled(group->kctx) || group->faulted) {
4162 /* Drop the head group and continue */
4163 update_offslot_non_idle_cnt(group);
4164 remove_scheduled_group(kbdev, group);
4165 continue;
4166 }
4167 program_csg_slot(group, slot,
4168 get_slot_priority(group));
4169 if (unlikely(!csg_slot_in_use(kbdev, slot)))
4170 break;
4171
4172 spare--;
4173 } else
4174 break;
4175
4176 /* Drop the head csg from the list */
4177 remove_scheduled_group(kbdev, group);
4178 if (!WARN_ON(!scheduler->remaining_tick_slots))
4179 scheduler->remaining_tick_slots--;
4180 }
4181
4182 /* Dealing with groups currently going through suspend */
4183 program_suspending_csg_slots(kbdev);
4184 }
4185
scheduler_ctx_scan_groups(struct kbase_device * kbdev,struct kbase_context * kctx,int priority)4186 static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
4187 struct kbase_context *kctx, int priority)
4188 {
4189 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4190 struct kbase_queue_group *group;
4191
4192 lockdep_assert_held(&scheduler->lock);
4193 lockdep_assert_held(&scheduler->interrupt_lock);
4194 if (WARN_ON(priority < 0) ||
4195 WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
4196 return;
4197
4198 if (!kctx_as_enabled(kctx))
4199 return;
4200
4201 list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority],
4202 link) {
4203 if (WARN_ON(!list_empty(&group->link_to_schedule)))
4204 /* This would be a bug */
4205 list_del_init(&group->link_to_schedule);
4206
4207 if (unlikely(group->faulted))
4208 continue;
4209
4210 /* Set the scanout sequence number, starting from 0 */
4211 group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
4212
4213 if (scheduler->tick_protm_pending_seq ==
4214 KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
4215 if (!bitmap_empty(group->protm_pending_bitmap,
4216 kbdev->csf.global_iface.groups[0].stream_num))
4217 scheduler->tick_protm_pending_seq =
4218 group->scan_seq_num;
4219 }
4220
4221 if (queue_group_idle_locked(group)) {
4222 if (can_schedule_idle_group(group))
4223 list_add_tail(&group->link_to_schedule,
4224 &scheduler->idle_groups_to_schedule);
4225 continue;
4226 }
4227
4228 if (!scheduler->ngrp_to_schedule) {
4229 /* keep the top csg's origin */
4230 scheduler->top_ctx = kctx;
4231 scheduler->top_grp = group;
4232 }
4233
4234 list_add_tail(&group->link_to_schedule,
4235 &scheduler->groups_to_schedule);
4236 group->prepared_seq_num = scheduler->ngrp_to_schedule++;
4237
4238 kctx->csf.sched.ngrp_to_schedule++;
4239 count_active_address_space(kbdev, kctx);
4240 }
4241 }
4242
4243 /**
4244 * scheduler_rotate_groups() - Rotate the runnable queue groups to provide
4245 * fairness of scheduling within a single
4246 * kbase_context.
4247 *
4248 * @kbdev: Pointer to the GPU device.
4249 *
4250 * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned
4251 * the highest slot priority) is guaranteed to get the resources that it
4252 * needs we only rotate the kbase_context corresponding to it -
4253 * kbase_csf_scheduler's top_ctx.
4254 *
4255 * The priority level chosen for rotation is the one containing the previous
4256 * scheduling cycle's kbase_csf_scheduler's top_grp.
4257 *
4258 * In a 'fresh-slice-cycle' this always corresponds to the highest group
4259 * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority
4260 * level of the previous scheduling cycle's first runnable kbase_context.
4261 *
4262 * We choose this priority level because when higher priority work is
4263 * scheduled, we should always cause the scheduler to run and do a scan. The
4264 * scan always enumerates the highest priority work first (whether that be
4265 * based on process priority or group priority), and thus
4266 * kbase_csf_scheduler's top_grp will point to the first of those high priority
4267 * groups, which necessarily must be the highest priority group in
4268 * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick
4269 * up that group appropriately.
4270 *
4271 * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL),
4272 * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but
4273 * will set up kbase_csf_scheduler's top_ctx again for the next scheduling
4274 * cycle. Implicitly, a rotation had already occurred by removing
4275 * the kbase_csf_scheduler's top_grp
4276 *
4277 * If kbase_csf_scheduler's top_grp became idle and all other groups belonging
4278 * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's
4279 * top_ctx are also idle, then the effect of this will be to rotate idle
4280 * groups, which might not actually become resident in the next
4281 * scheduling slice. However this is acceptable since a queue group becoming
4282 * idle is implicitly a rotation (as above with evicted queue groups), as it
4283 * automatically allows a new queue group to take the maximum slot priority
4284 * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of
4285 * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will
4286 * be for a group in the next lowest priority level or in absence of those the
4287 * next kbase_context's queue groups.
4288 */
scheduler_rotate_groups(struct kbase_device * kbdev)4289 static void scheduler_rotate_groups(struct kbase_device *kbdev)
4290 {
4291 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4292 struct kbase_context *const top_ctx = scheduler->top_ctx;
4293 struct kbase_queue_group *const top_grp = scheduler->top_grp;
4294
4295 lockdep_assert_held(&scheduler->lock);
4296 if (top_ctx && top_grp) {
4297 struct list_head *list =
4298 &top_ctx->csf.sched.runnable_groups[top_grp->priority];
4299
4300 WARN_ON(top_grp->kctx != top_ctx);
4301 if (!WARN_ON(list_empty(list))) {
4302 struct kbase_queue_group *new_head_grp;
4303
4304 list_move_tail(&top_grp->link, list);
4305 new_head_grp = (!list_empty(list)) ?
4306 list_first_entry(list, struct kbase_queue_group, link) :
4307 NULL;
4308 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp,
4309 top_ctx->csf.sched.num_runnable_grps);
4310 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
4311 dev_dbg(kbdev->dev,
4312 "groups rotated for a context, num_runnable_groups: %u\n",
4313 scheduler->top_ctx->csf.sched.num_runnable_grps);
4314 }
4315 }
4316 }
4317
scheduler_rotate_ctxs(struct kbase_device * kbdev)4318 static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
4319 {
4320 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4321 struct list_head *list = &scheduler->runnable_kctxs;
4322
4323 lockdep_assert_held(&scheduler->lock);
4324 if (scheduler->top_ctx) {
4325 if (!WARN_ON(list_empty(list))) {
4326 struct kbase_context *pos;
4327 bool found = false;
4328
4329 /* Locate the ctx on the list */
4330 list_for_each_entry(pos, list, csf.link) {
4331 if (scheduler->top_ctx == pos) {
4332 found = true;
4333 break;
4334 }
4335 }
4336
4337 if (!WARN_ON(!found)) {
4338 struct kbase_context *new_head_kctx;
4339
4340 list_move_tail(&pos->csf.link, list);
4341 KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u);
4342 new_head_kctx = (!list_empty(list)) ?
4343 list_first_entry(list, struct kbase_context, csf.link) :
4344 NULL;
4345 KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx,
4346 0u);
4347 dev_dbg(kbdev->dev, "contexts rotated\n");
4348 }
4349 }
4350 }
4351 }
4352
4353 /**
4354 * scheduler_update_idle_slots_status() - Get the status update for the CSG
4355 * slots for which the IDLE notification was received
4356 * previously.
4357 *
4358 * @kbdev: Pointer to the GPU device.
4359 * @csg_bitmap: Bitmap of the CSG slots for which
4360 * the status update request completed successfully.
4361 * @failed_csg_bitmap: Bitmap of the idle CSG slots for which
4362 * the status update request timedout.
4363 *
4364 * This function sends a CSG status update request for all the CSG slots
4365 * present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if
4366 * the group's 'reevaluate_idle_status' field is set, the nominally non-idle
4367 * slots are also included in the status update for a confirmation of their
4368 * status. The function wait for the status update request to complete and
4369 * returns the update completed slots bitmap and any timed out idle-flagged
4370 * slots bitmap.
4371 *
4372 * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
4373 * this function.
4374 */
scheduler_update_idle_slots_status(struct kbase_device * kbdev,unsigned long * csg_bitmap,unsigned long * failed_csg_bitmap)4375 static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
4376 unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap)
4377 {
4378 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4379 const u32 num_groups = kbdev->csf.global_iface.group_num;
4380 struct kbase_csf_global_iface *const global_iface =
4381 &kbdev->csf.global_iface;
4382 unsigned long flags, i;
4383 u32 active_chk = 0;
4384
4385 lockdep_assert_held(&scheduler->lock);
4386
4387 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4388
4389 for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
4390 struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
4391 struct kbase_queue_group *group = csg_slot->resident_group;
4392 struct kbase_csf_cmd_stream_group_info *const ginfo =
4393 &global_iface->groups[i];
4394 u32 csg_req;
4395 bool idle_flag;
4396
4397 if (WARN_ON(!group)) {
4398 clear_bit(i, scheduler->csg_inuse_bitmap);
4399 clear_bit(i, scheduler->csg_slots_idle_mask);
4400 continue;
4401 }
4402
4403 idle_flag = test_bit(i, scheduler->csg_slots_idle_mask);
4404 if (idle_flag || group->reevaluate_idle_status) {
4405 if (idle_flag) {
4406 #ifdef CONFIG_MALI_BIFROST_DEBUG
4407 if (!bitmap_empty(group->protm_pending_bitmap,
4408 ginfo->stream_num)) {
4409 dev_warn(kbdev->dev,
4410 "Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution",
4411 group->handle, group->kctx->tgid,
4412 group->kctx->id, (int)i);
4413 }
4414 #endif
4415 clear_bit(i, scheduler->csg_slots_idle_mask);
4416 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
4417 scheduler->csg_slots_idle_mask[0]);
4418 } else {
4419 /* Updates include slots for which reevaluation is needed.
4420 * Here one tracks the extra included slots in active_chk.
4421 * For protm pending slots, their status of activeness are
4422 * assured so no need to request an update.
4423 */
4424 active_chk |= BIT(i);
4425 group->reevaluate_idle_status = false;
4426 }
4427
4428 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i);
4429 csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
4430 csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
4431 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
4432 CSG_REQ_STATUS_UPDATE_MASK);
4433
4434 /* Track the slot update requests in csg_bitmap.
4435 * Note, if the scheduler requested extended update, the resulting
4436 * csg_bitmap would be the idle_flags + active_chk. Otherwise it's
4437 * identical to the idle_flags.
4438 */
4439 set_bit(i, csg_bitmap);
4440 } else {
4441 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
4442 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
4443 group->run_state);
4444 }
4445 }
4446
4447
4448 /* The groups are aggregated into a single kernel doorbell request */
4449 if (!bitmap_empty(csg_bitmap, num_groups)) {
4450 long wt =
4451 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
4452 u32 db_slots = (u32)csg_bitmap[0];
4453
4454 kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots);
4455 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4456
4457 if (wait_csg_slots_handshake_ack(kbdev,
4458 CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) {
4459 const int csg_nr = ffs(csg_bitmap[0]) - 1;
4460 struct kbase_queue_group *group =
4461 scheduler->csg_slots[csg_nr].resident_group;
4462
4463 dev_warn(
4464 kbdev->dev,
4465 "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
4466 kbase_backend_get_cycle_cnt(kbdev),
4467 kbdev->csf.fw_timeout_ms,
4468 csg_bitmap[0]);
4469 schedule_actions_trigger_df(kbdev, group->kctx,
4470 DF_CSG_STATUS_UPDATE_TIMEOUT);
4471
4472 /* Store the bitmap of timed out slots */
4473 bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
4474 csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
4475
4476 /* Mask off any failed bit position contributed from active ones, as the
4477 * intention is to retain the failed bit pattern contains only those from
4478 * idle flags reporting back to the caller. This way, any failed to update
4479 * original idle flag would be kept as 'idle' (an informed guess, as the
4480 * update did not come to a conclusive result). So will be the failed
4481 * active ones be treated as still 'non-idle'. This is for a graceful
4482 * handling to the unexpected timeout condition.
4483 */
4484 failed_csg_bitmap[0] &= ~active_chk;
4485
4486 } else {
4487 KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots);
4488 csg_bitmap[0] = db_slots;
4489 }
4490 } else {
4491 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4492 }
4493 }
4494
4495 /**
4496 * scheduler_handle_idle_slots() - Update the idle status of queue groups
4497 * resident on CSG slots for which the
4498 * IDLE notification was received previously.
4499 *
4500 * @kbdev: Pointer to the GPU device.
4501 *
4502 * This function is called at the start of scheduling tick/tock to reconfirm
4503 * the idle status of queue groups resident on CSG slots for
4504 * which idle notification was received previously, i.e. all the CSG slots
4505 * present in the bitmap scheduler->csg_slots_idle_mask.
4506 * The confirmation is done by sending the CSG status update request to the
4507 * firmware. On completion, the firmware will mark the idleness at the
4508 * slot's interface CSG_STATUS_STATE register accordingly.
4509 *
4510 * The run state of the groups resident on still idle CSG slots is changed to
4511 * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is
4512 * updated accordingly.
4513 * The bits corresponding to slots for which the status update request timedout
4514 * remain set in scheduler->csg_slots_idle_mask.
4515 */
scheduler_handle_idle_slots(struct kbase_device * kbdev)4516 static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
4517 {
4518 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4519 u32 num_groups = kbdev->csf.global_iface.group_num;
4520 unsigned long flags, i;
4521 DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
4522 DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
4523
4524 lockdep_assert_held(&scheduler->lock);
4525
4526 scheduler_update_idle_slots_status(kbdev, csg_bitmap,
4527 failed_csg_bitmap);
4528
4529 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4530 for_each_set_bit(i, csg_bitmap, num_groups) {
4531 struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
4532 struct kbase_queue_group *group = csg_slot->resident_group;
4533
4534 if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING))
4535 continue;
4536 if (WARN_ON(!group))
4537 continue;
4538 if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE &&
4539 group->run_state != KBASE_CSF_GROUP_IDLE))
4540 continue;
4541 if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
4542 continue;
4543
4544 if (group_on_slot_is_idle(kbdev, i)) {
4545 group->run_state = KBASE_CSF_GROUP_IDLE;
4546 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
4547 set_bit(i, scheduler->csg_slots_idle_mask);
4548 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET,
4549 group, scheduler->csg_slots_idle_mask[0]);
4550 } else {
4551 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
4552 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
4553 group->run_state);
4554 }
4555 }
4556
4557 bitmap_or(scheduler->csg_slots_idle_mask,
4558 scheduler->csg_slots_idle_mask,
4559 failed_csg_bitmap, num_groups);
4560 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL,
4561 scheduler->csg_slots_idle_mask[0]);
4562 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4563 }
4564
scheduler_scan_idle_groups(struct kbase_device * kbdev)4565 static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
4566 {
4567 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4568 struct kbase_queue_group *group, *n;
4569
4570 list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
4571 link_to_schedule) {
4572 WARN_ON(!can_schedule_idle_group(group));
4573
4574 if (!scheduler->ngrp_to_schedule) {
4575 /* keep the top csg's origin */
4576 scheduler->top_ctx = group->kctx;
4577 scheduler->top_grp = group;
4578 }
4579
4580 group->prepared_seq_num = scheduler->ngrp_to_schedule++;
4581 list_move_tail(&group->link_to_schedule,
4582 &scheduler->groups_to_schedule);
4583
4584 group->kctx->csf.sched.ngrp_to_schedule++;
4585 count_active_address_space(kbdev, group->kctx);
4586 }
4587 }
4588
scheduler_rotate(struct kbase_device * kbdev)4589 static void scheduler_rotate(struct kbase_device *kbdev)
4590 {
4591 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4592
4593 lockdep_assert_held(&scheduler->lock);
4594
4595 /* Dealing with rotation */
4596 scheduler_rotate_groups(kbdev);
4597 scheduler_rotate_ctxs(kbdev);
4598 }
4599
get_tock_top_group(struct kbase_csf_scheduler * const scheduler)4600 static struct kbase_queue_group *get_tock_top_group(
4601 struct kbase_csf_scheduler *const scheduler)
4602 {
4603 struct kbase_context *kctx;
4604 int i;
4605
4606 lockdep_assert_held(&scheduler->lock);
4607 for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
4608 list_for_each_entry(kctx,
4609 &scheduler->runnable_kctxs, csf.link) {
4610 struct kbase_queue_group *group;
4611
4612 list_for_each_entry(group,
4613 &kctx->csf.sched.runnable_groups[i],
4614 link) {
4615 if (queue_group_idle_locked(group))
4616 continue;
4617
4618 return group;
4619 }
4620 }
4621 }
4622
4623 return NULL;
4624 }
4625
4626 /**
4627 * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon
4628 * suspend or GPU IDLE.
4629 *
4630 * @kbdev: Pointer to the device
4631 * @system_suspend: Flag to indicate it's for system suspend.
4632 *
4633 * This function will suspend all active CSG groups upon either
4634 * system suspend, runtime suspend or GPU IDLE.
4635 *
4636 * Return: 0 on success, -1 otherwise.
4637 */
suspend_active_groups_on_powerdown(struct kbase_device * kbdev,bool system_suspend)4638 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
4639 bool system_suspend)
4640 {
4641 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4642 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
4643
4644 int ret = suspend_active_queue_groups(kbdev, slot_mask);
4645
4646 if (unlikely(ret)) {
4647 const int csg_nr = ffs(slot_mask[0]) - 1;
4648 struct kbase_queue_group *group =
4649 scheduler->csg_slots[csg_nr].resident_group;
4650 enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
4651
4652 /* The suspend of CSGs failed,
4653 * trigger the GPU reset to be in a deterministic state.
4654 */
4655 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
4656 kbase_backend_get_cycle_cnt(kbdev),
4657 kbdev->csf.fw_timeout_ms,
4658 kbdev->csf.global_iface.group_num, slot_mask);
4659 if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
4660 error_type = DF_PING_REQUEST_TIMEOUT;
4661 schedule_actions_trigger_df(kbdev, group->kctx, error_type);
4662
4663 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
4664 kbase_reset_gpu(kbdev);
4665
4666 return -1;
4667 }
4668
4669 /* Check if the groups became active whilst the suspend was ongoing,
4670 * but only for the case where the system suspend is not in progress
4671 */
4672 if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
4673 return -1;
4674
4675 return 0;
4676 }
4677
4678 /**
4679 * all_on_slot_groups_remained_idle - Live check for all groups' idleness
4680 *
4681 * @kbdev: Pointer to the device.
4682 *
4683 * Returns false if any of the queues inside any of the groups that have been
4684 * assigned a physical CSG slot have work to execute, or have executed work
4685 * since having received a GPU idle notification. This function is used to
4686 * handle a rance condition between firmware reporting GPU idle and userspace
4687 * submitting more work by directly ringing a doorbell.
4688 *
4689 * Return: false if any queue inside any resident group has work to be processed
4690 * or has processed work since GPU idle event, true otherwise.
4691 */
all_on_slot_groups_remained_idle(struct kbase_device * kbdev)4692 static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
4693 {
4694 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4695 /* All CSGs have the same number of CSs */
4696 size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
4697 size_t i;
4698
4699 lockdep_assert_held(&scheduler->lock);
4700 lockdep_assert_held(&scheduler->interrupt_lock);
4701
4702 for_each_set_bit(i, scheduler->csg_slots_idle_mask,
4703 kbdev->csf.global_iface.group_num) {
4704 struct kbase_queue_group *const group =
4705 scheduler->csg_slots[i].resident_group;
4706 size_t j;
4707
4708 for (j = 0; j < max_streams; ++j) {
4709 struct kbase_queue const *const queue =
4710 group->bound_queues[j];
4711 u64 const *output_addr;
4712 u64 cur_extract_ofs;
4713
4714 if (!queue || !queue->user_io_addr)
4715 continue;
4716
4717 output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
4718 cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
4719 if (cur_extract_ofs != queue->extract_ofs) {
4720 /* More work has been executed since the idle
4721 * notification.
4722 */
4723 return false;
4724 }
4725 }
4726 }
4727
4728 return true;
4729 }
4730
scheduler_idle_suspendable(struct kbase_device * kbdev)4731 static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
4732 {
4733 bool suspend;
4734 unsigned long flags;
4735 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4736
4737 lockdep_assert_held(&scheduler->lock);
4738
4739 if ((scheduler->state == SCHED_SUSPENDED) ||
4740 (scheduler->state == SCHED_SLEEPING))
4741 return false;
4742
4743 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
4744 spin_lock(&scheduler->interrupt_lock);
4745
4746 if (scheduler->fast_gpu_idle_handling) {
4747 scheduler->fast_gpu_idle_handling = false;
4748
4749 if (scheduler->total_runnable_grps) {
4750 suspend = !atomic_read(&scheduler->non_idle_offslot_grps) &&
4751 kbase_pm_idle_groups_sched_suspendable(kbdev);
4752 } else
4753 suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
4754 spin_unlock(&scheduler->interrupt_lock);
4755 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4756
4757 return suspend;
4758 }
4759
4760 if (scheduler->total_runnable_grps) {
4761
4762 /* Check both on-slots and off-slots groups idle status */
4763 suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
4764 !atomic_read(&scheduler->non_idle_offslot_grps) &&
4765 kbase_pm_idle_groups_sched_suspendable(kbdev);
4766 } else
4767 suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
4768
4769 /* Confirm that all groups are actually idle before proceeding with
4770 * suspension as groups might potentially become active again without
4771 * informing the scheduler in case userspace rings a doorbell directly.
4772 */
4773 if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) ||
4774 unlikely(!all_on_slot_groups_remained_idle(kbdev))))
4775 suspend = false;
4776
4777 spin_unlock(&scheduler->interrupt_lock);
4778 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4779
4780 return suspend;
4781 }
4782
4783 #ifdef KBASE_PM_RUNTIME
4784 /**
4785 * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU
4786 * becoming idle.
4787 *
4788 * @kbdev: Pointer to the device.
4789 *
4790 * This function is called on GPU idle notification to trigger the transition of
4791 * GPU to sleep state, where MCU firmware pauses execution and L2 cache is
4792 * turned off. Scheduler's state is changed to sleeping and all the active queue
4793 * groups remain on the CSG slots.
4794 */
scheduler_sleep_on_idle(struct kbase_device * kbdev)4795 static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
4796 {
4797 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4798
4799 lockdep_assert_held(&scheduler->lock);
4800
4801 dev_dbg(kbdev->dev,
4802 "Scheduler to be put to sleep on GPU becoming idle");
4803 cancel_tick_timer(kbdev);
4804 scheduler_pm_idle_before_sleep(kbdev);
4805 scheduler->state = SCHED_SLEEPING;
4806 KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state);
4807 }
4808 #endif
4809
4810 /**
4811 * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU
4812 * becoming idle.
4813 *
4814 * @kbdev: Pointer to the device.
4815 *
4816 * This function is called on GPU idle notification to trigger the power down of
4817 * GPU. Scheduler's state is changed to suspended and all the active queue
4818 * groups are suspended before halting the MCU firmware.
4819 *
4820 * Return: true if scheduler will be suspended or false if suspend is aborted.
4821 */
scheduler_suspend_on_idle(struct kbase_device * kbdev)4822 static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
4823 {
4824 int ret = suspend_active_groups_on_powerdown(kbdev, false);
4825
4826 if (ret) {
4827 dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
4828 atomic_read(
4829 &kbdev->csf.scheduler.non_idle_offslot_grps));
4830 /* Bring forward the next tick */
4831 kbase_csf_scheduler_tick_advance(kbdev);
4832 return false;
4833 }
4834
4835 dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
4836 scheduler_suspend(kbdev);
4837 cancel_tick_timer(kbdev);
4838 return true;
4839 }
4840
gpu_idle_worker(struct work_struct * work)4841 static void gpu_idle_worker(struct work_struct *work)
4842 {
4843 struct kbase_device *kbdev = container_of(
4844 work, struct kbase_device, csf.scheduler.gpu_idle_work);
4845 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4846 bool scheduler_is_idle_suspendable = false;
4847 bool all_groups_suspended = false;
4848
4849 KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u);
4850
4851 #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \
4852 (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8))
4853
4854 if (kbase_reset_gpu_try_prevent(kbdev)) {
4855 dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
4856 KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
4857 __ENCODE_KTRACE_INFO(true, false, false));
4858 return;
4859 }
4860 kbase_debug_csf_fault_wait_completion(kbdev);
4861 mutex_lock(&scheduler->lock);
4862
4863 #if IS_ENABLED(CONFIG_DEBUG_FS)
4864 if (unlikely(scheduler->state == SCHED_BUSY)) {
4865 mutex_unlock(&scheduler->lock);
4866 kbase_reset_gpu_allow(kbdev);
4867 return;
4868 }
4869 #endif
4870
4871 scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
4872 if (scheduler_is_idle_suspendable) {
4873 KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL,
4874 kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
4875 #ifdef KBASE_PM_RUNTIME
4876 if (kbase_pm_gpu_sleep_allowed(kbdev) &&
4877 kbase_csf_scheduler_get_nr_active_csgs(kbdev))
4878 scheduler_sleep_on_idle(kbdev);
4879 else
4880 #endif
4881 all_groups_suspended = scheduler_suspend_on_idle(kbdev);
4882
4883 KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_END, NULL, 0u);
4884 }
4885
4886 mutex_unlock(&scheduler->lock);
4887 kbase_reset_gpu_allow(kbdev);
4888 KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
4889 __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable,
4890 all_groups_suspended));
4891 #undef __ENCODE_KTRACE_INFO
4892 }
4893
scheduler_prepare(struct kbase_device * kbdev)4894 static int scheduler_prepare(struct kbase_device *kbdev)
4895 {
4896 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4897 unsigned long flags;
4898 int i;
4899
4900 lockdep_assert_held(&scheduler->lock);
4901
4902 /* Empty the groups_to_schedule */
4903 while (!list_empty(&scheduler->groups_to_schedule)) {
4904 struct kbase_queue_group *grp =
4905 list_first_entry(&scheduler->groups_to_schedule,
4906 struct kbase_queue_group,
4907 link_to_schedule);
4908
4909 remove_scheduled_group(kbdev, grp);
4910 }
4911
4912 /* Pre-scan init scheduler fields */
4913 if (WARN_ON(scheduler->ngrp_to_schedule != 0))
4914 scheduler->ngrp_to_schedule = 0;
4915 scheduler->top_ctx = NULL;
4916 scheduler->top_grp = NULL;
4917 scheduler->csg_scan_count_for_tick = 0;
4918 WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule));
4919 scheduler->num_active_address_spaces = 0;
4920 scheduler->num_csg_slots_for_tick = 0;
4921 bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
4922
4923 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4924 scheduler->tick_protm_pending_seq =
4925 KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
4926 /* Scan out to run groups */
4927 for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
4928 struct kbase_context *kctx;
4929
4930 list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
4931 scheduler_ctx_scan_groups(kbdev, kctx, i);
4932 }
4933 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4934
4935 /* Update this tick's non-idle groups */
4936 scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
4937
4938 /* Initial number of non-idle off-slot groups, before the scheduler's
4939 * scheduler_apply() operation. This gives a sensible start point view
4940 * of the tick. It will be subject to up/downs during the scheduler
4941 * active phase.
4942 */
4943 atomic_set(&scheduler->non_idle_offslot_grps,
4944 scheduler->non_idle_scanout_grps);
4945 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL,
4946 scheduler->non_idle_scanout_grps);
4947
4948 /* Adds those idle but runnable groups to the scanout list */
4949 scheduler_scan_idle_groups(kbdev);
4950
4951 WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
4952
4953 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
4954 scheduler->num_active_address_spaces |
4955 (((u64)scheduler->ngrp_to_schedule) << 32));
4956 set_max_csg_slots(kbdev);
4957 dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n",
4958 scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces);
4959 return 0;
4960 }
4961
4962 /**
4963 * keep_lru_on_slots() - Check the condition for LRU is met.
4964 *
4965 * @kbdev: Pointer to the device.
4966 *
4967 * This function tries to maintain the Last-Recent-Use case on slots, when
4968 * the scheduler has no non-idle off-slot CSGs for a replacement
4969 * consideration. This effectively extends the previous scheduling results
4970 * for the new one. That is, the last recent used CSGs are retained on slots
4971 * for the new tick/tock action.
4972 *
4973 * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
4974 * otherwise false.
4975 */
keep_lru_on_slots(struct kbase_device * kbdev)4976 static bool keep_lru_on_slots(struct kbase_device *kbdev)
4977 {
4978 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4979 bool keep_lru = false;
4980 int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap,
4981 kbdev->csf.global_iface.group_num);
4982
4983 lockdep_assert_held(&scheduler->lock);
4984
4985 if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) {
4986 unsigned long flags;
4987
4988 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4989 /* All on-slots are idle, no non-idle off-slot CSGs available
4990 * for considering a meaningful change. Set keep_lru.
4991 */
4992 keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
4993
4994 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4995
4996 dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
4997 keep_lru, on_slots);
4998 }
4999
5000 return keep_lru;
5001 }
5002
5003 /**
5004 * prepare_fast_local_tock() - making preparation arrangement for exercizing
5005 * a fast local tock inside scheduling-actions.
5006 *
5007 * @kbdev: Pointer to the GPU device.
5008 *
5009 * The function assumes that a scheduling action of firing a fast local tock
5010 * call (i.e. an equivalent tock action without dropping the lock) is desired
5011 * if there are idle onslot CSGs. The function updates those affected CSGs'
5012 * run-state as a preparation. This should only be called from inside the
5013 * schedule_actions(), where the previous idle-flags are still considered to
5014 * be reflective, following its earlier idle confirmation operational call,
5015 * plus some potential newly idle CSGs in the scheduling action committing
5016 * steps.
5017 *
5018 * Return: number of on-slots CSGs that can be considered for replacing.
5019 */
prepare_fast_local_tock(struct kbase_device * kbdev)5020 static int prepare_fast_local_tock(struct kbase_device *kbdev)
5021 {
5022 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5023 u32 num_groups = kbdev->csf.global_iface.group_num;
5024 unsigned long flags, i;
5025 DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
5026
5027 lockdep_assert_held(&scheduler->lock);
5028
5029 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5030 bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups);
5031 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5032
5033 /* Marking the flagged idle CSGs' run state to IDLE, so
5034 * the intended fast local tock can replacing them with off-slots
5035 * non-idle CSGs.
5036 */
5037 for_each_set_bit(i, csg_bitmap, num_groups) {
5038 struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
5039 struct kbase_queue_group *group = csg_slot->resident_group;
5040
5041 if (!queue_group_idle_locked(group)) {
5042 group->run_state = KBASE_CSF_GROUP_IDLE;
5043 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
5044 }
5045 }
5046
5047 /* Return the number of idle slots for potential replacement */
5048 return bitmap_weight(csg_bitmap, num_groups);
5049 }
5050
wait_csg_slots_suspend(struct kbase_device * kbdev,unsigned long * slot_mask,unsigned int timeout_ms)5051 static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask,
5052 unsigned int timeout_ms)
5053 {
5054 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5055 long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
5056 u32 num_groups = kbdev->csf.global_iface.group_num;
5057 int err = 0;
5058 DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
5059
5060 lockdep_assert_held(&scheduler->lock);
5061
5062 bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
5063
5064 while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) {
5065 DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
5066
5067 bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
5068
5069 remaining = wait_event_timeout(
5070 kbdev->csf.event_wait,
5071 slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining);
5072
5073 if (likely(remaining)) {
5074 u32 i;
5075
5076 for_each_set_bit(i, changed, num_groups) {
5077 struct kbase_queue_group *group;
5078
5079 if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
5080 continue;
5081
5082 /* The on slot csg is now stopped */
5083 clear_bit(i, slot_mask_local);
5084
5085 KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
5086 kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
5087
5088 group = scheduler->csg_slots[i].resident_group;
5089 if (likely(group)) {
5090 /* Only do save/cleanup if the
5091 * group is not terminated during
5092 * the sleep.
5093 */
5094 save_csg_slot(group);
5095 if (cleanup_csg_slot(group))
5096 sched_evict_group(group, true, true);
5097 }
5098 }
5099 } else {
5100 dev_warn(
5101 kbdev->dev,
5102 "[%llu] Suspend request sent on CSG slots 0x%lx timed out for slots 0x%lx",
5103 kbase_backend_get_cycle_cnt(kbdev), slot_mask[0],
5104 slot_mask_local[0]);
5105 /* Return the bitmask of the timed out slots to the caller */
5106 bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS);
5107
5108 err = -ETIMEDOUT;
5109 }
5110 }
5111
5112 return err;
5113 }
5114
5115 /**
5116 * evict_lru_or_blocked_csg() - Evict the least-recently-used idle or blocked CSG
5117 *
5118 * @kbdev: Pointer to the device
5119 *
5120 * Used to allow for speedier starting/resumption of another CSG. The worst-case
5121 * scenario of the evicted CSG being scheduled next is expected to be rare.
5122 * Also, the eviction will not be applied if the GPU is running in protected mode.
5123 * Otherwise the the eviction attempt would force the MCU to quit the execution of
5124 * the protected mode, and likely re-request to enter it again.
5125 */
evict_lru_or_blocked_csg(struct kbase_device * kbdev)5126 static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
5127 {
5128 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5129 size_t i;
5130 struct kbase_queue_group *lru_idle_group = NULL;
5131 const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
5132 const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >=
5133 (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS));
5134 u8 as_usage[BASE_MAX_NR_AS] = { 0 };
5135
5136 lockdep_assert_held(&scheduler->lock);
5137 if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
5138 return;
5139
5140 BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE));
5141 if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots)
5142 return; /* Some CSG slots remain unused */
5143
5144 if (all_addr_spaces_used) {
5145 for (i = 0; i != total_csg_slots; ++i) {
5146 if (scheduler->csg_slots[i].resident_group != NULL) {
5147 if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
5148 0))
5149 continue;
5150
5151 as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
5152 }
5153 }
5154 }
5155
5156 for (i = 0; i != total_csg_slots; ++i) {
5157 struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
5158
5159 /* We expect that by this point all groups would normally be
5160 * assigned a physical CSG slot, but if circumstances have
5161 * changed then bail out of this optimisation.
5162 */
5163 if (group == NULL)
5164 return;
5165
5166 /* Real-time priority CSGs must be kept on-slot even when
5167 * idle.
5168 */
5169 if ((group->run_state == KBASE_CSF_GROUP_IDLE) &&
5170 (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
5171 ((lru_idle_group == NULL) ||
5172 (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
5173 if (WARN_ON(group->kctx->as_nr < 0))
5174 continue;
5175
5176 /* If all address spaces are used, we need to ensure the group does not
5177 * share the AS with other active CSGs. Or CSG would be freed without AS
5178 * and this optimization would not work.
5179 */
5180 if ((!all_addr_spaces_used) || (as_usage[group->kctx->as_nr] == 1))
5181 lru_idle_group = group;
5182 }
5183 }
5184
5185 if (lru_idle_group != NULL) {
5186 unsigned long slot_mask = 1 << lru_idle_group->csg_nr;
5187
5188 dev_dbg(kbdev->dev, "Suspending LRU idle group %d of context %d_%d on slot %d",
5189 lru_idle_group->handle, lru_idle_group->kctx->tgid,
5190 lru_idle_group->kctx->id, lru_idle_group->csg_nr);
5191 suspend_queue_group(lru_idle_group);
5192 if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) {
5193 enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
5194
5195 dev_warn(
5196 kbdev->dev,
5197 "[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)",
5198 kbase_backend_get_cycle_cnt(kbdev), lru_idle_group->handle,
5199 lru_idle_group->kctx->tgid, lru_idle_group->kctx->id,
5200 lru_idle_group->csg_nr, kbdev->csf.fw_timeout_ms);
5201 if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
5202 error_type = DF_PING_REQUEST_TIMEOUT;
5203 schedule_actions_trigger_df(kbdev, lru_idle_group->kctx, error_type);
5204 }
5205 }
5206 }
5207
schedule_actions(struct kbase_device * kbdev,bool is_tick)5208 static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
5209 {
5210 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5211 unsigned long flags;
5212 struct kbase_queue_group *protm_grp;
5213 int ret;
5214 bool skip_scheduling_actions;
5215 bool skip_idle_slots_update;
5216 bool new_protm_top_grp = false;
5217 int local_tock_slots = 0;
5218
5219 kbase_reset_gpu_assert_prevented(kbdev);
5220 lockdep_assert_held(&scheduler->lock);
5221
5222 ret = kbase_csf_scheduler_wait_mcu_active(kbdev);
5223 if (ret) {
5224 dev_err(kbdev->dev,
5225 "Wait for MCU power on failed on scheduling tick/tock");
5226 return;
5227 }
5228
5229 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5230 skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
5231 skip_scheduling_actions =
5232 !skip_idle_slots_update && kbdev->protected_mode;
5233 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5234
5235 /* Skip scheduling actions as GPU reset hasn't been performed yet to
5236 * rectify the anomaly that happened when pmode exit interrupt wasn't
5237 * received before the termination of group running in pmode.
5238 */
5239 if (unlikely(skip_scheduling_actions)) {
5240 dev_info(kbdev->dev,
5241 "Scheduling actions skipped due to anomaly in pmode");
5242 return;
5243 }
5244
5245 if (!skip_idle_slots_update) {
5246 /* Updating on-slot idle CSGs when not in protected mode. */
5247 scheduler_handle_idle_slots(kbdev);
5248
5249 /* Determine whether the condition is met for keeping the
5250 * Last-Recent-Use. If true, skipping the remaining action
5251 * steps and thus extending the previous tick's arrangement,
5252 * in particular, no alterations to on-slot CSGs.
5253 */
5254 if (keep_lru_on_slots(kbdev))
5255 return;
5256 }
5257
5258 if (is_tick)
5259 scheduler_rotate(kbdev);
5260
5261 redo_local_tock:
5262 scheduler_prepare(kbdev);
5263 /* Need to specifically enqueue the GPU idle work if there are no groups
5264 * to schedule despite the runnable groups. This scenario will happen
5265 * if System suspend is done when all groups are idle and and no work
5266 * is submitted for the groups after the System resume.
5267 */
5268 if (unlikely(!scheduler->ngrp_to_schedule &&
5269 scheduler->total_runnable_grps)) {
5270 dev_dbg(kbdev->dev, "No groups to schedule in the tick");
5271 enqueue_gpu_idle_work(scheduler);
5272 return;
5273 }
5274 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5275 protm_grp = scheduler->active_protm_grp;
5276
5277 /* Avoid update if the top-group remains unchanged and in protected
5278 * mode. For the said case, all the slots update is effectively
5279 * competing against the active protected mode group (typically the
5280 * top-group). If we update other slots, even on leaving the
5281 * top-group slot untouched, the firmware would exit the protected mode
5282 * for interacting with the host-driver. After it, as the top-group
5283 * would again raise the request for entering protected mode, we would
5284 * be actively doing the switching over twice without progressing the
5285 * queue jobs.
5286 */
5287 if (protm_grp && scheduler->top_grp == protm_grp) {
5288 dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
5289 protm_grp->handle);
5290 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5291
5292 update_offslot_non_idle_cnt_for_onslot_grp(protm_grp);
5293 remove_scheduled_group(kbdev, protm_grp);
5294 scheduler_check_pmode_progress(kbdev);
5295 } else if (scheduler->top_grp) {
5296 if (protm_grp)
5297 dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d",
5298 protm_grp->handle);
5299
5300 if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap,
5301 kbdev->csf.global_iface.groups[0].stream_num)) {
5302 dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d",
5303 scheduler->top_grp->handle,
5304 scheduler->top_grp->kctx->tgid,
5305 scheduler->top_grp->kctx->id);
5306
5307 /* When entering protected mode all CSG slots can be occupied
5308 * but only the protected mode CSG will be running. Any event
5309 * that would trigger the execution of an on-slot idle CSG will
5310 * need to be handled by the host during protected mode.
5311 */
5312 new_protm_top_grp = true;
5313 }
5314
5315 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5316
5317 scheduler_apply(kbdev);
5318
5319 /* Scheduler is dropping the exec of the previous protm_grp,
5320 * Until the protm quit completes, the GPU is effectively
5321 * locked in the secure mode.
5322 */
5323 if (protm_grp)
5324 scheduler_force_protm_exit(kbdev);
5325
5326 wait_csg_slots_start(kbdev);
5327 wait_csg_slots_finish_prio_update(kbdev);
5328
5329 if (new_protm_top_grp) {
5330 scheduler_group_check_protm_enter(kbdev,
5331 scheduler->top_grp);
5332 } else if (!local_tock_slots &&
5333 atomic_read(&scheduler->non_idle_offslot_grps)) {
5334 /* If during the scheduling action, we have off-slot
5335 * non-idle CSGs in waiting, if it happens to have
5336 * some new idle slots emerging during the committed
5337 * action steps, trigger a one-off fast local tock.
5338 */
5339 local_tock_slots = prepare_fast_local_tock(kbdev);
5340
5341 if (local_tock_slots) {
5342 dev_dbg(kbdev->dev,
5343 "In-cycle %d idle slots available\n",
5344 local_tock_slots);
5345 goto redo_local_tock;
5346 }
5347 }
5348 } else {
5349 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5350 }
5351
5352 evict_lru_or_blocked_csg(kbdev);
5353 }
5354
5355 /**
5356 * can_skip_scheduling() - Check if the scheduling actions can be skipped.
5357 *
5358 * @kbdev: Pointer to the device
5359 *
5360 * This function is called on a scheduling tick or tock to determine if the
5361 * scheduling actions can be skipped.
5362 * If Scheduler is in sleeping state and exit from the sleep state is allowed
5363 * then activation of MCU will be triggered. The tick or tock work item could
5364 * have been in flight when the state of Scheduler was changed to sleeping.
5365 *
5366 * Return: true if the scheduling actions can be skipped.
5367 */
can_skip_scheduling(struct kbase_device * kbdev)5368 static bool can_skip_scheduling(struct kbase_device *kbdev)
5369 {
5370 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5371
5372 lockdep_assert_held(&scheduler->lock);
5373
5374 if (unlikely(!kbase_reset_gpu_is_not_pending(kbdev)))
5375 return true;
5376
5377 if (scheduler->state == SCHED_SUSPENDED)
5378 return true;
5379
5380 #ifdef KBASE_PM_RUNTIME
5381 if (scheduler->state == SCHED_SLEEPING) {
5382 unsigned long flags;
5383
5384 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5385 if (kbdev->pm.backend.exit_gpu_sleep_mode) {
5386 int ret = scheduler_pm_active_after_sleep(kbdev, &flags);
5387
5388 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5389 if (!ret) {
5390 scheduler->state = SCHED_INACTIVE;
5391 KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
5392 return false;
5393 }
5394
5395 dev_info(kbdev->dev,
5396 "Skip scheduling due to system suspend");
5397 return true;
5398 }
5399 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5400 return true;
5401 }
5402 #endif
5403
5404 return false;
5405 }
5406
schedule_on_tock(struct work_struct * work)5407 static void schedule_on_tock(struct work_struct *work)
5408 {
5409 struct kbase_device *kbdev =
5410 container_of(work, struct kbase_device, csf.scheduler.tock_work.work);
5411 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5412 int err;
5413
5414 err = kbase_reset_gpu_try_prevent(kbdev);
5415 /* Regardless of whether reset failed or is currently happening, exit
5416 * early
5417 */
5418 if (err)
5419 return;
5420
5421 kbase_debug_csf_fault_wait_completion(kbdev);
5422 mutex_lock(&scheduler->lock);
5423 if (can_skip_scheduling(kbdev))
5424 {
5425 atomic_set(&scheduler->pending_tock_work, false);
5426 goto exit_no_schedule_unlock;
5427 }
5428
5429 WARN_ON(!(scheduler->state == SCHED_INACTIVE));
5430 scheduler->state = SCHED_BUSY;
5431 KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
5432
5433 /* Undertaking schedule action steps */
5434 KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u);
5435 while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true)
5436 schedule_actions(kbdev, false);
5437
5438 /* Record time information on a non-skipped tock */
5439 scheduler->last_schedule = jiffies;
5440
5441 scheduler->state = SCHED_INACTIVE;
5442 KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
5443 if (!scheduler->total_runnable_grps)
5444 enqueue_gpu_idle_work(scheduler);
5445 mutex_unlock(&scheduler->lock);
5446 kbase_reset_gpu_allow(kbdev);
5447
5448 dev_dbg(kbdev->dev,
5449 "Waking up for event after schedule-on-tock completes.");
5450 wake_up_all(&kbdev->csf.event_wait);
5451 KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u);
5452 return;
5453
5454 exit_no_schedule_unlock:
5455 mutex_unlock(&scheduler->lock);
5456 kbase_reset_gpu_allow(kbdev);
5457 }
5458
schedule_on_tick(struct work_struct * work)5459 static void schedule_on_tick(struct work_struct *work)
5460 {
5461 struct kbase_device *kbdev =
5462 container_of(work, struct kbase_device, csf.scheduler.tick_work);
5463 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5464
5465 int err = kbase_reset_gpu_try_prevent(kbdev);
5466 /* Regardless of whether reset failed or is currently happening, exit
5467 * early
5468 */
5469 if (err)
5470 return;
5471
5472 kbase_debug_csf_fault_wait_completion(kbdev);
5473 mutex_lock(&scheduler->lock);
5474
5475 WARN_ON(scheduler->tick_timer_active);
5476 if (can_skip_scheduling(kbdev))
5477 goto exit_no_schedule_unlock;
5478
5479 scheduler->state = SCHED_BUSY;
5480 KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
5481
5482 /* Undertaking schedule action steps */
5483 KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps);
5484 schedule_actions(kbdev, true);
5485
5486 /* Record time information */
5487 scheduler->last_schedule = jiffies;
5488
5489 /* Kicking next scheduling if needed */
5490 if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
5491 (scheduler->total_runnable_grps > 0)) {
5492 start_tick_timer(kbdev);
5493 dev_dbg(kbdev->dev,
5494 "scheduling for next tick, num_runnable_groups:%u\n",
5495 scheduler->total_runnable_grps);
5496 } else if (!scheduler->total_runnable_grps) {
5497 enqueue_gpu_idle_work(scheduler);
5498 }
5499
5500 scheduler->state = SCHED_INACTIVE;
5501 mutex_unlock(&scheduler->lock);
5502 KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
5503 kbase_reset_gpu_allow(kbdev);
5504
5505 dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
5506 wake_up_all(&kbdev->csf.event_wait);
5507 KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL,
5508 scheduler->total_runnable_grps);
5509 return;
5510
5511 exit_no_schedule_unlock:
5512 mutex_unlock(&scheduler->lock);
5513 kbase_reset_gpu_allow(kbdev);
5514 }
5515
suspend_active_queue_groups(struct kbase_device * kbdev,unsigned long * slot_mask)5516 static int suspend_active_queue_groups(struct kbase_device *kbdev,
5517 unsigned long *slot_mask)
5518 {
5519 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5520 u32 num_groups = kbdev->csf.global_iface.group_num;
5521 u32 slot_num;
5522 int ret;
5523
5524 lockdep_assert_held(&scheduler->lock);
5525
5526 for (slot_num = 0; slot_num < num_groups; slot_num++) {
5527 struct kbase_queue_group *group =
5528 scheduler->csg_slots[slot_num].resident_group;
5529
5530 if (group) {
5531 suspend_queue_group(group);
5532 set_bit(slot_num, slot_mask);
5533 }
5534 }
5535
5536 ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms);
5537 return ret;
5538 }
5539
suspend_active_queue_groups_on_reset(struct kbase_device * kbdev)5540 static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
5541 {
5542 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5543 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
5544 int ret;
5545 int ret2;
5546
5547 mutex_lock(&scheduler->lock);
5548
5549 ret = suspend_active_queue_groups(kbdev, slot_mask);
5550
5551 if (ret) {
5552 dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
5553 kbdev->csf.global_iface.group_num, slot_mask);
5554 }
5555
5556 /* Need to flush the GPU cache to ensure suspend buffer
5557 * contents are not lost on reset of GPU.
5558 * Do this even if suspend operation had timed out for some of
5559 * the CSG slots.
5560 * In case the scheduler already in suspended state, the
5561 * cache clean is required as the async reset request from
5562 * the debugfs may race against the scheduler suspend operation
5563 * due to the extra context ref-count, which prevents the
5564 * L2 powering down cache clean operation in the non racing
5565 * case.
5566 * LSC is being flushed together to cover buslogging usecase,
5567 * where GPU reset is done regularly to avoid the log buffer
5568 * overflow.
5569 */
5570 kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
5571 ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
5572 kbdev->reset_timeout_ms);
5573 if (ret2) {
5574 dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
5575 kbase_backend_get_cycle_cnt(kbdev));
5576 if (!ret)
5577 ret = ret2;
5578 }
5579
5580 mutex_unlock(&scheduler->lock);
5581
5582 return ret;
5583 }
5584
5585 /**
5586 * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode
5587 * groups when reset is done during
5588 * protected mode execution.
5589 *
5590 * @kbdev: Pointer to the device.
5591 *
5592 * This function is called at the time of GPU reset, before the suspension of
5593 * queue groups, to handle the case when the reset is getting performed whilst
5594 * GPU is in protected mode.
5595 * On entry to protected mode all the groups, except the top group that executes
5596 * in protected mode, are implicitly suspended by the FW. Thus this function
5597 * simply marks the normal mode groups as suspended (and cleans up the
5598 * corresponding CSG slots) to prevent their potential forceful eviction from
5599 * the Scheduler. So if GPU was in protected mode and there was no fault, then
5600 * only the protected mode group would be suspended in the regular way post exit
5601 * from this function. And if GPU was in normal mode, then all on-slot groups
5602 * will get suspended in the regular way.
5603 *
5604 * Return: true if the groups remaining on the CSG slots need to be suspended in
5605 * the regular way by sending CSG SUSPEND reqs to FW, otherwise false.
5606 */
scheduler_handle_reset_in_protected_mode(struct kbase_device * kbdev)5607 static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
5608 {
5609 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5610 u32 const num_groups = kbdev->csf.global_iface.group_num;
5611 struct kbase_queue_group *protm_grp;
5612 bool suspend_on_slot_groups = true;
5613 bool pmode_active;
5614 unsigned long flags;
5615 u32 csg_nr;
5616
5617 mutex_lock(&scheduler->lock);
5618
5619 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5620 protm_grp = scheduler->active_protm_grp;
5621 pmode_active = kbdev->protected_mode;
5622
5623 if (likely(!protm_grp && !pmode_active)) {
5624 /* Case 1: GPU is not in protected mode or it successfully
5625 * exited protected mode. All on-slot groups can be suspended in
5626 * the regular way before reset.
5627 */
5628 suspend_on_slot_groups = true;
5629 } else if (protm_grp && pmode_active) {
5630 /* Case 2: GPU went successfully into protected mode and hasn't
5631 * exited from it yet and the protected mode group is still
5632 * active. If there was no fault for the protected mode group
5633 * then it can be suspended in the regular way before reset.
5634 * The other normal mode on-slot groups were already implicitly
5635 * suspended on entry to protected mode so they can be marked as
5636 * suspended right away.
5637 */
5638 suspend_on_slot_groups = !protm_grp->faulted;
5639 } else if (!protm_grp && pmode_active) {
5640 /* Case 3: GPU went successfully into protected mode and hasn't
5641 * exited from it yet but the protected mode group got deleted.
5642 * This would have happened if the FW got stuck during protected
5643 * mode for some reason (like GPU page fault or some internal
5644 * error). In normal cases FW is expected to send the pmode exit
5645 * interrupt before it handles the CSG termination request.
5646 * The other normal mode on-slot groups would already have been
5647 * implicitly suspended on entry to protected mode so they can be
5648 * marked as suspended right away.
5649 */
5650 suspend_on_slot_groups = false;
5651 } else if (protm_grp && !pmode_active) {
5652 /* Case 4: GPU couldn't successfully enter protected mode, i.e.
5653 * PROTM_ENTER request had timed out.
5654 * All the on-slot groups need to be suspended in the regular
5655 * way before reset.
5656 */
5657 suspend_on_slot_groups = true;
5658 }
5659
5660 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5661
5662 if (likely(!pmode_active))
5663 goto unlock;
5664
5665 /* GPU hasn't exited protected mode, so all the on-slot groups barring
5666 * the protected mode group can be marked as suspended right away.
5667 */
5668 for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
5669 struct kbase_queue_group *const group =
5670 kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
5671 int new_val;
5672
5673 if (!group || (group == protm_grp))
5674 continue;
5675
5676 cleanup_csg_slot(group);
5677 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
5678 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, group->run_state);
5679
5680 /* Simply treat the normal mode groups as non-idle. The tick
5681 * scheduled after the reset will re-initialize the counter
5682 * anyways.
5683 */
5684 new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
5685 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
5686 }
5687
5688 unlock:
5689 mutex_unlock(&scheduler->lock);
5690 return suspend_on_slot_groups;
5691 }
5692
cancel_tick_work(struct kbase_csf_scheduler * const scheduler)5693 static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
5694 {
5695 cancel_work_sync(&scheduler->tick_work);
5696 }
5697
cancel_tock_work(struct kbase_csf_scheduler * const scheduler)5698 static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
5699 {
5700 atomic_set(&scheduler->pending_tock_work, false);
5701 cancel_delayed_work_sync(&scheduler->tock_work);
5702 }
5703
scheduler_inner_reset(struct kbase_device * kbdev)5704 static void scheduler_inner_reset(struct kbase_device *kbdev)
5705 {
5706 u32 const num_groups = kbdev->csf.global_iface.group_num;
5707 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5708 unsigned long flags;
5709
5710 WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
5711
5712 /* Cancel any potential queued delayed work(s) */
5713 cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
5714 cancel_tick_timer(kbdev);
5715 cancel_tick_work(scheduler);
5716 cancel_tock_work(scheduler);
5717 cancel_delayed_work_sync(&scheduler->ping_work);
5718
5719 mutex_lock(&scheduler->lock);
5720
5721 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5722 bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
5723 if (scheduler->active_protm_grp)
5724 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp,
5725 0u);
5726 scheduler->active_protm_grp = NULL;
5727 memset(kbdev->csf.scheduler.csg_slots, 0,
5728 num_groups * sizeof(struct kbase_csf_csg_slot));
5729 bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups);
5730 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5731
5732 scheduler->top_ctx = NULL;
5733 scheduler->top_grp = NULL;
5734
5735 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
5736 scheduler->num_active_address_spaces |
5737 (((u64)scheduler->total_runnable_grps) << 32));
5738
5739 mutex_unlock(&scheduler->lock);
5740 }
5741
kbase_csf_scheduler_reset(struct kbase_device * kbdev)5742 void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
5743 {
5744 struct kbase_context *kctx;
5745
5746 WARN_ON(!kbase_reset_gpu_is_active(kbdev));
5747
5748 KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
5749
5750 kbase_debug_csf_fault_wait_completion(kbdev);
5751
5752 if (scheduler_handle_reset_in_protected_mode(kbdev) &&
5753 !suspend_active_queue_groups_on_reset(kbdev)) {
5754 /* As all groups have been successfully evicted from the CSG
5755 * slots, clear out thee scheduler data fields and return
5756 */
5757 scheduler_inner_reset(kbdev);
5758 return;
5759 }
5760
5761 mutex_lock(&kbdev->kctx_list_lock);
5762
5763 /* The loop to iterate over the kbase contexts is present due to lock
5764 * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock.
5765 * CSF ioctls first take kctx->csf.lock which is context-specific and
5766 * then take kbdev->csf.scheduler.lock for global actions like assigning
5767 * a CSG slot.
5768 * If the lock ordering constraint was not there then could have
5769 * directly looped over the active queue groups.
5770 */
5771 list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
5772 /* Firmware reload would reinitialize the CSG & CS interface IO
5773 * pages, so just need to internally mark the currently active
5774 * queue groups as terminated (similar to the unexpected OoM
5775 * event case).
5776 * No further work can now get executed for the active groups
5777 * (new groups would have to be created to execute work) and
5778 * in near future Clients would be duly informed of this
5779 * reset. The resources (like User IO pages, GPU queue memory)
5780 * allocated for the associated queues would be freed when the
5781 * Clients do the teardown when they become aware of the reset.
5782 */
5783 kbase_csf_active_queue_groups_reset(kbdev, kctx);
5784 }
5785
5786 mutex_unlock(&kbdev->kctx_list_lock);
5787
5788 KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_END, NULL, 0u);
5789
5790 /* After queue groups reset, the scheduler data fields clear out */
5791 scheduler_inner_reset(kbdev);
5792 }
5793
firmware_aliveness_monitor(struct work_struct * work)5794 static void firmware_aliveness_monitor(struct work_struct *work)
5795 {
5796 struct kbase_device *kbdev = container_of(work, struct kbase_device,
5797 csf.scheduler.ping_work.work);
5798 int err;
5799
5800 /* Ensure that reset will not be occurring while this function is being
5801 * executed as otherwise calling kbase_reset_gpu when reset is already
5802 * occurring is a programming error.
5803 *
5804 * We must use the 'try' variant as the Reset worker can try to flush
5805 * this workqueue, which would otherwise deadlock here if we tried to
5806 * wait for the reset (and thus ourselves) to complete.
5807 */
5808 err = kbase_reset_gpu_try_prevent(kbdev);
5809 if (err) {
5810 /* It doesn't matter whether the value was -EAGAIN or a fatal
5811 * error, just stop processing. In case of -EAGAIN, the Reset
5812 * worker will restart the scheduler later to resume ping
5813 */
5814 return;
5815 }
5816
5817 mutex_lock(&kbdev->csf.scheduler.lock);
5818
5819 #ifdef CONFIG_MALI_BIFROST_DEBUG
5820 if (fw_debug) {
5821 /* ping requests cause distraction in firmware debugging */
5822 goto exit;
5823 }
5824 #endif
5825
5826 if (kbdev->csf.scheduler.state == SCHED_SUSPENDED ||
5827 kbdev->csf.scheduler.state == SCHED_SLEEPING)
5828 goto exit;
5829
5830 if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1)
5831 goto exit;
5832
5833 if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
5834 goto exit;
5835
5836 if (kbase_pm_context_active_handle_suspend(kbdev,
5837 KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
5838 /* Suspend pending - no real need to ping */
5839 goto exit;
5840 }
5841
5842 kbase_csf_scheduler_wait_mcu_active(kbdev);
5843
5844 err = kbase_csf_firmware_ping_wait(kbdev, kbdev->csf.fw_timeout_ms);
5845
5846 if (err) {
5847 /* It is acceptable to enqueue a reset whilst we've prevented
5848 * them, it will happen after we've allowed them again
5849 */
5850 if (kbase_prepare_to_reset_gpu(
5851 kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
5852 kbase_reset_gpu(kbdev);
5853 } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
5854 queue_delayed_work(
5855 system_long_wq, &kbdev->csf.scheduler.ping_work,
5856 msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
5857 }
5858
5859 kbase_pm_context_idle(kbdev);
5860 exit:
5861 mutex_unlock(&kbdev->csf.scheduler.lock);
5862 kbase_reset_gpu_allow(kbdev);
5863 }
5864
kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group * group,struct kbase_suspend_copy_buffer * sus_buf)5865 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
5866 struct kbase_suspend_copy_buffer *sus_buf)
5867 {
5868 struct kbase_context *const kctx = group->kctx;
5869 struct kbase_device *const kbdev = kctx->kbdev;
5870 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5871 bool on_slot;
5872 int err = 0;
5873
5874 kbase_reset_gpu_assert_prevented(kbdev);
5875 lockdep_assert_held(&kctx->csf.lock);
5876 mutex_lock(&scheduler->lock);
5877
5878 on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5879
5880 #ifdef KBASE_PM_RUNTIME
5881 if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
5882 if (wait_for_scheduler_to_exit_sleep(kbdev)) {
5883 dev_warn(
5884 kbdev->dev,
5885 "Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5886 group->handle, group->kctx->tgid,
5887 group->kctx->id, group->csg_nr);
5888
5889 scheduler_wakeup(kbdev, true);
5890
5891 /* Wait for MCU firmware to start running */
5892 if (kbase_csf_scheduler_wait_mcu_active(kbdev))
5893 dev_warn(
5894 kbdev->dev,
5895 "Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5896 group->handle, group->kctx->tgid,
5897 group->kctx->id, group->csg_nr);
5898 }
5899
5900 /* Check the group state again as scheduler lock would have been
5901 * released when waiting for the exit from SLEEPING state.
5902 */
5903 on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5904 }
5905 #endif
5906 if (on_slot) {
5907 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
5908
5909 set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask);
5910
5911 if (!WARN_ON(scheduler->state == SCHED_SUSPENDED))
5912 suspend_queue_group(group);
5913 err = wait_csg_slots_suspend(kbdev, slot_mask,
5914 kbdev->csf.fw_timeout_ms);
5915 if (err) {
5916 dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d",
5917 kbase_backend_get_cycle_cnt(kbdev),
5918 group->handle, group->csg_nr);
5919 goto exit;
5920 }
5921 }
5922
5923 if (queue_group_suspended_locked(group)) {
5924 unsigned int target_page_nr = 0, i = 0;
5925 u64 offset = sus_buf->offset;
5926 size_t to_copy = sus_buf->size;
5927 const u32 csg_suspend_buf_nr_pages =
5928 PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
5929
5930 if (scheduler->state != SCHED_SUSPENDED) {
5931 /* Similar to the case of HW counters, need to flush
5932 * the GPU L2 cache before reading from the suspend buffer
5933 * pages as they are mapped and cached on GPU side.
5934 * Flushing LSC is not done here, since only the flush of
5935 * CSG suspend buffer contents is needed from the L2 cache.
5936 */
5937 kbase_gpu_start_cache_clean(
5938 kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
5939 kbase_gpu_wait_cache_clean(kbdev);
5940 } else {
5941 /* Make sure power down transitions have completed,
5942 * i.e. L2 has been powered off as that would ensure
5943 * its contents are flushed to memory.
5944 * This is needed as Scheduler doesn't wait for the
5945 * power down to finish.
5946 */
5947 kbase_pm_wait_for_desired_state(kbdev);
5948 }
5949
5950 for (i = 0; i < csg_suspend_buf_nr_pages &&
5951 target_page_nr < sus_buf->nr_pages; i++) {
5952 struct page *pg =
5953 as_page(group->normal_suspend_buf.phy[i]);
5954 void *sus_page = kmap(pg);
5955
5956 if (sus_page) {
5957 kbase_sync_single_for_cpu(kbdev,
5958 kbase_dma_addr(pg),
5959 PAGE_SIZE, DMA_BIDIRECTIONAL);
5960
5961 err = kbase_mem_copy_to_pinned_user_pages(
5962 sus_buf->pages, sus_page,
5963 &to_copy, sus_buf->nr_pages,
5964 &target_page_nr, offset);
5965 kunmap(pg);
5966 if (err)
5967 break;
5968 } else {
5969 err = -ENOMEM;
5970 break;
5971 }
5972 }
5973 schedule_in_cycle(group, false);
5974 } else {
5975 /* If addr-space fault, the group may have been evicted */
5976 err = -EIO;
5977 }
5978
5979 exit:
5980 mutex_unlock(&scheduler->lock);
5981 return err;
5982 }
5983
5984 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf);
5985
5986 /**
5987 * group_sync_updated() - Evaluate sync wait condition of all blocked command
5988 * queues of the group.
5989 *
5990 * @group: Pointer to the command queue group that has blocked command queue(s)
5991 * bound to it.
5992 *
5993 * Return: true if sync wait condition is satisfied for at least one blocked
5994 * queue of the group.
5995 */
group_sync_updated(struct kbase_queue_group * group)5996 static bool group_sync_updated(struct kbase_queue_group *group)
5997 {
5998 bool updated = false;
5999 int stream;
6000
6001 /* Groups can also be blocked on-slot during protected mode. */
6002 WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC &&
6003 group->run_state != KBASE_CSF_GROUP_IDLE);
6004
6005 for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) {
6006 struct kbase_queue *const queue = group->bound_queues[stream];
6007
6008 /* To check the necessity of sync-wait evaluation,
6009 * we rely on the cached 'status_wait' instead of reading it
6010 * directly from shared memory as the CSG has been already
6011 * evicted from the CSG slot, thus this CSG doesn't have
6012 * valid information in the shared memory.
6013 */
6014 if (queue && queue->enabled &&
6015 CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait))
6016 if (evaluate_sync_update(queue)) {
6017 updated = true;
6018 queue->status_wait = 0;
6019 }
6020 }
6021
6022 return updated;
6023 }
6024
6025 /**
6026 * scheduler_get_protm_enter_async_group() - Check if the GPU queue group
6027 * can be now allowed to execute in protected mode.
6028 *
6029 * @kbdev: Pointer to the GPU device.
6030 * @group: Pointer to the GPU queue group.
6031 *
6032 * This function is called outside the scheduling tick/tock to determine
6033 * if the given GPU queue group can now execute in protected mode or not.
6034 * If the group pointer passed is NULL then the evaluation is done for the
6035 * highest priority group on the scheduler maintained group lists without
6036 * tick associated rotation actions. This is referred as the 'top-group'
6037 * in a tock action sense.
6038 *
6039 * It returns the same group pointer, that was passed as an argument, if that
6040 * group matches the highest priority group and has pending protected region
6041 * requests otherwise NULL is returned.
6042 *
6043 * If the group pointer passed is NULL then the internal evaluated highest
6044 * priority group is returned if that has pending protected region requests
6045 * otherwise NULL is returned.
6046 *
6047 * The evaluated highest priority group may not necessarily be the same as the
6048 * scheduler->top_grp. This can happen if there is dynamic de-idle update
6049 * during the tick interval for some on-slots groups that were idle during the
6050 * scheduler normal scheduling action, where the scheduler->top_grp was set.
6051 * The recorded scheduler->top_grp is untouched by this evualuation, so will not
6052 * affect the scheduler context/priority list rotation arrangement.
6053 *
6054 * Return: the pointer to queue group that can currently execute in protected
6055 * mode or NULL.
6056 */
scheduler_get_protm_enter_async_group(struct kbase_device * const kbdev,struct kbase_queue_group * const group)6057 static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
6058 struct kbase_device *const kbdev,
6059 struct kbase_queue_group *const group)
6060 {
6061 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6062 struct kbase_queue_group *match_grp, *input_grp;
6063
6064 lockdep_assert_held(&scheduler->lock);
6065
6066 if (scheduler->state != SCHED_INACTIVE)
6067 return NULL;
6068
6069 match_grp = get_tock_top_group(scheduler);
6070 input_grp = group ? group : match_grp;
6071
6072 if (input_grp && (input_grp == match_grp)) {
6073 struct kbase_csf_cmd_stream_group_info *ginfo =
6074 &kbdev->csf.global_iface.groups[0];
6075 unsigned long *pending =
6076 input_grp->protm_pending_bitmap;
6077 unsigned long flags;
6078
6079 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
6080
6081 if (kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
6082 bitmap_empty(pending, ginfo->stream_num))
6083 input_grp = NULL;
6084
6085 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
6086 } else {
6087 input_grp = NULL;
6088 }
6089
6090 return input_grp;
6091 }
6092
kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group * group)6093 void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
6094 {
6095 struct kbase_device *const kbdev = group->kctx->kbdev;
6096 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6097
6098 int err = kbase_reset_gpu_try_prevent(kbdev);
6099 /* Regardless of whether reset failed or is currently happening, exit
6100 * early
6101 */
6102 if (err)
6103 return;
6104
6105 mutex_lock(&scheduler->lock);
6106
6107 if (group->run_state == KBASE_CSF_GROUP_IDLE) {
6108 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
6109 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
6110 group->run_state);
6111 }
6112 /* Check if the group is now eligible for execution in protected mode. */
6113 if (scheduler_get_protm_enter_async_group(kbdev, group))
6114 scheduler_group_check_protm_enter(kbdev, group);
6115
6116 mutex_unlock(&scheduler->lock);
6117 kbase_reset_gpu_allow(kbdev);
6118 }
6119
6120 /**
6121 * check_sync_update_for_on_slot_group() - Check the sync wait condition
6122 * for all the queues bound to
6123 * the given on-slot group.
6124 *
6125 * @group: Pointer to the on-slot group that requires evaluation.
6126 *
6127 * This function is called if the GPU is in protected mode and there are on
6128 * slot idle groups with higher priority than the active protected mode group
6129 * or this function is called when CQS object is signaled whilst GPU is in
6130 * sleep state.
6131 * This function will evaluate the sync condition, if any, of all the queues
6132 * bound to the given group.
6133 *
6134 * Return: true if the sync condition of at least one queue has been satisfied.
6135 */
check_sync_update_for_on_slot_group(struct kbase_queue_group * group)6136 static bool check_sync_update_for_on_slot_group(
6137 struct kbase_queue_group *group)
6138 {
6139 struct kbase_device *const kbdev = group->kctx->kbdev;
6140 struct kbase_csf_scheduler *const scheduler =
6141 &kbdev->csf.scheduler;
6142 bool sync_update_done = false;
6143 int i;
6144
6145 lockdep_assert_held(&scheduler->lock);
6146
6147 for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
6148 struct kbase_queue *queue = group->bound_queues[i];
6149
6150 if (queue && queue->enabled && !sync_update_done) {
6151 struct kbase_csf_cmd_stream_group_info *const ginfo =
6152 &kbdev->csf.global_iface.groups[group->csg_nr];
6153 struct kbase_csf_cmd_stream_info *const stream =
6154 &ginfo->streams[queue->csi_index];
6155 u32 status = kbase_csf_firmware_cs_output(
6156 stream, CS_STATUS_WAIT);
6157 unsigned long flags;
6158
6159 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS,
6160 queue->group, queue, status);
6161
6162 if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
6163 continue;
6164
6165 /* Save the information of sync object of the command
6166 * queue so the callback function, 'group_sync_updated'
6167 * can evaluate the sync object when it gets updated
6168 * later.
6169 */
6170 queue->status_wait = status;
6171 queue->sync_ptr = kbase_csf_firmware_cs_output(
6172 stream, CS_STATUS_WAIT_SYNC_POINTER_LO);
6173 queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(
6174 stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
6175 queue->sync_value = kbase_csf_firmware_cs_output(
6176 stream, CS_STATUS_WAIT_SYNC_VALUE);
6177 queue->blocked_reason =
6178 CS_STATUS_BLOCKED_REASON_REASON_GET(
6179 kbase_csf_firmware_cs_output(
6180 stream,
6181 CS_STATUS_BLOCKED_REASON));
6182
6183 if (!evaluate_sync_update(queue))
6184 continue;
6185
6186 /* Update csg_slots_idle_mask and group's run_state */
6187 if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) {
6188 /* Only clear the group's idle flag if it has been dealt
6189 * with by the scheduler's tick/tock action, otherwise
6190 * leave it untouched.
6191 */
6192 spin_lock_irqsave(&scheduler->interrupt_lock,
6193 flags);
6194 clear_bit((unsigned int)group->csg_nr,
6195 scheduler->csg_slots_idle_mask);
6196 KBASE_KTRACE_ADD_CSF_GRP(
6197 kbdev, CSG_SLOT_IDLE_CLEAR, group,
6198 scheduler->csg_slots_idle_mask[0]);
6199 spin_unlock_irqrestore(
6200 &scheduler->interrupt_lock, flags);
6201 /* Request the scheduler to confirm the condition inferred
6202 * here inside the protected mode.
6203 */
6204 group->reevaluate_idle_status = true;
6205 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
6206 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
6207 group->run_state);
6208 }
6209
6210 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
6211 sync_update_done = true;
6212 }
6213 }
6214
6215 return sync_update_done;
6216 }
6217
6218 /**
6219 * check_sync_update_for_idle_groups_protm() - Check the sync wait condition
6220 * for the idle groups on slot
6221 * during protected mode.
6222 *
6223 * @kbdev: Pointer to the GPU device
6224 *
6225 * This function checks the gpu queues of all the idle groups on slot during
6226 * protected mode that has a higher priority than the active protected mode
6227 * group.
6228 *
6229 * Return: true if the sync condition of at least one queue in a group has been
6230 * satisfied.
6231 */
check_sync_update_for_idle_groups_protm(struct kbase_device * kbdev)6232 static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
6233 {
6234 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6235 struct kbase_queue_group *protm_grp;
6236 bool exit_protm = false;
6237 unsigned long flags;
6238 u32 num_groups;
6239 u32 i;
6240
6241 lockdep_assert_held(&scheduler->lock);
6242
6243 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
6244 protm_grp = scheduler->active_protm_grp;
6245 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
6246
6247 if (!protm_grp)
6248 return exit_protm;
6249
6250 num_groups = kbdev->csf.global_iface.group_num;
6251
6252 for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
6253 struct kbase_csf_csg_slot *csg_slot =
6254 &scheduler->csg_slots[i];
6255 struct kbase_queue_group *group = csg_slot->resident_group;
6256
6257 if (group->scan_seq_num < protm_grp->scan_seq_num) {
6258 /* If sync update has been performed for the group that
6259 * has a higher priority than the protm group, then we
6260 * need to exit protected mode.
6261 */
6262 if (check_sync_update_for_on_slot_group(group))
6263 exit_protm = true;
6264 }
6265 }
6266
6267 return exit_protm;
6268 }
6269
check_sync_update_in_sleep_mode(struct kbase_device * kbdev)6270 static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
6271 {
6272 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6273 u32 const num_groups = kbdev->csf.global_iface.group_num;
6274 u32 csg_nr;
6275
6276 lockdep_assert_held(&scheduler->lock);
6277
6278 for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
6279 struct kbase_queue_group *const group =
6280 kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
6281
6282 if (!group)
6283 continue;
6284
6285 if (check_sync_update_for_on_slot_group(group)) {
6286 scheduler_wakeup(kbdev, true);
6287 return;
6288 }
6289 }
6290 }
6291
6292 /**
6293 * check_group_sync_update_worker() - Check the sync wait condition for all the
6294 * blocked queue groups
6295 *
6296 * @work: Pointer to the context-specific work item for evaluating the wait
6297 * condition for all the queue groups in idle_wait_groups list.
6298 *
6299 * This function checks the gpu queues of all the groups present in both
6300 * idle_wait_groups list of a context and all on slot idle groups (if GPU
6301 * is in protected mode).
6302 * If the sync wait condition for at least one queue bound to the group has
6303 * been satisfied then the group is moved to the per context list of
6304 * runnable groups so that Scheduler can consider scheduling the group
6305 * in next tick or exit protected mode.
6306 */
check_group_sync_update_worker(struct work_struct * work)6307 static void check_group_sync_update_worker(struct work_struct *work)
6308 {
6309 struct kbase_context *const kctx = container_of(work,
6310 struct kbase_context, csf.sched.sync_update_work);
6311 struct kbase_device *const kbdev = kctx->kbdev;
6312 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6313 bool sync_updated = false;
6314
6315 mutex_lock(&scheduler->lock);
6316
6317 #if IS_ENABLED(CONFIG_DEBUG_FS)
6318 if (unlikely(scheduler->state == SCHED_BUSY)) {
6319 queue_work(kctx->csf.sched.sync_update_wq,
6320 &kctx->csf.sched.sync_update_work);
6321 mutex_unlock(&scheduler->lock);
6322 return;
6323 }
6324 #endif
6325
6326 KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
6327 if (kctx->csf.sched.num_idle_wait_grps != 0) {
6328 struct kbase_queue_group *group, *temp;
6329
6330 list_for_each_entry_safe(group, temp,
6331 &kctx->csf.sched.idle_wait_groups, link) {
6332 if (group_sync_updated(group)) {
6333 sync_updated = true;
6334 /* Move this group back in to the runnable
6335 * groups list of the context.
6336 */
6337 update_idle_suspended_group_state(group);
6338 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
6339 }
6340 }
6341 } else {
6342 WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
6343 }
6344
6345 if (check_sync_update_for_idle_groups_protm(kbdev)) {
6346 scheduler_force_protm_exit(kbdev);
6347 sync_updated = true;
6348 }
6349
6350 /* If scheduler is in sleep or suspended state, re-activate it
6351 * to serve on-slot CSGs blocked on CQS which has been signaled.
6352 */
6353 if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
6354 check_sync_update_in_sleep_mode(kbdev);
6355
6356 KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
6357
6358 mutex_unlock(&scheduler->lock);
6359 }
6360
6361 static
check_group_sync_update_cb(void * param)6362 enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
6363 {
6364 struct kbase_context *const kctx = param;
6365
6366 KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u);
6367
6368 queue_work(kctx->csf.sched.sync_update_wq,
6369 &kctx->csf.sched.sync_update_work);
6370
6371 return KBASE_CSF_EVENT_CALLBACK_KEEP;
6372 }
6373
kbase_csf_scheduler_context_init(struct kbase_context * kctx)6374 int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
6375 {
6376 int priority;
6377 int err;
6378
6379 kbase_ctx_sched_init_ctx(kctx);
6380
6381 for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
6382 ++priority) {
6383 INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]);
6384 }
6385
6386 kctx->csf.sched.num_runnable_grps = 0;
6387 INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups);
6388 kctx->csf.sched.num_idle_wait_grps = 0;
6389 kctx->csf.sched.ngrp_to_schedule = 0;
6390
6391 kctx->csf.sched.sync_update_wq =
6392 alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq",
6393 WQ_HIGHPRI);
6394 if (!kctx->csf.sched.sync_update_wq) {
6395 dev_err(kctx->kbdev->dev,
6396 "Failed to initialize scheduler context workqueue");
6397 err = -ENOMEM;
6398 goto alloc_wq_failed;
6399 }
6400
6401 INIT_WORK(&kctx->csf.sched.sync_update_work,
6402 check_group_sync_update_worker);
6403
6404 kbase_csf_tiler_heap_reclaim_ctx_init(kctx);
6405
6406 err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
6407
6408 if (err) {
6409 dev_err(kctx->kbdev->dev,
6410 "Failed to register a sync update callback");
6411 goto event_wait_add_failed;
6412 }
6413
6414 return err;
6415
6416 event_wait_add_failed:
6417 destroy_workqueue(kctx->csf.sched.sync_update_wq);
6418 alloc_wq_failed:
6419 kbase_ctx_sched_remove_ctx(kctx);
6420 return err;
6421 }
6422
kbase_csf_scheduler_context_term(struct kbase_context * kctx)6423 void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
6424 {
6425 kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
6426 cancel_work_sync(&kctx->csf.sched.sync_update_work);
6427 destroy_workqueue(kctx->csf.sched.sync_update_wq);
6428
6429 kbase_ctx_sched_remove_ctx(kctx);
6430 }
6431
kbase_csf_scheduler_init(struct kbase_device * kbdev)6432 int kbase_csf_scheduler_init(struct kbase_device *kbdev)
6433 {
6434 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6435 u32 num_groups = kbdev->csf.global_iface.group_num;
6436
6437 bitmap_zero(scheduler->csg_inuse_bitmap, num_groups);
6438 bitmap_zero(scheduler->csg_slots_idle_mask, num_groups);
6439
6440 scheduler->csg_slots = kcalloc(num_groups,
6441 sizeof(*scheduler->csg_slots), GFP_KERNEL);
6442 if (!scheduler->csg_slots) {
6443 dev_err(kbdev->dev,
6444 "Failed to allocate memory for csg slot status array\n");
6445 return -ENOMEM;
6446 }
6447
6448 return kbase_csf_mcu_shared_regs_data_init(kbdev);
6449 }
6450
kbase_csf_scheduler_early_init(struct kbase_device * kbdev)6451 int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
6452 {
6453 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6454
6455 scheduler->timer_enabled = true;
6456
6457 scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI);
6458 if (!scheduler->wq) {
6459 dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
6460 return -ENOMEM;
6461 }
6462 scheduler->idle_wq = alloc_ordered_workqueue(
6463 "csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
6464 if (!scheduler->idle_wq) {
6465 dev_err(kbdev->dev,
6466 "Failed to allocate GPU idle scheduler workqueue\n");
6467 destroy_workqueue(kbdev->csf.scheduler.wq);
6468 return -ENOMEM;
6469 }
6470
6471 INIT_WORK(&scheduler->tick_work, schedule_on_tick);
6472 INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
6473 atomic_set(&scheduler->pending_tock_work, false);
6474
6475 INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
6476
6477 mutex_init(&scheduler->lock);
6478 spin_lock_init(&scheduler->interrupt_lock);
6479
6480 /* Internal lists */
6481 INIT_LIST_HEAD(&scheduler->runnable_kctxs);
6482 INIT_LIST_HEAD(&scheduler->groups_to_schedule);
6483 INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule);
6484
6485 BUILD_BUG_ON(MAX_SUPPORTED_CSGS >
6486 (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE));
6487 bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
6488 scheduler->state = SCHED_SUSPENDED;
6489 KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
6490 scheduler->pm_active_count = 0;
6491 scheduler->ngrp_to_schedule = 0;
6492 scheduler->total_runnable_grps = 0;
6493 scheduler->top_ctx = NULL;
6494 scheduler->top_grp = NULL;
6495 scheduler->last_schedule = 0;
6496 scheduler->active_protm_grp = NULL;
6497 scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
6498 scheduler_doorbell_init(kbdev);
6499
6500 INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
6501 scheduler->fast_gpu_idle_handling = false;
6502 atomic_set(&scheduler->gpu_no_longer_idle, false);
6503 atomic_set(&scheduler->non_idle_offslot_grps, 0);
6504
6505 hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
6506 scheduler->tick_timer.function = tick_timer_callback;
6507 scheduler->tick_timer_active = false;
6508
6509 kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
6510
6511 return 0;
6512 }
6513
kbase_csf_scheduler_term(struct kbase_device * kbdev)6514 void kbase_csf_scheduler_term(struct kbase_device *kbdev)
6515 {
6516 if (kbdev->csf.scheduler.csg_slots) {
6517 WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
6518 /* The unload of Driver can take place only when all contexts have
6519 * been terminated. The groups that were not terminated by the User
6520 * are terminated on context termination. So no CSGs are expected
6521 * to be active at the time of Driver unload.
6522 */
6523 WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
6524 flush_work(&kbdev->csf.scheduler.gpu_idle_work);
6525 mutex_lock(&kbdev->csf.scheduler.lock);
6526
6527 if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
6528 unsigned long flags;
6529 /* The power policy could prevent the Scheduler from
6530 * getting suspended when GPU becomes idle.
6531 */
6532 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6533 WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
6534 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6535 scheduler_suspend(kbdev);
6536 }
6537
6538 mutex_unlock(&kbdev->csf.scheduler.lock);
6539 cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
6540 cancel_tick_timer(kbdev);
6541 cancel_tick_work(&kbdev->csf.scheduler);
6542 cancel_tock_work(&kbdev->csf.scheduler);
6543 kfree(kbdev->csf.scheduler.csg_slots);
6544 kbdev->csf.scheduler.csg_slots = NULL;
6545 }
6546 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL,
6547 kbase_csf_scheduler_get_nr_active_csgs(kbdev));
6548 /* Terminating the MCU shared regions, following the release of slots */
6549 kbase_csf_mcu_shared_regs_data_term(kbdev);
6550 }
6551
kbase_csf_scheduler_early_term(struct kbase_device * kbdev)6552 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
6553 {
6554 if (kbdev->csf.scheduler.idle_wq)
6555 destroy_workqueue(kbdev->csf.scheduler.idle_wq);
6556 if (kbdev->csf.scheduler.wq)
6557 destroy_workqueue(kbdev->csf.scheduler.wq);
6558
6559 kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
6560 mutex_destroy(&kbdev->csf.scheduler.lock);
6561 }
6562
6563 /**
6564 * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer.
6565 *
6566 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
6567 *
6568 * This function will restart the scheduler tick so that regular scheduling can
6569 * be resumed without any explicit trigger (like kicking of GPU queues). This
6570 * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the
6571 * CSF scheduler lock to already have been held.
6572 */
scheduler_enable_tick_timer_nolock(struct kbase_device * kbdev)6573 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
6574 {
6575 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6576
6577 lockdep_assert_held(&kbdev->csf.scheduler.lock);
6578
6579 if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev)))
6580 return;
6581
6582 WARN_ON((scheduler->state != SCHED_INACTIVE) &&
6583 (scheduler->state != SCHED_SUSPENDED) &&
6584 (scheduler->state != SCHED_SLEEPING));
6585
6586 if (scheduler->total_runnable_grps > 0) {
6587 enqueue_tick_work(kbdev);
6588 dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
6589 } else if (scheduler->state != SCHED_SUSPENDED) {
6590 enqueue_gpu_idle_work(scheduler);
6591 }
6592 }
6593
kbase_csf_scheduler_enable_tick_timer(struct kbase_device * kbdev)6594 void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev)
6595 {
6596 mutex_lock(&kbdev->csf.scheduler.lock);
6597 scheduler_enable_tick_timer_nolock(kbdev);
6598 mutex_unlock(&kbdev->csf.scheduler.lock);
6599 }
6600
kbase_csf_scheduler_timer_is_enabled(struct kbase_device * kbdev)6601 bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
6602 {
6603 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6604 bool enabled;
6605
6606 mutex_lock(&scheduler->lock);
6607 enabled = scheduler_timer_is_enabled_nolock(kbdev);
6608 mutex_unlock(&scheduler->lock);
6609
6610 return enabled;
6611 }
6612
kbase_csf_scheduler_timer_set_enabled(struct kbase_device * kbdev,bool enable)6613 void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
6614 bool enable)
6615 {
6616 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6617 bool currently_enabled;
6618
6619 mutex_lock(&scheduler->lock);
6620
6621 currently_enabled = scheduler_timer_is_enabled_nolock(kbdev);
6622 if (currently_enabled && !enable) {
6623 scheduler->timer_enabled = false;
6624 cancel_tick_timer(kbdev);
6625 mutex_unlock(&scheduler->lock);
6626 /* The non-sync version to cancel the normal work item is not
6627 * available, so need to drop the lock before cancellation.
6628 */
6629 cancel_tick_work(scheduler);
6630 cancel_tock_work(scheduler);
6631 return;
6632 }
6633
6634 if (!currently_enabled && enable) {
6635 scheduler->timer_enabled = true;
6636
6637 scheduler_enable_tick_timer_nolock(kbdev);
6638 }
6639
6640 mutex_unlock(&scheduler->lock);
6641 }
6642
kbase_csf_scheduler_kick(struct kbase_device * kbdev)6643 void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
6644 {
6645 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6646
6647 mutex_lock(&scheduler->lock);
6648
6649 if (unlikely(scheduler_timer_is_enabled_nolock(kbdev)))
6650 goto out;
6651
6652 if (scheduler->total_runnable_grps > 0) {
6653 enqueue_tick_work(kbdev);
6654 dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
6655 }
6656
6657 out:
6658 mutex_unlock(&scheduler->lock);
6659 }
6660
kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device * kbdev)6661 int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
6662 {
6663 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6664 int result = 0;
6665
6666 lockdep_assert_held(&scheduler->lock);
6667
6668 #if IS_ENABLED(CONFIG_DEBUG_FS)
6669 if (unlikely(scheduler->state == SCHED_BUSY))
6670 return -EBUSY;
6671 #endif
6672
6673 #ifdef KBASE_PM_RUNTIME
6674 /* If scheduler is in sleeping state, then MCU needs to be activated
6675 * to suspend CSGs.
6676 */
6677 if (scheduler->state == SCHED_SLEEPING) {
6678 dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
6679 result = force_scheduler_to_exit_sleep(kbdev);
6680 if (result) {
6681 dev_warn(kbdev->dev, "Scheduler failed to exit from sleep");
6682 goto exit;
6683 }
6684 }
6685 #endif
6686 if (scheduler->state != SCHED_SUSPENDED) {
6687 result = suspend_active_groups_on_powerdown(kbdev, true);
6688 if (result) {
6689 dev_warn(kbdev->dev, "failed to suspend active groups");
6690 goto exit;
6691 } else {
6692 dev_info(kbdev->dev, "Scheduler PM suspend");
6693 scheduler_suspend(kbdev);
6694 cancel_tick_timer(kbdev);
6695 }
6696 }
6697
6698 exit:
6699 return result;
6700 }
6701
kbase_csf_scheduler_pm_suspend(struct kbase_device * kbdev)6702 int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
6703 {
6704 int result = 0;
6705 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6706
6707 /* Cancel any potential queued delayed work(s) */
6708 cancel_tick_work(scheduler);
6709 cancel_tock_work(scheduler);
6710
6711 result = kbase_reset_gpu_prevent_and_wait(kbdev);
6712 if (result) {
6713 dev_warn(kbdev->dev, "Stop PM suspending for failing to prevent gpu reset.\n");
6714 return result;
6715 }
6716
6717 mutex_lock(&scheduler->lock);
6718
6719 result = kbase_csf_scheduler_pm_suspend_no_lock(kbdev);
6720 mutex_unlock(&scheduler->lock);
6721
6722 kbase_reset_gpu_allow(kbdev);
6723
6724 return result;
6725 }
6726 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
6727
kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device * kbdev)6728 void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev)
6729 {
6730 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6731
6732 lockdep_assert_held(&scheduler->lock);
6733 if ((scheduler->total_runnable_grps > 0) &&
6734 (scheduler->state == SCHED_SUSPENDED)) {
6735 dev_info(kbdev->dev, "Scheduler PM resume");
6736 scheduler_wakeup(kbdev, true);
6737 }
6738 }
6739
kbase_csf_scheduler_pm_resume(struct kbase_device * kbdev)6740 void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
6741 {
6742 mutex_lock(&kbdev->csf.scheduler.lock);
6743
6744 kbase_csf_scheduler_pm_resume_no_lock(kbdev);
6745 mutex_unlock(&kbdev->csf.scheduler.lock);
6746 }
6747 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
6748
kbase_csf_scheduler_pm_active(struct kbase_device * kbdev)6749 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
6750 {
6751 /* Here the lock is taken to synchronize against the runtime suspend
6752 * callback function, which may need to wake up the MCU for suspending
6753 * the CSGs before powering down the GPU.
6754 */
6755 mutex_lock(&kbdev->csf.scheduler.lock);
6756 scheduler_pm_active_handle_suspend(kbdev,
6757 KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
6758 mutex_unlock(&kbdev->csf.scheduler.lock);
6759 }
6760 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
6761
kbase_csf_scheduler_pm_idle(struct kbase_device * kbdev)6762 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
6763 {
6764 /* Here the lock is taken just to maintain symmetry with
6765 * kbase_csf_scheduler_pm_active().
6766 */
6767 mutex_lock(&kbdev->csf.scheduler.lock);
6768 scheduler_pm_idle(kbdev);
6769 mutex_unlock(&kbdev->csf.scheduler.lock);
6770 }
6771 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
6772
kbase_csf_scheduler_wait_mcu_active(struct kbase_device * kbdev)6773 int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
6774 {
6775 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6776 unsigned long flags;
6777 int err;
6778
6779 kbase_pm_lock(kbdev);
6780 WARN_ON(!kbdev->pm.active_count);
6781 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6782 WARN_ON(!scheduler->pm_active_count);
6783 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6784 kbase_pm_unlock(kbdev);
6785
6786 kbase_pm_wait_for_poweroff_work_complete(kbdev);
6787
6788 err = kbase_pm_wait_for_desired_state(kbdev);
6789 if (!err) {
6790 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6791 WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
6792 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6793 }
6794
6795 return err;
6796 }
6797 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
6798
6799 #ifdef KBASE_PM_RUNTIME
kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device * kbdev)6800 int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
6801 {
6802 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6803 unsigned long flags;
6804 int ret;
6805
6806 dev_dbg(kbdev->dev, "Handling runtime suspend");
6807
6808 kbase_reset_gpu_assert_prevented(kbdev);
6809 lockdep_assert_held(&scheduler->lock);
6810 WARN_ON(scheduler->pm_active_count);
6811
6812 if (scheduler->state == SCHED_SUSPENDED) {
6813 WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active);
6814 return 0;
6815 }
6816
6817 ret = suspend_active_groups_on_powerdown(kbdev, false);
6818
6819 if (ret) {
6820 dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)",
6821 atomic_read(&scheduler->non_idle_offslot_grps));
6822
6823 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6824 kbdev->pm.backend.exit_gpu_sleep_mode = true;
6825 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6826
6827 kbase_csf_scheduler_invoke_tick(kbdev);
6828 return ret;
6829 }
6830
6831 scheduler->state = SCHED_SUSPENDED;
6832 KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
6833 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6834 kbdev->pm.backend.gpu_sleep_mode_active = false;
6835 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6836
6837 wake_up_all(&kbdev->csf.event_wait);
6838 return 0;
6839 }
6840
kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device * kbdev)6841 void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
6842 {
6843 u32 csg_nr;
6844
6845 lockdep_assert_held(&kbdev->hwaccess_lock);
6846
6847 WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP);
6848
6849 for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) {
6850 struct kbase_csf_cmd_stream_group_info *ginfo =
6851 &kbdev->csf.global_iface.groups[csg_nr];
6852 bool csg_idle;
6853
6854 if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
6855 continue;
6856
6857 csg_idle =
6858 kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
6859 CSG_STATUS_STATE_IDLE_MASK;
6860 if (!csg_idle) {
6861 dev_dbg(kbdev->dev,
6862 "Re-activate Scheduler after MCU sleep");
6863 kbdev->pm.backend.exit_gpu_sleep_mode = true;
6864 kbase_csf_scheduler_invoke_tick(kbdev);
6865 break;
6866 }
6867 }
6868 }
6869
kbase_csf_scheduler_force_sleep(struct kbase_device * kbdev)6870 void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
6871 {
6872 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6873
6874 mutex_lock(&scheduler->lock);
6875 if (kbase_pm_gpu_sleep_allowed(kbdev) &&
6876 (scheduler->state == SCHED_INACTIVE))
6877 scheduler_sleep_on_idle(kbdev);
6878 mutex_unlock(&scheduler->lock);
6879 }
6880 #endif
6881
kbase_csf_scheduler_force_wakeup(struct kbase_device * kbdev)6882 void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
6883 {
6884 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6885
6886 mutex_lock(&scheduler->lock);
6887 scheduler_wakeup(kbdev, true);
6888 mutex_unlock(&scheduler->lock);
6889 }
6890