xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_scheduler.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include <mali_kbase.h>
23 #include "mali_kbase_config_defaults.h"
24 #include <mali_kbase_ctx_sched.h>
25 #include <mali_kbase_reset_gpu.h>
26 #include <mali_kbase_as_fault_debugfs.h>
27 #include "mali_kbase_csf.h"
28 #include <tl/mali_kbase_tracepoints.h>
29 #include <backend/gpu/mali_kbase_pm_internal.h>
30 #include <linux/export.h>
31 #include <csf/mali_kbase_csf_registers.h>
32 #include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
33 #include <mali_kbase_hwaccess_time.h>
34 #include "mali_kbase_csf_tiler_heap_reclaim.h"
35 #include "mali_kbase_csf_mcu_shared_reg.h"
36 
37 /* Value to indicate that a queue group is not groups_to_schedule list */
38 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
39 
40 /* This decides the upper limit on the waiting time for the Scheduler
41  * to exit the sleep state. Usually the value of autosuspend_delay is
42  * expected to be around 100 milli seconds.
43  */
44 #define MAX_AUTO_SUSPEND_DELAY_MS (5000)
45 
46 /* Maximum number of endpoints which may run tiler jobs. */
47 #define CSG_TILER_MAX ((u8)1)
48 
49 /* Maximum dynamic CSG slot priority value */
50 #define MAX_CSG_SLOT_PRIORITY ((u8)15)
51 
52 /* CSF scheduler time slice value */
53 #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */
54 
55 /* A GPU address space slot is reserved for MCU. */
56 #define NUM_RESERVED_AS_SLOTS (1)
57 
58 /* Time to wait for completion of PING req before considering MCU as hung */
59 #define FW_PING_AFTER_ERROR_TIMEOUT_MS (10)
60 
61 /* Explicitly defining this blocked_reason code as SB_WAIT for clarity */
62 #define CS_STATUS_BLOCKED_ON_SB_WAIT CS_STATUS_BLOCKED_REASON_REASON_WAIT
63 
64 static int scheduler_group_schedule(struct kbase_queue_group *group);
65 static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
66 static
67 void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
68 		struct kbase_queue_group *const group,
69 		enum kbase_csf_group_state run_state);
70 static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
71 		struct kbase_device *const kbdev,
72 		struct kbase_queue_group *const group);
73 static struct kbase_queue_group *get_tock_top_group(
74 	struct kbase_csf_scheduler *const scheduler);
75 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
76 static int suspend_active_queue_groups(struct kbase_device *kbdev,
77 				       unsigned long *slot_mask);
78 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
79 					      bool system_suspend);
80 static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
81 static bool queue_group_scheduled_locked(struct kbase_queue_group *group);
82 
83 #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
84 
85 /**
86  * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and
87  *              scheduling tick/tock to complete before the group deschedule.
88  *
89  * @group: Pointer to the group that is being descheduled.
90  *
91  * This function blocks the descheduling of the group until the dump on fault is
92  * completed and scheduling tick/tock has completed.
93  * To deschedule an on slot group CSG termination request would be sent and that
94  * might time out if the fault had occurred and also potentially affect the state
95  * being dumped. Moreover the scheduler lock would be held, so the access to debugfs
96  * files would get blocked.
97  * Scheduler lock and 'kctx->csf.lock' are released before this function starts
98  * to wait. When a request sent by the Scheduler to the FW times out, Scheduler
99  * would also wait for the dumping to complete and release the Scheduler lock
100  * before the wait. Meanwhile Userspace can try to delete the group, this function
101  * would ensure that the group doesn't exit the Scheduler until scheduling
102  * tick/tock has completed. Though very unlikely, group deschedule can be triggered
103  * from multiple threads around the same time and after the wait Userspace thread
104  * can win the race and get the group descheduled and free the memory for group
105  * pointer before the other threads wake up and notice that group has already been
106  * descheduled. To avoid the freeing in such a case, a sort of refcount is used
107  * for the group which is incremented & decremented across the wait.
108  */
109 static
wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group * group)110 void wait_for_dump_complete_on_group_deschedule(struct kbase_queue_group *group)
111 {
112 #if IS_ENABLED(CONFIG_DEBUG_FS)
113 	struct kbase_device *kbdev = group->kctx->kbdev;
114 	struct kbase_context *kctx = group->kctx;
115 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
116 
117 	lockdep_assert_held(&kctx->csf.lock);
118 	lockdep_assert_held(&scheduler->lock);
119 
120 	if (likely(!kbase_debug_csf_fault_dump_enabled(kbdev)))
121 		return;
122 
123 	while ((!kbase_debug_csf_fault_dump_complete(kbdev) ||
124 	       (scheduler->state == SCHED_BUSY)) &&
125 	       queue_group_scheduled_locked(group)) {
126 		group->deschedule_deferred_cnt++;
127 		mutex_unlock(&scheduler->lock);
128 		mutex_unlock(&kctx->csf.lock);
129 		kbase_debug_csf_fault_wait_completion(kbdev);
130 		mutex_lock(&kctx->csf.lock);
131 		mutex_lock(&scheduler->lock);
132 		group->deschedule_deferred_cnt--;
133 	}
134 #endif
135 }
136 
137 /**
138  * schedule_actions_trigger_df() - Notify the client about the fault and
139  *                                 wait for the dumping to complete.
140  *
141  * @kbdev: Pointer to the device
142  * @kctx:  Pointer to the context associated with the CSG slot for which
143  *         the timeout was seen.
144  * @error: Error code indicating the type of timeout that occurred.
145  *
146  * This function notifies the Userspace client waiting for the faults and wait
147  * for the Client to complete the dumping.
148  * The function is called only from Scheduling tick/tock when a request sent by
149  * the Scheduler to FW times out or from the protm event work item of the group
150  * when the protected mode entry request times out.
151  * In the latter case there is no wait done as scheduler lock would be released
152  * immediately. In the former case the function waits and releases the scheduler
153  * lock before the wait. It has been ensured that the Scheduler view of the groups
154  * won't change meanwhile, so no group can enter/exit the Scheduler, become
155  * runnable or go off slot.
156  */
schedule_actions_trigger_df(struct kbase_device * kbdev,struct kbase_context * kctx,enum dumpfault_error_type error)157 static void schedule_actions_trigger_df(struct kbase_device *kbdev,
158 	struct kbase_context *kctx, enum dumpfault_error_type error)
159 {
160 #if IS_ENABLED(CONFIG_DEBUG_FS)
161 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
162 
163 	lockdep_assert_held(&scheduler->lock);
164 
165 	if (!kbase_debug_csf_fault_notify(kbdev, kctx, error))
166 		return;
167 
168 	if (unlikely(scheduler->state != SCHED_BUSY)) {
169 		WARN_ON(error != DF_PROTECTED_MODE_ENTRY_FAILURE);
170 		return;
171 	}
172 
173 	mutex_unlock(&scheduler->lock);
174 	kbase_debug_csf_fault_wait_completion(kbdev);
175 	mutex_lock(&scheduler->lock);
176 	WARN_ON(scheduler->state != SCHED_BUSY);
177 #endif
178 }
179 
180 #ifdef KBASE_PM_RUNTIME
181 /**
182  * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
183  *                                      sleeping state.
184  *
185  * @kbdev: Pointer to the device
186  *
187  * This function waits until the Scheduler has exited the sleep state and
188  * it is called when an on-slot group is terminated or when the suspend
189  * buffer of an on-slot group needs to be captured.
190  *
191  * Return: 0 when the wait is successful, otherwise an error code.
192  */
wait_for_scheduler_to_exit_sleep(struct kbase_device * kbdev)193 static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
194 {
195 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
196 	int autosuspend_delay = kbdev->dev->power.autosuspend_delay;
197 	unsigned int sleep_exit_wait_time;
198 	long remaining;
199 	int ret = 0;
200 
201 	lockdep_assert_held(&scheduler->lock);
202 	WARN_ON(scheduler->state != SCHED_SLEEPING);
203 
204 	/* No point in waiting if autosuspend_delay value is negative.
205 	 * For the negative value of autosuspend_delay Driver will directly
206 	 * go for the suspend of Scheduler, but the autosuspend_delay value
207 	 * could have been changed after the sleep was initiated.
208 	 */
209 	if (autosuspend_delay < 0)
210 		return -EINVAL;
211 
212 	if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS)
213 		autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS;
214 
215 	/* Usually Scheduler would remain in sleeping state until the
216 	 * auto-suspend timer expires and all active CSGs are suspended.
217 	 */
218 	sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms;
219 
220 	remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time);
221 
222 	while ((scheduler->state == SCHED_SLEEPING) && !ret) {
223 		mutex_unlock(&scheduler->lock);
224 		remaining = wait_event_timeout(
225 				kbdev->csf.event_wait,
226 				(scheduler->state != SCHED_SLEEPING),
227 				remaining);
228 		mutex_lock(&scheduler->lock);
229 		if (!remaining && (scheduler->state == SCHED_SLEEPING))
230 			ret = -ETIMEDOUT;
231 	}
232 
233 	return ret;
234 }
235 
236 /**
237  * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state
238  *
239  * @kbdev: Pointer to the device
240  *
241  * This function will force the Scheduler to exit the sleep state by doing the
242  * wake up of MCU and suspension of on-slot groups. It is called at the time of
243  * system suspend.
244  *
245  * Return: 0 on success.
246  */
force_scheduler_to_exit_sleep(struct kbase_device * kbdev)247 static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
248 {
249 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
250 	unsigned long flags;
251 	int ret = 0;
252 
253 	lockdep_assert_held(&scheduler->lock);
254 	WARN_ON(scheduler->state != SCHED_SLEEPING);
255 	WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active);
256 
257 	kbase_pm_lock(kbdev);
258 	ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
259 	kbase_pm_unlock(kbdev);
260 	if (ret) {
261 		dev_warn(kbdev->dev,
262 			 "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
263 			 kbase_backend_get_cycle_cnt(kbdev));
264 		goto out;
265 	}
266 
267 	ret = suspend_active_groups_on_powerdown(kbdev, true);
268 	if (ret)
269 		goto out;
270 
271 	kbase_pm_lock(kbdev);
272 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
273 	kbdev->pm.backend.gpu_sleep_mode_active = false;
274 	kbdev->pm.backend.gpu_wakeup_override = false;
275 	kbase_pm_update_state(kbdev);
276 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
277 	ret = kbase_pm_wait_for_desired_state(kbdev);
278 	kbase_pm_unlock(kbdev);
279 	if (ret) {
280 		dev_warn(kbdev->dev,
281 			 "[%llu] Wait for pm state change failed on forced scheduler suspend",
282 			 kbase_backend_get_cycle_cnt(kbdev));
283 		goto out;
284 	}
285 
286 	scheduler->state = SCHED_SUSPENDED;
287 	KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
288 
289 	return 0;
290 
291 out:
292 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
293 	kbdev->pm.backend.exit_gpu_sleep_mode = true;
294 	kbdev->pm.backend.gpu_wakeup_override = false;
295 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
296 	kbase_csf_scheduler_invoke_tick(kbdev);
297 
298 	return ret;
299 }
300 #endif
301 
302 /**
303  * tick_timer_callback() - Callback function for the scheduling tick hrtimer
304  *
305  * @timer: Pointer to the scheduling tick hrtimer
306  *
307  * This function will enqueue the scheduling tick work item for immediate
308  * execution, if it has not been queued already.
309  *
310  * Return: enum value to indicate that timer should not be restarted.
311  */
tick_timer_callback(struct hrtimer * timer)312 static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
313 {
314 	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
315 						  csf.scheduler.tick_timer);
316 
317 	kbase_csf_scheduler_tick_advance(kbdev);
318 	return HRTIMER_NORESTART;
319 }
320 
321 /**
322  * start_tick_timer() - Start the scheduling tick hrtimer.
323  *
324  * @kbdev: Pointer to the device
325  *
326  * This function will start the scheduling tick hrtimer and is supposed to
327  * be called only from the tick work item function. The tick hrtimer should
328  * not be active already.
329  */
start_tick_timer(struct kbase_device * kbdev)330 static void start_tick_timer(struct kbase_device *kbdev)
331 {
332 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
333 	unsigned long flags;
334 
335 	lockdep_assert_held(&scheduler->lock);
336 
337 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
338 	WARN_ON(scheduler->tick_timer_active);
339 	if (likely(!work_pending(&scheduler->tick_work))) {
340 		scheduler->tick_timer_active = true;
341 
342 		hrtimer_start(&scheduler->tick_timer,
343 		    HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
344 		    HRTIMER_MODE_REL);
345 	}
346 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
347 }
348 
349 /**
350  * cancel_tick_timer() - Cancel the scheduling tick hrtimer
351  *
352  * @kbdev: Pointer to the device
353  */
cancel_tick_timer(struct kbase_device * kbdev)354 static void cancel_tick_timer(struct kbase_device *kbdev)
355 {
356 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
357 	unsigned long flags;
358 
359 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
360 	scheduler->tick_timer_active = false;
361 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
362 	hrtimer_cancel(&scheduler->tick_timer);
363 }
364 
365 /**
366  * enqueue_tick_work() - Enqueue the scheduling tick work item
367  *
368  * @kbdev: Pointer to the device
369  *
370  * This function will queue the scheduling tick work item for immediate
371  * execution. This shall only be called when both the tick hrtimer and tick
372  * work item are not active/pending.
373  */
enqueue_tick_work(struct kbase_device * kbdev)374 static void enqueue_tick_work(struct kbase_device *kbdev)
375 {
376 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
377 
378 	lockdep_assert_held(&scheduler->lock);
379 
380 	kbase_csf_scheduler_invoke_tick(kbdev);
381 }
382 
release_doorbell(struct kbase_device * kbdev,int doorbell_nr)383 static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
384 {
385 	WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
386 
387 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
388 	clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
389 }
390 
acquire_doorbell(struct kbase_device * kbdev)391 static int acquire_doorbell(struct kbase_device *kbdev)
392 {
393 	int doorbell_nr;
394 
395 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
396 
397 	doorbell_nr = find_first_zero_bit(
398 			kbdev->csf.scheduler.doorbell_inuse_bitmap,
399 			CSF_NUM_DOORBELL);
400 
401 	if (doorbell_nr >= CSF_NUM_DOORBELL)
402 		return KBASEP_USER_DB_NR_INVALID;
403 
404 	set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
405 
406 	return doorbell_nr;
407 }
408 
unassign_user_doorbell_from_group(struct kbase_device * kbdev,struct kbase_queue_group * group)409 static void unassign_user_doorbell_from_group(struct kbase_device *kbdev,
410 		struct kbase_queue_group *group)
411 {
412 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
413 
414 	if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
415 		release_doorbell(kbdev, group->doorbell_nr);
416 		group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
417 	}
418 }
419 
unassign_user_doorbell_from_queue(struct kbase_device * kbdev,struct kbase_queue * queue)420 static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev,
421 		struct kbase_queue *queue)
422 {
423 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
424 
425 	mutex_lock(&kbdev->csf.reg_lock);
426 
427 	if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
428 		queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
429 		/* After this the dummy page would be mapped in */
430 		unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping,
431 			queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
432 	}
433 
434 	mutex_unlock(&kbdev->csf.reg_lock);
435 }
436 
assign_user_doorbell_to_group(struct kbase_device * kbdev,struct kbase_queue_group * group)437 static void assign_user_doorbell_to_group(struct kbase_device *kbdev,
438 		struct kbase_queue_group *group)
439 {
440 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
441 
442 	if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
443 		group->doorbell_nr = acquire_doorbell(kbdev);
444 }
445 
assign_user_doorbell_to_queue(struct kbase_device * kbdev,struct kbase_queue * const queue)446 static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
447 		struct kbase_queue *const queue)
448 {
449 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
450 
451 	mutex_lock(&kbdev->csf.reg_lock);
452 
453 	/* If bind operation for the queue hasn't completed yet, then the
454 	 * CSI can't be programmed for the queue
455 	 * (even in stopped state) and so the doorbell also can't be assigned
456 	 * to it.
457 	 */
458 	if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) &&
459 	    (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) {
460 		WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID);
461 		queue->doorbell_nr = queue->group->doorbell_nr;
462 
463 		/* After this the real Hw doorbell page would be mapped in */
464 		unmap_mapping_range(
465 				kbdev->csf.db_filp->f_inode->i_mapping,
466 				queue->db_file_offset << PAGE_SHIFT,
467 				PAGE_SIZE, 1);
468 	}
469 
470 	mutex_unlock(&kbdev->csf.reg_lock);
471 }
472 
scheduler_doorbell_init(struct kbase_device * kbdev)473 static void scheduler_doorbell_init(struct kbase_device *kbdev)
474 {
475 	int doorbell_nr;
476 
477 	bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap,
478 		CSF_NUM_DOORBELL);
479 
480 	mutex_lock(&kbdev->csf.scheduler.lock);
481 	/* Reserve doorbell 0 for use by kernel driver */
482 	doorbell_nr = acquire_doorbell(kbdev);
483 	mutex_unlock(&kbdev->csf.scheduler.lock);
484 
485 	WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
486 }
487 
488 /**
489  * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
490  *
491  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
492  *
493  * This function updates the EXTRACT offset for all queues which groups have
494  * been assigned a physical slot. These values could be used to detect a
495  * queue's true idleness status. This is intended to be an additional check
496  * on top of the GPU idle notification to account for race conditions.
497  * This function is supposed to be called only when GPU idle notification
498  * interrupt is received.
499  */
update_on_slot_queues_offsets(struct kbase_device * kbdev)500 static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
501 {
502 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
503 	/* All CSGs have the same number of CSs */
504 	size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
505 	size_t i;
506 
507 	lockdep_assert_held(&scheduler->interrupt_lock);
508 
509 	/* csg_slots_idle_mask is not used here for the looping, as it could get
510 	 * updated concurrently when Scheduler re-evaluates the idle status of
511 	 * the CSGs for which idle notification was received previously.
512 	 */
513 	for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
514 		struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
515 		size_t j;
516 
517 		if (WARN_ON(!group))
518 			continue;
519 
520 		for (j = 0; j < max_streams; ++j) {
521 			struct kbase_queue *const queue = group->bound_queues[j];
522 
523 			if (queue) {
524 				if (queue->user_io_addr) {
525 					u64 const *const output_addr =
526 						(u64 const *)(queue->user_io_addr + PAGE_SIZE);
527 
528 					queue->extract_ofs =
529 						output_addr[CS_EXTRACT_LO / sizeof(u64)];
530 				} else {
531 					dev_warn(kbdev->dev,
532 						 "%s(): queue->user_io_addr is NULL, queue: %p",
533 						 __func__,
534 						 queue);
535 				}
536 			}
537 		}
538 	}
539 }
540 
enqueue_gpu_idle_work(struct kbase_csf_scheduler * const scheduler)541 static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
542 {
543 	atomic_set(&scheduler->gpu_no_longer_idle, false);
544 	queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
545 }
546 
kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device * kbdev)547 void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
548 {
549 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
550 	int non_idle_offslot_grps;
551 	bool can_suspend_on_idle;
552 
553 	lockdep_assert_held(&kbdev->hwaccess_lock);
554 	lockdep_assert_held(&scheduler->interrupt_lock);
555 
556 	non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
557 	can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
558 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
559 			 ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
560 
561 	if (!non_idle_offslot_grps) {
562 		if (can_suspend_on_idle) {
563 			/* fast_gpu_idle_handling is protected by the
564 			 * interrupt_lock, which would prevent this from being
565 			 * updated whilst gpu_idle_worker() is executing.
566 			 */
567 			scheduler->fast_gpu_idle_handling =
568 				(kbdev->csf.gpu_idle_hysteresis_us == 0) ||
569 				!kbase_csf_scheduler_all_csgs_idle(kbdev);
570 
571 			/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
572 			 * finished. It's queued before to reduce the time it takes till execution
573 			 * but it'll eventually be blocked by the scheduler->interrupt_lock.
574 			 */
575 			enqueue_gpu_idle_work(scheduler);
576 
577 			/* The extract offsets are unused in fast GPU idle handling */
578 			if (!scheduler->fast_gpu_idle_handling)
579 				update_on_slot_queues_offsets(kbdev);
580 		}
581 	} else {
582 		/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
583 		kbase_csf_scheduler_tick_advance_nolock(kbdev);
584 	}
585 }
586 
kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device * kbdev)587 u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
588 {
589 	u32 nr_active_csgs;
590 
591 	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
592 
593 	nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap,
594 				kbdev->csf.global_iface.group_num);
595 
596 	return nr_active_csgs;
597 }
598 
kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device * kbdev)599 u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev)
600 {
601 	u32 nr_active_csgs;
602 	unsigned long flags;
603 
604 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
605 	nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev);
606 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
607 
608 	return nr_active_csgs;
609 }
610 
611 /**
612  * csg_slot_in_use - returns true if a queue group has been programmed on a
613  *                   given CSG slot.
614  *
615  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
616  * @slot:  Index/number of the CSG slot in question.
617  *
618  * Return: the interface is actively engaged flag.
619  *
620  * Note: Caller must hold the scheduler lock.
621  */
csg_slot_in_use(struct kbase_device * kbdev,int slot)622 static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot)
623 {
624 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
625 
626 	return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL);
627 }
628 
queue_group_suspended_locked(struct kbase_queue_group * group)629 static bool queue_group_suspended_locked(struct kbase_queue_group *group)
630 {
631 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
632 
633 	return (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
634 		group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE ||
635 		group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
636 }
637 
queue_group_idle_locked(struct kbase_queue_group * group)638 static bool queue_group_idle_locked(struct kbase_queue_group *group)
639 {
640 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
641 
642 	return (group->run_state == KBASE_CSF_GROUP_IDLE ||
643 		group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
644 }
645 
on_slot_group_idle_locked(struct kbase_queue_group * group)646 static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
647 {
648 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
649 
650 	return (group->run_state == KBASE_CSF_GROUP_IDLE);
651 }
652 
can_schedule_idle_group(struct kbase_queue_group * group)653 static bool can_schedule_idle_group(struct kbase_queue_group *group)
654 {
655 	return (on_slot_group_idle_locked(group) ||
656 		(group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME));
657 }
658 
queue_group_scheduled(struct kbase_queue_group * group)659 static bool queue_group_scheduled(struct kbase_queue_group *group)
660 {
661 	return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
662 		group->run_state != KBASE_CSF_GROUP_TERMINATED &&
663 		group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED);
664 }
665 
queue_group_scheduled_locked(struct kbase_queue_group * group)666 static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
667 {
668 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
669 
670 	return queue_group_scheduled(group);
671 }
672 
673 /**
674  * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
675  *
676  * @kbdev: Pointer to the GPU device
677  *
678  * This function waits for the GPU to exit protected mode which is confirmed
679  * when active_protm_grp is set to NULL.
680  *
681  * Return: true on success, false otherwise.
682  */
scheduler_protm_wait_quit(struct kbase_device * kbdev)683 static bool scheduler_protm_wait_quit(struct kbase_device *kbdev)
684 {
685 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
686 	long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
687 	long remaining;
688 	bool success = true;
689 
690 	lockdep_assert_held(&scheduler->lock);
691 
692 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_START, NULL, jiffies_to_msecs(wt));
693 
694 	remaining = wait_event_timeout(kbdev->csf.event_wait,
695 			!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
696 
697 	if (unlikely(!remaining)) {
698 		struct kbase_queue_group *group = kbdev->csf.scheduler.active_protm_grp;
699 		struct kbase_context *kctx = group ? group->kctx : NULL;
700 
701 		dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
702 			kbase_backend_get_cycle_cnt(kbdev),
703 			kbdev->csf.fw_timeout_ms);
704 		schedule_actions_trigger_df(kbdev, kctx, DF_PROTECTED_MODE_EXIT_TIMEOUT);
705 		success = false;
706 	}
707 
708 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_PROTM_WAIT_QUIT_END, NULL, jiffies_to_msecs(remaining));
709 
710 	return success;
711 }
712 
713 /**
714  * scheduler_force_protm_exit() - Force GPU to exit protected mode.
715  *
716  * @kbdev: Pointer to the GPU device
717  *
718  * This function sends a ping request to the firmware and waits for the GPU
719  * to exit protected mode.
720  *
721  * If the GPU does not exit protected mode, it is considered as hang.
722  * A GPU reset would then be triggered.
723  */
scheduler_force_protm_exit(struct kbase_device * kbdev)724 static void scheduler_force_protm_exit(struct kbase_device *kbdev)
725 {
726 	unsigned long flags;
727 
728 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
729 
730 	kbase_csf_firmware_ping(kbdev);
731 
732 	if (scheduler_protm_wait_quit(kbdev))
733 		return;
734 
735 	dev_err(kbdev->dev, "Possible GPU hang in Protected mode");
736 
737 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
738 	if (kbdev->csf.scheduler.active_protm_grp) {
739 		dev_err(kbdev->dev,
740 			"Group-%d of context %d_%d ran in protected mode for too long on slot %d",
741 			kbdev->csf.scheduler.active_protm_grp->handle,
742 			kbdev->csf.scheduler.active_protm_grp->kctx->tgid,
743 			kbdev->csf.scheduler.active_protm_grp->kctx->id,
744 			kbdev->csf.scheduler.active_protm_grp->csg_nr);
745 	}
746 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
747 
748 	/* The GPU could be stuck in Protected mode. To prevent a hang,
749 	 * a GPU reset is performed.
750 	 */
751 	if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
752 		kbase_reset_gpu(kbdev);
753 }
754 
755 /**
756  * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
757  * automatically for periodic tasks.
758  *
759  * @kbdev: Pointer to the device
760  *
761  * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the
762  * CSF scheduler lock to already have been held.
763  *
764  * Return: true if the scheduler is configured to wake up periodically
765  */
scheduler_timer_is_enabled_nolock(struct kbase_device * kbdev)766 static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
767 {
768 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
769 
770 	return kbdev->csf.scheduler.timer_enabled;
771 }
772 
773 /**
774  * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
775  *                                        Scheduler
776  *
777  * @kbdev: Pointer to the device
778  * @suspend_handler: Handler code for how to handle a suspend that might occur.
779  *
780  * This function is usually called when Scheduler needs to be activated.
781  * The PM reference count is acquired for the Scheduler and the power on
782  * of GPU is initiated.
783  *
784  * Return: 0 if successful or a negative error code on failure.
785  */
scheduler_pm_active_handle_suspend(struct kbase_device * kbdev,enum kbase_pm_suspend_handler suspend_handler)786 static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
787 				enum kbase_pm_suspend_handler suspend_handler)
788 {
789 	unsigned long flags;
790 	u32 prev_count;
791 	int ret = 0;
792 
793 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
794 
795 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
796 	prev_count = kbdev->csf.scheduler.pm_active_count;
797 	if (!WARN_ON(prev_count == U32_MAX))
798 		kbdev->csf.scheduler.pm_active_count++;
799 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
800 
801 	/* On 0 => 1, make a pm_ctx_active request */
802 	if (!prev_count) {
803 		ret = kbase_pm_context_active_handle_suspend(kbdev,
804 							suspend_handler);
805 		/* Invoke the PM state machines again as the change in MCU
806 		 * desired status, due to the update of scheduler.pm_active_count,
807 		 * may be missed by the thread that called pm_wait_for_desired_state()
808 		 */
809 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
810 		if (ret)
811 			kbdev->csf.scheduler.pm_active_count--;
812 		kbase_pm_update_state(kbdev);
813 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
814 	}
815 
816 	return ret;
817 }
818 
819 #ifdef KBASE_PM_RUNTIME
820 /**
821  * scheduler_pm_active_after_sleep() - Acquire the PM reference count for
822  *                                     Scheduler
823  *
824  * @kbdev: Pointer to the device
825  * @flags: Pointer to the flags variable containing the interrupt state
826  *         when hwaccess lock was acquired.
827  *
828  * This function is called when Scheduler needs to be activated from the
829  * sleeping state.
830  * The PM reference count is acquired for the Scheduler and the wake up of
831  * MCU is initiated. It resets the flag that indicates to the MCU state
832  * machine that MCU needs to be put in sleep state.
833  *
834  * Note: This function shall be called with hwaccess lock held and it may
835  * release that lock and reacquire it.
836  *
837  * Return: zero when the PM reference was taken and non-zero when the
838  * system is being suspending/suspended.
839  */
scheduler_pm_active_after_sleep(struct kbase_device * kbdev,unsigned long * flags)840 static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
841 					   unsigned long *flags)
842 {
843 	u32 prev_count;
844 	int ret = 0;
845 
846 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
847 	lockdep_assert_held(&kbdev->hwaccess_lock);
848 
849 	prev_count = kbdev->csf.scheduler.pm_active_count;
850 	if (!WARN_ON(prev_count == U32_MAX))
851 		kbdev->csf.scheduler.pm_active_count++;
852 
853 	/* On 0 => 1, make a pm_ctx_active request */
854 	if (!prev_count) {
855 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, *flags);
856 
857 		ret = kbase_pm_context_active_handle_suspend(kbdev,
858 				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
859 
860 		spin_lock_irqsave(&kbdev->hwaccess_lock, *flags);
861 		if (ret)
862 			kbdev->csf.scheduler.pm_active_count--;
863 		else
864 			kbdev->pm.backend.gpu_sleep_mode_active = false;
865 		kbase_pm_update_state(kbdev);
866 	}
867 
868 	return ret;
869 }
870 #endif
871 
872 /**
873  * scheduler_pm_idle() - Release the PM reference count held by Scheduler
874  *
875  * @kbdev: Pointer to the device
876  *
877  * This function is usually called after Scheduler is suspended.
878  * The PM reference count held by the Scheduler is released to trigger the
879  * power down of GPU.
880  */
scheduler_pm_idle(struct kbase_device * kbdev)881 static void scheduler_pm_idle(struct kbase_device *kbdev)
882 {
883 	unsigned long flags;
884 	u32 prev_count;
885 
886 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
887 
888 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
889 	prev_count = kbdev->csf.scheduler.pm_active_count;
890 	if (!WARN_ON(prev_count == 0))
891 		kbdev->csf.scheduler.pm_active_count--;
892 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
893 
894 	if (prev_count == 1) {
895 		kbase_pm_context_idle(kbdev);
896 		/* Invoke the PM state machines again as the change in MCU
897 		 * desired status, due to the update of scheduler.pm_active_count,
898 		 * may be missed by the thread that called pm_wait_for_desired_state()
899 		 */
900 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
901 		kbase_pm_update_state(kbdev);
902 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
903 	}
904 }
905 
906 #ifdef KBASE_PM_RUNTIME
907 /**
908  * scheduler_pm_idle_before_sleep() - Release the PM reference count and
909  *                                    trigger the tranistion to sleep state.
910  *
911  * @kbdev: Pointer to the device
912  *
913  * This function is called on the GPU idle notification. It releases the
914  * Scheduler's PM reference count and sets the flag to indicate to the
915  * MCU state machine that MCU needs to be put in sleep state.
916  */
scheduler_pm_idle_before_sleep(struct kbase_device * kbdev)917 static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
918 {
919 	unsigned long flags;
920 	u32 prev_count;
921 
922 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
923 
924 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
925 	prev_count = kbdev->csf.scheduler.pm_active_count;
926 	if (!WARN_ON(prev_count == 0))
927 		kbdev->csf.scheduler.pm_active_count--;
928 	kbdev->pm.backend.gpu_sleep_mode_active = true;
929 	kbdev->pm.backend.exit_gpu_sleep_mode = false;
930 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
931 
932 	if (prev_count == 1) {
933 		kbase_pm_context_idle(kbdev);
934 		/* Invoke the PM state machines again as the change in MCU
935 		 * desired status, due to the update of scheduler.pm_active_count,
936 		 * may be missed by the thread that called pm_wait_for_desired_state()
937 		 */
938 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
939 		kbase_pm_update_state(kbdev);
940 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
941 	}
942 }
943 #endif
944 
scheduler_wakeup(struct kbase_device * kbdev,bool kick)945 static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
946 {
947 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
948 	int ret;
949 
950 	lockdep_assert_held(&scheduler->lock);
951 
952 	if ((scheduler->state != SCHED_SUSPENDED) &&
953 	    (scheduler->state != SCHED_SLEEPING))
954 		return;
955 
956 	if (scheduler->state == SCHED_SUSPENDED) {
957 		dev_dbg(kbdev->dev,
958 			"Re-activating the Scheduler after suspend");
959 		ret = scheduler_pm_active_handle_suspend(kbdev,
960 				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
961 	} else {
962 #ifdef KBASE_PM_RUNTIME
963 		unsigned long flags;
964 
965 		dev_dbg(kbdev->dev,
966 			"Re-activating the Scheduler out of sleep");
967 
968 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
969 		ret = scheduler_pm_active_after_sleep(kbdev, &flags);
970 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
971 #endif
972 	}
973 
974 	if (ret) {
975 		/* GPUCORE-29850 would add the handling for the case where
976 		 * Scheduler could not be activated due to system suspend.
977 		 */
978 		dev_info(kbdev->dev,
979 			"Couldn't wakeup Scheduler due to system suspend");
980 		return;
981 	}
982 
983 	scheduler->state = SCHED_INACTIVE;
984 	KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
985 
986 	if (kick)
987 		scheduler_enable_tick_timer_nolock(kbdev);
988 }
989 
scheduler_suspend(struct kbase_device * kbdev)990 static void scheduler_suspend(struct kbase_device *kbdev)
991 {
992 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
993 
994 	lockdep_assert_held(&scheduler->lock);
995 
996 	if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
997 		dev_dbg(kbdev->dev, "Suspending the Scheduler");
998 		scheduler_pm_idle(kbdev);
999 		scheduler->state = SCHED_SUSPENDED;
1000 		KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
1001 	}
1002 }
1003 
1004 /**
1005  * update_idle_suspended_group_state() - Move the queue group to a non-idle
1006  *                                       suspended state.
1007  * @group: Pointer to the queue group.
1008  *
1009  * This function is called to change the state of queue group to non-idle
1010  * suspended state, if the group was suspended when all the queues bound to it
1011  * became empty or when some queues got blocked on a sync wait & others became
1012  * empty. The group is also moved to the runnable list from idle wait list in
1013  * the latter case.
1014  * So the function gets called when a queue is kicked or sync wait condition
1015  * gets satisfied.
1016  */
update_idle_suspended_group_state(struct kbase_queue_group * group)1017 static void update_idle_suspended_group_state(struct kbase_queue_group *group)
1018 {
1019 	struct kbase_csf_scheduler *scheduler =
1020 		&group->kctx->kbdev->csf.scheduler;
1021 	int new_val;
1022 
1023 	lockdep_assert_held(&scheduler->lock);
1024 
1025 	if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) {
1026 		remove_group_from_idle_wait(group);
1027 		insert_group_to_runnable(scheduler, group,
1028 					 KBASE_CSF_GROUP_SUSPENDED);
1029 	} else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
1030 		group->run_state = KBASE_CSF_GROUP_SUSPENDED;
1031 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
1032 					 group->run_state);
1033 
1034 		/* If scheduler is not suspended and the given group's
1035 		 * static priority (reflected by the scan_seq_num) is inside
1036 		 * the current tick slot-range, or there are some on_slot
1037 		 * idle groups, schedule an async tock.
1038 		 */
1039 		if (scheduler->state != SCHED_SUSPENDED) {
1040 			unsigned long flags;
1041 			int n_idle;
1042 			int n_used;
1043 			int n_slots =
1044 				group->kctx->kbdev->csf.global_iface.group_num;
1045 
1046 			spin_lock_irqsave(&scheduler->interrupt_lock, flags);
1047 			n_idle = bitmap_weight(scheduler->csg_slots_idle_mask,
1048 					       n_slots);
1049 			n_used = bitmap_weight(scheduler->csg_inuse_bitmap,
1050 					       n_slots);
1051 			spin_unlock_irqrestore(&scheduler->interrupt_lock,
1052 					       flags);
1053 
1054 			if (n_idle ||
1055 			    n_used < scheduler->num_csg_slots_for_tick ||
1056 			    group->scan_seq_num <
1057 				    scheduler->num_csg_slots_for_tick)
1058 				schedule_in_cycle(group, true);
1059 		}
1060 	} else
1061 		return;
1062 
1063 	new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
1064 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
1065 				 new_val);
1066 }
1067 
kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group * group)1068 int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
1069 {
1070 	struct kbase_csf_scheduler *scheduler =
1071 			&group->kctx->kbdev->csf.scheduler;
1072 	int slot_num = group->csg_nr;
1073 
1074 	lockdep_assert_held(&scheduler->interrupt_lock);
1075 
1076 	if (slot_num >= 0) {
1077 		if (WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
1078 			    group))
1079 			return -1;
1080 	}
1081 
1082 	return slot_num;
1083 }
1084 
kbase_csf_scheduler_group_get_slot(struct kbase_queue_group * group)1085 int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group)
1086 {
1087 	struct kbase_csf_scheduler *scheduler =
1088 			&group->kctx->kbdev->csf.scheduler;
1089 	unsigned long flags;
1090 	int slot_num;
1091 
1092 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
1093 	slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
1094 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
1095 
1096 	return slot_num;
1097 }
1098 
1099 /* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot.
1100  *
1101  * @group: GPU queue group to be checked
1102  *
1103  * This function needs to be called with scheduler's lock held
1104  *
1105  * Return: true if @group is on slot.
1106  */
kbasep_csf_scheduler_group_is_on_slot_locked(struct kbase_queue_group * group)1107 static bool kbasep_csf_scheduler_group_is_on_slot_locked(
1108 				struct kbase_queue_group *group)
1109 {
1110 	struct kbase_csf_scheduler *scheduler =
1111 			&group->kctx->kbdev->csf.scheduler;
1112 	int slot_num = group->csg_nr;
1113 
1114 	lockdep_assert_held(&scheduler->lock);
1115 
1116 	if (slot_num >= 0) {
1117 		if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
1118 			     group))
1119 			return true;
1120 	}
1121 
1122 	return false;
1123 }
1124 
kbase_csf_scheduler_group_events_enabled(struct kbase_device * kbdev,struct kbase_queue_group * group)1125 bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
1126 			struct kbase_queue_group *group)
1127 {
1128 	struct kbase_csf_scheduler *scheduler =
1129 			&group->kctx->kbdev->csf.scheduler;
1130 	int slot_num = group->csg_nr;
1131 
1132 	lockdep_assert_held(&scheduler->interrupt_lock);
1133 
1134 	if (WARN_ON(slot_num < 0))
1135 		return false;
1136 
1137 	return test_bit(slot_num, scheduler->csgs_events_enable_mask);
1138 }
1139 
kbase_csf_scheduler_get_group_on_slot(struct kbase_device * kbdev,int slot)1140 struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(
1141 			struct kbase_device *kbdev, int slot)
1142 {
1143 	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
1144 
1145 	return kbdev->csf.scheduler.csg_slots[slot].resident_group;
1146 }
1147 
halt_stream_sync(struct kbase_queue * queue)1148 static int halt_stream_sync(struct kbase_queue *queue)
1149 {
1150 	struct kbase_queue_group *group = queue->group;
1151 	struct kbase_device *kbdev = queue->kctx->kbdev;
1152 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1153 	struct kbase_csf_cmd_stream_group_info *ginfo;
1154 	struct kbase_csf_cmd_stream_info *stream;
1155 	int csi_index = queue->csi_index;
1156 	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1157 	unsigned long flags;
1158 
1159 	if (WARN_ON(!group) ||
1160 	    WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1161 		return -EINVAL;
1162 
1163 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1164 	ginfo = &global_iface->groups[group->csg_nr];
1165 	stream = &ginfo->streams[csi_index];
1166 
1167 	if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) ==
1168 			CS_REQ_STATE_START) {
1169 
1170 		remaining = wait_event_timeout(kbdev->csf.event_wait,
1171 			(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
1172 			 == CS_ACK_STATE_START), remaining);
1173 
1174 		if (!remaining) {
1175 			dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d",
1176 				 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1177 				 csi_index, group->handle, group->csg_nr);
1178 			if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
1179 				kbase_reset_gpu(kbdev);
1180 
1181 
1182 			return -ETIMEDOUT;
1183 		}
1184 
1185 		remaining =
1186 			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1187 	}
1188 
1189 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1190 	/* Set state to STOP */
1191 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
1192 					 CS_REQ_STATE_MASK);
1193 
1194 	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
1195 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
1196 
1197 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQ, group, queue, 0u);
1198 
1199 	/* Timed wait */
1200 	remaining = wait_event_timeout(kbdev->csf.event_wait,
1201 		(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
1202 		 == CS_ACK_STATE_STOP), remaining);
1203 
1204 	if (!remaining) {
1205 		dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d",
1206 			 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1207 			 queue->csi_index, group->handle, group->csg_nr);
1208 
1209 		/* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
1210 		 * will be reset as a work-around.
1211 		 */
1212 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
1213 			kbase_reset_gpu(kbdev);
1214 
1215 
1216 	}
1217 	return (remaining) ? 0 : -ETIMEDOUT;
1218 }
1219 
can_halt_stream(struct kbase_device * kbdev,struct kbase_queue_group * group)1220 static bool can_halt_stream(struct kbase_device *kbdev,
1221 		struct kbase_queue_group *group)
1222 {
1223 	struct kbase_csf_csg_slot *const csg_slot =
1224 			kbdev->csf.scheduler.csg_slots;
1225 	unsigned long flags;
1226 	bool can_halt;
1227 	int slot;
1228 
1229 	if (!queue_group_scheduled(group))
1230 		return true;
1231 
1232 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1233 	slot = kbase_csf_scheduler_group_get_slot_locked(group);
1234 	can_halt = (slot >= 0) &&
1235 		   (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
1236 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
1237 				flags);
1238 
1239 	return can_halt;
1240 }
1241 
1242 /**
1243  * sched_halt_stream() - Stop a GPU queue when its queue group is not running
1244  *                       on a CSG slot.
1245  * @queue: Pointer to the GPU queue to stop.
1246  *
1247  * This function handles stopping gpu queues for groups that are either not on
1248  * a CSG slot or are on the slot but undergoing transition to
1249  * resume or suspend states.
1250  * It waits until the queue group is scheduled on a slot and starts running,
1251  * which is needed as groups that were suspended may need to resume all queues
1252  * that were enabled and running at the time of suspension.
1253  *
1254  * Return: 0 on success, or negative on failure.
1255  */
sched_halt_stream(struct kbase_queue * queue)1256 static int sched_halt_stream(struct kbase_queue *queue)
1257 {
1258 	struct kbase_queue_group *group = queue->group;
1259 	struct kbase_device *kbdev = queue->kctx->kbdev;
1260 	struct kbase_csf_scheduler *const scheduler =
1261 			&kbdev->csf.scheduler;
1262 	struct kbase_csf_csg_slot *const csg_slot =
1263 			kbdev->csf.scheduler.csg_slots;
1264 	bool retry_needed = false;
1265 	bool retried = false;
1266 	long remaining;
1267 	int slot;
1268 	int err = 0;
1269 	const u32 group_schedule_timeout = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
1270 
1271 	if (WARN_ON(!group))
1272 		return -EINVAL;
1273 
1274 	lockdep_assert_held(&queue->kctx->csf.lock);
1275 	lockdep_assert_held(&scheduler->lock);
1276 
1277 	slot = kbase_csf_scheduler_group_get_slot(group);
1278 
1279 	if (slot >= 0) {
1280 		WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
1281 
1282 		if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
1283 			dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state",
1284 				queue->csi_index, group->handle);
1285 			retry_needed = true;
1286 		}
1287 	}
1288 retry:
1289 	/* Update the group state so that it can get scheduled soon */
1290 	update_idle_suspended_group_state(group);
1291 
1292 	mutex_unlock(&scheduler->lock);
1293 
1294 	/* This function is called when the queue group is either not on a CSG
1295 	 * slot or is on the slot but undergoing transition.
1296 	 *
1297 	 * To stop the queue, the function needs to wait either for the queue
1298 	 * group to be assigned a CSG slot (and that slot has to reach the
1299 	 * running state) or for the eviction of the queue group from the
1300 	 * scheduler's list.
1301 	 *
1302 	 * In order to evaluate the latter condition, the function doesn't
1303 	 * really need to lock the scheduler, as any update to the run_state
1304 	 * of the queue group by sched_evict_group() would be visible due
1305 	 * to implicit barriers provided by the kernel waitqueue macros.
1306 	 *
1307 	 * The group pointer cannot disappear meanwhile, as the high level
1308 	 * CSF context is locked. Therefore, the scheduler would be
1309 	 * the only one to update the run_state of the group.
1310 	 */
1311 	remaining = wait_event_timeout(
1312 		kbdev->csf.event_wait, can_halt_stream(kbdev, group),
1313 		kbase_csf_timeout_in_jiffies(group_schedule_timeout));
1314 
1315 	mutex_lock(&scheduler->lock);
1316 
1317 	if (remaining && queue_group_scheduled_locked(group)) {
1318 		slot = kbase_csf_scheduler_group_get_slot(group);
1319 
1320 		/* If the group is still on slot and slot is in running state
1321 		 * then explicitly stop the CSI of the
1322 		 * queue. Otherwise there are different cases to consider
1323 		 *
1324 		 * - If the queue group was already undergoing transition to
1325 		 *   resume/start state when this function was entered then it
1326 		 *   would not have disabled the CSI of the
1327 		 *   queue being stopped and the previous wait would have ended
1328 		 *   once the slot was in a running state with CS
1329 		 *   interface still enabled.
1330 		 *   Now the group is going through another transition either
1331 		 *   to a suspend state or to a resume state (it could have
1332 		 *   been suspended before the scheduler lock was grabbed).
1333 		 *   In both scenarios need to wait again for the group to
1334 		 *   come on a slot and that slot to reach the running state,
1335 		 *   as that would guarantee that firmware will observe the
1336 		 *   CSI as disabled.
1337 		 *
1338 		 * - If the queue group was either off the slot or was
1339 		 *   undergoing transition to suspend state on entering this
1340 		 *   function, then the group would have been resumed with the
1341 		 *   queue's CSI in disabled state.
1342 		 *   So now if the group is undergoing another transition
1343 		 *   (after the resume) then just need to wait for the state
1344 		 *   bits in the ACK register of CSI to be
1345 		 *   set to STOP value. It is expected that firmware will
1346 		 *   process the stop/disable request of the CS
1347 		 *   interface after resuming the group before it processes
1348 		 *   another state change request of the group.
1349 		 */
1350 		if ((slot >= 0) &&
1351 		    (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) {
1352 			err = halt_stream_sync(queue);
1353 		} else if (retry_needed && !retried) {
1354 			retried = true;
1355 			goto retry;
1356 		} else if (slot >= 0) {
1357 			struct kbase_csf_global_iface *global_iface =
1358 					&kbdev->csf.global_iface;
1359 			struct kbase_csf_cmd_stream_group_info *ginfo =
1360 					&global_iface->groups[slot];
1361 			struct kbase_csf_cmd_stream_info *stream =
1362 					&ginfo->streams[queue->csi_index];
1363 			u32 cs_req =
1364 			    kbase_csf_firmware_cs_input_read(stream, CS_REQ);
1365 
1366 			if (!WARN_ON(CS_REQ_STATE_GET(cs_req) !=
1367 				     CS_REQ_STATE_STOP)) {
1368 				/* Timed wait */
1369 				remaining = wait_event_timeout(
1370 					kbdev->csf.event_wait,
1371 					(CS_ACK_STATE_GET(
1372 						 kbase_csf_firmware_cs_output(
1373 							 stream, CS_ACK)) ==
1374 					 CS_ACK_STATE_STOP),
1375 					kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms));
1376 
1377 				if (!remaining) {
1378 					dev_warn(kbdev->dev,
1379 						 "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d",
1380 						 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1381 						 queue->csi_index,
1382 						 group->handle, group->csg_nr);
1383 
1384 
1385 					err = -ETIMEDOUT;
1386 				}
1387 			}
1388 		}
1389 	} else if (!remaining) {
1390 		dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)",
1391 			 kbase_backend_get_cycle_cnt(kbdev),
1392 			 group->handle, queue->csi_index,
1393 			 group_schedule_timeout);
1394 
1395 
1396 		err = -ETIMEDOUT;
1397 	}
1398 
1399 	return err;
1400 }
1401 
1402 /**
1403  * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU
1404  *                                      queue needs to be stopped.
1405  *
1406  * @queue: Pointer the GPU command queue
1407  *
1408  * This function is called when the CSI to which GPU queue is bound needs to
1409  * be stopped. For that the corresponding queue group needs to be resident on
1410  * the CSG slot and MCU firmware should be running. So this function makes the
1411  * Scheduler exit the sleeping or suspended state.
1412  */
scheduler_activate_on_queue_stop(struct kbase_queue * queue)1413 static void scheduler_activate_on_queue_stop(struct kbase_queue *queue)
1414 {
1415 	struct kbase_device *kbdev = queue->kctx->kbdev;
1416 
1417 	scheduler_wakeup(kbdev, true);
1418 
1419 	/* Wait for MCU firmware to start running */
1420 	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
1421 		dev_warn(
1422 			kbdev->dev,
1423 			"[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d",
1424 			kbase_backend_get_cycle_cnt(kbdev),
1425 			queue->csi_index, queue->group->handle,
1426 			queue->kctx->tgid, queue->kctx->id,
1427 			queue->group->csg_nr);
1428 	}
1429 }
1430 
kbase_csf_scheduler_queue_stop(struct kbase_queue * queue)1431 int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
1432 {
1433 	struct kbase_device *kbdev = queue->kctx->kbdev;
1434 	struct kbase_queue_group *group = queue->group;
1435 	bool const cs_enabled = queue->enabled;
1436 	int err = 0;
1437 
1438 	if (WARN_ON(!group))
1439 		return -EINVAL;
1440 
1441 	kbase_reset_gpu_assert_failed_or_prevented(kbdev);
1442 	lockdep_assert_held(&queue->kctx->csf.lock);
1443 	mutex_lock(&kbdev->csf.scheduler.lock);
1444 
1445 	queue->enabled = false;
1446 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled);
1447 
1448 	if (cs_enabled && queue_group_scheduled_locked(group)) {
1449 		struct kbase_csf_csg_slot *const csg_slot =
1450 			kbdev->csf.scheduler.csg_slots;
1451 		int slot = kbase_csf_scheduler_group_get_slot(group);
1452 
1453 		/* Since the group needs to be resumed in order to stop the queue,
1454 		 * check if GPU needs to be powered up.
1455 		 */
1456 		scheduler_activate_on_queue_stop(queue);
1457 
1458 		if ((slot >= 0) &&
1459 		    (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING))
1460 			err = halt_stream_sync(queue);
1461 		else
1462 			err = sched_halt_stream(queue);
1463 
1464 		unassign_user_doorbell_from_queue(kbdev, queue);
1465 		kbase_csf_mcu_shared_drop_stopped_queue(kbdev, queue);
1466 	}
1467 
1468 	mutex_unlock(&kbdev->csf.scheduler.lock);
1469 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_STOP, group, queue, group->run_state);
1470 	return err;
1471 }
1472 
update_hw_active(struct kbase_queue * queue,bool active)1473 static void update_hw_active(struct kbase_queue *queue, bool active)
1474 {
1475 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
1476 	if (queue && queue->enabled) {
1477 		u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
1478 
1479 		output_addr[CS_ACTIVE / sizeof(u32)] = active;
1480 	}
1481 #else
1482 	CSTD_UNUSED(queue);
1483 	CSTD_UNUSED(active);
1484 #endif
1485 }
1486 
program_cs_extract_init(struct kbase_queue * queue)1487 static void program_cs_extract_init(struct kbase_queue *queue)
1488 {
1489 	u64 *input_addr = (u64 *)queue->user_io_addr;
1490 	u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
1491 
1492 	input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] =
1493 			output_addr[CS_EXTRACT_LO / sizeof(u64)];
1494 }
1495 
program_cs_trace_cfg(struct kbase_csf_cmd_stream_info * stream,struct kbase_queue * queue)1496 static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream,
1497 				 struct kbase_queue *queue)
1498 {
1499 	struct kbase_device *kbdev = queue->kctx->kbdev;
1500 	u32 const glb_version = kbdev->csf.global_iface.version;
1501 
1502 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1503 
1504 	/* If cs_trace_command not supported, nothing to program */
1505 	if (glb_version < kbase_csf_interface_version(1, 1, 0))
1506 		return;
1507 
1508 	/* Program for cs_trace if enabled. In the current arrangement, it is
1509 	 * possible for the context to enable the cs_trace after some queues
1510 	 * has been registered in cs_trace in disabled state. This is tracked by
1511 	 * the queue's trace buffer base address, which had been validated at the
1512 	 * queue's register_ex call.
1513 	 */
1514 	if (kbase_csf_scheduler_queue_has_trace(queue)) {
1515 		u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET(
1516 			queue->trace_cfg, queue->kctx->as_nr);
1517 
1518 		kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg);
1519 		kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE,
1520 				queue->trace_buffer_size);
1521 
1522 		kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO,
1523 				queue->trace_buffer_base & U32_MAX);
1524 		kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI,
1525 				queue->trace_buffer_base >> 32);
1526 
1527 		kbase_csf_firmware_cs_input(
1528 				stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO,
1529 				queue->trace_offset_ptr & U32_MAX);
1530 		kbase_csf_firmware_cs_input(
1531 				stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI,
1532 				queue->trace_offset_ptr >> 32);
1533 	} else {
1534 		/* Place the configuration to the disabled condition */
1535 		kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0);
1536 		kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0);
1537 	}
1538 }
1539 
program_cs(struct kbase_device * kbdev,struct kbase_queue * queue,bool ring_csg_doorbell)1540 static void program_cs(struct kbase_device *kbdev,
1541 		struct kbase_queue *queue, bool ring_csg_doorbell)
1542 {
1543 	struct kbase_queue_group *group = queue->group;
1544 	struct kbase_csf_cmd_stream_group_info *ginfo;
1545 	struct kbase_csf_cmd_stream_info *stream;
1546 	int csi_index = queue->csi_index;
1547 	unsigned long flags;
1548 	u64 user_input;
1549 	u64 user_output;
1550 
1551 	if (WARN_ON(!group))
1552 		return;
1553 
1554 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1555 
1556 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1557 		return;
1558 
1559 	ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
1560 
1561 	if (WARN_ON(csi_index < 0) ||
1562 	    WARN_ON(csi_index >= ginfo->stream_num))
1563 		return;
1564 
1565 	if (queue->enabled) {
1566 		assign_user_doorbell_to_queue(kbdev, queue);
1567 		if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
1568 			return;
1569 
1570 		WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
1571 	}
1572 
1573 	if (queue->enabled && queue_group_suspended_locked(group))
1574 		program_cs_extract_init(queue);
1575 
1576 	stream = &ginfo->streams[csi_index];
1577 
1578 	kbase_csf_firmware_cs_input(stream, CS_BASE_LO,
1579 				    queue->base_addr & 0xFFFFFFFF);
1580 	kbase_csf_firmware_cs_input(stream, CS_BASE_HI,
1581 				    queue->base_addr >> 32);
1582 	kbase_csf_firmware_cs_input(stream, CS_SIZE,
1583 				    queue->size);
1584 
1585 	user_input = queue->user_io_gpu_va;
1586 	WARN_ONCE(!user_input && queue->enabled, "Enabled queue should have a valid gpu_va");
1587 
1588 	kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, user_input & 0xFFFFFFFF);
1589 	kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, user_input >> 32);
1590 
1591 	user_output = user_input + PAGE_SIZE;
1592 	kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, user_output & 0xFFFFFFFF);
1593 	kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, user_output >> 32);
1594 
1595 	kbase_csf_firmware_cs_input(stream, CS_CONFIG,
1596 		(queue->doorbell_nr << 8) | (queue->priority & 0xF));
1597 
1598 	/* Program the queue's cs_trace configuration */
1599 	program_cs_trace_cfg(stream, queue);
1600 
1601 	/* Enable all interrupts for now */
1602 	kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
1603 
1604 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1605 
1606 	/* The fault bit could be misaligned between CS_REQ and CS_ACK if the
1607 	 * acknowledgment was deferred due to dump on fault and the group was
1608 	 * removed from the CSG slot before the fault could be acknowledged.
1609 	 */
1610 	if (queue->enabled) {
1611 		u32 const cs_ack =
1612 			kbase_csf_firmware_cs_output(stream, CS_ACK);
1613 
1614 		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
1615 						 CS_REQ_FAULT_MASK);
1616 	}
1617 
1618 	/*
1619 	 * Enable the CSG idle notification once the CS's ringbuffer
1620 	 * becomes empty or the CS becomes sync_idle, waiting sync update
1621 	 * or protected mode switch.
1622 	 */
1623 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1624 					 CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
1625 						 CS_REQ_IDLE_SHARED_SB_DEC_MASK,
1626 					 CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK |
1627 						 CS_REQ_IDLE_SHARED_SB_DEC_MASK);
1628 
1629 	/* Set state to START/STOP */
1630 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1631 		queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP,
1632 		CS_REQ_STATE_MASK);
1633 	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
1634 					  ring_csg_doorbell);
1635 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
1636 
1637 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
1638 
1639 	update_hw_active(queue, true);
1640 }
1641 
onslot_csg_add_new_queue(struct kbase_queue * queue)1642 static int onslot_csg_add_new_queue(struct kbase_queue *queue)
1643 {
1644 	struct kbase_device *kbdev = queue->kctx->kbdev;
1645 	int err;
1646 
1647 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1648 
1649 	err = kbase_csf_mcu_shared_add_queue(kbdev, queue);
1650 	if (!err)
1651 		program_cs(kbdev, queue, true);
1652 
1653 	return err;
1654 }
1655 
kbase_csf_scheduler_queue_start(struct kbase_queue * queue)1656 int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
1657 {
1658 	struct kbase_queue_group *group = queue->group;
1659 	struct kbase_device *kbdev = queue->kctx->kbdev;
1660 	bool const cs_enabled = queue->enabled;
1661 	int err = 0;
1662 	bool evicted = false;
1663 
1664 	kbase_reset_gpu_assert_prevented(kbdev);
1665 	lockdep_assert_held(&queue->kctx->csf.lock);
1666 
1667 	if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
1668 		return -EINVAL;
1669 
1670 	mutex_lock(&kbdev->csf.scheduler.lock);
1671 
1672 #if IS_ENABLED(CONFIG_DEBUG_FS)
1673 	if (unlikely(kbdev->csf.scheduler.state == SCHED_BUSY)) {
1674 		mutex_unlock(&kbdev->csf.scheduler.lock);
1675 		return -EBUSY;
1676 	}
1677 #endif
1678 
1679 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
1680 				   group->run_state);
1681 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group, queue,
1682 				   queue->status_wait);
1683 
1684 	if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
1685 		err = -EIO;
1686 		evicted = true;
1687 	} else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
1688 		   && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
1689 		dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked",
1690 			queue->csi_index, group->handle);
1691 	} else {
1692 		err = scheduler_group_schedule(group);
1693 
1694 		if (!err) {
1695 			queue->enabled = true;
1696 			if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
1697 				if (cs_enabled) {
1698 					/* In normal situation, when a queue is
1699 					 * already running, the queue update
1700 					 * would be a doorbell kick on user
1701 					 * side. However, if such a kick is
1702 					 * shortly following a start or resume,
1703 					 * the queue may actually in transition
1704 					 * hence the said kick would enter the
1705 					 * kernel as the hw_active flag is yet
1706 					 * to be set. The sheduler needs to
1707 					 * give a kick to the corresponding
1708 					 * user door-bell on such a case.
1709 					 */
1710 					kbase_csf_ring_cs_user_doorbell(kbdev, queue);
1711 				} else {
1712 					err = onslot_csg_add_new_queue(queue);
1713 					/* For an on slot CSG, the only error in adding a new
1714 					 * queue to run is that the scheduler could not map
1715 					 * the required userio pages due to likely some resource
1716 					 * issues. In such a case, and if the group is yet
1717 					 * to enter its fatal error state, we return a -EBUSY
1718 					 * to the submitter for another kick. The queue itself
1719 					 * has yet to be programmed hence needs to remain its
1720 					 * previous (disabled) state. If the error persists,
1721 					 * the group will eventually reports a fatal error by
1722 					 * the group's error reporting mechanism, when the MCU
1723 					 * shared region map retry limit of the group is
1724 					 * exceeded. For such a case, the expected error value
1725 					 * is -EIO.
1726 					 */
1727 					if (unlikely(err)) {
1728 						queue->enabled = cs_enabled;
1729 						mutex_unlock(&kbdev->csf.scheduler.lock);
1730 						return (err != -EIO) ? -EBUSY : err;
1731 					}
1732 				}
1733 			}
1734 			queue_delayed_work(system_long_wq, &kbdev->csf.scheduler.ping_work,
1735 					   msecs_to_jiffies(kbase_get_timeout_ms(
1736 						   kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
1737 		}
1738 	}
1739 
1740 	mutex_unlock(&kbdev->csf.scheduler.lock);
1741 
1742 	if (evicted)
1743 		kbase_csf_term_descheduled_queue_group(group);
1744 
1745 	return err;
1746 }
1747 
update_csg_slot_status(struct kbase_device * kbdev,s8 slot)1748 static enum kbase_csf_csg_slot_state update_csg_slot_status(
1749 				struct kbase_device *kbdev, s8 slot)
1750 {
1751 	struct kbase_csf_csg_slot *csg_slot =
1752 		&kbdev->csf.scheduler.csg_slots[slot];
1753 	struct kbase_csf_cmd_stream_group_info *ginfo =
1754 		&kbdev->csf.global_iface.groups[slot];
1755 	u32 state;
1756 	enum kbase_csf_csg_slot_state slot_state;
1757 
1758 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1759 
1760 	state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
1761 			CSG_ACK));
1762 	slot_state = atomic_read(&csg_slot->state);
1763 
1764 	switch (slot_state) {
1765 	case CSG_SLOT_READY2RUN:
1766 		if ((state == CSG_ACK_STATE_START) ||
1767 		    (state == CSG_ACK_STATE_RESUME)) {
1768 			slot_state = CSG_SLOT_RUNNING;
1769 			atomic_set(&csg_slot->state, slot_state);
1770 			csg_slot->trigger_jiffies = jiffies;
1771 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_RUNNING, csg_slot->resident_group,
1772 						 state);
1773 			dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
1774 				csg_slot->resident_group->handle, slot);
1775 		}
1776 		break;
1777 	case CSG_SLOT_DOWN2STOP:
1778 		if ((state == CSG_ACK_STATE_SUSPEND) ||
1779 		    (state == CSG_ACK_STATE_TERMINATE)) {
1780 			slot_state = CSG_SLOT_STOPPED;
1781 			atomic_set(&csg_slot->state, slot_state);
1782 			csg_slot->trigger_jiffies = jiffies;
1783 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state);
1784 			dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n",
1785 				csg_slot->resident_group->handle, slot);
1786 		}
1787 		break;
1788 	case CSG_SLOT_DOWN2STOP_TIMEDOUT:
1789 	case CSG_SLOT_READY2RUN_TIMEDOUT:
1790 	case CSG_SLOT_READY:
1791 	case CSG_SLOT_RUNNING:
1792 	case CSG_SLOT_STOPPED:
1793 		break;
1794 	default:
1795 		dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state);
1796 		break;
1797 	}
1798 
1799 	return slot_state;
1800 }
1801 
csg_slot_running(struct kbase_device * kbdev,s8 slot)1802 static bool csg_slot_running(struct kbase_device *kbdev, s8 slot)
1803 {
1804 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1805 
1806 	return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING);
1807 }
1808 
csg_slot_stopped_locked(struct kbase_device * kbdev,s8 slot)1809 static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot)
1810 {
1811 	enum kbase_csf_csg_slot_state slot_state;
1812 
1813 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1814 
1815 	slot_state = update_csg_slot_status(kbdev, slot);
1816 
1817 	return (slot_state == CSG_SLOT_STOPPED ||
1818 		slot_state == CSG_SLOT_READY);
1819 }
1820 
csg_slot_stopped_raw(struct kbase_device * kbdev,s8 slot)1821 static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot)
1822 {
1823 	struct kbase_csf_cmd_stream_group_info *ginfo =
1824 		&kbdev->csf.global_iface.groups[slot];
1825 	u32 state;
1826 
1827 	state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
1828 			CSG_ACK));
1829 
1830 	if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) {
1831 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state);
1832 		dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot);
1833 		return true;
1834 	}
1835 
1836 	return false;
1837 }
1838 
halt_csg_slot(struct kbase_queue_group * group,bool suspend)1839 static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
1840 {
1841 	struct kbase_device *kbdev = group->kctx->kbdev;
1842 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1843 	struct kbase_csf_csg_slot *csg_slot =
1844 		kbdev->csf.scheduler.csg_slots;
1845 	s8 slot;
1846 
1847 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1848 
1849 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1850 		return;
1851 
1852 	slot = group->csg_nr;
1853 
1854 	/* When in transition, wait for it to complete */
1855 	if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
1856 		long remaining =
1857 			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1858 
1859 		dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot);
1860 		remaining = wait_event_timeout(kbdev->csf.event_wait,
1861 				csg_slot_running(kbdev, slot), remaining);
1862 		if (!remaining)
1863 			dev_warn(kbdev->dev,
1864 				 "[%llu] slot %d timeout (%d ms) on up-running\n",
1865 				 kbase_backend_get_cycle_cnt(kbdev),
1866 				 slot, kbdev->csf.fw_timeout_ms);
1867 	}
1868 
1869 	if (csg_slot_running(kbdev, slot)) {
1870 		unsigned long flags;
1871 		struct kbase_csf_cmd_stream_group_info *ginfo =
1872 						&global_iface->groups[slot];
1873 
1874 		u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
1875 					 CSG_REQ_STATE_TERMINATE;
1876 
1877 		dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d",
1878 			suspend, group->handle, group->kctx->tgid, group->kctx->id, slot);
1879 
1880 		spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1881 		/* Set state to SUSPEND/TERMINATE */
1882 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd,
1883 						  CSG_REQ_STATE_MASK);
1884 		kbase_csf_ring_csg_doorbell(kbdev, slot);
1885 		spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
1886 					flags);
1887 		atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
1888 		csg_slot[slot].trigger_jiffies = jiffies;
1889 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
1890 
1891 		KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
1892 			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
1893 	}
1894 }
1895 
term_csg_slot(struct kbase_queue_group * group)1896 static void term_csg_slot(struct kbase_queue_group *group)
1897 {
1898 	halt_csg_slot(group, false);
1899 }
1900 
suspend_csg_slot(struct kbase_queue_group * group)1901 static void suspend_csg_slot(struct kbase_queue_group *group)
1902 {
1903 	halt_csg_slot(group, true);
1904 }
1905 
csf_wait_ge_condition_supported(struct kbase_device * kbdev)1906 static bool csf_wait_ge_condition_supported(struct kbase_device *kbdev)
1907 {
1908 	const uint32_t glb_major = GLB_VERSION_MAJOR_GET(kbdev->csf.global_iface.version);
1909 	const uint32_t glb_minor = GLB_VERSION_MINOR_GET(kbdev->csf.global_iface.version);
1910 
1911 	switch (glb_major) {
1912 	case 0:
1913 		break;
1914 	case 1:
1915 		if (glb_minor >= 4)
1916 			return true;
1917 		break;
1918 	case 2:
1919 		if (glb_minor >= 6)
1920 			return true;
1921 		break;
1922 	case 3:
1923 		if (glb_minor >= 6)
1924 			return true;
1925 		break;
1926 	default:
1927 		return true;
1928 	}
1929 	return false;
1930 }
1931 /**
1932  * evaluate_sync_update() - Evaluate the sync wait condition the GPU command
1933  *                          queue has been blocked on.
1934  *
1935  * @queue: Pointer to the GPU command queue
1936  *
1937  * Return: true if sync wait condition is satisfied.
1938  */
evaluate_sync_update(struct kbase_queue * queue)1939 static bool evaluate_sync_update(struct kbase_queue *queue)
1940 {
1941 	struct kbase_vmap_struct *mapping;
1942 	bool updated = false;
1943 	u32 *sync_ptr;
1944 	u32 sync_wait_size;
1945 	u32 sync_wait_align_mask;
1946 	u32 sync_wait_cond;
1947 	u32 sync_current_val;
1948 	struct kbase_device *kbdev;
1949 	bool sync_wait_align_valid = false;
1950 	bool sync_wait_cond_valid = false;
1951 
1952 	if (WARN_ON(!queue))
1953 		return false;
1954 
1955 	kbdev = queue->kctx->kbdev;
1956 
1957 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1958 
1959 	sync_wait_size = CS_STATUS_WAIT_SYNC_WAIT_SIZE_GET(queue->status_wait);
1960 	sync_wait_align_mask =
1961 		(sync_wait_size == 0 ? BASEP_EVENT32_ALIGN_BYTES : BASEP_EVENT64_ALIGN_BYTES) - 1;
1962 	sync_wait_align_valid = ((uintptr_t)queue->sync_ptr & sync_wait_align_mask) == 0;
1963 	if (!sync_wait_align_valid) {
1964 		dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX is misaligned",
1965 			queue->sync_ptr);
1966 		goto out;
1967 	}
1968 
1969 	sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
1970 					&mapping);
1971 
1972 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_START, queue->group, queue,
1973 				   queue->sync_ptr);
1974 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_BLOCKED_REASON, queue->group, queue,
1975 				   queue->blocked_reason);
1976 
1977 	if (!sync_ptr) {
1978 		dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
1979 			queue->sync_ptr);
1980 		goto out;
1981 	}
1982 
1983 	sync_wait_cond =
1984 		CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait);
1985 	sync_wait_cond_valid = (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) ||
1986 			       (sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) ||
1987 			       ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
1988 				csf_wait_ge_condition_supported(kbdev));
1989 
1990 	WARN_ON(!sync_wait_cond_valid);
1991 
1992 	sync_current_val = READ_ONCE(*sync_ptr);
1993 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_CUR_VAL, queue->group, queue,
1994 				   sync_current_val);
1995 
1996 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_TEST_VAL, queue->group, queue,
1997 				   queue->sync_value);
1998 
1999 	if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
2000 	     (sync_current_val > queue->sync_value)) ||
2001 	    ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GE) &&
2002 	     (sync_current_val >= queue->sync_value) && csf_wait_ge_condition_supported(kbdev)) ||
2003 	    ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) &&
2004 	     (sync_current_val <= queue->sync_value))) {
2005 		/* The sync wait condition is satisfied so the group to which
2006 		 * queue is bound can be re-scheduled.
2007 		 */
2008 		updated = true;
2009 	} else {
2010 		dev_dbg(queue->kctx->kbdev->dev,
2011 			"sync memory not updated yet(%u)", sync_current_val);
2012 	}
2013 
2014 	kbase_phy_alloc_mapping_put(queue->kctx, mapping);
2015 out:
2016 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVAL_END, queue->group, queue, updated);
2017 	return updated;
2018 }
2019 
2020 /**
2021  * save_slot_cs() -  Save the state for blocked GPU command queue.
2022  *
2023  * @ginfo: Pointer to the CSG interface used by the group
2024  *         the queue is bound to.
2025  * @queue: Pointer to the GPU command queue.
2026  *
2027  * This function will check if GPU command queue is blocked on a sync wait and
2028  * evaluate the wait condition. If the wait condition isn't satisfied it would
2029  * save the state needed to reevaluate the condition in future.
2030  * The group to which queue is bound shall be in idle state.
2031  *
2032  * Return: true if the queue is blocked on a sync wait operation.
2033  */
2034 static
save_slot_cs(struct kbase_csf_cmd_stream_group_info const * const ginfo,struct kbase_queue * queue)2035 bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
2036 		struct kbase_queue *queue)
2037 {
2038 	struct kbase_csf_cmd_stream_info *const stream =
2039 		&ginfo->streams[queue->csi_index];
2040 	u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
2041 	bool is_waiting = false;
2042 
2043 #if IS_ENABLED(CONFIG_DEBUG_FS)
2044 	u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
2045 
2046 	cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
2047 	queue->saved_cmd_ptr = cmd_ptr;
2048 #endif
2049 
2050 	KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS, queue->group,
2051 				   queue, status);
2052 
2053 	if (CS_STATUS_WAIT_SYNC_WAIT_GET(status) || CS_STATUS_WAIT_SB_MASK_GET(status)) {
2054 		queue->status_wait = status;
2055 		queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
2056 			CS_STATUS_WAIT_SYNC_POINTER_LO);
2057 		queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream,
2058 			CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
2059 		queue->sync_value = kbase_csf_firmware_cs_output(stream,
2060 			CS_STATUS_WAIT_SYNC_VALUE);
2061 
2062 		queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
2063 			kbase_csf_firmware_cs_output(stream,
2064 						     CS_STATUS_SCOREBOARDS));
2065 		queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET(
2066 			kbase_csf_firmware_cs_output(stream,
2067 						     CS_STATUS_BLOCKED_REASON));
2068 
2069 		if ((queue->blocked_reason == CS_STATUS_BLOCKED_ON_SB_WAIT) ||
2070 		    !evaluate_sync_update(queue)) {
2071 			is_waiting = true;
2072 		} else {
2073 			/* Sync object already got updated & met the condition
2074 			 * thus it doesn't need to be reevaluated and so can
2075 			 * clear the 'status_wait' here.
2076 			 */
2077 			queue->status_wait = 0;
2078 		}
2079 	} else {
2080 		/* Invalidate wait status info that would have been recorded if
2081 		 * this queue was blocked when the group (in idle state) was
2082 		 * suspended previously. After that the group could have been
2083 		 * unblocked due to the kicking of another queue bound to it &
2084 		 * so the wait status info would have stuck with this queue.
2085 		 */
2086 		queue->status_wait = 0;
2087 	}
2088 
2089 	return is_waiting;
2090 }
2091 
schedule_in_cycle(struct kbase_queue_group * group,bool force)2092 static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
2093 {
2094 	struct kbase_context *kctx = group->kctx;
2095 	struct kbase_device *kbdev = kctx->kbdev;
2096 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2097 
2098 	lockdep_assert_held(&scheduler->lock);
2099 
2100 	/* Only try to schedule work for this event if no requests are pending,
2101 	 * otherwise the function will end up canceling previous work requests,
2102 	 * and scheduler is configured to wake up periodically (or the schedule
2103 	 * of work needs to be enforced in situation such as entering into
2104 	 * protected mode).
2105 	 */
2106 	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) {
2107 		dev_dbg(kbdev->dev, "Kicking async for group %d\n",
2108 			group->handle);
2109 		kbase_csf_scheduler_invoke_tock(kbdev);
2110 	}
2111 }
2112 
ktrace_log_group_state(struct kbase_queue_group * const group)2113 static void ktrace_log_group_state(struct kbase_queue_group *const group)
2114 {
2115 	switch (group->run_state) {
2116 	case KBASE_CSF_GROUP_INACTIVE:
2117 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
2118 					group->run_state);
2119 		break;
2120 	case KBASE_CSF_GROUP_RUNNABLE:
2121 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group,
2122 					group->run_state);
2123 		break;
2124 	case KBASE_CSF_GROUP_IDLE:
2125 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_IDLE, group,
2126 					group->run_state);
2127 		break;
2128 	case KBASE_CSF_GROUP_SUSPENDED:
2129 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED, group,
2130 					group->run_state);
2131 		break;
2132 	case KBASE_CSF_GROUP_SUSPENDED_ON_IDLE:
2133 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
2134 					group->run_state);
2135 		break;
2136 	case KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC:
2137 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC,
2138 					group, group->run_state);
2139 		break;
2140 	case KBASE_CSF_GROUP_FAULT_EVICTED:
2141 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_FAULT_EVICTED, group,
2142 					group->run_state);
2143 		break;
2144 	case KBASE_CSF_GROUP_TERMINATED:
2145 		KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group,
2146 					group->run_state);
2147 		break;
2148 	}
2149 }
2150 
2151 static
insert_group_to_runnable(struct kbase_csf_scheduler * const scheduler,struct kbase_queue_group * const group,enum kbase_csf_group_state run_state)2152 void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
2153 		struct kbase_queue_group *const group,
2154 		enum kbase_csf_group_state run_state)
2155 {
2156 	struct kbase_context *const kctx = group->kctx;
2157 	struct kbase_device *const kbdev = kctx->kbdev;
2158 
2159 	lockdep_assert_held(&scheduler->lock);
2160 
2161 	WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2162 
2163 	if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
2164 		return;
2165 
2166 	group->run_state = run_state;
2167 
2168 	ktrace_log_group_state(group);
2169 
2170 	if (run_state == KBASE_CSF_GROUP_RUNNABLE)
2171 		group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
2172 
2173 	list_add_tail(&group->link,
2174 			&kctx->csf.sched.runnable_groups[group->priority]);
2175 	kctx->csf.sched.num_runnable_grps++;
2176 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_INSERT, group,
2177 				 kctx->csf.sched.num_runnable_grps);
2178 
2179 	/* Add the kctx if not yet in runnable kctxs */
2180 	if (kctx->csf.sched.num_runnable_grps == 1) {
2181 		/* First runnable csg, adds to the runnable_kctxs */
2182 		INIT_LIST_HEAD(&kctx->csf.link);
2183 		list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
2184 		KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_INSERT, kctx, 0u);
2185 	}
2186 
2187 	scheduler->total_runnable_grps++;
2188 
2189 	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
2190 	    (scheduler->total_runnable_grps == 1 ||
2191 	     scheduler->state == SCHED_SUSPENDED ||
2192 	     scheduler->state == SCHED_SLEEPING)) {
2193 		dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
2194 		/* Fire a scheduling to start the time-slice */
2195 		enqueue_tick_work(kbdev);
2196 	} else
2197 		schedule_in_cycle(group, false);
2198 
2199 	/* Since a new group has become runnable, check if GPU needs to be
2200 	 * powered up.
2201 	 */
2202 	scheduler_wakeup(kbdev, false);
2203 }
2204 
2205 static
remove_group_from_runnable(struct kbase_csf_scheduler * const scheduler,struct kbase_queue_group * group,enum kbase_csf_group_state run_state)2206 void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
2207 		struct kbase_queue_group *group,
2208 		enum kbase_csf_group_state run_state)
2209 {
2210 	struct kbase_context *kctx = group->kctx;
2211 	struct kbase_queue_group *new_head_grp;
2212 	struct list_head *list =
2213 		&kctx->csf.sched.runnable_groups[group->priority];
2214 	unsigned long flags;
2215 
2216 	lockdep_assert_held(&scheduler->lock);
2217 
2218 	WARN_ON(!queue_group_scheduled_locked(group));
2219 
2220 	group->run_state = run_state;
2221 
2222 	ktrace_log_group_state(group);
2223 
2224 	list_del_init(&group->link);
2225 
2226 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
2227 	/* The below condition will be true when the group running in protected
2228 	 * mode is being terminated but the protected mode exit interrupt was't
2229 	 * received. This can happen if the FW got stuck during protected mode
2230 	 * for some reason (like GPU page fault or some internal error).
2231 	 * In normal cases FW is expected to send the protected mode exit
2232 	 * interrupt before it handles the CSG termination request.
2233 	 */
2234 	if (unlikely(scheduler->active_protm_grp == group)) {
2235 		/* CSG slot cleanup should have happened for the pmode group */
2236 		WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group));
2237 		WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2238 		/* Initiate a GPU reset, in case it wasn't initiated yet,
2239 		 * in order to rectify the anomaly.
2240 		 */
2241 		if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
2242 			kbase_reset_gpu(kctx->kbdev);
2243 
2244 		KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_PROTM_EXIT,
2245 					 scheduler->active_protm_grp, 0u);
2246 		scheduler->active_protm_grp = NULL;
2247 	}
2248 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
2249 
2250 	if (scheduler->top_grp == group) {
2251 		/*
2252 		 * Note: this disables explicit rotation in the next scheduling
2253 		 * cycle. However, removing the top_grp is the same as an
2254 		 * implicit rotation (e.g. if we instead rotated the top_ctx
2255 		 * and then remove top_grp)
2256 		 *
2257 		 * This implicit rotation is assumed by the scheduler rotate
2258 		 * functions.
2259 		 */
2260 		scheduler->top_grp = NULL;
2261 
2262 		/*
2263 		 * Trigger a scheduling tock for a CSG containing protected
2264 		 * content in case there has been any in order to minimise
2265 		 * latency.
2266 		 */
2267 		group = scheduler_get_protm_enter_async_group(kctx->kbdev,
2268 							      NULL);
2269 		if (group)
2270 			schedule_in_cycle(group, true);
2271 	}
2272 
2273 	kctx->csf.sched.num_runnable_grps--;
2274 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_REMOVE, group,
2275 				 kctx->csf.sched.num_runnable_grps);
2276 	new_head_grp = (!list_empty(list)) ?
2277 				list_first_entry(list, struct kbase_queue_group, link) :
2278 				NULL;
2279 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
2280 
2281 	if (kctx->csf.sched.num_runnable_grps == 0) {
2282 		struct kbase_context *new_head_kctx;
2283 		struct list_head *kctx_list = &scheduler->runnable_kctxs;
2284 		/* drop the kctx */
2285 		list_del_init(&kctx->csf.link);
2286 		if (scheduler->top_ctx == kctx)
2287 			scheduler->top_ctx = NULL;
2288 		KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_REMOVE, kctx, 0u);
2289 		new_head_kctx = (!list_empty(kctx_list)) ?
2290 					list_first_entry(kctx_list, struct kbase_context, csf.link) :
2291 					NULL;
2292 		KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx, 0u);
2293 	}
2294 
2295 	WARN_ON(scheduler->total_runnable_grps == 0);
2296 	scheduler->total_runnable_grps--;
2297 	if (!scheduler->total_runnable_grps) {
2298 		dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
2299 		cancel_tick_timer(kctx->kbdev);
2300 		WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
2301 		if (scheduler->state != SCHED_SUSPENDED)
2302 			enqueue_gpu_idle_work(scheduler);
2303 	}
2304 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
2305 			scheduler->num_active_address_spaces |
2306 			(((u64)scheduler->total_runnable_grps) << 32));
2307 }
2308 
insert_group_to_idle_wait(struct kbase_queue_group * const group)2309 static void insert_group_to_idle_wait(struct kbase_queue_group *const group)
2310 {
2311 	struct kbase_context *kctx = group->kctx;
2312 
2313 	lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
2314 
2315 	WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE);
2316 
2317 	list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
2318 	kctx->csf.sched.num_idle_wait_grps++;
2319 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_INSERT, group,
2320 				 kctx->csf.sched.num_idle_wait_grps);
2321 	group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
2322 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, group,
2323 				 group->run_state);
2324 	dev_dbg(kctx->kbdev->dev,
2325 		"Group-%d suspended on sync_wait, total wait_groups: %u\n",
2326 		group->handle, kctx->csf.sched.num_idle_wait_grps);
2327 }
2328 
remove_group_from_idle_wait(struct kbase_queue_group * const group)2329 static void remove_group_from_idle_wait(struct kbase_queue_group *const group)
2330 {
2331 	struct kbase_context *kctx = group->kctx;
2332 	struct list_head *list = &kctx->csf.sched.idle_wait_groups;
2333 	struct kbase_queue_group *new_head_grp;
2334 
2335 	lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
2336 
2337 	WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
2338 
2339 	list_del_init(&group->link);
2340 	WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
2341 	kctx->csf.sched.num_idle_wait_grps--;
2342 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_REMOVE, group,
2343 				 kctx->csf.sched.num_idle_wait_grps);
2344 	new_head_grp = (!list_empty(list)) ?
2345 				list_first_entry(list, struct kbase_queue_group, link) :
2346 				NULL;
2347 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_IDLE_WAIT_HEAD, new_head_grp, 0u);
2348 	group->run_state = KBASE_CSF_GROUP_INACTIVE;
2349 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, CSF_GROUP_INACTIVE, group, group->run_state);
2350 }
2351 
deschedule_idle_wait_group(struct kbase_csf_scheduler * scheduler,struct kbase_queue_group * group)2352 static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
2353 		struct kbase_queue_group *group)
2354 {
2355 	lockdep_assert_held(&scheduler->lock);
2356 
2357 	if (WARN_ON(!group))
2358 		return;
2359 
2360 	remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE);
2361 	insert_group_to_idle_wait(group);
2362 }
2363 
update_offslot_non_idle_cnt(struct kbase_queue_group * group)2364 static void update_offslot_non_idle_cnt(struct kbase_queue_group *group)
2365 {
2366 	struct kbase_device *kbdev = group->kctx->kbdev;
2367 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2368 
2369 	lockdep_assert_held(&scheduler->lock);
2370 
2371 	if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
2372 		int new_val =
2373 			atomic_dec_return(&scheduler->non_idle_offslot_grps);
2374 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
2375 	}
2376 }
2377 
update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group * group)2378 static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group)
2379 {
2380 	struct kbase_device *kbdev = group->kctx->kbdev;
2381 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2382 
2383 	lockdep_assert_held(&scheduler->lock);
2384 
2385 	WARN_ON(group->csg_nr < 0);
2386 
2387 	if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
2388 		int new_val =
2389 			atomic_dec_return(&scheduler->non_idle_offslot_grps);
2390 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group, new_val);
2391 	}
2392 }
2393 
update_offslot_non_idle_cnt_on_grp_suspend(struct kbase_queue_group * group)2394 static void update_offslot_non_idle_cnt_on_grp_suspend(
2395 				struct kbase_queue_group *group)
2396 {
2397 	struct kbase_device *kbdev = group->kctx->kbdev;
2398 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2399 
2400 	lockdep_assert_held(&scheduler->lock);
2401 
2402 	if (scheduler->state == SCHED_BUSY) {
2403 		/* active phase or, async entering the protected mode */
2404 		if (group->prepared_seq_num >=
2405 		    scheduler->non_idle_scanout_grps) {
2406 			/* At scanout, it was tagged as on-slot idle */
2407 			if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
2408 				int new_val = atomic_inc_return(
2409 					&scheduler->non_idle_offslot_grps);
2410 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC,
2411 							 group, new_val);
2412 			}
2413 		} else {
2414 			if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
2415 				int new_val = atomic_dec_return(
2416 					&scheduler->non_idle_offslot_grps);
2417 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC,
2418 							 group, new_val);
2419 			}
2420 		}
2421 	} else {
2422 		/* async phases */
2423 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
2424 			int new_val = atomic_inc_return(
2425 				&scheduler->non_idle_offslot_grps);
2426 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group,
2427 						 new_val);
2428 		}
2429 	}
2430 }
2431 
confirm_cmd_buf_empty(struct kbase_queue const * queue)2432 static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
2433 {
2434 	bool cs_empty;
2435 	bool cs_idle;
2436 	u32 sb_status = 0;
2437 
2438 	struct kbase_device const *const kbdev = queue->group->kctx->kbdev;
2439 	struct kbase_csf_global_iface const *const iface =
2440 		&kbdev->csf.global_iface;
2441 
2442 	u32 glb_version = iface->version;
2443 
2444 	u64 const *input_addr = (u64 const *)queue->user_io_addr;
2445 	u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
2446 
2447 	if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
2448 		/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
2449 		struct kbase_csf_cmd_stream_group_info const *const ginfo =
2450 			&kbdev->csf.global_iface.groups[queue->group->csg_nr];
2451 		struct kbase_csf_cmd_stream_info const *const stream =
2452 			&ginfo->streams[queue->csi_index];
2453 
2454 		sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
2455 			kbase_csf_firmware_cs_output(stream,
2456 						     CS_STATUS_SCOREBOARDS));
2457 	}
2458 
2459 	cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] ==
2460 		    output_addr[CS_EXTRACT_LO / sizeof(u64)]);
2461 	cs_idle = cs_empty && (!sb_status);
2462 
2463 	return cs_idle;
2464 }
2465 
save_csg_slot(struct kbase_queue_group * group)2466 static void save_csg_slot(struct kbase_queue_group *group)
2467 {
2468 	struct kbase_device *kbdev = group->kctx->kbdev;
2469 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2470 	struct kbase_csf_cmd_stream_group_info *ginfo;
2471 	u32 state;
2472 
2473 	lockdep_assert_held(&scheduler->lock);
2474 
2475 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2476 		return;
2477 
2478 	ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
2479 
2480 	state =
2481 	    CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK));
2482 
2483 	if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) &&
2484 		     (state != CSG_ACK_STATE_TERMINATE))) {
2485 		u32 max_streams = ginfo->stream_num;
2486 		u32 i;
2487 		bool sync_wait = false;
2488 		bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
2489 			    CSG_STATUS_STATE_IDLE_MASK;
2490 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
2491 		for (i = 0; i < max_streams; i++)
2492 			update_hw_active(group->bound_queues[i], false);
2493 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
2494 		for (i = 0; idle && i < max_streams; i++) {
2495 			struct kbase_queue *const queue =
2496 					group->bound_queues[i];
2497 
2498 			if (!queue || !queue->enabled)
2499 				continue;
2500 
2501 			if (save_slot_cs(ginfo, queue)) {
2502 				/* sync_wait is only true if the queue is blocked on
2503 				 * a CQS and not a scoreboard.
2504 				 */
2505 				if (queue->blocked_reason !=
2506 				    CS_STATUS_BLOCKED_ON_SB_WAIT)
2507 					sync_wait = true;
2508 			} else {
2509 				/* Need to confirm if ringbuffer of the GPU
2510 				 * queue is empty or not. A race can arise
2511 				 * between the flush of GPU queue and suspend
2512 				 * of CSG. If a queue is flushed after FW has
2513 				 * set the IDLE bit in CSG_STATUS_STATE, then
2514 				 * Scheduler will incorrectly consider CSG
2515 				 * as idle. And there may not be any further
2516 				 * flush call for the GPU queue, which would
2517 				 * have de-idled the CSG.
2518 				 */
2519 				idle = confirm_cmd_buf_empty(queue);
2520 			}
2521 		}
2522 
2523 		if (idle) {
2524 			/* Take the suspended group out of the runnable_groups
2525 			 * list of the context and move it to the
2526 			 * idle_wait_groups list.
2527 			 */
2528 			if (sync_wait)
2529 				deschedule_idle_wait_group(scheduler, group);
2530 			else {
2531 				group->run_state =
2532 					KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
2533 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED_ON_IDLE, group,
2534 							 group->run_state);
2535 				dev_dbg(kbdev->dev, "Group-%d suspended: idle",
2536 					group->handle);
2537 			}
2538 		} else {
2539 			group->run_state = KBASE_CSF_GROUP_SUSPENDED;
2540 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group,
2541 						 group->run_state);
2542 		}
2543 
2544 		update_offslot_non_idle_cnt_on_grp_suspend(group);
2545 		kbase_csf_tiler_heap_reclaim_sched_notify_grp_suspend(group);
2546 	}
2547 }
2548 
2549 /* Cleanup_csg_slot after it has been vacated, ready for next csg run.
2550  * Return whether there is a kctx address fault associated with the group
2551  * for which the clean-up is done.
2552  */
cleanup_csg_slot(struct kbase_queue_group * group)2553 static bool cleanup_csg_slot(struct kbase_queue_group *group)
2554 {
2555 	struct kbase_context *kctx = group->kctx;
2556 	struct kbase_device *kbdev = kctx->kbdev;
2557 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
2558 	struct kbase_csf_cmd_stream_group_info *ginfo;
2559 	s8 slot;
2560 	struct kbase_csf_csg_slot *csg_slot;
2561 	unsigned long flags;
2562 	u32 i;
2563 	bool as_fault = false;
2564 
2565 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2566 
2567 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2568 		return as_fault;
2569 
2570 	slot = group->csg_nr;
2571 	csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
2572 	ginfo = &global_iface->groups[slot];
2573 
2574 	/* Now loop through all the bound CSs, and clean them via a stop */
2575 	for (i = 0; i < ginfo->stream_num; i++) {
2576 		struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i];
2577 
2578 		if (group->bound_queues[i]) {
2579 			if (group->bound_queues[i]->enabled) {
2580 				kbase_csf_firmware_cs_input_mask(stream,
2581 					CS_REQ, CS_REQ_STATE_STOP,
2582 					CS_REQ_STATE_MASK);
2583 			}
2584 
2585 			unassign_user_doorbell_from_queue(kbdev,
2586 				group->bound_queues[i]);
2587 		}
2588 	}
2589 
2590 	unassign_user_doorbell_from_group(kbdev, group);
2591 
2592 	/* The csg does not need cleanup other than drop its AS */
2593 	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
2594 	as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT);
2595 	kbase_ctx_sched_release_ctx(kctx);
2596 	if (unlikely(group->faulted))
2597 		as_fault = true;
2598 	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
2599 
2600 	/* now marking the slot is vacant */
2601 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2602 	kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL;
2603 	clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask);
2604 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
2605 				 kbdev->csf.scheduler.csg_slots_idle_mask[0]);
2606 
2607 	group->csg_nr = KBASEP_CSG_NR_INVALID;
2608 	set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask);
2609 	clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
2610 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2611 
2612 	csg_slot->trigger_jiffies = jiffies;
2613 	atomic_set(&csg_slot->state, CSG_SLOT_READY);
2614 
2615 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot);
2616 	dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n",
2617 		group->handle, slot);
2618 
2619 	KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
2620 		kbdev->gpu_props.props.raw_props.gpu_id, slot);
2621 
2622 	/* Notify the group is off-slot and the csg_reg might be available for
2623 	 * resue with other groups in a 'lazy unbinding' style.
2624 	 */
2625 	kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
2626 
2627 	return as_fault;
2628 }
2629 
update_csg_slot_priority(struct kbase_queue_group * group,u8 prio)2630 static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
2631 {
2632 	struct kbase_device *kbdev = group->kctx->kbdev;
2633 	struct kbase_csf_csg_slot *csg_slot;
2634 	struct kbase_csf_cmd_stream_group_info *ginfo;
2635 	s8 slot;
2636 	u8 prev_prio;
2637 	u32 ep_cfg;
2638 	u32 csg_req;
2639 	unsigned long flags;
2640 
2641 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2642 
2643 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2644 		return;
2645 
2646 	slot = group->csg_nr;
2647 	csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
2648 	ginfo = &kbdev->csf.global_iface.groups[slot];
2649 
2650 	/* CSGs remaining on-slot can be either idle or runnable.
2651 	 * This also applies in protected mode.
2652 	 */
2653 	WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) ||
2654 		(group->run_state == KBASE_CSF_GROUP_IDLE)));
2655 
2656 	/* Update consumes a group from scanout */
2657 	update_offslot_non_idle_cnt_for_onslot_grp(group);
2658 
2659 	if (csg_slot->priority == prio)
2660 		return;
2661 
2662 	/* Read the csg_ep_cfg back for updating the priority field */
2663 	ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ);
2664 	prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg);
2665 	ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
2666 	kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
2667 
2668 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2669 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2670 	csg_req ^= CSG_REQ_EP_CFG_MASK;
2671 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
2672 					  CSG_REQ_EP_CFG_MASK);
2673 	kbase_csf_ring_csg_doorbell(kbdev, slot);
2674 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2675 
2676 	csg_slot->priority = prio;
2677 
2678 	dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n",
2679 		group->handle, group->kctx->tgid, group->kctx->id, slot,
2680 		prev_prio, prio);
2681 
2682 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_PRIO_UPDATE, group, prev_prio);
2683 
2684 	set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
2685 }
2686 
program_csg_slot(struct kbase_queue_group * group,s8 slot,u8 prio)2687 static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
2688 		u8 prio)
2689 {
2690 	struct kbase_context *kctx = group->kctx;
2691 	struct kbase_device *kbdev = kctx->kbdev;
2692 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
2693 	const u64 shader_core_mask =
2694 		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER);
2695 	const u64 tiler_core_mask =
2696 		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER);
2697 	const u64 compute_mask = shader_core_mask & group->compute_mask;
2698 	const u64 fragment_mask = shader_core_mask & group->fragment_mask;
2699 	const u64 tiler_mask = tiler_core_mask & group->tiler_mask;
2700 	const u8 num_cores = kbdev->gpu_props.num_cores;
2701 	const u8 compute_max = min(num_cores, group->compute_max);
2702 	const u8 fragment_max = min(num_cores, group->fragment_max);
2703 	const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max);
2704 	struct kbase_csf_cmd_stream_group_info *ginfo;
2705 	u32 ep_cfg = 0;
2706 	u32 csg_req;
2707 	u32 state;
2708 	int i;
2709 	unsigned long flags;
2710 	u64 normal_suspend_buf;
2711 	u64 protm_suspend_buf;
2712 	struct kbase_csf_csg_slot *csg_slot =
2713 		&kbdev->csf.scheduler.csg_slots[slot];
2714 
2715 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2716 
2717 	if (WARN_ON(slot < 0) &&
2718 	    WARN_ON(slot >= global_iface->group_num))
2719 		return;
2720 
2721 	WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
2722 
2723 	if (unlikely(kbase_csf_mcu_shared_group_bind_csg_reg(kbdev, group))) {
2724 		dev_warn(kbdev->dev,
2725 			 "Couldn't bind MCU shared csg_reg for group %d of context %d_%d, slot=%u",
2726 			 group->handle, group->kctx->tgid, kctx->id, slot);
2727 		kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
2728 		return;
2729 	}
2730 
2731 	/* The suspend buf has already been mapped through binding to csg_reg */
2732 	normal_suspend_buf = group->normal_suspend_buf.gpu_va;
2733 	protm_suspend_buf = group->protected_suspend_buf.gpu_va;
2734 	WARN_ONCE(!normal_suspend_buf, "Normal suspend buffer not mapped");
2735 
2736 	ginfo = &global_iface->groups[slot];
2737 
2738 	/* Pick an available address space for this context */
2739 	mutex_lock(&kbdev->mmu_hw_mutex);
2740 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
2741 	kbase_ctx_sched_retain_ctx(kctx);
2742 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
2743 	mutex_unlock(&kbdev->mmu_hw_mutex);
2744 
2745 	if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
2746 		dev_dbg(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
2747 			 group->handle, kctx->tgid, kctx->id, slot);
2748 		kbase_csf_mcu_shared_set_group_csg_reg_unused(kbdev, group);
2749 		return;
2750 	}
2751 
2752 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2753 	set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
2754 	kbdev->csf.scheduler.csg_slots[slot].resident_group = group;
2755 	group->csg_nr = slot;
2756 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2757 
2758 	assign_user_doorbell_to_group(kbdev, group);
2759 
2760 	/* Now loop through all the bound & kicked CSs, and program them */
2761 	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
2762 		struct kbase_queue *queue = group->bound_queues[i];
2763 
2764 		if (queue)
2765 			program_cs(kbdev, queue, false);
2766 	}
2767 
2768 
2769 	/* Endpoint programming for CSG */
2770 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO,
2771 				     compute_mask & U32_MAX);
2772 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI,
2773 				     compute_mask >> 32);
2774 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO,
2775 				     fragment_mask & U32_MAX);
2776 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI,
2777 				     fragment_mask >> 32);
2778 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
2779 				     tiler_mask & U32_MAX);
2780 
2781 	/* Register group UID with firmware */
2782 	kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG,
2783 				     group->group_uid);
2784 
2785 	ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
2786 	ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
2787 	ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
2788 	ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
2789 	kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
2790 
2791 	/* Program the address space number assigned to the context */
2792 	kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr);
2793 
2794 	kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO,
2795 			normal_suspend_buf & U32_MAX);
2796 	kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
2797 			normal_suspend_buf >> 32);
2798 
2799 	/* Note, we program the P-mode buffer pointer here, but actual runtime
2800 	 * enter into pmode execution is controlled by the P-mode phy pages are
2801 	 * allocated and mapped with the bound csg_reg, which has a specific flag
2802 	 * for indicating this P-mode runnable condition before a group is
2803 	 * granted its p-mode section entry. Without a P-mode entry, the buffer
2804 	 * pointed is not going to be accessed at all.
2805 	 */
2806 	kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, protm_suspend_buf & U32_MAX);
2807 	kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, protm_suspend_buf >> 32);
2808 
2809 	if (group->dvs_buf) {
2810 		kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_LO,
2811 					     group->dvs_buf & U32_MAX);
2812 		kbase_csf_firmware_csg_input(ginfo, CSG_DVS_BUF_HI,
2813 					     group->dvs_buf >> 32);
2814 	}
2815 
2816 	/* Enable all interrupts for now */
2817 	kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0));
2818 
2819 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2820 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2821 	csg_req ^= CSG_REQ_EP_CFG_MASK;
2822 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
2823 					  CSG_REQ_EP_CFG_MASK);
2824 
2825 	/* Set state to START/RESUME */
2826 	if (queue_group_suspended_locked(group)) {
2827 		state = CSG_REQ_STATE_RESUME;
2828 	} else {
2829 		WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE);
2830 		state = CSG_REQ_STATE_START;
2831 	}
2832 
2833 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
2834 			state, CSG_REQ_STATE_MASK);
2835 	kbase_csf_ring_csg_doorbell(kbdev, slot);
2836 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2837 
2838 	/* Update status before rings the door-bell, marking ready => run */
2839 	atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN);
2840 	csg_slot->trigger_jiffies = jiffies;
2841 	csg_slot->priority = prio;
2842 
2843 	/* Trace the programming of the CSG on the slot */
2844 	KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
2845 		kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
2846 		group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);
2847 
2848 	dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
2849 		group->handle, kctx->tgid, kctx->id, slot, prio);
2850 
2851 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START_REQ, group,
2852 				 (((u64)ep_cfg) << 32) | ((((u32)kctx->as_nr) & 0xF) << 16) |
2853 					 (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
2854 
2855 	/* Update the heap reclaim manager */
2856 	kbase_csf_tiler_heap_reclaim_sched_notify_grp_active(group);
2857 
2858 	/* Programming a slot consumes a group from scanout */
2859 	update_offslot_non_idle_cnt_for_onslot_grp(group);
2860 
2861 	/* Notify the group's bound csg_reg is now in active use */
2862 	kbase_csf_mcu_shared_set_group_csg_reg_active(kbdev, group);
2863 }
2864 
remove_scheduled_group(struct kbase_device * kbdev,struct kbase_queue_group * group)2865 static void remove_scheduled_group(struct kbase_device *kbdev,
2866 		struct kbase_queue_group *group)
2867 {
2868 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2869 
2870 	lockdep_assert_held(&scheduler->lock);
2871 
2872 	WARN_ON(group->prepared_seq_num ==
2873 		KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID);
2874 	WARN_ON(list_empty(&group->link_to_schedule));
2875 
2876 	list_del_init(&group->link_to_schedule);
2877 	scheduler->ngrp_to_schedule--;
2878 	group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
2879 	group->kctx->csf.sched.ngrp_to_schedule--;
2880 }
2881 
sched_evict_group(struct kbase_queue_group * group,bool fault,bool update_non_idle_offslot_grps_cnt_from_run_state)2882 static void sched_evict_group(struct kbase_queue_group *group, bool fault,
2883 			      bool update_non_idle_offslot_grps_cnt_from_run_state)
2884 {
2885 	struct kbase_context *kctx = group->kctx;
2886 	struct kbase_device *kbdev = kctx->kbdev;
2887 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2888 
2889 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2890 
2891 	if (queue_group_scheduled_locked(group)) {
2892 		u32 i;
2893 
2894 		if (update_non_idle_offslot_grps_cnt_from_run_state &&
2895 		    (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
2896 		     group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
2897 			int new_val = atomic_dec_return(
2898 				&scheduler->non_idle_offslot_grps);
2899 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, group,
2900 						 new_val);
2901 		}
2902 
2903 		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
2904 			if (group->bound_queues[i])
2905 				group->bound_queues[i]->enabled = false;
2906 		}
2907 
2908 		if (group->prepared_seq_num !=
2909 				KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) {
2910 			if (!update_non_idle_offslot_grps_cnt_from_run_state)
2911 				update_offslot_non_idle_cnt(group);
2912 			remove_scheduled_group(kbdev, group);
2913 		}
2914 
2915 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
2916 			remove_group_from_idle_wait(group);
2917 		else {
2918 			remove_group_from_runnable(scheduler, group,
2919 						KBASE_CSF_GROUP_INACTIVE);
2920 		}
2921 
2922 		WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2923 
2924 		if (fault) {
2925 			group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
2926 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_FAULT_EVICTED, group,
2927 						 scheduler->total_runnable_grps);
2928 		}
2929 
2930 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT, group,
2931 					 (((u64)scheduler->total_runnable_grps) << 32) |
2932 						 ((u32)group->run_state));
2933 		dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
2934 			group->handle, scheduler->total_runnable_grps);
2935 		/* Notify a group has been evicted */
2936 		wake_up_all(&kbdev->csf.event_wait);
2937 	}
2938 
2939 	kbase_csf_tiler_heap_reclaim_sched_notify_grp_evict(group);
2940 
2941 	/* Clear all the bound shared regions and unmap any in-place MMU maps */
2942 	kbase_csf_mcu_shared_clear_evicted_group_csg_reg(kbdev, group);
2943 }
2944 
term_group_sync(struct kbase_queue_group * group)2945 static int term_group_sync(struct kbase_queue_group *group)
2946 {
2947 	struct kbase_device *kbdev = group->kctx->kbdev;
2948 	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
2949 	int err = 0;
2950 
2951 	term_csg_slot(group);
2952 
2953 	remaining = wait_event_timeout(kbdev->csf.event_wait,
2954 		group->cs_unrecoverable || csg_slot_stopped_locked(kbdev, group->csg_nr),
2955 		remaining);
2956 
2957 	if (unlikely(!remaining)) {
2958 		enum dumpfault_error_type error_type = DF_CSG_TERMINATE_TIMEOUT;
2959 
2960 		dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
2961 			 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
2962 			 group->handle, group->kctx->tgid,
2963 			 group->kctx->id, group->csg_nr);
2964 		if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
2965 			error_type = DF_PING_REQUEST_TIMEOUT;
2966 		kbase_debug_csf_fault_notify(kbdev, group->kctx, error_type);
2967 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
2968 			kbase_reset_gpu(kbdev);
2969 
2970 
2971 		err = -ETIMEDOUT;
2972 	}
2973 
2974 	return err;
2975 }
2976 
kbase_csf_scheduler_group_deschedule(struct kbase_queue_group * group)2977 void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
2978 {
2979 	struct kbase_device *kbdev = group->kctx->kbdev;
2980 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2981 	bool wait_for_termination = true;
2982 	bool on_slot;
2983 
2984 	kbase_reset_gpu_assert_failed_or_prevented(kbdev);
2985 	lockdep_assert_held(&group->kctx->csf.lock);
2986 	mutex_lock(&scheduler->lock);
2987 
2988 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
2989 	wait_for_dump_complete_on_group_deschedule(group);
2990 	if (!queue_group_scheduled_locked(group))
2991 		goto unlock;
2992 
2993 	on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
2994 
2995 #ifdef KBASE_PM_RUNTIME
2996 	/* If the queue group is on slot and Scheduler is in SLEEPING state,
2997 	 * then we need to wake up the Scheduler to exit the sleep state rather
2998 	 * than waiting for the runtime suspend or power down of GPU.
2999 	 * The group termination is usually triggered in the context of Application
3000 	 * thread and it has been seen that certain Apps can destroy groups at
3001 	 * random points and not necessarily when the App is exiting.
3002 	 */
3003 	if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
3004 		scheduler_wakeup(kbdev, true);
3005 
3006 		/* Wait for MCU firmware to start running */
3007 		if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
3008 			dev_warn(
3009 				kbdev->dev,
3010 				"[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
3011 				kbase_backend_get_cycle_cnt(kbdev),
3012 				group->handle, group->kctx->tgid,
3013 				group->kctx->id, group->csg_nr);
3014 			/* No point in waiting for CSG termination if MCU didn't
3015 			 * become active.
3016 			 */
3017 			wait_for_termination = false;
3018 		}
3019 	}
3020 #endif
3021 	if (!on_slot) {
3022 		sched_evict_group(group, false, true);
3023 	} else {
3024 		bool as_faulty;
3025 
3026 		if (likely(wait_for_termination))
3027 			term_group_sync(group);
3028 		else
3029 			term_csg_slot(group);
3030 
3031 		/* Treat the csg been terminated */
3032 		as_faulty = cleanup_csg_slot(group);
3033 		/* remove from the scheduler list */
3034 		sched_evict_group(group, as_faulty, false);
3035 	}
3036 
3037 	WARN_ON(queue_group_scheduled_locked(group));
3038 
3039 unlock:
3040 	mutex_unlock(&scheduler->lock);
3041 }
3042 
3043 /**
3044  * scheduler_group_schedule() - Schedule a GPU command queue group on firmware
3045  *
3046  * @group: Pointer to the queue group to be scheduled.
3047  *
3048  * This function would enable the scheduling of GPU command queue group on
3049  * firmware.
3050  *
3051  * Return: 0 on success, or negative on failure.
3052  */
scheduler_group_schedule(struct kbase_queue_group * group)3053 static int scheduler_group_schedule(struct kbase_queue_group *group)
3054 {
3055 	struct kbase_context *kctx = group->kctx;
3056 	struct kbase_device *kbdev = kctx->kbdev;
3057 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3058 
3059 	lockdep_assert_held(&kctx->csf.lock);
3060 	lockdep_assert_held(&scheduler->lock);
3061 
3062 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state);
3063 	if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
3064 		update_idle_suspended_group_state(group);
3065 	else if (queue_group_idle_locked(group)) {
3066 		WARN_ON(kctx->csf.sched.num_runnable_grps == 0);
3067 		WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0);
3068 
3069 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE)
3070 			update_idle_suspended_group_state(group);
3071 		else {
3072 			struct kbase_queue_group *protm_grp;
3073 			unsigned long flags;
3074 
3075 			WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(
3076 				group));
3077 
3078 			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3079 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
3080 						 group->run_state);
3081 
3082 			/* A normal mode CSG could be idle onslot during
3083 			 * protected mode. In this case clear the
3084 			 * appropriate bit in csg_slots_idle_mask.
3085 			 */
3086 			spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3087 			protm_grp = scheduler->active_protm_grp;
3088 			if (protm_grp && protm_grp != group) {
3089 				clear_bit((unsigned int)group->csg_nr,
3090 					  scheduler->csg_slots_idle_mask);
3091 				/* Request the update to confirm the condition inferred. */
3092 				group->reevaluate_idle_status = true;
3093 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
3094 					scheduler->csg_slots_idle_mask[0]);
3095 			}
3096 			spin_unlock_irqrestore(&scheduler->interrupt_lock,
3097 					       flags);
3098 
3099 			/* If GPU is in protected mode then any doorbells rang
3100 			 * would have no effect. Check if GPU is in protected
3101 			 * mode and if this group has higher priority than the
3102 			 * active protected mode group. If so prompt the FW
3103 			 * to exit protected mode.
3104 			 */
3105 			if (protm_grp &&
3106 			    group->scan_seq_num < protm_grp->scan_seq_num) {
3107 				/* Prompt the FW to exit protected mode */
3108 				scheduler_force_protm_exit(kbdev);
3109 			}
3110 		}
3111 	} else if (!queue_group_scheduled_locked(group)) {
3112 		int new_val;
3113 
3114 		insert_group_to_runnable(&kbdev->csf.scheduler, group,
3115 			KBASE_CSF_GROUP_RUNNABLE);
3116 		/* A new group into the scheduler */
3117 		new_val = atomic_inc_return(
3118 			&kbdev->csf.scheduler.non_idle_offslot_grps);
3119 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
3120 	}
3121 
3122 	/* Since a group has become active now, check if GPU needs to be
3123 	 * powered up. Also rekick the Scheduler.
3124 	 */
3125 	scheduler_wakeup(kbdev, true);
3126 
3127 	return 0;
3128 }
3129 
3130 /**
3131  * set_max_csg_slots() - Set the number of available CSG slots
3132  *
3133  * @kbdev: Pointer of the GPU device.
3134  *
3135  * This function would set/limit the number of CSG slots that
3136  * can be used in the given tick/tock. It would be less than the total CSG
3137  * slots supported by firmware if the number of GPU address space slots
3138  * required to utilize all the CSG slots is more than the available
3139  * address space slots.
3140  */
set_max_csg_slots(struct kbase_device * kbdev)3141 static inline void set_max_csg_slots(struct kbase_device *kbdev)
3142 {
3143 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3144 	unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
3145 	unsigned int max_address_space_slots =
3146 			kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
3147 
3148 	WARN_ON(scheduler->num_active_address_spaces > total_csg_slots);
3149 
3150 	if (likely(scheduler->num_active_address_spaces <=
3151 		   max_address_space_slots))
3152 		scheduler->num_csg_slots_for_tick = total_csg_slots;
3153 }
3154 
3155 /**
3156  * count_active_address_space() - Count the number of GPU address space slots
3157  *
3158  * @kbdev: Pointer of the GPU device.
3159  * @kctx: Pointer of the Kbase context.
3160  *
3161  * This function would update the counter that is tracking the number of GPU
3162  * address space slots that would be required to program the CS
3163  * group slots from the groups at the head of groups_to_schedule list.
3164  */
count_active_address_space(struct kbase_device * kbdev,struct kbase_context * kctx)3165 static inline void count_active_address_space(struct kbase_device *kbdev,
3166 		struct kbase_context *kctx)
3167 {
3168 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3169 	unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
3170 	unsigned int max_address_space_slots =
3171 			kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
3172 
3173 	if (scheduler->ngrp_to_schedule <= total_csg_slots) {
3174 		if (kctx->csf.sched.ngrp_to_schedule == 1)
3175 			scheduler->num_active_address_spaces++;
3176 
3177 		if (scheduler->num_active_address_spaces <=
3178 		    max_address_space_slots)
3179 			scheduler->num_csg_slots_for_tick++;
3180 	}
3181 }
3182 
3183 /* Two schemes are used in assigning the priority to CSG slots for a given
3184  * CSG from the 'groups_to_schedule' list.
3185  * This is needed as an idle on-slot group is deprioritized by moving it to
3186  * the tail of 'groups_to_schedule' list. As a result it can either get
3187  * evicted from the CSG slot in current tick/tock dealing, or its position
3188  * can be after the lower priority non-idle groups in the 'groups_to_schedule'
3189  * list. The latter case can result in the on-slot subset containing both
3190  * non-idle and idle CSGs, and is handled through the 2nd scheme described
3191  * below.
3192  *
3193  * First scheme :- If all the slots are going to be occupied by the non-idle or
3194  * idle groups, then a simple assignment of the priority is done as per the
3195  * position of a group in the 'groups_to_schedule' list. So maximum priority
3196  * gets assigned to the slot of a group which is at the head of the list.
3197  * Here the 'groups_to_schedule' list would effectively be ordered as per the
3198  * static priority of groups.
3199  *
3200  * Second scheme :- If the slots are going to be occupied by a mix of idle and
3201  * non-idle groups then the priority assignment needs to ensure that the
3202  * priority of a slot belonging to a higher priority idle group will always be
3203  * greater than the priority of a slot belonging to a lower priority non-idle
3204  * group, reflecting the original position of a group in the scan order (i.e
3205  * static priority) 'scan_seq_num', which is set during the prepare phase of a
3206  * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it
3207  * is idle.
3208  * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first
3209  * 'slots_for_tick' groups in the original scan order are assigned a priority in
3210  * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick),
3211  * whereas rest of the groups are assigned the priority in the subrange
3212  * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher
3213  * priority group ends up after the non-idle lower priority groups in the
3214  * 'groups_to_schedule' list, it will get a higher slot priority. And this will
3215  * enable the FW to quickly start the execution of higher priority group when it
3216  * gets de-idled.
3217  */
get_slot_priority(struct kbase_queue_group * group)3218 static u8 get_slot_priority(struct kbase_queue_group *group)
3219 {
3220 	struct kbase_csf_scheduler *scheduler =
3221 		&group->kctx->kbdev->csf.scheduler;
3222 	u8 slot_prio;
3223 	u32 slots_for_tick = scheduler->num_csg_slots_for_tick;
3224 	u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots;
3225 	/* Check if all the slots are going to be occupied by the non-idle or
3226 	 * idle groups.
3227 	 */
3228 	if (scheduler->non_idle_scanout_grps >= slots_for_tick ||
3229 	    !scheduler->non_idle_scanout_grps) {
3230 		slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots);
3231 	} else {
3232 		/* There will be a mix of idle and non-idle groups. */
3233 		if (group->scan_seq_num < slots_for_tick)
3234 			slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY -
3235 					 group->scan_seq_num);
3236 		else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots))
3237 			slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots));
3238 		else
3239 			slot_prio = 0;
3240 	}
3241 	return slot_prio;
3242 }
3243 
3244 /**
3245  * update_resident_groups_priority() - Update the priority of resident groups
3246  *
3247  * @kbdev:    The GPU device.
3248  *
3249  * This function will update the priority of all resident queue groups
3250  * that are at the head of groups_to_schedule list, preceding the first
3251  * non-resident group.
3252  *
3253  * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on
3254  * the priority update.
3255  */
update_resident_groups_priority(struct kbase_device * kbdev)3256 static void update_resident_groups_priority(struct kbase_device *kbdev)
3257 {
3258 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3259 	u32 num_groups = scheduler->num_csg_slots_for_tick;
3260 
3261 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3262 	while (!list_empty(&scheduler->groups_to_schedule)) {
3263 		struct kbase_queue_group *group =
3264 			list_first_entry(&scheduler->groups_to_schedule,
3265 					struct kbase_queue_group,
3266 					 link_to_schedule);
3267 		bool resident =
3268 			kbasep_csf_scheduler_group_is_on_slot_locked(group);
3269 
3270 		if ((group->prepared_seq_num >= num_groups) || !resident)
3271 			break;
3272 
3273 		update_csg_slot_priority(group,
3274 					 get_slot_priority(group));
3275 
3276 		/* Drop the head group from the list */
3277 		remove_scheduled_group(kbdev, group);
3278 		scheduler->remaining_tick_slots--;
3279 	}
3280 }
3281 
3282 /**
3283  * program_group_on_vacant_csg_slot() - Program a non-resident group on the
3284  *                                      given vacant CSG slot.
3285  * @kbdev:    Pointer to the GPU device.
3286  * @slot:     Vacant CSG slot number.
3287  *
3288  * This function will program a non-resident group at the head of
3289  * kbase_csf_scheduler.groups_to_schedule list on the given vacant
3290  * CSG slot, provided the initial position of the non-resident
3291  * group in the list is less than the number of CSG slots and there is
3292  * an available GPU address space slot.
3293  * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after
3294  * programming the slot.
3295  */
program_group_on_vacant_csg_slot(struct kbase_device * kbdev,s8 slot)3296 static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
3297 		s8 slot)
3298 {
3299 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3300 	struct kbase_queue_group *const group =
3301 		list_empty(&scheduler->groups_to_schedule) ? NULL :
3302 			list_first_entry(&scheduler->groups_to_schedule,
3303 					struct kbase_queue_group,
3304 					link_to_schedule);
3305 	u32 num_groups = scheduler->num_csg_slots_for_tick;
3306 
3307 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3308 	if (group && (group->prepared_seq_num < num_groups)) {
3309 		bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group);
3310 
3311 		if (!WARN_ON(ret)) {
3312 			if (kctx_as_enabled(group->kctx) && !group->faulted) {
3313 				program_csg_slot(group, slot,
3314 					get_slot_priority(group));
3315 
3316 				if (likely(csg_slot_in_use(kbdev, slot))) {
3317 					/* Drop the head group from the list */
3318 					remove_scheduled_group(kbdev, group);
3319 					scheduler->remaining_tick_slots--;
3320 				}
3321 			} else {
3322 				update_offslot_non_idle_cnt(group);
3323 				remove_scheduled_group(kbdev, group);
3324 			}
3325 		}
3326 	}
3327 }
3328 
3329 /**
3330  * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident
3331  *                             group and update the priority of resident groups.
3332  *
3333  * @kbdev:    Pointer to the GPU device.
3334  * @slot:     Vacant CSG slot number.
3335  *
3336  * This function will first update the priority of all resident queue groups
3337  * that are at the head of groups_to_schedule list, preceding the first
3338  * non-resident group, it will then try to program the given CS
3339  * group slot with the non-resident group. Finally update the priority of all
3340  * resident queue groups following the non-resident group.
3341  *
3342  * kbase_csf_scheduler.remaining_tick_slots would also be adjusted.
3343  */
program_vacant_csg_slot(struct kbase_device * kbdev,s8 slot)3344 static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot)
3345 {
3346 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3347 	struct kbase_csf_csg_slot *const csg_slot =
3348 				scheduler->csg_slots;
3349 
3350 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3351 	WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY);
3352 
3353 	/* First update priority for already resident groups (if any)
3354 	 * before the non-resident group
3355 	 */
3356 	update_resident_groups_priority(kbdev);
3357 
3358 	/* Now consume the vacant slot for the non-resident group */
3359 	program_group_on_vacant_csg_slot(kbdev, slot);
3360 
3361 	/* Now update priority for already resident groups (if any)
3362 	 * following the non-resident group
3363 	 */
3364 	update_resident_groups_priority(kbdev);
3365 }
3366 
slots_state_changed(struct kbase_device * kbdev,unsigned long * slots_mask,bool (* state_check_func)(struct kbase_device *,s8))3367 static bool slots_state_changed(struct kbase_device *kbdev,
3368 		unsigned long *slots_mask,
3369 		bool (*state_check_func)(struct kbase_device *, s8))
3370 {
3371 	u32 num_groups = kbdev->csf.global_iface.group_num;
3372 	DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0};
3373 	bool changed = false;
3374 	u32 i;
3375 
3376 	for_each_set_bit(i, slots_mask, num_groups) {
3377 		if (state_check_func(kbdev, (s8)i)) {
3378 			set_bit(i, changed_slots);
3379 			changed = true;
3380 		}
3381 	}
3382 
3383 	if (changed)
3384 		bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS);
3385 
3386 	return changed;
3387 }
3388 
3389 /**
3390  * program_suspending_csg_slots() - Program the CSG slots vacated on suspension
3391  *                                  of queue groups running on them.
3392  *
3393  * @kbdev:    Pointer to the GPU device.
3394  *
3395  * This function will first wait for the ongoing suspension to complete on a
3396  * CSG slot and will then program the vacant slot with the
3397  * non-resident queue group inside the groups_to_schedule list.
3398  * The programming of the non-resident queue group on the vacant slot could
3399  * fail due to unavailability of free GPU address space slot and so the
3400  * programming is re-attempted after the ongoing suspension has completed
3401  * for all the CSG slots.
3402  * The priority of resident groups before and after the non-resident group
3403  * in the groups_to_schedule list would also be updated.
3404  * This would be repeated for all the slots undergoing suspension.
3405  * GPU reset would be initiated if the wait for suspend times out.
3406  */
program_suspending_csg_slots(struct kbase_device * kbdev)3407 static void program_suspending_csg_slots(struct kbase_device *kbdev)
3408 {
3409 	u32 num_groups = kbdev->csf.global_iface.group_num;
3410 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3411 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS);
3412 	DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0};
3413 	bool suspend_wait_failed = false;
3414 	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3415 
3416 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3417 
3418 	/* In the current implementation, csgs_events_enable_mask would be used
3419 	 * only to indicate suspending CSGs.
3420 	 */
3421 	bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask,
3422 		MAX_SUPPORTED_CSGS);
3423 
3424 	while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
3425 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3426 
3427 		bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
3428 
3429 		remaining = wait_event_timeout(kbdev->csf.event_wait,
3430 			slots_state_changed(kbdev, changed,
3431 				csg_slot_stopped_raw),
3432 			remaining);
3433 
3434 		if (likely(remaining)) {
3435 			u32 i;
3436 
3437 			for_each_set_bit(i, changed, num_groups) {
3438 				struct kbase_queue_group *group =
3439 					scheduler->csg_slots[i].resident_group;
3440 
3441 				if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
3442 					continue;
3443 
3444 				/* The on slot csg is now stopped */
3445 				clear_bit(i, slot_mask);
3446 
3447 				KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
3448 					kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
3449 
3450 				if (likely(group)) {
3451 					bool as_fault;
3452 					/* Only do save/cleanup if the
3453 					 * group is not terminated during
3454 					 * the sleep.
3455 					 */
3456 					save_csg_slot(group);
3457 					as_fault = cleanup_csg_slot(group);
3458 					/* If AS fault detected, evict it */
3459 					if (as_fault) {
3460 						sched_evict_group(group, true, true);
3461 						set_bit(i, evicted_mask);
3462 					}
3463 				}
3464 
3465 				program_vacant_csg_slot(kbdev, (s8)i);
3466 			}
3467 		} else {
3468 			u32 i;
3469 
3470 			/* Groups that have failed to suspend in time shall
3471 			 * raise a fatal error as they could no longer be
3472 			 * safely resumed.
3473 			 */
3474 			for_each_set_bit(i, slot_mask, num_groups) {
3475 				struct kbase_queue_group *const group =
3476 					scheduler->csg_slots[i].resident_group;
3477 				enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
3478 
3479 				struct base_gpu_queue_group_error const
3480 					err_payload = { .error_type =
3481 								BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
3482 							.payload = {
3483 								.fatal_group = {
3484 									.status =
3485 										GPU_EXCEPTION_TYPE_SW_FAULT_2,
3486 								} } };
3487 
3488 				if (unlikely(group == NULL))
3489 					continue;
3490 
3491 				/* TODO GPUCORE-25328: The CSG can't be
3492 				 * terminated, the GPU will be reset as a
3493 				 * work-around.
3494 				 */
3495 				dev_warn(
3496 					kbdev->dev,
3497 					"[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)",
3498 					kbase_backend_get_cycle_cnt(kbdev),
3499 					group->handle, group->kctx->tgid,
3500 					group->kctx->id, i,
3501 					kbdev->csf.fw_timeout_ms);
3502 				if (kbase_csf_firmware_ping_wait(kbdev,
3503 								 FW_PING_AFTER_ERROR_TIMEOUT_MS))
3504 					error_type = DF_PING_REQUEST_TIMEOUT;
3505 				schedule_actions_trigger_df(kbdev, group->kctx, error_type);
3506 
3507 				kbase_csf_add_group_fatal_error(group, &err_payload);
3508 				kbase_event_wakeup(group->kctx);
3509 
3510 				/* The group has failed suspension, stop
3511 				 * further examination.
3512 				 */
3513 				clear_bit(i, slot_mask);
3514 				set_bit(i, scheduler->csgs_events_enable_mask);
3515 			}
3516 
3517 			suspend_wait_failed = true;
3518 		}
3519 	}
3520 
3521 	if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS))
3522 		dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n",
3523 			 num_groups, evicted_mask);
3524 
3525 	if (likely(!suspend_wait_failed)) {
3526 		u32 i;
3527 
3528 		while (scheduler->ngrp_to_schedule &&
3529 		       scheduler->remaining_tick_slots) {
3530 			i = find_first_zero_bit(scheduler->csg_inuse_bitmap,
3531 					num_groups);
3532 			if (WARN_ON(i == num_groups))
3533 				break;
3534 			program_vacant_csg_slot(kbdev, (s8)i);
3535 			if (!csg_slot_in_use(kbdev, (int)i)) {
3536 				dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i);
3537 				break;
3538 			}
3539 		}
3540 	} else {
3541 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
3542 			kbase_reset_gpu(kbdev);
3543 	}
3544 }
3545 
suspend_queue_group(struct kbase_queue_group * group)3546 static void suspend_queue_group(struct kbase_queue_group *group)
3547 {
3548 	unsigned long flags;
3549 	struct kbase_csf_scheduler *const scheduler =
3550 		&group->kctx->kbdev->csf.scheduler;
3551 
3552 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3553 	/* This shall be used in program_suspending_csg_slots() where we
3554 	 * assume that whilst CSGs are being suspended, this bitmask is not
3555 	 * used by anything else i.e., it indicates only the CSGs going
3556 	 * through suspension.
3557 	 */
3558 	clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask);
3559 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3560 
3561 	/* If AS fault detected, terminate the group */
3562 	if (!kctx_as_enabled(group->kctx) || group->faulted)
3563 		term_csg_slot(group);
3564 	else
3565 		suspend_csg_slot(group);
3566 }
3567 
wait_csg_slots_start(struct kbase_device * kbdev)3568 static void wait_csg_slots_start(struct kbase_device *kbdev)
3569 {
3570 	u32 num_groups = kbdev->csf.global_iface.group_num;
3571 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3572 	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3573 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
3574 	u32 i;
3575 
3576 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3577 
3578 	/* extract start slot flags for check */
3579 	for (i = 0; i < num_groups; i++) {
3580 		if (atomic_read(&scheduler->csg_slots[i].state) ==
3581 		    CSG_SLOT_READY2RUN)
3582 			set_bit(i, slot_mask);
3583 	}
3584 
3585 	while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
3586 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3587 
3588 		bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
3589 
3590 		remaining = wait_event_timeout(kbdev->csf.event_wait,
3591 			slots_state_changed(kbdev, changed, csg_slot_running),
3592 			remaining);
3593 
3594 		if (likely(remaining)) {
3595 			for_each_set_bit(i, changed, num_groups) {
3596 				struct kbase_queue_group *group =
3597 					scheduler->csg_slots[i].resident_group;
3598 
3599 				/* The on slot csg is now running */
3600 				clear_bit(i, slot_mask);
3601 				group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3602 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
3603 							 group->run_state);
3604 			}
3605 		} else {
3606 			const int csg_nr = ffs(slot_mask[0]) - 1;
3607 			struct kbase_queue_group *group =
3608 				scheduler->csg_slots[csg_nr].resident_group;
3609 			enum dumpfault_error_type error_type = DF_CSG_START_TIMEOUT;
3610 
3611 			dev_err(kbdev->dev,
3612 				"[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n",
3613 				kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
3614 				num_groups, slot_mask);
3615 			if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
3616 				error_type = DF_PING_REQUEST_TIMEOUT;
3617 			schedule_actions_trigger_df(kbdev, group->kctx, error_type);
3618 
3619 			if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
3620 				kbase_reset_gpu(kbdev);
3621 			break;
3622 		}
3623 	}
3624 }
3625 
3626 /**
3627  * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state
3628  *                           flagged after the completion of a CSG status
3629  *                           update command
3630  *
3631  * @kbdev:  Pointer to the GPU device.
3632  * @slot:   The given slot for checking an occupying resident group's idle
3633  *          state.
3634  *
3635  * This function is called at the start of scheduling tick to check the
3636  * idle status of a queue group resident on a CSG slot.
3637  * The caller must make sure the corresponding status update command has
3638  * been called and completed before checking this status.
3639  *
3640  * Return: true if the group resident on slot is idle, otherwise false.
3641  */
group_on_slot_is_idle(struct kbase_device * kbdev,unsigned long slot)3642 static bool group_on_slot_is_idle(struct kbase_device *kbdev,
3643 				  unsigned long slot)
3644 {
3645 	struct kbase_csf_cmd_stream_group_info *ginfo =
3646 					&kbdev->csf.global_iface.groups[slot];
3647 	bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
3648 			CSG_STATUS_STATE_IDLE_MASK;
3649 
3650 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3651 
3652 	return idle;
3653 }
3654 
3655 /**
3656  * slots_update_state_changed() -  Check the handshake state of a subset of
3657  *                                 command group slots.
3658  *
3659  * @kbdev:          The GPU device.
3660  * @field_mask:     The field mask for checking the state in the csg_req/ack.
3661  * @slots_mask:     A bit_map specifying the slots to check.
3662  * @slots_done:     A cleared bit_map for returning the slots that
3663  *                  have finished update.
3664  *
3665  * Checks the state of a subset of slots selected through the slots_mask
3666  * bit_map. Records which slots' handshake completed and send it back in the
3667  * slots_done bit_map.
3668  *
3669  * Return: true if the slots_done is set for at least one slot.
3670  *         Otherwise false.
3671  */
3672 static
slots_update_state_changed(struct kbase_device * kbdev,u32 field_mask,const unsigned long * slots_mask,unsigned long * slots_done)3673 bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask,
3674 		const unsigned long *slots_mask, unsigned long *slots_done)
3675 {
3676 	u32 num_groups = kbdev->csf.global_iface.group_num;
3677 	bool changed = false;
3678 	u32 i;
3679 
3680 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3681 
3682 	for_each_set_bit(i, slots_mask, num_groups) {
3683 		struct kbase_csf_cmd_stream_group_info const *const ginfo =
3684 					    &kbdev->csf.global_iface.groups[i];
3685 		u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
3686 
3687 		state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
3688 
3689 		if (!(state & field_mask)) {
3690 			set_bit(i, slots_done);
3691 			changed = true;
3692 		}
3693 	}
3694 
3695 	return changed;
3696 }
3697 
3698 /**
3699  * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on
3700  *                                the specified groups.
3701  *
3702  * @kbdev:           Pointer to the GPU device.
3703  * @field_mask:      The field mask for checking the state in the csg_req/ack.
3704  * @slot_mask:       Bitmap reflecting the slots, the function will modify
3705  *                   the acknowledged slots by clearing their corresponding
3706  *                   bits.
3707  * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out.
3708  *
3709  * This function waits for the acknowledgment of the request that have
3710  * already been placed for the CSG slots by the caller. Currently used for
3711  * the CSG priority update and status update requests.
3712  *
3713  * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For
3714  *         timed out condition with unacknowledged slots, their bits remain
3715  *         set in the slot_mask.
3716  */
wait_csg_slots_handshake_ack(struct kbase_device * kbdev,u32 field_mask,unsigned long * slot_mask,long wait_in_jiffies)3717 static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev,
3718 		u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies)
3719 {
3720 	const u32 num_groups = kbdev->csf.global_iface.group_num;
3721 	long remaining = wait_in_jiffies;
3722 
3723 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3724 
3725 	while (!bitmap_empty(slot_mask, num_groups) &&
3726 	       !kbase_reset_gpu_is_active(kbdev)) {
3727 		DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 };
3728 
3729 		remaining = wait_event_timeout(kbdev->csf.event_wait,
3730 				slots_update_state_changed(kbdev, field_mask,
3731 						   slot_mask, dones),
3732 				remaining);
3733 
3734 		if (likely(remaining))
3735 			bitmap_andnot(slot_mask, slot_mask, dones, num_groups);
3736 		else {
3737 
3738 			/* Timed-out on the wait */
3739 			return -ETIMEDOUT;
3740 		}
3741 	}
3742 
3743 	return 0;
3744 }
3745 
wait_csg_slots_finish_prio_update(struct kbase_device * kbdev)3746 static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
3747 {
3748 	unsigned long *slot_mask =
3749 			kbdev->csf.scheduler.csg_slots_prio_update;
3750 	long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3751 	int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK,
3752 					       slot_mask, wait_time);
3753 
3754 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3755 
3756 	if (unlikely(ret != 0)) {
3757 		const int csg_nr = ffs(slot_mask[0]) - 1;
3758 		struct kbase_queue_group *group =
3759 			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
3760 		enum dumpfault_error_type error_type = DF_CSG_EP_CFG_TIMEOUT;
3761 
3762 		dev_warn(
3763 			kbdev->dev,
3764 			"[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
3765 			kbase_backend_get_cycle_cnt(kbdev),
3766 			kbdev->csf.fw_timeout_ms,
3767 			slot_mask[0]);
3768 		if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
3769 			error_type = DF_PING_REQUEST_TIMEOUT;
3770 		schedule_actions_trigger_df(kbdev, group->kctx, error_type);
3771 
3772 		/* Timeout could indicate firmware is unresponsive so trigger a GPU reset. */
3773 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
3774 			kbase_reset_gpu(kbdev);
3775 	}
3776 }
3777 
report_csg_termination(struct kbase_queue_group * const group)3778 static void report_csg_termination(struct kbase_queue_group *const group)
3779 {
3780 	struct base_gpu_queue_group_error
3781 		err = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
3782 			.payload = { .fatal_group = {
3783 					     .status = GPU_EXCEPTION_TYPE_SW_FAULT_2,
3784 				     } } };
3785 
3786 	kbase_csf_add_group_fatal_error(group, &err);
3787 }
3788 
kbase_csf_scheduler_evict_ctx_slots(struct kbase_device * kbdev,struct kbase_context * kctx,struct list_head * evicted_groups)3789 void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
3790 		struct kbase_context *kctx, struct list_head *evicted_groups)
3791 {
3792 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3793 	struct kbase_queue_group *group;
3794 	u32 num_groups = kbdev->csf.global_iface.group_num;
3795 	u32 slot;
3796 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
3797 
3798 	lockdep_assert_held(&kctx->csf.lock);
3799 	mutex_lock(&scheduler->lock);
3800 
3801 	/* This code is only called during reset, so we don't wait for the CSG
3802 	 * slots to be stopped
3803 	 */
3804 	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
3805 
3806 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_START, kctx, 0u);
3807 	for (slot = 0; slot < num_groups; slot++) {
3808 		group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
3809 		if (group && group->kctx == kctx) {
3810 			bool as_fault;
3811 
3812 			dev_dbg(kbdev->dev, "Evicting group [%d] running on slot [%d] due to reset",
3813 				group->handle, group->csg_nr);
3814 
3815 			term_csg_slot(group);
3816 			as_fault = cleanup_csg_slot(group);
3817 			/* remove the group from the scheduler list */
3818 			sched_evict_group(group, as_fault, false);
3819 			/* signal Userspace that CSG is being terminated */
3820 			report_csg_termination(group);
3821 			/* return the evicted group to the caller */
3822 			list_add_tail(&group->link, evicted_groups);
3823 			set_bit(slot, slot_mask);
3824 		}
3825 	}
3826 
3827 	dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n",
3828 			kctx->tgid, kctx->id, num_groups, slot_mask);
3829 
3830 	/* Fatal errors may have been the cause of the GPU reset
3831 	 * taking place, in which case we want to make sure that
3832 	 * we wake up the fatal event queue to notify userspace
3833 	 * only once. Otherwise, we may have duplicate event
3834 	 * notifications between the time the first notification
3835 	 * occurs and the time the GPU is reset.
3836 	 */
3837 	kbase_event_wakeup(kctx);
3838 
3839 	mutex_unlock(&scheduler->lock);
3840 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_EVICT_CTX_SLOTS_END, kctx, num_groups);
3841 }
3842 
3843 /**
3844  * scheduler_slot_protm_ack - Acknowledging the protected region requests
3845  * from the resident group on a given slot.
3846  *
3847  * @kbdev:  Pointer to the GPU device.
3848  * @group:  Pointer to the resident group on the given slot.
3849  * @slot:   The slot that the given group is actively operating on.
3850  *
3851  * The function assumes that the given slot is in stable running state and
3852  * has already been judged by the caller on that any pending protected region
3853  * requests of the resident group should be acknowledged.
3854  *
3855  * Return: true if the group has pending protm request(s) and is acknowledged.
3856  *         The caller should arrange to enter the protected mode for servicing
3857  *         it. Otherwise return false, indicating the group has no pending protm
3858  *         request.
3859  */
scheduler_slot_protm_ack(struct kbase_device * const kbdev,struct kbase_queue_group * const group,const int slot)3860 static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
3861 		struct kbase_queue_group *const group,
3862 		const int slot)
3863 {
3864 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3865 	bool protm_ack = false;
3866 	struct kbase_csf_cmd_stream_group_info *ginfo =
3867 		&kbdev->csf.global_iface.groups[slot];
3868 	u32 max_csi;
3869 	int i;
3870 
3871 	if (WARN_ON(scheduler->csg_slots[slot].resident_group != group))
3872 		return protm_ack;
3873 
3874 	lockdep_assert_held(&scheduler->lock);
3875 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock);
3876 
3877 	max_csi = ginfo->stream_num;
3878 	for (i = find_first_bit(group->protm_pending_bitmap, max_csi);
3879 	     i < max_csi;
3880 	     i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) {
3881 		struct kbase_queue *queue = group->bound_queues[i];
3882 
3883 		clear_bit(i, group->protm_pending_bitmap);
3884 		KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_CLEAR, group, queue,
3885 					   group->protm_pending_bitmap[0]);
3886 
3887 		if (!WARN_ON(!queue) && queue->enabled) {
3888 			struct kbase_csf_cmd_stream_info *stream =
3889 						&ginfo->streams[i];
3890 			u32 cs_protm_ack = kbase_csf_firmware_cs_output(
3891 						stream, CS_ACK) &
3892 						CS_ACK_PROTM_PEND_MASK;
3893 			u32 cs_protm_req = kbase_csf_firmware_cs_input_read(
3894 						stream, CS_REQ) &
3895 						CS_REQ_PROTM_PEND_MASK;
3896 
3897 			KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group,
3898 						   queue, cs_protm_ack ^ cs_protm_req);
3899 
3900 			if (cs_protm_ack == cs_protm_req) {
3901 				dev_dbg(kbdev->dev,
3902 					"PROTM-ack already done for queue-%d group-%d slot-%d",
3903 					queue->csi_index, group->handle, slot);
3904 				continue;
3905 			}
3906 
3907 			kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
3908 						cs_protm_ack,
3909 						CS_ACK_PROTM_PEND_MASK);
3910 			protm_ack = true;
3911 			dev_dbg(kbdev->dev,
3912 				"PROTM-ack for queue-%d, group-%d slot-%d",
3913 				queue->csi_index, group->handle, slot);
3914 		}
3915 	}
3916 
3917 	return protm_ack;
3918 }
3919 
3920 /**
3921  * protm_enter_set_next_pending_seq - Update the scheduler's field of
3922  * tick_protm_pending_seq to that from the next available on-slot protm
3923  * pending CSG.
3924  *
3925  * @kbdev:     Pointer to the GPU device.
3926  *
3927  * If applicable, the function updates the scheduler's tick_protm_pending_seq
3928  * field from the next available on-slot protm pending CSG. If not, the field
3929  * is set to KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID.
3930  */
protm_enter_set_next_pending_seq(struct kbase_device * const kbdev)3931 static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev)
3932 {
3933 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3934 	u32 num_groups = kbdev->csf.global_iface.group_num;
3935 	u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num;
3936 	DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 };
3937 	u32 i;
3938 
3939 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
3940 
3941 	bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap,
3942 		   num_groups);
3943 	/* Reset the tick's pending protm seq number to invalid initially */
3944 	scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
3945 	for_each_set_bit(i, active_csgs, num_groups) {
3946 		struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group;
3947 
3948 		/* Set to the next pending protm group's scan_seq_number */
3949 		if ((group != scheduler->active_protm_grp) &&
3950 		    (!bitmap_empty(group->protm_pending_bitmap, num_csis)) &&
3951 		    (group->scan_seq_num < scheduler->tick_protm_pending_seq))
3952 			scheduler->tick_protm_pending_seq = group->scan_seq_num;
3953 	}
3954 }
3955 
3956 /**
3957  * scheduler_group_check_protm_enter - Request the given group to be evaluated
3958  * for triggering the protected mode.
3959  *
3960  * @kbdev:     Pointer to the GPU device.
3961  * @input_grp: Pointer to the GPU queue group.
3962  *
3963  * The function assumes the given group is either an active running group or
3964  * the scheduler internally maintained field scheduler->top_grp.
3965  *
3966  * If the GPU is not already running in protected mode and the input group
3967  * has protected region requests from its bound queues, the requests are
3968  * acknowledged and the GPU is instructed to enter the protected mode.
3969  */
scheduler_group_check_protm_enter(struct kbase_device * const kbdev,struct kbase_queue_group * const input_grp)3970 static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
3971 				struct kbase_queue_group *const input_grp)
3972 {
3973 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3974 	struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf;
3975 	unsigned long flags;
3976 	bool protm_in_use;
3977 
3978 	lockdep_assert_held(&scheduler->lock);
3979 
3980 	/* Return early if the physical pages have not been allocated yet */
3981 	if (unlikely(!sbuf->pma))
3982 		return;
3983 
3984 	/* This lock is taken to prevent the issuing of MMU command during the
3985 	 * transition to protected mode. This helps avoid the scenario where the
3986 	 * entry to protected mode happens with a memory region being locked and
3987 	 * the same region is then accessed by the GPU in protected mode.
3988 	 */
3989 	mutex_lock(&kbdev->mmu_hw_mutex);
3990 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3991 
3992 	/* Check if the previous transition to enter & exit the protected
3993 	 * mode has completed or not.
3994 	 */
3995 	protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
3996 		       kbdev->protected_mode;
3997 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER_CHECK, input_grp, protm_in_use);
3998 
3999 	/* Firmware samples the PROTM_PEND ACK bit for CSs when
4000 	 * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
4001 	 * is set for a CS after Host has sent the PROTM_ENTER
4002 	 * Global request, then there is no guarantee that firmware will
4003 	 * notice that prior to switching to protected mode. And firmware
4004 	 * may not again raise the PROTM_PEND interrupt for that CS
4005 	 * later on. To avoid that uncertainty PROTM_PEND ACK bit
4006 	 * is not set for a CS if the request to enter protected
4007 	 * mode has already been sent. It will be set later (after the exit
4008 	 * from protected mode has taken place) when the group to which
4009 	 * CS is bound becomes the top group.
4010 	 *
4011 	 * The actual decision of entering protected mode is hinging on the
4012 	 * input group is the top priority group, or, in case the previous
4013 	 * top-group is evicted from the scheduler during the tick, its would
4014 	 * be replacement, and that it is currently in a stable state (i.e. the
4015 	 * slot state is running).
4016 	 */
4017 	if (!protm_in_use && !WARN_ON(!input_grp)) {
4018 		const int slot =
4019 			kbase_csf_scheduler_group_get_slot_locked(input_grp);
4020 
4021 		/* check the input_grp is running and requesting protected mode
4022 		 */
4023 		if (slot >= 0 &&
4024 		    atomic_read(&scheduler->csg_slots[slot].state) ==
4025 			    CSG_SLOT_RUNNING) {
4026 			if (kctx_as_enabled(input_grp->kctx) &&
4027 			    scheduler_slot_protm_ack(kbdev, input_grp, slot)) {
4028 				int err;
4029 
4030 				/* Option of acknowledging to multiple
4031 				 * CSGs from the same kctx is dropped,
4032 				 * after consulting with the
4033 				 * architecture team. See the comment in
4034 				 * GPUCORE-21394.
4035 				 */
4036 
4037 				/* Switch to protected mode */
4038 				scheduler->active_protm_grp = input_grp;
4039 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_ENTER, input_grp,
4040 							 0u);
4041 
4042 #if IS_ENABLED(CONFIG_MALI_CORESIGHT)
4043 				spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4044 
4045 				/* Coresight must be disabled before entering protected mode. */
4046 				kbase_debug_coresight_csf_disable_pmode_enter(kbdev);
4047 
4048 				spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4049 #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
4050 
4051 				kbase_csf_enter_protected_mode(kbdev);
4052 				/* Set the pending protm seq number to the next one */
4053 				protm_enter_set_next_pending_seq(kbdev);
4054 
4055 				spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4056 
4057 				err = kbase_csf_wait_protected_mode_enter(kbdev);
4058 				mutex_unlock(&kbdev->mmu_hw_mutex);
4059 
4060 				if (err)
4061 					schedule_actions_trigger_df(kbdev, input_grp->kctx,
4062 							DF_PROTECTED_MODE_ENTRY_FAILURE);
4063 
4064 				scheduler->protm_enter_time = ktime_get_raw();
4065 
4066 				return;
4067 			}
4068 		}
4069 	}
4070 
4071 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4072 	mutex_unlock(&kbdev->mmu_hw_mutex);
4073 }
4074 
4075 /**
4076  * scheduler_check_pmode_progress - Check if protected mode execution is progressing
4077  *
4078  * @kbdev:     Pointer to the GPU device.
4079  *
4080  * This function is called when the GPU is in protected mode.
4081  *
4082  * It will check if the time spent in protected mode is less
4083  * than CSF_SCHED_PROTM_PROGRESS_TIMEOUT. If not, a PROTM_EXIT
4084  * request is sent to the FW.
4085  */
scheduler_check_pmode_progress(struct kbase_device * kbdev)4086 static void scheduler_check_pmode_progress(struct kbase_device *kbdev)
4087 {
4088 	u64 protm_spent_time_ms;
4089 	u64 protm_progress_timeout =
4090 		kbase_get_timeout_ms(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT);
4091 	s64 diff_ms_signed =
4092 		ktime_ms_delta(ktime_get_raw(), kbdev->csf.scheduler.protm_enter_time);
4093 
4094 	if (diff_ms_signed < 0)
4095 		return;
4096 
4097 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
4098 
4099 	protm_spent_time_ms = (u64)diff_ms_signed;
4100 	if (protm_spent_time_ms < protm_progress_timeout)
4101 		return;
4102 
4103 	dev_dbg(kbdev->dev, "Protected mode progress timeout: %llu >= %llu",
4104 		protm_spent_time_ms, protm_progress_timeout);
4105 
4106 	/* Prompt the FW to exit protected mode */
4107 	scheduler_force_protm_exit(kbdev);
4108 }
4109 
scheduler_apply(struct kbase_device * kbdev)4110 static void scheduler_apply(struct kbase_device *kbdev)
4111 {
4112 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4113 	const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
4114 	const u32 available_csg_slots = scheduler->num_csg_slots_for_tick;
4115 	u32 suspend_cnt = 0;
4116 	u32 remain_cnt = 0;
4117 	u32 resident_cnt = 0;
4118 	struct kbase_queue_group *group;
4119 	u32 i;
4120 	u32 spare;
4121 
4122 	lockdep_assert_held(&scheduler->lock);
4123 
4124 	/* Suspend those resident groups not in the run list */
4125 	for (i = 0; i < total_csg_slots; i++) {
4126 		group = scheduler->csg_slots[i].resident_group;
4127 		if (group) {
4128 			resident_cnt++;
4129 			if (group->prepared_seq_num >= available_csg_slots) {
4130 				suspend_queue_group(group);
4131 				suspend_cnt++;
4132 			} else
4133 				remain_cnt++;
4134 		}
4135 	}
4136 
4137 	/* Initialize the remaining available csg slots for the tick/tock */
4138 	scheduler->remaining_tick_slots = available_csg_slots;
4139 
4140 	/* If there are spare slots, apply heads in the list */
4141 	spare = (available_csg_slots > resident_cnt) ?
4142 		(available_csg_slots - resident_cnt) : 0;
4143 	while (!list_empty(&scheduler->groups_to_schedule)) {
4144 		group = list_first_entry(&scheduler->groups_to_schedule,
4145 				struct kbase_queue_group,
4146 				link_to_schedule);
4147 
4148 		if (kbasep_csf_scheduler_group_is_on_slot_locked(group) &&
4149 		    group->prepared_seq_num < available_csg_slots) {
4150 			/* One of the resident remainders */
4151 			update_csg_slot_priority(group,
4152 					get_slot_priority(group));
4153 		} else if (spare != 0) {
4154 			s8 slot = (s8)find_first_zero_bit(
4155 				     kbdev->csf.scheduler.csg_inuse_bitmap,
4156 				     total_csg_slots);
4157 
4158 			if (WARN_ON(slot >= (s8)total_csg_slots))
4159 				break;
4160 
4161 			if (!kctx_as_enabled(group->kctx) || group->faulted) {
4162 				/* Drop the head group and continue */
4163 				update_offslot_non_idle_cnt(group);
4164 				remove_scheduled_group(kbdev, group);
4165 				continue;
4166 			}
4167 			program_csg_slot(group, slot,
4168 					 get_slot_priority(group));
4169 			if (unlikely(!csg_slot_in_use(kbdev, slot)))
4170 				break;
4171 
4172 			spare--;
4173 		} else
4174 			break;
4175 
4176 		/* Drop the head csg from the list */
4177 		remove_scheduled_group(kbdev, group);
4178 		if (!WARN_ON(!scheduler->remaining_tick_slots))
4179 			scheduler->remaining_tick_slots--;
4180 	}
4181 
4182 	/* Dealing with groups currently going through suspend */
4183 	program_suspending_csg_slots(kbdev);
4184 }
4185 
scheduler_ctx_scan_groups(struct kbase_device * kbdev,struct kbase_context * kctx,int priority)4186 static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
4187 		struct kbase_context *kctx, int priority)
4188 {
4189 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4190 	struct kbase_queue_group *group;
4191 
4192 	lockdep_assert_held(&scheduler->lock);
4193 	lockdep_assert_held(&scheduler->interrupt_lock);
4194 	if (WARN_ON(priority < 0) ||
4195 	    WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
4196 		return;
4197 
4198 	if (!kctx_as_enabled(kctx))
4199 		return;
4200 
4201 	list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority],
4202 			    link) {
4203 		if (WARN_ON(!list_empty(&group->link_to_schedule)))
4204 			/* This would be a bug */
4205 			list_del_init(&group->link_to_schedule);
4206 
4207 		if (unlikely(group->faulted))
4208 			continue;
4209 
4210 		/* Set the scanout sequence number, starting from 0 */
4211 		group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
4212 
4213 		if (scheduler->tick_protm_pending_seq ==
4214 				KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
4215 			if (!bitmap_empty(group->protm_pending_bitmap,
4216 			     kbdev->csf.global_iface.groups[0].stream_num))
4217 				scheduler->tick_protm_pending_seq =
4218 					group->scan_seq_num;
4219 		}
4220 
4221 		if (queue_group_idle_locked(group)) {
4222 			if (can_schedule_idle_group(group))
4223 				list_add_tail(&group->link_to_schedule,
4224 					&scheduler->idle_groups_to_schedule);
4225 			continue;
4226 		}
4227 
4228 		if (!scheduler->ngrp_to_schedule) {
4229 			/* keep the top csg's origin */
4230 			scheduler->top_ctx = kctx;
4231 			scheduler->top_grp = group;
4232 		}
4233 
4234 		list_add_tail(&group->link_to_schedule,
4235 			      &scheduler->groups_to_schedule);
4236 		group->prepared_seq_num = scheduler->ngrp_to_schedule++;
4237 
4238 		kctx->csf.sched.ngrp_to_schedule++;
4239 		count_active_address_space(kbdev, kctx);
4240 	}
4241 }
4242 
4243 /**
4244  * scheduler_rotate_groups() - Rotate the runnable queue groups to provide
4245  *                             fairness of scheduling within a single
4246  *                             kbase_context.
4247  *
4248  * @kbdev:    Pointer to the GPU device.
4249  *
4250  * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned
4251  * the highest slot priority) is guaranteed to get the resources that it
4252  * needs we only rotate the kbase_context corresponding to it -
4253  * kbase_csf_scheduler's top_ctx.
4254  *
4255  * The priority level chosen for rotation is the one containing the previous
4256  * scheduling cycle's kbase_csf_scheduler's top_grp.
4257  *
4258  * In a 'fresh-slice-cycle' this always corresponds to the highest group
4259  * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority
4260  * level of the previous scheduling cycle's first runnable kbase_context.
4261  *
4262  * We choose this priority level because when higher priority work is
4263  * scheduled, we should always cause the scheduler to run and do a scan. The
4264  * scan always enumerates the highest priority work first (whether that be
4265  * based on process priority or group priority), and thus
4266  * kbase_csf_scheduler's top_grp will point to the first of those high priority
4267  * groups, which necessarily must be the highest priority group in
4268  * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick
4269  * up that group appropriately.
4270  *
4271  * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL),
4272  * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but
4273  * will set up kbase_csf_scheduler's top_ctx again for the next scheduling
4274  * cycle. Implicitly, a rotation had already occurred by removing
4275  * the kbase_csf_scheduler's top_grp
4276  *
4277  * If kbase_csf_scheduler's top_grp became idle and all other groups belonging
4278  * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's
4279  * top_ctx are also idle, then the effect of this will be to rotate idle
4280  * groups, which might not actually become resident in the next
4281  * scheduling slice. However this is acceptable since a queue group becoming
4282  * idle is implicitly a rotation (as above with evicted queue groups), as it
4283  * automatically allows a new queue group to take the maximum slot priority
4284  * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of
4285  * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will
4286  * be for a group in the next lowest priority level or in absence of those the
4287  * next kbase_context's queue groups.
4288  */
scheduler_rotate_groups(struct kbase_device * kbdev)4289 static void scheduler_rotate_groups(struct kbase_device *kbdev)
4290 {
4291 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4292 	struct kbase_context *const top_ctx = scheduler->top_ctx;
4293 	struct kbase_queue_group *const top_grp = scheduler->top_grp;
4294 
4295 	lockdep_assert_held(&scheduler->lock);
4296 	if (top_ctx && top_grp) {
4297 		struct list_head *list =
4298 			&top_ctx->csf.sched.runnable_groups[top_grp->priority];
4299 
4300 		WARN_ON(top_grp->kctx != top_ctx);
4301 		if (!WARN_ON(list_empty(list))) {
4302 			struct kbase_queue_group *new_head_grp;
4303 
4304 			list_move_tail(&top_grp->link, list);
4305 			new_head_grp = (!list_empty(list)) ?
4306 						list_first_entry(list, struct kbase_queue_group, link) :
4307 						NULL;
4308 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_ROTATE, top_grp,
4309 						 top_ctx->csf.sched.num_runnable_grps);
4310 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_RUNNABLE_HEAD, new_head_grp, 0u);
4311 			dev_dbg(kbdev->dev,
4312 			    "groups rotated for a context, num_runnable_groups: %u\n",
4313 			    scheduler->top_ctx->csf.sched.num_runnable_grps);
4314 		}
4315 	}
4316 }
4317 
scheduler_rotate_ctxs(struct kbase_device * kbdev)4318 static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
4319 {
4320 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4321 	struct list_head *list = &scheduler->runnable_kctxs;
4322 
4323 	lockdep_assert_held(&scheduler->lock);
4324 	if (scheduler->top_ctx) {
4325 		if (!WARN_ON(list_empty(list))) {
4326 			struct kbase_context *pos;
4327 			bool found = false;
4328 
4329 			/* Locate the ctx on the list */
4330 			list_for_each_entry(pos, list, csf.link) {
4331 				if (scheduler->top_ctx == pos) {
4332 					found = true;
4333 					break;
4334 				}
4335 			}
4336 
4337 			if (!WARN_ON(!found)) {
4338 				struct kbase_context *new_head_kctx;
4339 
4340 				list_move_tail(&pos->csf.link, list);
4341 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_ROTATE, pos, 0u);
4342 				new_head_kctx = (!list_empty(list)) ?
4343 							list_first_entry(list, struct kbase_context, csf.link) :
4344 							NULL;
4345 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_RUNNABLE_KCTX_HEAD, new_head_kctx,
4346 						 0u);
4347 				dev_dbg(kbdev->dev, "contexts rotated\n");
4348 			}
4349 		}
4350 	}
4351 }
4352 
4353 /**
4354  * scheduler_update_idle_slots_status() - Get the status update for the CSG
4355  *                       slots for which the IDLE notification was received
4356  *                        previously.
4357  *
4358  * @kbdev:             Pointer to the GPU device.
4359  * @csg_bitmap:        Bitmap of the CSG slots for which
4360  *                     the status update request completed successfully.
4361  * @failed_csg_bitmap: Bitmap of the idle CSG slots for which
4362  *                     the status update request timedout.
4363  *
4364  * This function sends a CSG status update request for all the CSG slots
4365  * present in the bitmap scheduler->csg_slots_idle_mask. Additionally, if
4366  * the group's 'reevaluate_idle_status' field is set, the nominally non-idle
4367  * slots are also included in the status update for a confirmation of their
4368  * status. The function wait for the status update request to complete and
4369  * returns the update completed slots bitmap and any timed out idle-flagged
4370  * slots bitmap.
4371  *
4372  * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
4373  * this function.
4374  */
scheduler_update_idle_slots_status(struct kbase_device * kbdev,unsigned long * csg_bitmap,unsigned long * failed_csg_bitmap)4375 static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
4376 		unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap)
4377 {
4378 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4379 	const u32 num_groups = kbdev->csf.global_iface.group_num;
4380 	struct kbase_csf_global_iface *const global_iface =
4381 						&kbdev->csf.global_iface;
4382 	unsigned long flags, i;
4383 	u32 active_chk = 0;
4384 
4385 	lockdep_assert_held(&scheduler->lock);
4386 
4387 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4388 
4389 	for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
4390 		struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
4391 		struct kbase_queue_group *group = csg_slot->resident_group;
4392 		struct kbase_csf_cmd_stream_group_info *const ginfo =
4393 						&global_iface->groups[i];
4394 		u32 csg_req;
4395 		bool idle_flag;
4396 
4397 		if (WARN_ON(!group)) {
4398 			clear_bit(i, scheduler->csg_inuse_bitmap);
4399 			clear_bit(i, scheduler->csg_slots_idle_mask);
4400 			continue;
4401 		}
4402 
4403 		idle_flag = test_bit(i, scheduler->csg_slots_idle_mask);
4404 		if (idle_flag || group->reevaluate_idle_status) {
4405 			if (idle_flag) {
4406 #ifdef CONFIG_MALI_BIFROST_DEBUG
4407 				if (!bitmap_empty(group->protm_pending_bitmap,
4408 						  ginfo->stream_num)) {
4409 					dev_warn(kbdev->dev,
4410 						"Idle bit set for group %d of ctx %d_%d on slot %d with pending protm execution",
4411 						group->handle, group->kctx->tgid,
4412 						group->kctx->id, (int)i);
4413 				}
4414 #endif
4415 				clear_bit(i, scheduler->csg_slots_idle_mask);
4416 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
4417 							 scheduler->csg_slots_idle_mask[0]);
4418 			} else {
4419 				/* Updates include slots for which reevaluation is needed.
4420 				 * Here one tracks the extra included slots in active_chk.
4421 				 * For protm pending slots, their status of activeness are
4422 				 * assured so no need to request an update.
4423 				 */
4424 				active_chk |= BIT(i);
4425 				group->reevaluate_idle_status = false;
4426 			}
4427 
4428 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_UPDATE_IDLE_SLOT_REQ, group, i);
4429 			csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
4430 			csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
4431 			kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
4432 							CSG_REQ_STATUS_UPDATE_MASK);
4433 
4434 			/* Track the slot update requests in csg_bitmap.
4435 			 * Note, if the scheduler requested extended update, the resulting
4436 			 * csg_bitmap would be the idle_flags + active_chk. Otherwise it's
4437 			 * identical to the idle_flags.
4438 			 */
4439 			set_bit(i, csg_bitmap);
4440 		} else {
4441 			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
4442 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
4443 						group->run_state);
4444 		}
4445 	}
4446 
4447 
4448 	/* The groups are aggregated into a single kernel doorbell request */
4449 	if (!bitmap_empty(csg_bitmap, num_groups)) {
4450 		long wt =
4451 			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
4452 		u32 db_slots = (u32)csg_bitmap[0];
4453 
4454 		kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots);
4455 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4456 
4457 		if (wait_csg_slots_handshake_ack(kbdev,
4458 				CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) {
4459 			const int csg_nr = ffs(csg_bitmap[0]) - 1;
4460 			struct kbase_queue_group *group =
4461 				scheduler->csg_slots[csg_nr].resident_group;
4462 
4463 			dev_warn(
4464 				kbdev->dev,
4465 				"[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
4466 				kbase_backend_get_cycle_cnt(kbdev),
4467 				kbdev->csf.fw_timeout_ms,
4468 				csg_bitmap[0]);
4469 			schedule_actions_trigger_df(kbdev, group->kctx,
4470 				DF_CSG_STATUS_UPDATE_TIMEOUT);
4471 
4472 			/* Store the bitmap of timed out slots */
4473 			bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
4474 			csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
4475 
4476 			/* Mask off any failed bit position contributed from active ones, as the
4477 			 * intention is to retain the failed bit pattern contains only those from
4478 			 * idle flags reporting back to the caller. This way, any failed to update
4479 			 * original idle flag would be kept as 'idle' (an informed guess, as the
4480 			 * update did not come to a conclusive result). So will be the failed
4481 			 * active ones be treated as still 'non-idle'. This is for a graceful
4482 			 * handling to the unexpected timeout condition.
4483 			 */
4484 			failed_csg_bitmap[0] &= ~active_chk;
4485 
4486 		} else {
4487 			KBASE_KTRACE_ADD(kbdev, SCHEDULER_UPDATE_IDLE_SLOTS_ACK, NULL, db_slots);
4488 			csg_bitmap[0] = db_slots;
4489 		}
4490 	} else {
4491 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4492 	}
4493 }
4494 
4495 /**
4496  * scheduler_handle_idle_slots() - Update the idle status of queue groups
4497  *                    resident on CSG slots for which the
4498  *                    IDLE notification was received previously.
4499  *
4500  * @kbdev:  Pointer to the GPU device.
4501  *
4502  * This function is called at the start of scheduling tick/tock to reconfirm
4503  * the idle status of queue groups resident on CSG slots for
4504  * which idle notification was received previously, i.e. all the CSG slots
4505  * present in the bitmap scheduler->csg_slots_idle_mask.
4506  * The confirmation is done by sending the CSG status update request to the
4507  * firmware. On completion, the firmware will mark the idleness at the
4508  * slot's interface CSG_STATUS_STATE register accordingly.
4509  *
4510  * The run state of the groups resident on still idle CSG slots is changed to
4511  * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is
4512  * updated accordingly.
4513  * The bits corresponding to slots for which the status update request timedout
4514  * remain set in scheduler->csg_slots_idle_mask.
4515  */
scheduler_handle_idle_slots(struct kbase_device * kbdev)4516 static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
4517 {
4518 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4519 	u32 num_groups = kbdev->csf.global_iface.group_num;
4520 	unsigned long flags, i;
4521 	DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
4522 	DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
4523 
4524 	lockdep_assert_held(&scheduler->lock);
4525 
4526 	scheduler_update_idle_slots_status(kbdev, csg_bitmap,
4527 					   failed_csg_bitmap);
4528 
4529 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4530 	for_each_set_bit(i, csg_bitmap, num_groups) {
4531 		struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
4532 		struct kbase_queue_group *group = csg_slot->resident_group;
4533 
4534 		if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING))
4535 			continue;
4536 		if (WARN_ON(!group))
4537 			continue;
4538 		if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE &&
4539 					group->run_state != KBASE_CSF_GROUP_IDLE))
4540 			continue;
4541 		if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
4542 			continue;
4543 
4544 		if (group_on_slot_is_idle(kbdev, i)) {
4545 			group->run_state = KBASE_CSF_GROUP_IDLE;
4546 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
4547 			set_bit(i, scheduler->csg_slots_idle_mask);
4548 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET,
4549 						 group, scheduler->csg_slots_idle_mask[0]);
4550 		} else {
4551 			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
4552 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
4553 						 group->run_state);
4554 		}
4555 	}
4556 
4557 	bitmap_or(scheduler->csg_slots_idle_mask,
4558 		  scheduler->csg_slots_idle_mask,
4559 		  failed_csg_bitmap, num_groups);
4560 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_HANDLE_IDLE_SLOTS, NULL,
4561 				 scheduler->csg_slots_idle_mask[0]);
4562 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4563 }
4564 
scheduler_scan_idle_groups(struct kbase_device * kbdev)4565 static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
4566 {
4567 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4568 	struct kbase_queue_group *group, *n;
4569 
4570 	list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
4571 				 link_to_schedule) {
4572 		WARN_ON(!can_schedule_idle_group(group));
4573 
4574 		if (!scheduler->ngrp_to_schedule) {
4575 			/* keep the top csg's origin */
4576 			scheduler->top_ctx = group->kctx;
4577 			scheduler->top_grp = group;
4578 		}
4579 
4580 		group->prepared_seq_num = scheduler->ngrp_to_schedule++;
4581 		list_move_tail(&group->link_to_schedule,
4582 			       &scheduler->groups_to_schedule);
4583 
4584 		group->kctx->csf.sched.ngrp_to_schedule++;
4585 		count_active_address_space(kbdev, group->kctx);
4586 	}
4587 }
4588 
scheduler_rotate(struct kbase_device * kbdev)4589 static void scheduler_rotate(struct kbase_device *kbdev)
4590 {
4591 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4592 
4593 	lockdep_assert_held(&scheduler->lock);
4594 
4595 	/* Dealing with rotation */
4596 	scheduler_rotate_groups(kbdev);
4597 	scheduler_rotate_ctxs(kbdev);
4598 }
4599 
get_tock_top_group(struct kbase_csf_scheduler * const scheduler)4600 static struct kbase_queue_group *get_tock_top_group(
4601 	struct kbase_csf_scheduler *const scheduler)
4602 {
4603 	struct kbase_context *kctx;
4604 	int i;
4605 
4606 	lockdep_assert_held(&scheduler->lock);
4607 	for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
4608 		list_for_each_entry(kctx,
4609 			&scheduler->runnable_kctxs, csf.link) {
4610 			struct kbase_queue_group *group;
4611 
4612 			list_for_each_entry(group,
4613 					&kctx->csf.sched.runnable_groups[i],
4614 					link) {
4615 				if (queue_group_idle_locked(group))
4616 					continue;
4617 
4618 				return group;
4619 			}
4620 		}
4621 	}
4622 
4623 	return NULL;
4624 }
4625 
4626 /**
4627  * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon
4628  *                                        suspend or GPU IDLE.
4629  *
4630  * @kbdev:          Pointer to the device
4631  * @system_suspend: Flag to indicate it's for system suspend.
4632  *
4633  * This function will suspend all active CSG groups upon either
4634  * system suspend, runtime suspend or GPU IDLE.
4635  *
4636  * Return: 0 on success, -1 otherwise.
4637  */
suspend_active_groups_on_powerdown(struct kbase_device * kbdev,bool system_suspend)4638 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
4639 					      bool system_suspend)
4640 {
4641 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4642 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
4643 
4644 	int ret = suspend_active_queue_groups(kbdev, slot_mask);
4645 
4646 	if (unlikely(ret)) {
4647 		const int csg_nr = ffs(slot_mask[0]) - 1;
4648 		struct kbase_queue_group *group =
4649 			scheduler->csg_slots[csg_nr].resident_group;
4650 		enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
4651 
4652 		/* The suspend of CSGs failed,
4653 		 * trigger the GPU reset to be in a deterministic state.
4654 		 */
4655 		dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
4656 			 kbase_backend_get_cycle_cnt(kbdev),
4657 			 kbdev->csf.fw_timeout_ms,
4658 			 kbdev->csf.global_iface.group_num, slot_mask);
4659 		if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
4660 			error_type = DF_PING_REQUEST_TIMEOUT;
4661 		schedule_actions_trigger_df(kbdev, group->kctx, error_type);
4662 
4663 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
4664 			kbase_reset_gpu(kbdev);
4665 
4666 		return -1;
4667 	}
4668 
4669 	/* Check if the groups became active whilst the suspend was ongoing,
4670 	 * but only for the case where the system suspend is not in progress
4671 	 */
4672 	if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
4673 		return -1;
4674 
4675 	return 0;
4676 }
4677 
4678 /**
4679  * all_on_slot_groups_remained_idle - Live check for all groups' idleness
4680  *
4681  * @kbdev: Pointer to the device.
4682  *
4683  * Returns false if any of the queues inside any of the groups that have been
4684  * assigned a physical CSG slot have work to execute, or have executed work
4685  * since having received a GPU idle notification. This function is used to
4686  * handle a rance condition between firmware reporting GPU idle and userspace
4687  * submitting more work by directly ringing a doorbell.
4688  *
4689  * Return: false if any queue inside any resident group has work to be processed
4690  *         or has processed work since GPU idle event, true otherwise.
4691  */
all_on_slot_groups_remained_idle(struct kbase_device * kbdev)4692 static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
4693 {
4694 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4695 	/* All CSGs have the same number of CSs */
4696 	size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
4697 	size_t i;
4698 
4699 	lockdep_assert_held(&scheduler->lock);
4700 	lockdep_assert_held(&scheduler->interrupt_lock);
4701 
4702 	for_each_set_bit(i, scheduler->csg_slots_idle_mask,
4703 			  kbdev->csf.global_iface.group_num) {
4704 		struct kbase_queue_group *const group =
4705 			scheduler->csg_slots[i].resident_group;
4706 		size_t j;
4707 
4708 		for (j = 0; j < max_streams; ++j) {
4709 			struct kbase_queue const *const queue =
4710 				group->bound_queues[j];
4711 			u64 const *output_addr;
4712 			u64 cur_extract_ofs;
4713 
4714 			if (!queue || !queue->user_io_addr)
4715 				continue;
4716 
4717 			output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
4718 			cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
4719 			if (cur_extract_ofs != queue->extract_ofs) {
4720 				/* More work has been executed since the idle
4721 				 * notification.
4722 				 */
4723 				return false;
4724 			}
4725 		}
4726 	}
4727 
4728 	return true;
4729 }
4730 
scheduler_idle_suspendable(struct kbase_device * kbdev)4731 static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
4732 {
4733 	bool suspend;
4734 	unsigned long flags;
4735 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4736 
4737 	lockdep_assert_held(&scheduler->lock);
4738 
4739 	if ((scheduler->state == SCHED_SUSPENDED) ||
4740 	    (scheduler->state == SCHED_SLEEPING))
4741 		return false;
4742 
4743 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
4744 	spin_lock(&scheduler->interrupt_lock);
4745 
4746 	if (scheduler->fast_gpu_idle_handling) {
4747 		scheduler->fast_gpu_idle_handling = false;
4748 
4749 		if (scheduler->total_runnable_grps) {
4750 			suspend = !atomic_read(&scheduler->non_idle_offslot_grps) &&
4751 				  kbase_pm_idle_groups_sched_suspendable(kbdev);
4752 		} else
4753 			suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
4754 		spin_unlock(&scheduler->interrupt_lock);
4755 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4756 
4757 		return suspend;
4758 	}
4759 
4760 	if (scheduler->total_runnable_grps) {
4761 
4762 		/* Check both on-slots and off-slots groups idle status */
4763 		suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
4764 			  !atomic_read(&scheduler->non_idle_offslot_grps) &&
4765 			  kbase_pm_idle_groups_sched_suspendable(kbdev);
4766 	} else
4767 		suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
4768 
4769 	/* Confirm that all groups are actually idle before proceeding with
4770 	 * suspension as groups might potentially become active again without
4771 	 * informing the scheduler in case userspace rings a doorbell directly.
4772 	 */
4773 	if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) ||
4774 			unlikely(!all_on_slot_groups_remained_idle(kbdev))))
4775 		suspend = false;
4776 
4777 	spin_unlock(&scheduler->interrupt_lock);
4778 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4779 
4780 	return suspend;
4781 }
4782 
4783 #ifdef KBASE_PM_RUNTIME
4784 /**
4785  * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU
4786  *                           becoming idle.
4787  *
4788  * @kbdev: Pointer to the device.
4789  *
4790  * This function is called on GPU idle notification to trigger the transition of
4791  * GPU to sleep state, where MCU firmware pauses execution and L2 cache is
4792  * turned off. Scheduler's state is changed to sleeping and all the active queue
4793  * groups remain on the CSG slots.
4794  */
scheduler_sleep_on_idle(struct kbase_device * kbdev)4795 static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
4796 {
4797 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4798 
4799 	lockdep_assert_held(&scheduler->lock);
4800 
4801 	dev_dbg(kbdev->dev,
4802 		"Scheduler to be put to sleep on GPU becoming idle");
4803 	cancel_tick_timer(kbdev);
4804 	scheduler_pm_idle_before_sleep(kbdev);
4805 	scheduler->state = SCHED_SLEEPING;
4806 	KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state);
4807 }
4808 #endif
4809 
4810 /**
4811  * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU
4812  *                             becoming idle.
4813  *
4814  * @kbdev: Pointer to the device.
4815  *
4816  * This function is called on GPU idle notification to trigger the power down of
4817  * GPU. Scheduler's state is changed to suspended and all the active queue
4818  * groups are suspended before halting the MCU firmware.
4819  *
4820  * Return: true if scheduler will be suspended or false if suspend is aborted.
4821  */
scheduler_suspend_on_idle(struct kbase_device * kbdev)4822 static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
4823 {
4824 	int ret = suspend_active_groups_on_powerdown(kbdev, false);
4825 
4826 	if (ret) {
4827 		dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
4828 			atomic_read(
4829 				&kbdev->csf.scheduler.non_idle_offslot_grps));
4830 		/* Bring forward the next tick */
4831 		kbase_csf_scheduler_tick_advance(kbdev);
4832 		return false;
4833 	}
4834 
4835 	dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
4836 	scheduler_suspend(kbdev);
4837 	cancel_tick_timer(kbdev);
4838 	return true;
4839 }
4840 
gpu_idle_worker(struct work_struct * work)4841 static void gpu_idle_worker(struct work_struct *work)
4842 {
4843 	struct kbase_device *kbdev = container_of(
4844 		work, struct kbase_device, csf.scheduler.gpu_idle_work);
4845 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4846 	bool scheduler_is_idle_suspendable = false;
4847 	bool all_groups_suspended = false;
4848 
4849 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u);
4850 
4851 #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend)                         \
4852 	(((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8))
4853 
4854 	if (kbase_reset_gpu_try_prevent(kbdev)) {
4855 		dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
4856 		KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
4857 				 __ENCODE_KTRACE_INFO(true, false, false));
4858 		return;
4859 	}
4860 	kbase_debug_csf_fault_wait_completion(kbdev);
4861 	mutex_lock(&scheduler->lock);
4862 
4863 #if IS_ENABLED(CONFIG_DEBUG_FS)
4864 	if (unlikely(scheduler->state == SCHED_BUSY)) {
4865 		mutex_unlock(&scheduler->lock);
4866 		kbase_reset_gpu_allow(kbdev);
4867 		return;
4868 	}
4869 #endif
4870 
4871 	scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
4872 	if (scheduler_is_idle_suspendable) {
4873 		KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_START, NULL,
4874 				 kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
4875 #ifdef KBASE_PM_RUNTIME
4876 		if (kbase_pm_gpu_sleep_allowed(kbdev) &&
4877 		    kbase_csf_scheduler_get_nr_active_csgs(kbdev))
4878 			scheduler_sleep_on_idle(kbdev);
4879 		else
4880 #endif
4881 			all_groups_suspended = scheduler_suspend_on_idle(kbdev);
4882 
4883 		KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_HANDLING_END, NULL, 0u);
4884 	}
4885 
4886 	mutex_unlock(&scheduler->lock);
4887 	kbase_reset_gpu_allow(kbdev);
4888 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
4889 			 __ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable,
4890 					      all_groups_suspended));
4891 #undef __ENCODE_KTRACE_INFO
4892 }
4893 
scheduler_prepare(struct kbase_device * kbdev)4894 static int scheduler_prepare(struct kbase_device *kbdev)
4895 {
4896 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4897 	unsigned long flags;
4898 	int i;
4899 
4900 	lockdep_assert_held(&scheduler->lock);
4901 
4902 	/* Empty the groups_to_schedule */
4903 	while (!list_empty(&scheduler->groups_to_schedule)) {
4904 		struct kbase_queue_group *grp =
4905 			list_first_entry(&scheduler->groups_to_schedule,
4906 					 struct kbase_queue_group,
4907 					 link_to_schedule);
4908 
4909 		remove_scheduled_group(kbdev, grp);
4910 	}
4911 
4912 	/* Pre-scan init scheduler fields */
4913 	if (WARN_ON(scheduler->ngrp_to_schedule != 0))
4914 		scheduler->ngrp_to_schedule = 0;
4915 	scheduler->top_ctx = NULL;
4916 	scheduler->top_grp = NULL;
4917 	scheduler->csg_scan_count_for_tick = 0;
4918 	WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule));
4919 	scheduler->num_active_address_spaces = 0;
4920 	scheduler->num_csg_slots_for_tick = 0;
4921 	bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
4922 
4923 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4924 	scheduler->tick_protm_pending_seq =
4925 		KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
4926 	/* Scan out to run groups */
4927 	for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
4928 		struct kbase_context *kctx;
4929 
4930 		list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
4931 			scheduler_ctx_scan_groups(kbdev, kctx, i);
4932 	}
4933 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4934 
4935 	/* Update this tick's non-idle groups */
4936 	scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
4937 
4938 	/* Initial number of non-idle off-slot groups, before the scheduler's
4939 	 * scheduler_apply() operation. This gives a sensible start point view
4940 	 * of the tick. It will be subject to up/downs during the scheduler
4941 	 * active phase.
4942 	 */
4943 	atomic_set(&scheduler->non_idle_offslot_grps,
4944 		   scheduler->non_idle_scanout_grps);
4945 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, NULL,
4946 				 scheduler->non_idle_scanout_grps);
4947 
4948 	/* Adds those idle but runnable groups to the scanout list */
4949 	scheduler_scan_idle_groups(kbdev);
4950 
4951 	WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
4952 
4953 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
4954 			scheduler->num_active_address_spaces |
4955 			(((u64)scheduler->ngrp_to_schedule) << 32));
4956 	set_max_csg_slots(kbdev);
4957 	dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n",
4958 		scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces);
4959 	return 0;
4960 }
4961 
4962 /**
4963  * keep_lru_on_slots() - Check the condition for LRU is met.
4964  *
4965  * @kbdev: Pointer to the device.
4966  *
4967  * This function tries to maintain the Last-Recent-Use case on slots, when
4968  * the scheduler has no non-idle off-slot CSGs for a replacement
4969  * consideration. This effectively extends the previous scheduling results
4970  * for the new one. That is, the last recent used CSGs are retained on slots
4971  * for the new tick/tock action.
4972  *
4973  * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
4974  *         otherwise false.
4975  */
keep_lru_on_slots(struct kbase_device * kbdev)4976 static bool keep_lru_on_slots(struct kbase_device *kbdev)
4977 {
4978 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4979 	bool keep_lru = false;
4980 	int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap,
4981 				     kbdev->csf.global_iface.group_num);
4982 
4983 	lockdep_assert_held(&scheduler->lock);
4984 
4985 	if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) {
4986 		unsigned long flags;
4987 
4988 		spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4989 		/* All on-slots are idle, no non-idle off-slot CSGs available
4990 		 * for considering a meaningful change. Set keep_lru.
4991 		 */
4992 		keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
4993 
4994 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4995 
4996 		dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
4997 			keep_lru, on_slots);
4998 	}
4999 
5000 	return keep_lru;
5001 }
5002 
5003 /**
5004  * prepare_fast_local_tock() - making preparation arrangement for exercizing
5005  *                             a fast local tock inside scheduling-actions.
5006  *
5007  * @kbdev:  Pointer to the GPU device.
5008  *
5009  * The function assumes that a scheduling action of firing a fast local tock
5010  * call (i.e. an equivalent tock action without dropping the lock) is desired
5011  * if there are idle onslot CSGs. The function updates those affected CSGs'
5012  * run-state as a preparation. This should only be called from inside the
5013  * schedule_actions(), where the previous idle-flags are still considered to
5014  * be reflective, following its earlier idle confirmation operational call,
5015  * plus some potential newly idle CSGs in the scheduling action committing
5016  * steps.
5017  *
5018  * Return: number of on-slots CSGs that can be considered for replacing.
5019  */
prepare_fast_local_tock(struct kbase_device * kbdev)5020 static int prepare_fast_local_tock(struct kbase_device *kbdev)
5021 {
5022 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5023 	u32 num_groups = kbdev->csf.global_iface.group_num;
5024 	unsigned long flags, i;
5025 	DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
5026 
5027 	lockdep_assert_held(&scheduler->lock);
5028 
5029 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5030 	bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups);
5031 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5032 
5033 	/* Marking the flagged idle CSGs' run state to IDLE, so
5034 	 * the intended fast local tock can replacing them with off-slots
5035 	 * non-idle CSGs.
5036 	 */
5037 	for_each_set_bit(i, csg_bitmap, num_groups) {
5038 		struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
5039 		struct kbase_queue_group *group = csg_slot->resident_group;
5040 
5041 		if (!queue_group_idle_locked(group)) {
5042 			group->run_state = KBASE_CSF_GROUP_IDLE;
5043 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_IDLE, group, group->run_state);
5044 		}
5045 	}
5046 
5047 	/* Return the number of idle slots for potential replacement */
5048 	return bitmap_weight(csg_bitmap, num_groups);
5049 }
5050 
wait_csg_slots_suspend(struct kbase_device * kbdev,unsigned long * slot_mask,unsigned int timeout_ms)5051 static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask,
5052 				  unsigned int timeout_ms)
5053 {
5054 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5055 	long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
5056 	u32 num_groups = kbdev->csf.global_iface.group_num;
5057 	int err = 0;
5058 	DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
5059 
5060 	lockdep_assert_held(&scheduler->lock);
5061 
5062 	bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
5063 
5064 	while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) {
5065 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
5066 
5067 		bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
5068 
5069 		remaining = wait_event_timeout(
5070 			kbdev->csf.event_wait,
5071 			slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining);
5072 
5073 		if (likely(remaining)) {
5074 			u32 i;
5075 
5076 			for_each_set_bit(i, changed, num_groups) {
5077 				struct kbase_queue_group *group;
5078 
5079 				if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
5080 					continue;
5081 
5082 				/* The on slot csg is now stopped */
5083 				clear_bit(i, slot_mask_local);
5084 
5085 				KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
5086 					kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
5087 
5088 				group = scheduler->csg_slots[i].resident_group;
5089 				if (likely(group)) {
5090 					/* Only do save/cleanup if the
5091 					 * group is not terminated during
5092 					 * the sleep.
5093 					 */
5094 					save_csg_slot(group);
5095 					if (cleanup_csg_slot(group))
5096 						sched_evict_group(group, true, true);
5097 				}
5098 			}
5099 		} else {
5100 			dev_warn(
5101 				kbdev->dev,
5102 				"[%llu] Suspend request sent on CSG slots 0x%lx timed out for slots 0x%lx",
5103 				kbase_backend_get_cycle_cnt(kbdev), slot_mask[0],
5104 				slot_mask_local[0]);
5105 			/* Return the bitmask of the timed out slots to the caller */
5106 			bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS);
5107 
5108 			err = -ETIMEDOUT;
5109 		}
5110 	}
5111 
5112 	return err;
5113 }
5114 
5115 /**
5116  * evict_lru_or_blocked_csg() - Evict the least-recently-used idle or blocked CSG
5117  *
5118  * @kbdev: Pointer to the device
5119  *
5120  * Used to allow for speedier starting/resumption of another CSG. The worst-case
5121  * scenario of the evicted CSG being scheduled next is expected to be rare.
5122  * Also, the eviction will not be applied if the GPU is running in protected mode.
5123  * Otherwise the the eviction attempt would force the MCU to quit the execution of
5124  * the protected mode, and likely re-request to enter it again.
5125  */
evict_lru_or_blocked_csg(struct kbase_device * kbdev)5126 static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
5127 {
5128 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5129 	size_t i;
5130 	struct kbase_queue_group *lru_idle_group = NULL;
5131 	const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
5132 	const bool all_addr_spaces_used = (scheduler->num_active_address_spaces >=
5133 					   (kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS));
5134 	u8 as_usage[BASE_MAX_NR_AS] = { 0 };
5135 
5136 	lockdep_assert_held(&scheduler->lock);
5137 	if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
5138 		return;
5139 
5140 	BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(int) * BITS_PER_BYTE));
5141 	if (fls(scheduler->csg_inuse_bitmap[0]) != total_csg_slots)
5142 		return; /* Some CSG slots remain unused */
5143 
5144 	if (all_addr_spaces_used) {
5145 		for (i = 0; i != total_csg_slots; ++i) {
5146 			if (scheduler->csg_slots[i].resident_group != NULL) {
5147 				if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
5148 					    0))
5149 					continue;
5150 
5151 				as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
5152 			}
5153 		}
5154 	}
5155 
5156 	for (i = 0; i != total_csg_slots; ++i) {
5157 		struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
5158 
5159 		/* We expect that by this point all groups would normally be
5160 		 * assigned a physical CSG slot, but if circumstances have
5161 		 * changed then bail out of this optimisation.
5162 		 */
5163 		if (group == NULL)
5164 			return;
5165 
5166 		/* Real-time priority CSGs must be kept on-slot even when
5167 		 * idle.
5168 		 */
5169 		if ((group->run_state == KBASE_CSF_GROUP_IDLE) &&
5170 		    (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
5171 		    ((lru_idle_group == NULL) ||
5172 		     (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
5173 			if (WARN_ON(group->kctx->as_nr < 0))
5174 				continue;
5175 
5176 			/* If all address spaces are used, we need to ensure the group does not
5177 			 * share the AS with other active CSGs. Or CSG would be freed without AS
5178 			 * and this optimization would not work.
5179 			 */
5180 			if ((!all_addr_spaces_used) || (as_usage[group->kctx->as_nr] == 1))
5181 				lru_idle_group = group;
5182 		}
5183 	}
5184 
5185 	if (lru_idle_group != NULL) {
5186 		unsigned long slot_mask = 1 << lru_idle_group->csg_nr;
5187 
5188 		dev_dbg(kbdev->dev, "Suspending LRU idle group %d of context %d_%d on slot %d",
5189 			lru_idle_group->handle, lru_idle_group->kctx->tgid,
5190 			lru_idle_group->kctx->id, lru_idle_group->csg_nr);
5191 		suspend_queue_group(lru_idle_group);
5192 		if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) {
5193 			enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
5194 
5195 			dev_warn(
5196 				kbdev->dev,
5197 				"[%llu] LRU idle group %d of context %d_%d failed to suspend on slot %d (timeout %d ms)",
5198 				kbase_backend_get_cycle_cnt(kbdev), lru_idle_group->handle,
5199 				lru_idle_group->kctx->tgid, lru_idle_group->kctx->id,
5200 				lru_idle_group->csg_nr, kbdev->csf.fw_timeout_ms);
5201 			if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
5202 				error_type = DF_PING_REQUEST_TIMEOUT;
5203 			schedule_actions_trigger_df(kbdev, lru_idle_group->kctx, error_type);
5204 		}
5205 	}
5206 }
5207 
schedule_actions(struct kbase_device * kbdev,bool is_tick)5208 static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
5209 {
5210 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5211 	unsigned long flags;
5212 	struct kbase_queue_group *protm_grp;
5213 	int ret;
5214 	bool skip_scheduling_actions;
5215 	bool skip_idle_slots_update;
5216 	bool new_protm_top_grp = false;
5217 	int local_tock_slots = 0;
5218 
5219 	kbase_reset_gpu_assert_prevented(kbdev);
5220 	lockdep_assert_held(&scheduler->lock);
5221 
5222 	ret = kbase_csf_scheduler_wait_mcu_active(kbdev);
5223 	if (ret) {
5224 		dev_err(kbdev->dev,
5225 			"Wait for MCU power on failed on scheduling tick/tock");
5226 		return;
5227 	}
5228 
5229 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5230 	skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
5231 	skip_scheduling_actions =
5232 			!skip_idle_slots_update && kbdev->protected_mode;
5233 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5234 
5235 	/* Skip scheduling actions as GPU reset hasn't been performed yet to
5236 	 * rectify the anomaly that happened when pmode exit interrupt wasn't
5237 	 * received before the termination of group running in pmode.
5238 	 */
5239 	if (unlikely(skip_scheduling_actions)) {
5240 		dev_info(kbdev->dev,
5241 			 "Scheduling actions skipped due to anomaly in pmode");
5242 		return;
5243 	}
5244 
5245 	if (!skip_idle_slots_update) {
5246 		/* Updating on-slot idle CSGs when not in protected mode. */
5247 		scheduler_handle_idle_slots(kbdev);
5248 
5249 		/* Determine whether the condition is met for keeping the
5250 		 * Last-Recent-Use. If true, skipping the remaining action
5251 		 * steps and thus extending the previous tick's arrangement,
5252 		 * in particular, no alterations to on-slot CSGs.
5253 		 */
5254 		if (keep_lru_on_slots(kbdev))
5255 			return;
5256 	}
5257 
5258 	if (is_tick)
5259 		scheduler_rotate(kbdev);
5260 
5261 redo_local_tock:
5262 	scheduler_prepare(kbdev);
5263 	/* Need to specifically enqueue the GPU idle work if there are no groups
5264 	 * to schedule despite the runnable groups. This scenario will happen
5265 	 * if System suspend is done when all groups are idle and and no work
5266 	 * is submitted for the groups after the System resume.
5267 	 */
5268 	if (unlikely(!scheduler->ngrp_to_schedule &&
5269 		     scheduler->total_runnable_grps)) {
5270 		dev_dbg(kbdev->dev, "No groups to schedule in the tick");
5271 		enqueue_gpu_idle_work(scheduler);
5272 		return;
5273 	}
5274 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5275 	protm_grp = scheduler->active_protm_grp;
5276 
5277 	/* Avoid update if the top-group remains unchanged and in protected
5278 	 * mode. For the said case, all the slots update is effectively
5279 	 * competing against the active protected mode group (typically the
5280 	 * top-group). If we update other slots, even on leaving the
5281 	 * top-group slot untouched, the firmware would exit the protected mode
5282 	 * for interacting with the host-driver. After it, as the top-group
5283 	 * would again raise the request for entering protected mode, we would
5284 	 * be actively doing the switching over twice without progressing the
5285 	 * queue jobs.
5286 	 */
5287 	if (protm_grp && scheduler->top_grp == protm_grp) {
5288 		dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
5289 			protm_grp->handle);
5290 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5291 
5292 		update_offslot_non_idle_cnt_for_onslot_grp(protm_grp);
5293 		remove_scheduled_group(kbdev, protm_grp);
5294 		scheduler_check_pmode_progress(kbdev);
5295 	} else if (scheduler->top_grp) {
5296 		if (protm_grp)
5297 			dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d",
5298 				protm_grp->handle);
5299 
5300 		if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap,
5301 			     kbdev->csf.global_iface.groups[0].stream_num)) {
5302 			dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d",
5303 				scheduler->top_grp->handle,
5304 				scheduler->top_grp->kctx->tgid,
5305 				scheduler->top_grp->kctx->id);
5306 
5307 			/* When entering protected mode all CSG slots can be occupied
5308 			 * but only the protected mode CSG will be running. Any event
5309 			 * that would trigger the execution of an on-slot idle CSG will
5310 			 * need to be handled by the host during protected mode.
5311 			 */
5312 			new_protm_top_grp = true;
5313 		}
5314 
5315 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5316 
5317 		scheduler_apply(kbdev);
5318 
5319 		/* Scheduler is dropping the exec of the previous protm_grp,
5320 		 * Until the protm quit completes, the GPU is effectively
5321 		 * locked in the secure mode.
5322 		 */
5323 		if (protm_grp)
5324 			scheduler_force_protm_exit(kbdev);
5325 
5326 		wait_csg_slots_start(kbdev);
5327 		wait_csg_slots_finish_prio_update(kbdev);
5328 
5329 		if (new_protm_top_grp) {
5330 			scheduler_group_check_protm_enter(kbdev,
5331 						scheduler->top_grp);
5332 		} else if (!local_tock_slots &&
5333 			   atomic_read(&scheduler->non_idle_offslot_grps)) {
5334 			/* If during the scheduling action, we have off-slot
5335 			 * non-idle CSGs in waiting, if it happens to have
5336 			 * some new idle slots emerging during the committed
5337 			 * action steps, trigger a one-off fast local tock.
5338 			 */
5339 			local_tock_slots = prepare_fast_local_tock(kbdev);
5340 
5341 			if (local_tock_slots) {
5342 				dev_dbg(kbdev->dev,
5343 					"In-cycle %d idle slots available\n",
5344 					local_tock_slots);
5345 				goto redo_local_tock;
5346 			}
5347 		}
5348 	} else {
5349 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5350 	}
5351 
5352 	evict_lru_or_blocked_csg(kbdev);
5353 }
5354 
5355 /**
5356  * can_skip_scheduling() - Check if the scheduling actions can be skipped.
5357  *
5358  * @kbdev: Pointer to the device
5359  *
5360  * This function is called on a scheduling tick or tock to determine if the
5361  * scheduling actions can be skipped.
5362  * If Scheduler is in sleeping state and exit from the sleep state is allowed
5363  * then activation of MCU will be triggered. The tick or tock work item could
5364  * have been in flight when the state of Scheduler was changed to sleeping.
5365  *
5366  * Return: true if the scheduling actions can be skipped.
5367  */
can_skip_scheduling(struct kbase_device * kbdev)5368 static bool can_skip_scheduling(struct kbase_device *kbdev)
5369 {
5370 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5371 
5372 	lockdep_assert_held(&scheduler->lock);
5373 
5374 	if (unlikely(!kbase_reset_gpu_is_not_pending(kbdev)))
5375 		return true;
5376 
5377 	if (scheduler->state == SCHED_SUSPENDED)
5378 		return true;
5379 
5380 #ifdef KBASE_PM_RUNTIME
5381 	if (scheduler->state == SCHED_SLEEPING) {
5382 		unsigned long flags;
5383 
5384 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5385 		if (kbdev->pm.backend.exit_gpu_sleep_mode) {
5386 			int ret = scheduler_pm_active_after_sleep(kbdev, &flags);
5387 
5388 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5389 			if (!ret) {
5390 				scheduler->state = SCHED_INACTIVE;
5391 				KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
5392 				return false;
5393 			}
5394 
5395 			dev_info(kbdev->dev,
5396 				 "Skip scheduling due to system suspend");
5397 			return true;
5398 		}
5399 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5400 		return true;
5401 	}
5402 #endif
5403 
5404 	return false;
5405 }
5406 
schedule_on_tock(struct work_struct * work)5407 static void schedule_on_tock(struct work_struct *work)
5408 {
5409 	struct kbase_device *kbdev =
5410 		container_of(work, struct kbase_device, csf.scheduler.tock_work.work);
5411 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5412 	int err;
5413 
5414 	err = kbase_reset_gpu_try_prevent(kbdev);
5415 	/* Regardless of whether reset failed or is currently happening, exit
5416 	 * early
5417 	 */
5418 	if (err)
5419 		return;
5420 
5421 	kbase_debug_csf_fault_wait_completion(kbdev);
5422 	mutex_lock(&scheduler->lock);
5423 	if (can_skip_scheduling(kbdev))
5424 	{
5425 		atomic_set(&scheduler->pending_tock_work, false);
5426 		goto exit_no_schedule_unlock;
5427 	}
5428 
5429 	WARN_ON(!(scheduler->state == SCHED_INACTIVE));
5430 	scheduler->state = SCHED_BUSY;
5431 	KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
5432 
5433 	/* Undertaking schedule action steps */
5434 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_START, NULL, 0u);
5435 	while (atomic_cmpxchg(&scheduler->pending_tock_work, true, false) == true)
5436 		schedule_actions(kbdev, false);
5437 
5438 	/* Record time information on a non-skipped tock */
5439 	scheduler->last_schedule = jiffies;
5440 
5441 	scheduler->state = SCHED_INACTIVE;
5442 	KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
5443 	if (!scheduler->total_runnable_grps)
5444 		enqueue_gpu_idle_work(scheduler);
5445 	mutex_unlock(&scheduler->lock);
5446 	kbase_reset_gpu_allow(kbdev);
5447 
5448 	dev_dbg(kbdev->dev,
5449 		"Waking up for event after schedule-on-tock completes.");
5450 	wake_up_all(&kbdev->csf.event_wait);
5451 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u);
5452 	return;
5453 
5454 exit_no_schedule_unlock:
5455 	mutex_unlock(&scheduler->lock);
5456 	kbase_reset_gpu_allow(kbdev);
5457 }
5458 
schedule_on_tick(struct work_struct * work)5459 static void schedule_on_tick(struct work_struct *work)
5460 {
5461 	struct kbase_device *kbdev =
5462 		container_of(work, struct kbase_device, csf.scheduler.tick_work);
5463 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5464 
5465 	int err = kbase_reset_gpu_try_prevent(kbdev);
5466 	/* Regardless of whether reset failed or is currently happening, exit
5467 	 * early
5468 	 */
5469 	if (err)
5470 		return;
5471 
5472 	kbase_debug_csf_fault_wait_completion(kbdev);
5473 	mutex_lock(&scheduler->lock);
5474 
5475 	WARN_ON(scheduler->tick_timer_active);
5476 	if (can_skip_scheduling(kbdev))
5477 		goto exit_no_schedule_unlock;
5478 
5479 	scheduler->state = SCHED_BUSY;
5480 	KBASE_KTRACE_ADD(kbdev, SCHED_BUSY, NULL, scheduler->state);
5481 
5482 	/* Undertaking schedule action steps */
5483 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_START, NULL, scheduler->total_runnable_grps);
5484 	schedule_actions(kbdev, true);
5485 
5486 	/* Record time information */
5487 	scheduler->last_schedule = jiffies;
5488 
5489 	/* Kicking next scheduling if needed */
5490 	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
5491 			(scheduler->total_runnable_grps > 0)) {
5492 		start_tick_timer(kbdev);
5493 		dev_dbg(kbdev->dev,
5494 			"scheduling for next tick, num_runnable_groups:%u\n",
5495 			scheduler->total_runnable_grps);
5496 	} else if (!scheduler->total_runnable_grps) {
5497 		enqueue_gpu_idle_work(scheduler);
5498 	}
5499 
5500 	scheduler->state = SCHED_INACTIVE;
5501 	mutex_unlock(&scheduler->lock);
5502 	KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
5503 	kbase_reset_gpu_allow(kbdev);
5504 
5505 	dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
5506 	wake_up_all(&kbdev->csf.event_wait);
5507 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL,
5508 			 scheduler->total_runnable_grps);
5509 	return;
5510 
5511 exit_no_schedule_unlock:
5512 	mutex_unlock(&scheduler->lock);
5513 	kbase_reset_gpu_allow(kbdev);
5514 }
5515 
suspend_active_queue_groups(struct kbase_device * kbdev,unsigned long * slot_mask)5516 static int suspend_active_queue_groups(struct kbase_device *kbdev,
5517 				       unsigned long *slot_mask)
5518 {
5519 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5520 	u32 num_groups = kbdev->csf.global_iface.group_num;
5521 	u32 slot_num;
5522 	int ret;
5523 
5524 	lockdep_assert_held(&scheduler->lock);
5525 
5526 	for (slot_num = 0; slot_num < num_groups; slot_num++) {
5527 		struct kbase_queue_group *group =
5528 			scheduler->csg_slots[slot_num].resident_group;
5529 
5530 		if (group) {
5531 			suspend_queue_group(group);
5532 			set_bit(slot_num, slot_mask);
5533 		}
5534 	}
5535 
5536 	ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms);
5537 	return ret;
5538 }
5539 
suspend_active_queue_groups_on_reset(struct kbase_device * kbdev)5540 static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
5541 {
5542 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5543 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
5544 	int ret;
5545 	int ret2;
5546 
5547 	mutex_lock(&scheduler->lock);
5548 
5549 	ret = suspend_active_queue_groups(kbdev, slot_mask);
5550 
5551 	if (ret) {
5552 		dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
5553 			 kbdev->csf.global_iface.group_num, slot_mask);
5554 	}
5555 
5556 	/* Need to flush the GPU cache to ensure suspend buffer
5557 	 * contents are not lost on reset of GPU.
5558 	 * Do this even if suspend operation had timed out for some of
5559 	 * the CSG slots.
5560 	 * In case the scheduler already in suspended state, the
5561 	 * cache clean is required as the async reset request from
5562 	 * the debugfs may race against the scheduler suspend operation
5563 	 * due to the extra context ref-count, which prevents the
5564 	 * L2 powering down cache clean operation in the non racing
5565 	 * case.
5566 	 * LSC is being flushed together to cover buslogging usecase,
5567 	 * where GPU reset is done regularly to avoid the log buffer
5568 	 * overflow.
5569 	 */
5570 	kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
5571 	ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
5572 			kbdev->reset_timeout_ms);
5573 	if (ret2) {
5574 		dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
5575 			 kbase_backend_get_cycle_cnt(kbdev));
5576 		if (!ret)
5577 			ret = ret2;
5578 	}
5579 
5580 	mutex_unlock(&scheduler->lock);
5581 
5582 	return ret;
5583 }
5584 
5585 /**
5586  * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode
5587  *                                              groups when reset is done during
5588  *                                              protected mode execution.
5589  *
5590  * @kbdev: Pointer to the device.
5591  *
5592  * This function is called at the time of GPU reset, before the suspension of
5593  * queue groups, to handle the case when the reset is getting performed whilst
5594  * GPU is in protected mode.
5595  * On entry to protected mode all the groups, except the top group that executes
5596  * in protected mode, are implicitly suspended by the FW. Thus this function
5597  * simply marks the normal mode groups as suspended (and cleans up the
5598  * corresponding CSG slots) to prevent their potential forceful eviction from
5599  * the Scheduler. So if GPU was in protected mode and there was no fault, then
5600  * only the protected mode group would be suspended in the regular way post exit
5601  * from this function. And if GPU was in normal mode, then all on-slot groups
5602  * will get suspended in the regular way.
5603  *
5604  * Return: true if the groups remaining on the CSG slots need to be suspended in
5605  *         the regular way by sending CSG SUSPEND reqs to FW, otherwise false.
5606  */
scheduler_handle_reset_in_protected_mode(struct kbase_device * kbdev)5607 static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
5608 {
5609 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5610 	u32 const num_groups = kbdev->csf.global_iface.group_num;
5611 	struct kbase_queue_group *protm_grp;
5612 	bool suspend_on_slot_groups = true;
5613 	bool pmode_active;
5614 	unsigned long flags;
5615 	u32 csg_nr;
5616 
5617 	mutex_lock(&scheduler->lock);
5618 
5619 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5620 	protm_grp = scheduler->active_protm_grp;
5621 	pmode_active = kbdev->protected_mode;
5622 
5623 	if (likely(!protm_grp && !pmode_active)) {
5624 		/* Case 1: GPU is not in protected mode or it successfully
5625 		 * exited protected mode. All on-slot groups can be suspended in
5626 		 * the regular way before reset.
5627 		 */
5628 		suspend_on_slot_groups = true;
5629 	} else if (protm_grp && pmode_active) {
5630 		/* Case 2: GPU went successfully into protected mode and hasn't
5631 		 * exited from it yet and the protected mode group is still
5632 		 * active. If there was no fault for the protected mode group
5633 		 * then it can be suspended in the regular way before reset.
5634 		 * The other normal mode on-slot groups were already implicitly
5635 		 * suspended on entry to protected mode so they can be marked as
5636 		 * suspended right away.
5637 		 */
5638 		suspend_on_slot_groups = !protm_grp->faulted;
5639 	} else if (!protm_grp && pmode_active) {
5640 		/* Case 3: GPU went successfully into protected mode and hasn't
5641 		 * exited from it yet but the protected mode group got deleted.
5642 		 * This would have happened if the FW got stuck during protected
5643 		 * mode for some reason (like GPU page fault or some internal
5644 		 * error). In normal cases FW is expected to send the pmode exit
5645 		 * interrupt before it handles the CSG termination request.
5646 		 * The other normal mode on-slot groups would already have been
5647 		 * implicitly suspended on entry to protected mode so they can be
5648 		 * marked as suspended right away.
5649 		 */
5650 		suspend_on_slot_groups = false;
5651 	} else if (protm_grp && !pmode_active) {
5652 		/* Case 4: GPU couldn't successfully enter protected mode, i.e.
5653 		 * PROTM_ENTER request had timed out.
5654 		 * All the on-slot groups need to be suspended in the regular
5655 		 * way before reset.
5656 		 */
5657 		suspend_on_slot_groups = true;
5658 	}
5659 
5660 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5661 
5662 	if (likely(!pmode_active))
5663 		goto unlock;
5664 
5665 	/* GPU hasn't exited protected mode, so all the on-slot groups barring
5666 	 * the protected mode group can be marked as suspended right away.
5667 	 */
5668 	for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
5669 		struct kbase_queue_group *const group =
5670 			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
5671 		int new_val;
5672 
5673 		if (!group || (group == protm_grp))
5674 			continue;
5675 
5676 		cleanup_csg_slot(group);
5677 		group->run_state = KBASE_CSF_GROUP_SUSPENDED;
5678 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_SUSPENDED, group, group->run_state);
5679 
5680 		/* Simply treat the normal mode groups as non-idle. The tick
5681 		 * scheduled after the reset will re-initialize the counter
5682 		 * anyways.
5683 		 */
5684 		new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
5685 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_INC, group, new_val);
5686 	}
5687 
5688 unlock:
5689 	mutex_unlock(&scheduler->lock);
5690 	return suspend_on_slot_groups;
5691 }
5692 
cancel_tick_work(struct kbase_csf_scheduler * const scheduler)5693 static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
5694 {
5695 	cancel_work_sync(&scheduler->tick_work);
5696 }
5697 
cancel_tock_work(struct kbase_csf_scheduler * const scheduler)5698 static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
5699 {
5700 	atomic_set(&scheduler->pending_tock_work, false);
5701 	cancel_delayed_work_sync(&scheduler->tock_work);
5702 }
5703 
scheduler_inner_reset(struct kbase_device * kbdev)5704 static void scheduler_inner_reset(struct kbase_device *kbdev)
5705 {
5706 	u32 const num_groups = kbdev->csf.global_iface.group_num;
5707 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5708 	unsigned long flags;
5709 
5710 	WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
5711 
5712 	/* Cancel any potential queued delayed work(s) */
5713 	cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
5714 	cancel_tick_timer(kbdev);
5715 	cancel_tick_work(scheduler);
5716 	cancel_tock_work(scheduler);
5717 	cancel_delayed_work_sync(&scheduler->ping_work);
5718 
5719 	mutex_lock(&scheduler->lock);
5720 
5721 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5722 	bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
5723 	if (scheduler->active_protm_grp)
5724 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT, scheduler->active_protm_grp,
5725 					 0u);
5726 	scheduler->active_protm_grp = NULL;
5727 	memset(kbdev->csf.scheduler.csg_slots, 0,
5728 	       num_groups * sizeof(struct kbase_csf_csg_slot));
5729 	bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups);
5730 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5731 
5732 	scheduler->top_ctx = NULL;
5733 	scheduler->top_grp = NULL;
5734 
5735 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
5736 			scheduler->num_active_address_spaces |
5737 			(((u64)scheduler->total_runnable_grps) << 32));
5738 
5739 	mutex_unlock(&scheduler->lock);
5740 }
5741 
kbase_csf_scheduler_reset(struct kbase_device * kbdev)5742 void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
5743 {
5744 	struct kbase_context *kctx;
5745 
5746 	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
5747 
5748 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
5749 
5750 	kbase_debug_csf_fault_wait_completion(kbdev);
5751 
5752 	if (scheduler_handle_reset_in_protected_mode(kbdev) &&
5753 	    !suspend_active_queue_groups_on_reset(kbdev)) {
5754 		/* As all groups have been successfully evicted from the CSG
5755 		 * slots, clear out thee scheduler data fields and return
5756 		 */
5757 		scheduler_inner_reset(kbdev);
5758 		return;
5759 	}
5760 
5761 	mutex_lock(&kbdev->kctx_list_lock);
5762 
5763 	/* The loop to iterate over the kbase contexts is present due to lock
5764 	 * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock.
5765 	 * CSF ioctls first take kctx->csf.lock which is context-specific and
5766 	 * then take kbdev->csf.scheduler.lock for global actions like assigning
5767 	 * a CSG slot.
5768 	 * If the lock ordering constraint was not there then could have
5769 	 * directly looped over the active queue groups.
5770 	 */
5771 	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
5772 		/* Firmware reload would reinitialize the CSG & CS interface IO
5773 		 * pages, so just need to internally mark the currently active
5774 		 * queue groups as terminated (similar to the unexpected OoM
5775 		 * event case).
5776 		 * No further work can now get executed for the active groups
5777 		 * (new groups would have to be created to execute work) and
5778 		 * in near future Clients would be duly informed of this
5779 		 * reset. The resources (like User IO pages, GPU queue memory)
5780 		 * allocated for the associated queues would be freed when the
5781 		 * Clients do the teardown when they become aware of the reset.
5782 		 */
5783 		kbase_csf_active_queue_groups_reset(kbdev, kctx);
5784 	}
5785 
5786 	mutex_unlock(&kbdev->kctx_list_lock);
5787 
5788 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_END, NULL, 0u);
5789 
5790 	/* After queue groups reset, the scheduler data fields clear out */
5791 	scheduler_inner_reset(kbdev);
5792 }
5793 
firmware_aliveness_monitor(struct work_struct * work)5794 static void firmware_aliveness_monitor(struct work_struct *work)
5795 {
5796 	struct kbase_device *kbdev = container_of(work, struct kbase_device,
5797 					csf.scheduler.ping_work.work);
5798 	int err;
5799 
5800 	/* Ensure that reset will not be occurring while this function is being
5801 	 * executed as otherwise calling kbase_reset_gpu when reset is already
5802 	 * occurring is a programming error.
5803 	 *
5804 	 * We must use the 'try' variant as the Reset worker can try to flush
5805 	 * this workqueue, which would otherwise deadlock here if we tried to
5806 	 * wait for the reset (and thus ourselves) to complete.
5807 	 */
5808 	err = kbase_reset_gpu_try_prevent(kbdev);
5809 	if (err) {
5810 		/* It doesn't matter whether the value was -EAGAIN or a fatal
5811 		 * error, just stop processing. In case of -EAGAIN, the Reset
5812 		 * worker will restart the scheduler later to resume ping
5813 		 */
5814 		return;
5815 	}
5816 
5817 	mutex_lock(&kbdev->csf.scheduler.lock);
5818 
5819 #ifdef CONFIG_MALI_BIFROST_DEBUG
5820 	if (fw_debug) {
5821 		/* ping requests cause distraction in firmware debugging */
5822 		goto exit;
5823 	}
5824 #endif
5825 
5826 	if (kbdev->csf.scheduler.state == SCHED_SUSPENDED ||
5827 	    kbdev->csf.scheduler.state == SCHED_SLEEPING)
5828 		goto exit;
5829 
5830 	if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1)
5831 		goto exit;
5832 
5833 	if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
5834 		goto exit;
5835 
5836 	if (kbase_pm_context_active_handle_suspend(kbdev,
5837 			KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
5838 		/* Suspend pending - no real need to ping */
5839 		goto exit;
5840 	}
5841 
5842 	kbase_csf_scheduler_wait_mcu_active(kbdev);
5843 
5844 	err = kbase_csf_firmware_ping_wait(kbdev, kbdev->csf.fw_timeout_ms);
5845 
5846 	if (err) {
5847 		/* It is acceptable to enqueue a reset whilst we've prevented
5848 		 * them, it will happen after we've allowed them again
5849 		 */
5850 		if (kbase_prepare_to_reset_gpu(
5851 			    kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
5852 			kbase_reset_gpu(kbdev);
5853 	} else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
5854 		queue_delayed_work(
5855 			system_long_wq, &kbdev->csf.scheduler.ping_work,
5856 			msecs_to_jiffies(kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_PING_TIMEOUT)));
5857 	}
5858 
5859 	kbase_pm_context_idle(kbdev);
5860 exit:
5861 	mutex_unlock(&kbdev->csf.scheduler.lock);
5862 	kbase_reset_gpu_allow(kbdev);
5863 }
5864 
kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group * group,struct kbase_suspend_copy_buffer * sus_buf)5865 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
5866 		struct kbase_suspend_copy_buffer *sus_buf)
5867 {
5868 	struct kbase_context *const kctx = group->kctx;
5869 	struct kbase_device *const kbdev = kctx->kbdev;
5870 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5871 	bool on_slot;
5872 	int err = 0;
5873 
5874 	kbase_reset_gpu_assert_prevented(kbdev);
5875 	lockdep_assert_held(&kctx->csf.lock);
5876 	mutex_lock(&scheduler->lock);
5877 
5878 	on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5879 
5880 #ifdef KBASE_PM_RUNTIME
5881 	if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
5882 		if (wait_for_scheduler_to_exit_sleep(kbdev)) {
5883 			dev_warn(
5884 				kbdev->dev,
5885 				"Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5886 				group->handle, group->kctx->tgid,
5887 				group->kctx->id, group->csg_nr);
5888 
5889 			scheduler_wakeup(kbdev, true);
5890 
5891 			/* Wait for MCU firmware to start running */
5892 			if (kbase_csf_scheduler_wait_mcu_active(kbdev))
5893 				dev_warn(
5894 					kbdev->dev,
5895 					"Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5896 					group->handle, group->kctx->tgid,
5897 					group->kctx->id, group->csg_nr);
5898 		}
5899 
5900 		/* Check the group state again as scheduler lock would have been
5901 		 * released when waiting for the exit from SLEEPING state.
5902 		 */
5903 		on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5904 	}
5905 #endif
5906 	if (on_slot) {
5907 		DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
5908 
5909 		set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask);
5910 
5911 		if (!WARN_ON(scheduler->state == SCHED_SUSPENDED))
5912 			suspend_queue_group(group);
5913 		err = wait_csg_slots_suspend(kbdev, slot_mask,
5914 					     kbdev->csf.fw_timeout_ms);
5915 		if (err) {
5916 			dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d",
5917 				 kbase_backend_get_cycle_cnt(kbdev),
5918 				 group->handle, group->csg_nr);
5919 			goto exit;
5920 		}
5921 	}
5922 
5923 	if (queue_group_suspended_locked(group)) {
5924 		unsigned int target_page_nr = 0, i = 0;
5925 		u64 offset = sus_buf->offset;
5926 		size_t to_copy = sus_buf->size;
5927 		const u32 csg_suspend_buf_nr_pages =
5928 			PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
5929 
5930 		if (scheduler->state != SCHED_SUSPENDED) {
5931 			/* Similar to the case of HW counters, need to flush
5932 			 * the GPU L2 cache before reading from the suspend buffer
5933 			 * pages as they are mapped and cached on GPU side.
5934 			 * Flushing LSC is not done here, since only the flush of
5935 			 * CSG suspend buffer contents is needed from the L2 cache.
5936 			 */
5937 			kbase_gpu_start_cache_clean(
5938 				kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
5939 			kbase_gpu_wait_cache_clean(kbdev);
5940 		} else {
5941 			/* Make sure power down transitions have completed,
5942 			 * i.e. L2 has been powered off as that would ensure
5943 			 * its contents are flushed to memory.
5944 			 * This is needed as Scheduler doesn't wait for the
5945 			 * power down to finish.
5946 			 */
5947 			kbase_pm_wait_for_desired_state(kbdev);
5948 		}
5949 
5950 		for (i = 0; i < csg_suspend_buf_nr_pages &&
5951 				target_page_nr < sus_buf->nr_pages; i++) {
5952 			struct page *pg =
5953 				as_page(group->normal_suspend_buf.phy[i]);
5954 			void *sus_page = kmap(pg);
5955 
5956 			if (sus_page) {
5957 				kbase_sync_single_for_cpu(kbdev,
5958 					kbase_dma_addr(pg),
5959 					PAGE_SIZE, DMA_BIDIRECTIONAL);
5960 
5961 				err = kbase_mem_copy_to_pinned_user_pages(
5962 						sus_buf->pages, sus_page,
5963 						&to_copy, sus_buf->nr_pages,
5964 						&target_page_nr, offset);
5965 				kunmap(pg);
5966 				if (err)
5967 					break;
5968 			} else {
5969 				err = -ENOMEM;
5970 				break;
5971 			}
5972 		}
5973 		schedule_in_cycle(group, false);
5974 	} else {
5975 		/* If addr-space fault, the group may have been evicted */
5976 		err = -EIO;
5977 	}
5978 
5979 exit:
5980 	mutex_unlock(&scheduler->lock);
5981 	return err;
5982 }
5983 
5984 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf);
5985 
5986 /**
5987  * group_sync_updated() - Evaluate sync wait condition of all blocked command
5988  *                        queues of the group.
5989  *
5990  * @group: Pointer to the command queue group that has blocked command queue(s)
5991  *         bound to it.
5992  *
5993  * Return: true if sync wait condition is satisfied for at least one blocked
5994  *         queue of the group.
5995  */
group_sync_updated(struct kbase_queue_group * group)5996 static bool group_sync_updated(struct kbase_queue_group *group)
5997 {
5998 	bool updated = false;
5999 	int stream;
6000 
6001 	/* Groups can also be blocked on-slot during protected mode. */
6002 	WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC &&
6003 		    group->run_state != KBASE_CSF_GROUP_IDLE);
6004 
6005 	for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) {
6006 		struct kbase_queue *const queue = group->bound_queues[stream];
6007 
6008 		/* To check the necessity of sync-wait evaluation,
6009 		 * we rely on the cached 'status_wait' instead of reading it
6010 		 * directly from shared memory as the CSG has been already
6011 		 * evicted from the CSG slot, thus this CSG doesn't have
6012 		 * valid information in the shared memory.
6013 		 */
6014 		if (queue && queue->enabled &&
6015 		    CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait))
6016 			if (evaluate_sync_update(queue)) {
6017 				updated = true;
6018 				queue->status_wait = 0;
6019 			}
6020 	}
6021 
6022 	return updated;
6023 }
6024 
6025 /**
6026  * scheduler_get_protm_enter_async_group() -  Check if the GPU queue group
6027  *                          can be now allowed to execute in protected mode.
6028  *
6029  * @kbdev:    Pointer to the GPU device.
6030  * @group:    Pointer to the GPU queue group.
6031  *
6032  * This function is called outside the scheduling tick/tock to determine
6033  * if the given GPU queue group can now execute in protected mode or not.
6034  * If the group pointer passed is NULL then the evaluation is done for the
6035  * highest priority group on the scheduler maintained group lists without
6036  * tick associated rotation actions. This is referred as the 'top-group'
6037  * in a tock action sense.
6038  *
6039  * It returns the same group pointer, that was passed as an argument, if that
6040  * group matches the highest priority group and has pending protected region
6041  * requests otherwise NULL is returned.
6042  *
6043  * If the group pointer passed is NULL then the internal evaluated highest
6044  * priority group is returned if that has pending protected region requests
6045  * otherwise NULL is returned.
6046  *
6047  * The evaluated highest priority group may not necessarily be the same as the
6048  * scheduler->top_grp. This can happen if there is dynamic de-idle update
6049  * during the tick interval for some on-slots groups that were idle during the
6050  * scheduler normal scheduling action, where the scheduler->top_grp was set.
6051  * The recorded scheduler->top_grp is untouched by this evualuation, so will not
6052  * affect the scheduler context/priority list rotation arrangement.
6053  *
6054  * Return: the pointer to queue group that can currently execute in protected
6055  *         mode or NULL.
6056  */
scheduler_get_protm_enter_async_group(struct kbase_device * const kbdev,struct kbase_queue_group * const group)6057 static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
6058 		struct kbase_device *const kbdev,
6059 		struct kbase_queue_group *const group)
6060 {
6061 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6062 	struct kbase_queue_group *match_grp, *input_grp;
6063 
6064 	lockdep_assert_held(&scheduler->lock);
6065 
6066 	if (scheduler->state != SCHED_INACTIVE)
6067 		return NULL;
6068 
6069 	match_grp = get_tock_top_group(scheduler);
6070 	input_grp = group ? group : match_grp;
6071 
6072 	if (input_grp && (input_grp == match_grp)) {
6073 		struct kbase_csf_cmd_stream_group_info *ginfo =
6074 				&kbdev->csf.global_iface.groups[0];
6075 		unsigned long *pending =
6076 				input_grp->protm_pending_bitmap;
6077 		unsigned long flags;
6078 
6079 		spin_lock_irqsave(&scheduler->interrupt_lock, flags);
6080 
6081 		if (kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
6082 		    bitmap_empty(pending, ginfo->stream_num))
6083 			input_grp = NULL;
6084 
6085 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
6086 	} else {
6087 		input_grp = NULL;
6088 	}
6089 
6090 	return input_grp;
6091 }
6092 
kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group * group)6093 void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
6094 {
6095 	struct kbase_device *const kbdev = group->kctx->kbdev;
6096 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6097 
6098 	int err = kbase_reset_gpu_try_prevent(kbdev);
6099 	/* Regardless of whether reset failed or is currently happening, exit
6100 	 * early
6101 	 */
6102 	if (err)
6103 		return;
6104 
6105 	mutex_lock(&scheduler->lock);
6106 
6107 	if (group->run_state == KBASE_CSF_GROUP_IDLE) {
6108 		group->run_state = KBASE_CSF_GROUP_RUNNABLE;
6109 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
6110 					group->run_state);
6111 	}
6112 	/* Check if the group is now eligible for execution in protected mode. */
6113 	if (scheduler_get_protm_enter_async_group(kbdev, group))
6114 		scheduler_group_check_protm_enter(kbdev, group);
6115 
6116 	mutex_unlock(&scheduler->lock);
6117 	kbase_reset_gpu_allow(kbdev);
6118 }
6119 
6120 /**
6121  * check_sync_update_for_on_slot_group() - Check the sync wait condition
6122  *                                         for all the queues bound to
6123  *                                         the given on-slot group.
6124  *
6125  * @group:    Pointer to the on-slot group that requires evaluation.
6126  *
6127  * This function is called if the GPU is in protected mode and there are on
6128  * slot idle groups with higher priority than the active protected mode group
6129  * or this function is called when CQS object is signaled whilst GPU is in
6130  * sleep state.
6131  * This function will evaluate the sync condition, if any, of all the queues
6132  * bound to the given group.
6133  *
6134  * Return: true if the sync condition of at least one queue has been satisfied.
6135  */
check_sync_update_for_on_slot_group(struct kbase_queue_group * group)6136 static bool check_sync_update_for_on_slot_group(
6137 		struct kbase_queue_group *group)
6138 {
6139 	struct kbase_device *const kbdev = group->kctx->kbdev;
6140 	struct kbase_csf_scheduler *const scheduler =
6141 				&kbdev->csf.scheduler;
6142 	bool sync_update_done = false;
6143 	int i;
6144 
6145 	lockdep_assert_held(&scheduler->lock);
6146 
6147 	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
6148 		struct kbase_queue *queue = group->bound_queues[i];
6149 
6150 		if (queue && queue->enabled && !sync_update_done) {
6151 			struct kbase_csf_cmd_stream_group_info *const ginfo =
6152 				&kbdev->csf.global_iface.groups[group->csg_nr];
6153 			struct kbase_csf_cmd_stream_info *const stream =
6154 				&ginfo->streams[queue->csi_index];
6155 			u32 status = kbase_csf_firmware_cs_output(
6156 					stream, CS_STATUS_WAIT);
6157 			unsigned long flags;
6158 
6159 			KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_WAIT_STATUS,
6160 						   queue->group, queue, status);
6161 
6162 			if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
6163 				continue;
6164 
6165 			/* Save the information of sync object of the command
6166 			 * queue so the callback function, 'group_sync_updated'
6167 			 * can evaluate the sync object when it gets updated
6168 			 * later.
6169 			 */
6170 			queue->status_wait = status;
6171 			queue->sync_ptr = kbase_csf_firmware_cs_output(
6172 				stream, CS_STATUS_WAIT_SYNC_POINTER_LO);
6173 			queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(
6174 				stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
6175 			queue->sync_value = kbase_csf_firmware_cs_output(
6176 				stream, CS_STATUS_WAIT_SYNC_VALUE);
6177 			queue->blocked_reason =
6178 				CS_STATUS_BLOCKED_REASON_REASON_GET(
6179 					kbase_csf_firmware_cs_output(
6180 						stream,
6181 						CS_STATUS_BLOCKED_REASON));
6182 
6183 			if (!evaluate_sync_update(queue))
6184 				continue;
6185 
6186 			/* Update csg_slots_idle_mask and group's run_state */
6187 			if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) {
6188 				/* Only clear the group's idle flag if it has been dealt
6189 				 * with by the scheduler's tick/tock action, otherwise
6190 				 * leave it untouched.
6191 				 */
6192 				spin_lock_irqsave(&scheduler->interrupt_lock,
6193 						  flags);
6194 				clear_bit((unsigned int)group->csg_nr,
6195 					  scheduler->csg_slots_idle_mask);
6196 				KBASE_KTRACE_ADD_CSF_GRP(
6197 					kbdev, CSG_SLOT_IDLE_CLEAR, group,
6198 					scheduler->csg_slots_idle_mask[0]);
6199 				spin_unlock_irqrestore(
6200 					&scheduler->interrupt_lock, flags);
6201 				/* Request the scheduler to confirm the condition inferred
6202 				 * here inside the protected mode.
6203 				 */
6204 				group->reevaluate_idle_status = true;
6205 				group->run_state = KBASE_CSF_GROUP_RUNNABLE;
6206 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
6207 							 group->run_state);
6208 			}
6209 
6210 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
6211 			sync_update_done = true;
6212 		}
6213 	}
6214 
6215 	return sync_update_done;
6216 }
6217 
6218 /**
6219  * check_sync_update_for_idle_groups_protm() - Check the sync wait condition
6220  *                                             for the idle groups on slot
6221  *                                             during protected mode.
6222  *
6223  * @kbdev:    Pointer to the GPU device
6224  *
6225  * This function checks the gpu queues of all the idle groups on slot during
6226  * protected mode that has a higher priority than the active protected mode
6227  * group.
6228  *
6229  * Return: true if the sync condition of at least one queue in a group has been
6230  * satisfied.
6231  */
check_sync_update_for_idle_groups_protm(struct kbase_device * kbdev)6232 static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
6233 {
6234 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6235 	struct kbase_queue_group *protm_grp;
6236 	bool exit_protm = false;
6237 	unsigned long flags;
6238 	u32 num_groups;
6239 	u32 i;
6240 
6241 	lockdep_assert_held(&scheduler->lock);
6242 
6243 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
6244 	protm_grp = scheduler->active_protm_grp;
6245 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
6246 
6247 	if (!protm_grp)
6248 		return exit_protm;
6249 
6250 	num_groups = kbdev->csf.global_iface.group_num;
6251 
6252 	for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
6253 		struct kbase_csf_csg_slot *csg_slot =
6254 					&scheduler->csg_slots[i];
6255 		struct kbase_queue_group *group = csg_slot->resident_group;
6256 
6257 		if (group->scan_seq_num < protm_grp->scan_seq_num) {
6258 			/* If sync update has been performed for the group that
6259 			 * has a higher priority than the protm group, then we
6260 			 * need to exit protected mode.
6261 			 */
6262 			if (check_sync_update_for_on_slot_group(group))
6263 				exit_protm = true;
6264 		}
6265 	}
6266 
6267 	return exit_protm;
6268 }
6269 
check_sync_update_in_sleep_mode(struct kbase_device * kbdev)6270 static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
6271 {
6272 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6273 	u32 const num_groups = kbdev->csf.global_iface.group_num;
6274 	u32 csg_nr;
6275 
6276 	lockdep_assert_held(&scheduler->lock);
6277 
6278 	for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
6279 		struct kbase_queue_group *const group =
6280 			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
6281 
6282 		if (!group)
6283 			continue;
6284 
6285 		if (check_sync_update_for_on_slot_group(group)) {
6286 			scheduler_wakeup(kbdev, true);
6287 			return;
6288 		}
6289 	}
6290 }
6291 
6292 /**
6293  * check_group_sync_update_worker() - Check the sync wait condition for all the
6294  *                                    blocked queue groups
6295  *
6296  * @work:    Pointer to the context-specific work item for evaluating the wait
6297  *           condition for all the queue groups in idle_wait_groups list.
6298  *
6299  * This function checks the gpu queues of all the groups present in both
6300  * idle_wait_groups list of a context and all on slot idle groups (if GPU
6301  * is in protected mode).
6302  * If the sync wait condition for at least one queue bound to the group has
6303  * been satisfied then the group is moved to the per context list of
6304  * runnable groups so that Scheduler can consider scheduling the group
6305  * in next tick or exit protected mode.
6306  */
check_group_sync_update_worker(struct work_struct * work)6307 static void check_group_sync_update_worker(struct work_struct *work)
6308 {
6309 	struct kbase_context *const kctx = container_of(work,
6310 		struct kbase_context, csf.sched.sync_update_work);
6311 	struct kbase_device *const kbdev = kctx->kbdev;
6312 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6313 	bool sync_updated = false;
6314 
6315 	mutex_lock(&scheduler->lock);
6316 
6317 #if IS_ENABLED(CONFIG_DEBUG_FS)
6318 	if (unlikely(scheduler->state == SCHED_BUSY)) {
6319 		queue_work(kctx->csf.sched.sync_update_wq,
6320 			&kctx->csf.sched.sync_update_work);
6321 		mutex_unlock(&scheduler->lock);
6322 		return;
6323 	}
6324 #endif
6325 
6326 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
6327 	if (kctx->csf.sched.num_idle_wait_grps != 0) {
6328 		struct kbase_queue_group *group, *temp;
6329 
6330 		list_for_each_entry_safe(group, temp,
6331 				&kctx->csf.sched.idle_wait_groups, link) {
6332 			if (group_sync_updated(group)) {
6333 				sync_updated = true;
6334 				/* Move this group back in to the runnable
6335 				 * groups list of the context.
6336 				 */
6337 				update_idle_suspended_group_state(group);
6338 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
6339 			}
6340 		}
6341 	} else {
6342 		WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
6343 	}
6344 
6345 	if (check_sync_update_for_idle_groups_protm(kbdev)) {
6346 		scheduler_force_protm_exit(kbdev);
6347 		sync_updated = true;
6348 	}
6349 
6350 	/* If scheduler is in sleep or suspended state, re-activate it
6351 	 * to serve on-slot CSGs blocked on CQS which has been signaled.
6352 	 */
6353 	if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
6354 		check_sync_update_in_sleep_mode(kbdev);
6355 
6356 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
6357 
6358 	mutex_unlock(&scheduler->lock);
6359 }
6360 
6361 static
check_group_sync_update_cb(void * param)6362 enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
6363 {
6364 	struct kbase_context *const kctx = param;
6365 
6366 	KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u);
6367 
6368 	queue_work(kctx->csf.sched.sync_update_wq,
6369 		&kctx->csf.sched.sync_update_work);
6370 
6371 	return KBASE_CSF_EVENT_CALLBACK_KEEP;
6372 }
6373 
kbase_csf_scheduler_context_init(struct kbase_context * kctx)6374 int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
6375 {
6376 	int priority;
6377 	int err;
6378 
6379 	kbase_ctx_sched_init_ctx(kctx);
6380 
6381 	for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
6382 	     ++priority) {
6383 		INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]);
6384 	}
6385 
6386 	kctx->csf.sched.num_runnable_grps = 0;
6387 	INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups);
6388 	kctx->csf.sched.num_idle_wait_grps = 0;
6389 	kctx->csf.sched.ngrp_to_schedule = 0;
6390 
6391 	kctx->csf.sched.sync_update_wq =
6392 		alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq",
6393 			WQ_HIGHPRI);
6394 	if (!kctx->csf.sched.sync_update_wq) {
6395 		dev_err(kctx->kbdev->dev,
6396 			"Failed to initialize scheduler context workqueue");
6397 		err = -ENOMEM;
6398 		goto alloc_wq_failed;
6399 	}
6400 
6401 	INIT_WORK(&kctx->csf.sched.sync_update_work,
6402 		check_group_sync_update_worker);
6403 
6404 	kbase_csf_tiler_heap_reclaim_ctx_init(kctx);
6405 
6406 	err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
6407 
6408 	if (err) {
6409 		dev_err(kctx->kbdev->dev,
6410 			"Failed to register a sync update callback");
6411 		goto event_wait_add_failed;
6412 	}
6413 
6414 	return err;
6415 
6416 event_wait_add_failed:
6417 	destroy_workqueue(kctx->csf.sched.sync_update_wq);
6418 alloc_wq_failed:
6419 	kbase_ctx_sched_remove_ctx(kctx);
6420 	return err;
6421 }
6422 
kbase_csf_scheduler_context_term(struct kbase_context * kctx)6423 void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
6424 {
6425 	kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
6426 	cancel_work_sync(&kctx->csf.sched.sync_update_work);
6427 	destroy_workqueue(kctx->csf.sched.sync_update_wq);
6428 
6429 	kbase_ctx_sched_remove_ctx(kctx);
6430 }
6431 
kbase_csf_scheduler_init(struct kbase_device * kbdev)6432 int kbase_csf_scheduler_init(struct kbase_device *kbdev)
6433 {
6434 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6435 	u32 num_groups = kbdev->csf.global_iface.group_num;
6436 
6437 	bitmap_zero(scheduler->csg_inuse_bitmap, num_groups);
6438 	bitmap_zero(scheduler->csg_slots_idle_mask, num_groups);
6439 
6440 	scheduler->csg_slots = kcalloc(num_groups,
6441 				sizeof(*scheduler->csg_slots), GFP_KERNEL);
6442 	if (!scheduler->csg_slots) {
6443 		dev_err(kbdev->dev,
6444 			"Failed to allocate memory for csg slot status array\n");
6445 		return -ENOMEM;
6446 	}
6447 
6448 	return kbase_csf_mcu_shared_regs_data_init(kbdev);
6449 }
6450 
kbase_csf_scheduler_early_init(struct kbase_device * kbdev)6451 int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
6452 {
6453 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6454 
6455 	scheduler->timer_enabled = true;
6456 
6457 	scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI);
6458 	if (!scheduler->wq) {
6459 		dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
6460 		return -ENOMEM;
6461 	}
6462 	scheduler->idle_wq = alloc_ordered_workqueue(
6463 		"csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
6464 	if (!scheduler->idle_wq) {
6465 		dev_err(kbdev->dev,
6466 			"Failed to allocate GPU idle scheduler workqueue\n");
6467 		destroy_workqueue(kbdev->csf.scheduler.wq);
6468 		return -ENOMEM;
6469 	}
6470 
6471 	INIT_WORK(&scheduler->tick_work, schedule_on_tick);
6472 	INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
6473 	atomic_set(&scheduler->pending_tock_work, false);
6474 
6475 	INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
6476 
6477 	mutex_init(&scheduler->lock);
6478 	spin_lock_init(&scheduler->interrupt_lock);
6479 
6480 	/* Internal lists */
6481 	INIT_LIST_HEAD(&scheduler->runnable_kctxs);
6482 	INIT_LIST_HEAD(&scheduler->groups_to_schedule);
6483 	INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule);
6484 
6485 	BUILD_BUG_ON(MAX_SUPPORTED_CSGS >
6486 		(sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE));
6487 	bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
6488 	scheduler->state = SCHED_SUSPENDED;
6489 	KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
6490 	scheduler->pm_active_count = 0;
6491 	scheduler->ngrp_to_schedule = 0;
6492 	scheduler->total_runnable_grps = 0;
6493 	scheduler->top_ctx = NULL;
6494 	scheduler->top_grp = NULL;
6495 	scheduler->last_schedule = 0;
6496 	scheduler->active_protm_grp = NULL;
6497 	scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
6498 	scheduler_doorbell_init(kbdev);
6499 
6500 	INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
6501 	scheduler->fast_gpu_idle_handling = false;
6502 	atomic_set(&scheduler->gpu_no_longer_idle, false);
6503 	atomic_set(&scheduler->non_idle_offslot_grps, 0);
6504 
6505 	hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
6506 	scheduler->tick_timer.function = tick_timer_callback;
6507 	scheduler->tick_timer_active = false;
6508 
6509 	kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
6510 
6511 	return 0;
6512 }
6513 
kbase_csf_scheduler_term(struct kbase_device * kbdev)6514 void kbase_csf_scheduler_term(struct kbase_device *kbdev)
6515 {
6516 	if (kbdev->csf.scheduler.csg_slots) {
6517 		WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
6518 		/* The unload of Driver can take place only when all contexts have
6519 		 * been terminated. The groups that were not terminated by the User
6520 		 * are terminated on context termination. So no CSGs are expected
6521 		 * to be active at the time of Driver unload.
6522 		 */
6523 		WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
6524 		flush_work(&kbdev->csf.scheduler.gpu_idle_work);
6525 		mutex_lock(&kbdev->csf.scheduler.lock);
6526 
6527 		if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
6528 			unsigned long flags;
6529 			/* The power policy could prevent the Scheduler from
6530 			 * getting suspended when GPU becomes idle.
6531 			 */
6532 			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6533 			WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
6534 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6535 			scheduler_suspend(kbdev);
6536 		}
6537 
6538 		mutex_unlock(&kbdev->csf.scheduler.lock);
6539 		cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
6540 		cancel_tick_timer(kbdev);
6541 		cancel_tick_work(&kbdev->csf.scheduler);
6542 		cancel_tock_work(&kbdev->csf.scheduler);
6543 		kfree(kbdev->csf.scheduler.csg_slots);
6544 		kbdev->csf.scheduler.csg_slots = NULL;
6545 	}
6546 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_TERMINATED, NULL,
6547 				 kbase_csf_scheduler_get_nr_active_csgs(kbdev));
6548 	/* Terminating the MCU shared regions, following the release of slots */
6549 	kbase_csf_mcu_shared_regs_data_term(kbdev);
6550 }
6551 
kbase_csf_scheduler_early_term(struct kbase_device * kbdev)6552 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
6553 {
6554 	if (kbdev->csf.scheduler.idle_wq)
6555 		destroy_workqueue(kbdev->csf.scheduler.idle_wq);
6556 	if (kbdev->csf.scheduler.wq)
6557 		destroy_workqueue(kbdev->csf.scheduler.wq);
6558 
6559 	kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
6560 	mutex_destroy(&kbdev->csf.scheduler.lock);
6561 }
6562 
6563 /**
6564  * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer.
6565  *
6566  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
6567  *
6568  * This function will restart the scheduler tick so that regular scheduling can
6569  * be resumed without any explicit trigger (like kicking of GPU queues). This
6570  * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the
6571  * CSF scheduler lock to already have been held.
6572  */
scheduler_enable_tick_timer_nolock(struct kbase_device * kbdev)6573 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
6574 {
6575 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6576 
6577 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
6578 
6579 	if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev)))
6580 		return;
6581 
6582 	WARN_ON((scheduler->state != SCHED_INACTIVE) &&
6583 		(scheduler->state != SCHED_SUSPENDED) &&
6584 		(scheduler->state != SCHED_SLEEPING));
6585 
6586 	if (scheduler->total_runnable_grps > 0) {
6587 		enqueue_tick_work(kbdev);
6588 		dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
6589 	} else if (scheduler->state != SCHED_SUSPENDED) {
6590 		enqueue_gpu_idle_work(scheduler);
6591 	}
6592 }
6593 
kbase_csf_scheduler_enable_tick_timer(struct kbase_device * kbdev)6594 void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev)
6595 {
6596 	mutex_lock(&kbdev->csf.scheduler.lock);
6597 	scheduler_enable_tick_timer_nolock(kbdev);
6598 	mutex_unlock(&kbdev->csf.scheduler.lock);
6599 }
6600 
kbase_csf_scheduler_timer_is_enabled(struct kbase_device * kbdev)6601 bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
6602 {
6603 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6604 	bool enabled;
6605 
6606 	mutex_lock(&scheduler->lock);
6607 	enabled = scheduler_timer_is_enabled_nolock(kbdev);
6608 	mutex_unlock(&scheduler->lock);
6609 
6610 	return enabled;
6611 }
6612 
kbase_csf_scheduler_timer_set_enabled(struct kbase_device * kbdev,bool enable)6613 void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
6614 		bool enable)
6615 {
6616 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6617 	bool currently_enabled;
6618 
6619 	mutex_lock(&scheduler->lock);
6620 
6621 	currently_enabled = scheduler_timer_is_enabled_nolock(kbdev);
6622 	if (currently_enabled && !enable) {
6623 		scheduler->timer_enabled = false;
6624 		cancel_tick_timer(kbdev);
6625 		mutex_unlock(&scheduler->lock);
6626 		/* The non-sync version to cancel the normal work item is not
6627 		 * available, so need to drop the lock before cancellation.
6628 		 */
6629 		cancel_tick_work(scheduler);
6630 		cancel_tock_work(scheduler);
6631 		return;
6632 	}
6633 
6634 	if (!currently_enabled && enable) {
6635 		scheduler->timer_enabled = true;
6636 
6637 		scheduler_enable_tick_timer_nolock(kbdev);
6638 	}
6639 
6640 	mutex_unlock(&scheduler->lock);
6641 }
6642 
kbase_csf_scheduler_kick(struct kbase_device * kbdev)6643 void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
6644 {
6645 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6646 
6647 	mutex_lock(&scheduler->lock);
6648 
6649 	if (unlikely(scheduler_timer_is_enabled_nolock(kbdev)))
6650 		goto out;
6651 
6652 	if (scheduler->total_runnable_grps > 0) {
6653 		enqueue_tick_work(kbdev);
6654 		dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
6655 	}
6656 
6657 out:
6658 	mutex_unlock(&scheduler->lock);
6659 }
6660 
kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device * kbdev)6661 int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
6662 {
6663 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6664 	int result = 0;
6665 
6666 	lockdep_assert_held(&scheduler->lock);
6667 
6668 #if IS_ENABLED(CONFIG_DEBUG_FS)
6669 	if (unlikely(scheduler->state == SCHED_BUSY))
6670 		return -EBUSY;
6671 #endif
6672 
6673 #ifdef KBASE_PM_RUNTIME
6674 	/* If scheduler is in sleeping state, then MCU needs to be activated
6675 	 * to suspend CSGs.
6676 	 */
6677 	if (scheduler->state == SCHED_SLEEPING) {
6678 		dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
6679 		result = force_scheduler_to_exit_sleep(kbdev);
6680 		if (result) {
6681 			dev_warn(kbdev->dev, "Scheduler failed to exit from sleep");
6682 			goto exit;
6683 		}
6684 	}
6685 #endif
6686 	if (scheduler->state != SCHED_SUSPENDED) {
6687 		result = suspend_active_groups_on_powerdown(kbdev, true);
6688 		if (result) {
6689 			dev_warn(kbdev->dev, "failed to suspend active groups");
6690 			goto exit;
6691 		} else {
6692 			dev_info(kbdev->dev, "Scheduler PM suspend");
6693 			scheduler_suspend(kbdev);
6694 			cancel_tick_timer(kbdev);
6695 		}
6696 	}
6697 
6698 exit:
6699 	return result;
6700 }
6701 
kbase_csf_scheduler_pm_suspend(struct kbase_device * kbdev)6702 int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
6703 {
6704 	int result = 0;
6705 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6706 
6707 	/* Cancel any potential queued delayed work(s) */
6708 	cancel_tick_work(scheduler);
6709 	cancel_tock_work(scheduler);
6710 
6711 	result = kbase_reset_gpu_prevent_and_wait(kbdev);
6712 	if (result) {
6713 		dev_warn(kbdev->dev, "Stop PM suspending for failing to prevent gpu reset.\n");
6714 		return result;
6715 	}
6716 
6717 	mutex_lock(&scheduler->lock);
6718 
6719 	result = kbase_csf_scheduler_pm_suspend_no_lock(kbdev);
6720 	mutex_unlock(&scheduler->lock);
6721 
6722 	kbase_reset_gpu_allow(kbdev);
6723 
6724 	return result;
6725 }
6726 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
6727 
kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device * kbdev)6728 void kbase_csf_scheduler_pm_resume_no_lock(struct kbase_device *kbdev)
6729 {
6730 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6731 
6732 	lockdep_assert_held(&scheduler->lock);
6733 	if ((scheduler->total_runnable_grps > 0) &&
6734 	    (scheduler->state == SCHED_SUSPENDED)) {
6735 		dev_info(kbdev->dev, "Scheduler PM resume");
6736 		scheduler_wakeup(kbdev, true);
6737 	}
6738 }
6739 
kbase_csf_scheduler_pm_resume(struct kbase_device * kbdev)6740 void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
6741 {
6742 	mutex_lock(&kbdev->csf.scheduler.lock);
6743 
6744 	kbase_csf_scheduler_pm_resume_no_lock(kbdev);
6745 	mutex_unlock(&kbdev->csf.scheduler.lock);
6746 }
6747 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
6748 
kbase_csf_scheduler_pm_active(struct kbase_device * kbdev)6749 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
6750 {
6751 	/* Here the lock is taken to synchronize against the runtime suspend
6752 	 * callback function, which may need to wake up the MCU for suspending
6753 	 * the CSGs before powering down the GPU.
6754 	 */
6755 	mutex_lock(&kbdev->csf.scheduler.lock);
6756 	scheduler_pm_active_handle_suspend(kbdev,
6757 			KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
6758 	mutex_unlock(&kbdev->csf.scheduler.lock);
6759 }
6760 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
6761 
kbase_csf_scheduler_pm_idle(struct kbase_device * kbdev)6762 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
6763 {
6764 	/* Here the lock is taken just to maintain symmetry with
6765 	 * kbase_csf_scheduler_pm_active().
6766 	 */
6767 	mutex_lock(&kbdev->csf.scheduler.lock);
6768 	scheduler_pm_idle(kbdev);
6769 	mutex_unlock(&kbdev->csf.scheduler.lock);
6770 }
6771 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
6772 
kbase_csf_scheduler_wait_mcu_active(struct kbase_device * kbdev)6773 int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
6774 {
6775 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6776 	unsigned long flags;
6777 	int err;
6778 
6779 	kbase_pm_lock(kbdev);
6780 	WARN_ON(!kbdev->pm.active_count);
6781 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6782 	WARN_ON(!scheduler->pm_active_count);
6783 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6784 	kbase_pm_unlock(kbdev);
6785 
6786 	kbase_pm_wait_for_poweroff_work_complete(kbdev);
6787 
6788 	err = kbase_pm_wait_for_desired_state(kbdev);
6789 	if (!err) {
6790 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6791 		WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
6792 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6793 	}
6794 
6795 	return err;
6796 }
6797 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
6798 
6799 #ifdef KBASE_PM_RUNTIME
kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device * kbdev)6800 int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
6801 {
6802 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
6803 	unsigned long flags;
6804 	int ret;
6805 
6806 	dev_dbg(kbdev->dev, "Handling runtime suspend");
6807 
6808 	kbase_reset_gpu_assert_prevented(kbdev);
6809 	lockdep_assert_held(&scheduler->lock);
6810 	WARN_ON(scheduler->pm_active_count);
6811 
6812 	if (scheduler->state == SCHED_SUSPENDED) {
6813 		WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active);
6814 		return 0;
6815 	}
6816 
6817 	ret = suspend_active_groups_on_powerdown(kbdev, false);
6818 
6819 	if (ret) {
6820 		dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)",
6821 			 atomic_read(&scheduler->non_idle_offslot_grps));
6822 
6823 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6824 		kbdev->pm.backend.exit_gpu_sleep_mode = true;
6825 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6826 
6827 		kbase_csf_scheduler_invoke_tick(kbdev);
6828 		return ret;
6829 	}
6830 
6831 	scheduler->state = SCHED_SUSPENDED;
6832 	KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
6833 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
6834 	kbdev->pm.backend.gpu_sleep_mode_active = false;
6835 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
6836 
6837 	wake_up_all(&kbdev->csf.event_wait);
6838 	return 0;
6839 }
6840 
kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device * kbdev)6841 void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
6842 {
6843 	u32 csg_nr;
6844 
6845 	lockdep_assert_held(&kbdev->hwaccess_lock);
6846 
6847 	WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP);
6848 
6849 	for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) {
6850 		struct kbase_csf_cmd_stream_group_info *ginfo =
6851 			&kbdev->csf.global_iface.groups[csg_nr];
6852 		bool csg_idle;
6853 
6854 		if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
6855 			continue;
6856 
6857 		csg_idle =
6858 			kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
6859 			CSG_STATUS_STATE_IDLE_MASK;
6860 		if (!csg_idle) {
6861 			dev_dbg(kbdev->dev,
6862 				"Re-activate Scheduler after MCU sleep");
6863 			kbdev->pm.backend.exit_gpu_sleep_mode = true;
6864 			kbase_csf_scheduler_invoke_tick(kbdev);
6865 			break;
6866 		}
6867 	}
6868 }
6869 
kbase_csf_scheduler_force_sleep(struct kbase_device * kbdev)6870 void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
6871 {
6872 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6873 
6874 	mutex_lock(&scheduler->lock);
6875 	if (kbase_pm_gpu_sleep_allowed(kbdev) &&
6876 	    (scheduler->state == SCHED_INACTIVE))
6877 		scheduler_sleep_on_idle(kbdev);
6878 	mutex_unlock(&scheduler->lock);
6879 }
6880 #endif
6881 
kbase_csf_scheduler_force_wakeup(struct kbase_device * kbdev)6882 void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
6883 {
6884 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
6885 
6886 	mutex_lock(&scheduler->lock);
6887 	scheduler_wakeup(kbdev, true);
6888 	mutex_unlock(&scheduler->lock);
6889 }
6890