xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/scheduler/sched_main.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright 2015 Advanced Micro Devices, Inc.
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun  * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun  * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun  * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun  * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * The above copyright notice and this permission notice shall be included in
12*4882a593Smuzhiyun  * all copies or substantial portions of the Software.
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*4882a593Smuzhiyun  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*4882a593Smuzhiyun  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17*4882a593Smuzhiyun  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18*4882a593Smuzhiyun  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19*4882a593Smuzhiyun  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20*4882a593Smuzhiyun  * OTHER DEALINGS IN THE SOFTWARE.
21*4882a593Smuzhiyun  *
22*4882a593Smuzhiyun  */
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun /**
25*4882a593Smuzhiyun  * DOC: Overview
26*4882a593Smuzhiyun  *
27*4882a593Smuzhiyun  * The GPU scheduler provides entities which allow userspace to push jobs
28*4882a593Smuzhiyun  * into software queues which are then scheduled on a hardware run queue.
29*4882a593Smuzhiyun  * The software queues have a priority among them. The scheduler selects the entities
30*4882a593Smuzhiyun  * from the run queue using a FIFO. The scheduler provides dependency handling
31*4882a593Smuzhiyun  * features among jobs. The driver is supposed to provide callback functions for
32*4882a593Smuzhiyun  * backend operations to the scheduler like submitting a job to hardware run queue,
33*4882a593Smuzhiyun  * returning the dependencies of a job etc.
34*4882a593Smuzhiyun  *
35*4882a593Smuzhiyun  * The organisation of the scheduler is the following:
36*4882a593Smuzhiyun  *
37*4882a593Smuzhiyun  * 1. Each hw run queue has one scheduler
38*4882a593Smuzhiyun  * 2. Each scheduler has multiple run queues with different priorities
39*4882a593Smuzhiyun  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
40*4882a593Smuzhiyun  * 3. Each scheduler run queue has a queue of entities to schedule
41*4882a593Smuzhiyun  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
42*4882a593Smuzhiyun  *    the hardware.
43*4882a593Smuzhiyun  *
44*4882a593Smuzhiyun  * The jobs in a entity are always scheduled in the order that they were pushed.
45*4882a593Smuzhiyun  */
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun #include <linux/kthread.h>
48*4882a593Smuzhiyun #include <linux/wait.h>
49*4882a593Smuzhiyun #include <linux/sched.h>
50*4882a593Smuzhiyun #include <linux/completion.h>
51*4882a593Smuzhiyun #include <uapi/linux/sched/types.h>
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun #include <drm/drm_print.h>
54*4882a593Smuzhiyun #include <drm/gpu_scheduler.h>
55*4882a593Smuzhiyun #include <drm/spsc_queue.h>
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
58*4882a593Smuzhiyun #include "gpu_scheduler_trace.h"
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun #define to_drm_sched_job(sched_job)		\
61*4882a593Smuzhiyun 		container_of((sched_job), struct drm_sched_job, queue_node)
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun /**
66*4882a593Smuzhiyun  * drm_sched_rq_init - initialize a given run queue struct
67*4882a593Smuzhiyun  *
68*4882a593Smuzhiyun  * @rq: scheduler run queue
69*4882a593Smuzhiyun  *
70*4882a593Smuzhiyun  * Initializes a scheduler runqueue.
71*4882a593Smuzhiyun  */
drm_sched_rq_init(struct drm_gpu_scheduler * sched,struct drm_sched_rq * rq)72*4882a593Smuzhiyun static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
73*4882a593Smuzhiyun 			      struct drm_sched_rq *rq)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun 	spin_lock_init(&rq->lock);
76*4882a593Smuzhiyun 	INIT_LIST_HEAD(&rq->entities);
77*4882a593Smuzhiyun 	rq->current_entity = NULL;
78*4882a593Smuzhiyun 	rq->sched = sched;
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun /**
82*4882a593Smuzhiyun  * drm_sched_rq_add_entity - add an entity
83*4882a593Smuzhiyun  *
84*4882a593Smuzhiyun  * @rq: scheduler run queue
85*4882a593Smuzhiyun  * @entity: scheduler entity
86*4882a593Smuzhiyun  *
87*4882a593Smuzhiyun  * Adds a scheduler entity to the run queue.
88*4882a593Smuzhiyun  */
drm_sched_rq_add_entity(struct drm_sched_rq * rq,struct drm_sched_entity * entity)89*4882a593Smuzhiyun void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
90*4882a593Smuzhiyun 			     struct drm_sched_entity *entity)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun 	if (!list_empty(&entity->list))
93*4882a593Smuzhiyun 		return;
94*4882a593Smuzhiyun 	spin_lock(&rq->lock);
95*4882a593Smuzhiyun 	atomic_inc(&rq->sched->score);
96*4882a593Smuzhiyun 	list_add_tail(&entity->list, &rq->entities);
97*4882a593Smuzhiyun 	spin_unlock(&rq->lock);
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun /**
101*4882a593Smuzhiyun  * drm_sched_rq_remove_entity - remove an entity
102*4882a593Smuzhiyun  *
103*4882a593Smuzhiyun  * @rq: scheduler run queue
104*4882a593Smuzhiyun  * @entity: scheduler entity
105*4882a593Smuzhiyun  *
106*4882a593Smuzhiyun  * Removes a scheduler entity from the run queue.
107*4882a593Smuzhiyun  */
drm_sched_rq_remove_entity(struct drm_sched_rq * rq,struct drm_sched_entity * entity)108*4882a593Smuzhiyun void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
109*4882a593Smuzhiyun 				struct drm_sched_entity *entity)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun 	if (list_empty(&entity->list))
112*4882a593Smuzhiyun 		return;
113*4882a593Smuzhiyun 	spin_lock(&rq->lock);
114*4882a593Smuzhiyun 	atomic_dec(&rq->sched->score);
115*4882a593Smuzhiyun 	list_del_init(&entity->list);
116*4882a593Smuzhiyun 	if (rq->current_entity == entity)
117*4882a593Smuzhiyun 		rq->current_entity = NULL;
118*4882a593Smuzhiyun 	spin_unlock(&rq->lock);
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun /**
122*4882a593Smuzhiyun  * drm_sched_rq_select_entity - Select an entity which could provide a job to run
123*4882a593Smuzhiyun  *
124*4882a593Smuzhiyun  * @rq: scheduler run queue to check.
125*4882a593Smuzhiyun  *
126*4882a593Smuzhiyun  * Try to find a ready entity, returns NULL if none found.
127*4882a593Smuzhiyun  */
128*4882a593Smuzhiyun static struct drm_sched_entity *
drm_sched_rq_select_entity(struct drm_sched_rq * rq)129*4882a593Smuzhiyun drm_sched_rq_select_entity(struct drm_sched_rq *rq)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun 	struct drm_sched_entity *entity;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	spin_lock(&rq->lock);
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	entity = rq->current_entity;
136*4882a593Smuzhiyun 	if (entity) {
137*4882a593Smuzhiyun 		list_for_each_entry_continue(entity, &rq->entities, list) {
138*4882a593Smuzhiyun 			if (drm_sched_entity_is_ready(entity)) {
139*4882a593Smuzhiyun 				rq->current_entity = entity;
140*4882a593Smuzhiyun 				reinit_completion(&entity->entity_idle);
141*4882a593Smuzhiyun 				spin_unlock(&rq->lock);
142*4882a593Smuzhiyun 				return entity;
143*4882a593Smuzhiyun 			}
144*4882a593Smuzhiyun 		}
145*4882a593Smuzhiyun 	}
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	list_for_each_entry(entity, &rq->entities, list) {
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 		if (drm_sched_entity_is_ready(entity)) {
150*4882a593Smuzhiyun 			rq->current_entity = entity;
151*4882a593Smuzhiyun 			reinit_completion(&entity->entity_idle);
152*4882a593Smuzhiyun 			spin_unlock(&rq->lock);
153*4882a593Smuzhiyun 			return entity;
154*4882a593Smuzhiyun 		}
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 		if (entity == rq->current_entity)
157*4882a593Smuzhiyun 			break;
158*4882a593Smuzhiyun 	}
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	spin_unlock(&rq->lock);
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	return NULL;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun /**
166*4882a593Smuzhiyun  * drm_sched_dependency_optimized
167*4882a593Smuzhiyun  *
168*4882a593Smuzhiyun  * @fence: the dependency fence
169*4882a593Smuzhiyun  * @entity: the entity which depends on the above fence
170*4882a593Smuzhiyun  *
171*4882a593Smuzhiyun  * Returns true if the dependency can be optimized and false otherwise
172*4882a593Smuzhiyun  */
drm_sched_dependency_optimized(struct dma_fence * fence,struct drm_sched_entity * entity)173*4882a593Smuzhiyun bool drm_sched_dependency_optimized(struct dma_fence* fence,
174*4882a593Smuzhiyun 				    struct drm_sched_entity *entity)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun 	struct drm_gpu_scheduler *sched = entity->rq->sched;
177*4882a593Smuzhiyun 	struct drm_sched_fence *s_fence;
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 	if (!fence || dma_fence_is_signaled(fence))
180*4882a593Smuzhiyun 		return false;
181*4882a593Smuzhiyun 	if (fence->context == entity->fence_context)
182*4882a593Smuzhiyun 		return true;
183*4882a593Smuzhiyun 	s_fence = to_drm_sched_fence(fence);
184*4882a593Smuzhiyun 	if (s_fence && s_fence->sched == sched)
185*4882a593Smuzhiyun 		return true;
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	return false;
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_dependency_optimized);
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun /**
192*4882a593Smuzhiyun  * drm_sched_start_timeout - start timeout for reset worker
193*4882a593Smuzhiyun  *
194*4882a593Smuzhiyun  * @sched: scheduler instance to start the worker for
195*4882a593Smuzhiyun  *
196*4882a593Smuzhiyun  * Start the timeout for the given scheduler.
197*4882a593Smuzhiyun  */
drm_sched_start_timeout(struct drm_gpu_scheduler * sched)198*4882a593Smuzhiyun static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
201*4882a593Smuzhiyun 	    !list_empty(&sched->ring_mirror_list))
202*4882a593Smuzhiyun 		schedule_delayed_work(&sched->work_tdr, sched->timeout);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun /**
206*4882a593Smuzhiyun  * drm_sched_fault - immediately start timeout handler
207*4882a593Smuzhiyun  *
208*4882a593Smuzhiyun  * @sched: scheduler where the timeout handling should be started.
209*4882a593Smuzhiyun  *
210*4882a593Smuzhiyun  * Start timeout handling immediately when the driver detects a hardware fault.
211*4882a593Smuzhiyun  */
drm_sched_fault(struct drm_gpu_scheduler * sched)212*4882a593Smuzhiyun void drm_sched_fault(struct drm_gpu_scheduler *sched)
213*4882a593Smuzhiyun {
214*4882a593Smuzhiyun 	mod_delayed_work(system_wq, &sched->work_tdr, 0);
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_fault);
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun /**
219*4882a593Smuzhiyun  * drm_sched_suspend_timeout - Suspend scheduler job timeout
220*4882a593Smuzhiyun  *
221*4882a593Smuzhiyun  * @sched: scheduler instance for which to suspend the timeout
222*4882a593Smuzhiyun  *
223*4882a593Smuzhiyun  * Suspend the delayed work timeout for the scheduler. This is done by
224*4882a593Smuzhiyun  * modifying the delayed work timeout to an arbitrary large value,
225*4882a593Smuzhiyun  * MAX_SCHEDULE_TIMEOUT in this case.
226*4882a593Smuzhiyun  *
227*4882a593Smuzhiyun  * Returns the timeout remaining
228*4882a593Smuzhiyun  *
229*4882a593Smuzhiyun  */
drm_sched_suspend_timeout(struct drm_gpu_scheduler * sched)230*4882a593Smuzhiyun unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun 	unsigned long sched_timeout, now = jiffies;
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 	sched_timeout = sched->work_tdr.timer.expires;
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	/*
237*4882a593Smuzhiyun 	 * Modify the timeout to an arbitrarily large value. This also prevents
238*4882a593Smuzhiyun 	 * the timeout to be restarted when new submissions arrive
239*4882a593Smuzhiyun 	 */
240*4882a593Smuzhiyun 	if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
241*4882a593Smuzhiyun 			&& time_after(sched_timeout, now))
242*4882a593Smuzhiyun 		return sched_timeout - now;
243*4882a593Smuzhiyun 	else
244*4882a593Smuzhiyun 		return sched->timeout;
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_suspend_timeout);
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun /**
249*4882a593Smuzhiyun  * drm_sched_resume_timeout - Resume scheduler job timeout
250*4882a593Smuzhiyun  *
251*4882a593Smuzhiyun  * @sched: scheduler instance for which to resume the timeout
252*4882a593Smuzhiyun  * @remaining: remaining timeout
253*4882a593Smuzhiyun  *
254*4882a593Smuzhiyun  * Resume the delayed work timeout for the scheduler.
255*4882a593Smuzhiyun  */
drm_sched_resume_timeout(struct drm_gpu_scheduler * sched,unsigned long remaining)256*4882a593Smuzhiyun void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
257*4882a593Smuzhiyun 		unsigned long remaining)
258*4882a593Smuzhiyun {
259*4882a593Smuzhiyun 	spin_lock(&sched->job_list_lock);
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	if (list_empty(&sched->ring_mirror_list))
262*4882a593Smuzhiyun 		cancel_delayed_work(&sched->work_tdr);
263*4882a593Smuzhiyun 	else
264*4882a593Smuzhiyun 		mod_delayed_work(system_wq, &sched->work_tdr, remaining);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	spin_unlock(&sched->job_list_lock);
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_resume_timeout);
269*4882a593Smuzhiyun 
drm_sched_job_begin(struct drm_sched_job * s_job)270*4882a593Smuzhiyun static void drm_sched_job_begin(struct drm_sched_job *s_job)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun 	struct drm_gpu_scheduler *sched = s_job->sched;
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 	spin_lock(&sched->job_list_lock);
275*4882a593Smuzhiyun 	list_add_tail(&s_job->node, &sched->ring_mirror_list);
276*4882a593Smuzhiyun 	drm_sched_start_timeout(sched);
277*4882a593Smuzhiyun 	spin_unlock(&sched->job_list_lock);
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun 
drm_sched_job_timedout(struct work_struct * work)280*4882a593Smuzhiyun static void drm_sched_job_timedout(struct work_struct *work)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun 	struct drm_gpu_scheduler *sched;
283*4882a593Smuzhiyun 	struct drm_sched_job *job;
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun 	/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
288*4882a593Smuzhiyun 	spin_lock(&sched->job_list_lock);
289*4882a593Smuzhiyun 	job = list_first_entry_or_null(&sched->ring_mirror_list,
290*4882a593Smuzhiyun 				       struct drm_sched_job, node);
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	if (job) {
293*4882a593Smuzhiyun 		/*
294*4882a593Smuzhiyun 		 * Remove the bad job so it cannot be freed by concurrent
295*4882a593Smuzhiyun 		 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
296*4882a593Smuzhiyun 		 * is parked at which point it's safe.
297*4882a593Smuzhiyun 		 */
298*4882a593Smuzhiyun 		list_del_init(&job->node);
299*4882a593Smuzhiyun 		spin_unlock(&sched->job_list_lock);
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 		job->sched->ops->timedout_job(job);
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 		/*
304*4882a593Smuzhiyun 		 * Guilty job did complete and hence needs to be manually removed
305*4882a593Smuzhiyun 		 * See drm_sched_stop doc.
306*4882a593Smuzhiyun 		 */
307*4882a593Smuzhiyun 		if (sched->free_guilty) {
308*4882a593Smuzhiyun 			job->sched->ops->free_job(job);
309*4882a593Smuzhiyun 			sched->free_guilty = false;
310*4882a593Smuzhiyun 		}
311*4882a593Smuzhiyun 	} else {
312*4882a593Smuzhiyun 		spin_unlock(&sched->job_list_lock);
313*4882a593Smuzhiyun 	}
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	spin_lock(&sched->job_list_lock);
316*4882a593Smuzhiyun 	drm_sched_start_timeout(sched);
317*4882a593Smuzhiyun 	spin_unlock(&sched->job_list_lock);
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun  /**
321*4882a593Smuzhiyun   * drm_sched_increase_karma - Update sched_entity guilty flag
322*4882a593Smuzhiyun   *
323*4882a593Smuzhiyun   * @bad: The job guilty of time out
324*4882a593Smuzhiyun   *
325*4882a593Smuzhiyun   * Increment on every hang caused by the 'bad' job. If this exceeds the hang
326*4882a593Smuzhiyun   * limit of the scheduler then the respective sched entity is marked guilty and
327*4882a593Smuzhiyun   * jobs from it will not be scheduled further
328*4882a593Smuzhiyun   */
drm_sched_increase_karma(struct drm_sched_job * bad)329*4882a593Smuzhiyun void drm_sched_increase_karma(struct drm_sched_job *bad)
330*4882a593Smuzhiyun {
331*4882a593Smuzhiyun 	int i;
332*4882a593Smuzhiyun 	struct drm_sched_entity *tmp;
333*4882a593Smuzhiyun 	struct drm_sched_entity *entity;
334*4882a593Smuzhiyun 	struct drm_gpu_scheduler *sched = bad->sched;
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	/* don't increase @bad's karma if it's from KERNEL RQ,
337*4882a593Smuzhiyun 	 * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
338*4882a593Smuzhiyun 	 * corrupt but keep in mind that kernel jobs always considered good.
339*4882a593Smuzhiyun 	 */
340*4882a593Smuzhiyun 	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
341*4882a593Smuzhiyun 		atomic_inc(&bad->karma);
342*4882a593Smuzhiyun 		for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
343*4882a593Smuzhiyun 		     i++) {
344*4882a593Smuzhiyun 			struct drm_sched_rq *rq = &sched->sched_rq[i];
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 			spin_lock(&rq->lock);
347*4882a593Smuzhiyun 			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
348*4882a593Smuzhiyun 				if (bad->s_fence->scheduled.context ==
349*4882a593Smuzhiyun 				    entity->fence_context) {
350*4882a593Smuzhiyun 					if (atomic_read(&bad->karma) >
351*4882a593Smuzhiyun 					    bad->sched->hang_limit)
352*4882a593Smuzhiyun 						if (entity->guilty)
353*4882a593Smuzhiyun 							atomic_set(entity->guilty, 1);
354*4882a593Smuzhiyun 					break;
355*4882a593Smuzhiyun 				}
356*4882a593Smuzhiyun 			}
357*4882a593Smuzhiyun 			spin_unlock(&rq->lock);
358*4882a593Smuzhiyun 			if (&entity->list != &rq->entities)
359*4882a593Smuzhiyun 				break;
360*4882a593Smuzhiyun 		}
361*4882a593Smuzhiyun 	}
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_increase_karma);
364*4882a593Smuzhiyun 
365*4882a593Smuzhiyun /**
366*4882a593Smuzhiyun  * drm_sched_stop - stop the scheduler
367*4882a593Smuzhiyun  *
368*4882a593Smuzhiyun  * @sched: scheduler instance
369*4882a593Smuzhiyun  * @bad: job which caused the time out
370*4882a593Smuzhiyun  *
371*4882a593Smuzhiyun  * Stop the scheduler and also removes and frees all completed jobs.
372*4882a593Smuzhiyun  * Note: bad job will not be freed as it might be used later and so it's
373*4882a593Smuzhiyun  * callers responsibility to release it manually if it's not part of the
374*4882a593Smuzhiyun  * mirror list any more.
375*4882a593Smuzhiyun  *
376*4882a593Smuzhiyun  */
drm_sched_stop(struct drm_gpu_scheduler * sched,struct drm_sched_job * bad)377*4882a593Smuzhiyun void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
378*4882a593Smuzhiyun {
379*4882a593Smuzhiyun 	struct drm_sched_job *s_job, *tmp;
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun 	kthread_park(sched->thread);
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 	/*
384*4882a593Smuzhiyun 	 * Reinsert back the bad job here - now it's safe as
385*4882a593Smuzhiyun 	 * drm_sched_get_cleanup_job cannot race against us and release the
386*4882a593Smuzhiyun 	 * bad job at this point - we parked (waited for) any in progress
387*4882a593Smuzhiyun 	 * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
388*4882a593Smuzhiyun 	 * now until the scheduler thread is unparked.
389*4882a593Smuzhiyun 	 */
390*4882a593Smuzhiyun 	if (bad && bad->sched == sched)
391*4882a593Smuzhiyun 		/*
392*4882a593Smuzhiyun 		 * Add at the head of the queue to reflect it was the earliest
393*4882a593Smuzhiyun 		 * job extracted.
394*4882a593Smuzhiyun 		 */
395*4882a593Smuzhiyun 		list_add(&bad->node, &sched->ring_mirror_list);
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	/*
398*4882a593Smuzhiyun 	 * Iterate the job list from later to  earlier one and either deactive
399*4882a593Smuzhiyun 	 * their HW callbacks or remove them from mirror list if they already
400*4882a593Smuzhiyun 	 * signaled.
401*4882a593Smuzhiyun 	 * This iteration is thread safe as sched thread is stopped.
402*4882a593Smuzhiyun 	 */
403*4882a593Smuzhiyun 	list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
404*4882a593Smuzhiyun 		if (s_job->s_fence->parent &&
405*4882a593Smuzhiyun 		    dma_fence_remove_callback(s_job->s_fence->parent,
406*4882a593Smuzhiyun 					      &s_job->cb)) {
407*4882a593Smuzhiyun 			atomic_dec(&sched->hw_rq_count);
408*4882a593Smuzhiyun 		} else {
409*4882a593Smuzhiyun 			/*
410*4882a593Smuzhiyun 			 * remove job from ring_mirror_list.
411*4882a593Smuzhiyun 			 * Locking here is for concurrent resume timeout
412*4882a593Smuzhiyun 			 */
413*4882a593Smuzhiyun 			spin_lock(&sched->job_list_lock);
414*4882a593Smuzhiyun 			list_del_init(&s_job->node);
415*4882a593Smuzhiyun 			spin_unlock(&sched->job_list_lock);
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 			/*
418*4882a593Smuzhiyun 			 * Wait for job's HW fence callback to finish using s_job
419*4882a593Smuzhiyun 			 * before releasing it.
420*4882a593Smuzhiyun 			 *
421*4882a593Smuzhiyun 			 * Job is still alive so fence refcount at least 1
422*4882a593Smuzhiyun 			 */
423*4882a593Smuzhiyun 			dma_fence_wait(&s_job->s_fence->finished, false);
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 			/*
426*4882a593Smuzhiyun 			 * We must keep bad job alive for later use during
427*4882a593Smuzhiyun 			 * recovery by some of the drivers but leave a hint
428*4882a593Smuzhiyun 			 * that the guilty job must be released.
429*4882a593Smuzhiyun 			 */
430*4882a593Smuzhiyun 			if (bad != s_job)
431*4882a593Smuzhiyun 				sched->ops->free_job(s_job);
432*4882a593Smuzhiyun 			else
433*4882a593Smuzhiyun 				sched->free_guilty = true;
434*4882a593Smuzhiyun 		}
435*4882a593Smuzhiyun 	}
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun 	/*
438*4882a593Smuzhiyun 	 * Stop pending timer in flight as we rearm it in  drm_sched_start. This
439*4882a593Smuzhiyun 	 * avoids the pending timeout work in progress to fire right away after
440*4882a593Smuzhiyun 	 * this TDR finished and before the newly restarted jobs had a
441*4882a593Smuzhiyun 	 * chance to complete.
442*4882a593Smuzhiyun 	 */
443*4882a593Smuzhiyun 	cancel_delayed_work(&sched->work_tdr);
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_stop);
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun /**
449*4882a593Smuzhiyun  * drm_sched_job_recovery - recover jobs after a reset
450*4882a593Smuzhiyun  *
451*4882a593Smuzhiyun  * @sched: scheduler instance
452*4882a593Smuzhiyun  * @full_recovery: proceed with complete sched restart
453*4882a593Smuzhiyun  *
454*4882a593Smuzhiyun  */
drm_sched_start(struct drm_gpu_scheduler * sched,bool full_recovery)455*4882a593Smuzhiyun void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun 	struct drm_sched_job *s_job, *tmp;
458*4882a593Smuzhiyun 	int r;
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	/*
461*4882a593Smuzhiyun 	 * Locking the list is not required here as the sched thread is parked
462*4882a593Smuzhiyun 	 * so no new jobs are being inserted or removed. Also concurrent
463*4882a593Smuzhiyun 	 * GPU recovers can't run in parallel.
464*4882a593Smuzhiyun 	 */
465*4882a593Smuzhiyun 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
466*4882a593Smuzhiyun 		struct dma_fence *fence = s_job->s_fence->parent;
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 		atomic_inc(&sched->hw_rq_count);
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 		if (!full_recovery)
471*4882a593Smuzhiyun 			continue;
472*4882a593Smuzhiyun 
473*4882a593Smuzhiyun 		if (fence) {
474*4882a593Smuzhiyun 			r = dma_fence_add_callback(fence, &s_job->cb,
475*4882a593Smuzhiyun 						   drm_sched_process_job);
476*4882a593Smuzhiyun 			if (r == -ENOENT)
477*4882a593Smuzhiyun 				drm_sched_process_job(fence, &s_job->cb);
478*4882a593Smuzhiyun 			else if (r)
479*4882a593Smuzhiyun 				DRM_ERROR("fence add callback failed (%d)\n",
480*4882a593Smuzhiyun 					  r);
481*4882a593Smuzhiyun 		} else
482*4882a593Smuzhiyun 			drm_sched_process_job(NULL, &s_job->cb);
483*4882a593Smuzhiyun 	}
484*4882a593Smuzhiyun 
485*4882a593Smuzhiyun 	if (full_recovery) {
486*4882a593Smuzhiyun 		spin_lock(&sched->job_list_lock);
487*4882a593Smuzhiyun 		drm_sched_start_timeout(sched);
488*4882a593Smuzhiyun 		spin_unlock(&sched->job_list_lock);
489*4882a593Smuzhiyun 	}
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	kthread_unpark(sched->thread);
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_start);
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun /**
496*4882a593Smuzhiyun  * drm_sched_resubmit_jobs - helper to relunch job from mirror ring list
497*4882a593Smuzhiyun  *
498*4882a593Smuzhiyun  * @sched: scheduler instance
499*4882a593Smuzhiyun  *
500*4882a593Smuzhiyun  */
drm_sched_resubmit_jobs(struct drm_gpu_scheduler * sched)501*4882a593Smuzhiyun void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
502*4882a593Smuzhiyun {
503*4882a593Smuzhiyun 	struct drm_sched_job *s_job, *tmp;
504*4882a593Smuzhiyun 	uint64_t guilty_context;
505*4882a593Smuzhiyun 	bool found_guilty = false;
506*4882a593Smuzhiyun 	struct dma_fence *fence;
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
509*4882a593Smuzhiyun 		struct drm_sched_fence *s_fence = s_job->s_fence;
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 		if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
512*4882a593Smuzhiyun 			found_guilty = true;
513*4882a593Smuzhiyun 			guilty_context = s_job->s_fence->scheduled.context;
514*4882a593Smuzhiyun 		}
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
517*4882a593Smuzhiyun 			dma_fence_set_error(&s_fence->finished, -ECANCELED);
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 		dma_fence_put(s_job->s_fence->parent);
520*4882a593Smuzhiyun 		fence = sched->ops->run_job(s_job);
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 		if (IS_ERR_OR_NULL(fence)) {
523*4882a593Smuzhiyun 			if (IS_ERR(fence))
524*4882a593Smuzhiyun 				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun 			s_job->s_fence->parent = NULL;
527*4882a593Smuzhiyun 		} else {
528*4882a593Smuzhiyun 			s_job->s_fence->parent = fence;
529*4882a593Smuzhiyun 		}
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	}
533*4882a593Smuzhiyun }
534*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_resubmit_jobs);
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun /**
537*4882a593Smuzhiyun  * drm_sched_job_init - init a scheduler job
538*4882a593Smuzhiyun  *
539*4882a593Smuzhiyun  * @job: scheduler job to init
540*4882a593Smuzhiyun  * @entity: scheduler entity to use
541*4882a593Smuzhiyun  * @owner: job owner for debugging
542*4882a593Smuzhiyun  *
543*4882a593Smuzhiyun  * Refer to drm_sched_entity_push_job() documentation
544*4882a593Smuzhiyun  * for locking considerations.
545*4882a593Smuzhiyun  *
546*4882a593Smuzhiyun  * Returns 0 for success, negative error code otherwise.
547*4882a593Smuzhiyun  */
drm_sched_job_init(struct drm_sched_job * job,struct drm_sched_entity * entity,void * owner)548*4882a593Smuzhiyun int drm_sched_job_init(struct drm_sched_job *job,
549*4882a593Smuzhiyun 		       struct drm_sched_entity *entity,
550*4882a593Smuzhiyun 		       void *owner)
551*4882a593Smuzhiyun {
552*4882a593Smuzhiyun 	struct drm_gpu_scheduler *sched;
553*4882a593Smuzhiyun 
554*4882a593Smuzhiyun 	drm_sched_entity_select_rq(entity);
555*4882a593Smuzhiyun 	if (!entity->rq)
556*4882a593Smuzhiyun 		return -ENOENT;
557*4882a593Smuzhiyun 
558*4882a593Smuzhiyun 	sched = entity->rq->sched;
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	job->sched = sched;
561*4882a593Smuzhiyun 	job->entity = entity;
562*4882a593Smuzhiyun 	job->s_priority = entity->rq - sched->sched_rq;
563*4882a593Smuzhiyun 	job->s_fence = drm_sched_fence_create(entity, owner);
564*4882a593Smuzhiyun 	if (!job->s_fence)
565*4882a593Smuzhiyun 		return -ENOMEM;
566*4882a593Smuzhiyun 	job->id = atomic64_inc_return(&sched->job_id_count);
567*4882a593Smuzhiyun 
568*4882a593Smuzhiyun 	INIT_LIST_HEAD(&job->node);
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	return 0;
571*4882a593Smuzhiyun }
572*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_job_init);
573*4882a593Smuzhiyun 
574*4882a593Smuzhiyun /**
575*4882a593Smuzhiyun  * drm_sched_job_cleanup - clean up scheduler job resources
576*4882a593Smuzhiyun  *
577*4882a593Smuzhiyun  * @job: scheduler job to clean up
578*4882a593Smuzhiyun  */
drm_sched_job_cleanup(struct drm_sched_job * job)579*4882a593Smuzhiyun void drm_sched_job_cleanup(struct drm_sched_job *job)
580*4882a593Smuzhiyun {
581*4882a593Smuzhiyun 	dma_fence_put(&job->s_fence->finished);
582*4882a593Smuzhiyun 	job->s_fence = NULL;
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_job_cleanup);
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun /**
587*4882a593Smuzhiyun  * drm_sched_ready - is the scheduler ready
588*4882a593Smuzhiyun  *
589*4882a593Smuzhiyun  * @sched: scheduler instance
590*4882a593Smuzhiyun  *
591*4882a593Smuzhiyun  * Return true if we can push more jobs to the hw, otherwise false.
592*4882a593Smuzhiyun  */
drm_sched_ready(struct drm_gpu_scheduler * sched)593*4882a593Smuzhiyun static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
594*4882a593Smuzhiyun {
595*4882a593Smuzhiyun 	return atomic_read(&sched->hw_rq_count) <
596*4882a593Smuzhiyun 		sched->hw_submission_limit;
597*4882a593Smuzhiyun }
598*4882a593Smuzhiyun 
599*4882a593Smuzhiyun /**
600*4882a593Smuzhiyun  * drm_sched_wakeup - Wake up the scheduler when it is ready
601*4882a593Smuzhiyun  *
602*4882a593Smuzhiyun  * @sched: scheduler instance
603*4882a593Smuzhiyun  *
604*4882a593Smuzhiyun  */
drm_sched_wakeup(struct drm_gpu_scheduler * sched)605*4882a593Smuzhiyun void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
606*4882a593Smuzhiyun {
607*4882a593Smuzhiyun 	if (drm_sched_ready(sched))
608*4882a593Smuzhiyun 		wake_up_interruptible(&sched->wake_up_worker);
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun 
611*4882a593Smuzhiyun /**
612*4882a593Smuzhiyun  * drm_sched_select_entity - Select next entity to process
613*4882a593Smuzhiyun  *
614*4882a593Smuzhiyun  * @sched: scheduler instance
615*4882a593Smuzhiyun  *
616*4882a593Smuzhiyun  * Returns the entity to process or NULL if none are found.
617*4882a593Smuzhiyun  */
618*4882a593Smuzhiyun static struct drm_sched_entity *
drm_sched_select_entity(struct drm_gpu_scheduler * sched)619*4882a593Smuzhiyun drm_sched_select_entity(struct drm_gpu_scheduler *sched)
620*4882a593Smuzhiyun {
621*4882a593Smuzhiyun 	struct drm_sched_entity *entity;
622*4882a593Smuzhiyun 	int i;
623*4882a593Smuzhiyun 
624*4882a593Smuzhiyun 	if (!drm_sched_ready(sched))
625*4882a593Smuzhiyun 		return NULL;
626*4882a593Smuzhiyun 
627*4882a593Smuzhiyun 	/* Kernel run queue has higher priority than normal run queue*/
628*4882a593Smuzhiyun 	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
629*4882a593Smuzhiyun 		entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
630*4882a593Smuzhiyun 		if (entity)
631*4882a593Smuzhiyun 			break;
632*4882a593Smuzhiyun 	}
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun 	return entity;
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun /**
638*4882a593Smuzhiyun  * drm_sched_process_job - process a job
639*4882a593Smuzhiyun  *
640*4882a593Smuzhiyun  * @f: fence
641*4882a593Smuzhiyun  * @cb: fence callbacks
642*4882a593Smuzhiyun  *
643*4882a593Smuzhiyun  * Called after job has finished execution.
644*4882a593Smuzhiyun  */
drm_sched_process_job(struct dma_fence * f,struct dma_fence_cb * cb)645*4882a593Smuzhiyun static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
646*4882a593Smuzhiyun {
647*4882a593Smuzhiyun 	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
648*4882a593Smuzhiyun 	struct drm_sched_fence *s_fence = s_job->s_fence;
649*4882a593Smuzhiyun 	struct drm_gpu_scheduler *sched = s_fence->sched;
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 	atomic_dec(&sched->hw_rq_count);
652*4882a593Smuzhiyun 	atomic_dec(&sched->score);
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	trace_drm_sched_process_job(s_fence);
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun 	dma_fence_get(&s_fence->finished);
657*4882a593Smuzhiyun 	drm_sched_fence_finished(s_fence);
658*4882a593Smuzhiyun 	dma_fence_put(&s_fence->finished);
659*4882a593Smuzhiyun 	wake_up_interruptible(&sched->wake_up_worker);
660*4882a593Smuzhiyun }
661*4882a593Smuzhiyun 
662*4882a593Smuzhiyun /**
663*4882a593Smuzhiyun  * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
664*4882a593Smuzhiyun  *
665*4882a593Smuzhiyun  * @sched: scheduler instance
666*4882a593Smuzhiyun  *
667*4882a593Smuzhiyun  * Returns the next finished job from the mirror list (if there is one)
668*4882a593Smuzhiyun  * ready for it to be destroyed.
669*4882a593Smuzhiyun  */
670*4882a593Smuzhiyun static struct drm_sched_job *
drm_sched_get_cleanup_job(struct drm_gpu_scheduler * sched)671*4882a593Smuzhiyun drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
672*4882a593Smuzhiyun {
673*4882a593Smuzhiyun 	struct drm_sched_job *job;
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 	/*
676*4882a593Smuzhiyun 	 * Don't destroy jobs while the timeout worker is running  OR thread
677*4882a593Smuzhiyun 	 * is being parked and hence assumed to not touch ring_mirror_list
678*4882a593Smuzhiyun 	 */
679*4882a593Smuzhiyun 	if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
680*4882a593Smuzhiyun 	    !cancel_delayed_work(&sched->work_tdr)) ||
681*4882a593Smuzhiyun 	    kthread_should_park())
682*4882a593Smuzhiyun 		return NULL;
683*4882a593Smuzhiyun 
684*4882a593Smuzhiyun 	spin_lock(&sched->job_list_lock);
685*4882a593Smuzhiyun 
686*4882a593Smuzhiyun 	job = list_first_entry_or_null(&sched->ring_mirror_list,
687*4882a593Smuzhiyun 				       struct drm_sched_job, node);
688*4882a593Smuzhiyun 
689*4882a593Smuzhiyun 	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
690*4882a593Smuzhiyun 		/* remove job from ring_mirror_list */
691*4882a593Smuzhiyun 		list_del_init(&job->node);
692*4882a593Smuzhiyun 	} else {
693*4882a593Smuzhiyun 		job = NULL;
694*4882a593Smuzhiyun 		/* queue timeout for next job */
695*4882a593Smuzhiyun 		drm_sched_start_timeout(sched);
696*4882a593Smuzhiyun 	}
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	spin_unlock(&sched->job_list_lock);
699*4882a593Smuzhiyun 
700*4882a593Smuzhiyun 	return job;
701*4882a593Smuzhiyun }
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun /**
704*4882a593Smuzhiyun  * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
705*4882a593Smuzhiyun  * @sched_list: list of drm_gpu_schedulers
706*4882a593Smuzhiyun  * @num_sched_list: number of drm_gpu_schedulers in the sched_list
707*4882a593Smuzhiyun  *
708*4882a593Smuzhiyun  * Returns pointer of the sched with the least load or NULL if none of the
709*4882a593Smuzhiyun  * drm_gpu_schedulers are ready
710*4882a593Smuzhiyun  */
711*4882a593Smuzhiyun struct drm_gpu_scheduler *
drm_sched_pick_best(struct drm_gpu_scheduler ** sched_list,unsigned int num_sched_list)712*4882a593Smuzhiyun drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
713*4882a593Smuzhiyun 		     unsigned int num_sched_list)
714*4882a593Smuzhiyun {
715*4882a593Smuzhiyun 	struct drm_gpu_scheduler *sched, *picked_sched = NULL;
716*4882a593Smuzhiyun 	int i;
717*4882a593Smuzhiyun 	unsigned int min_score = UINT_MAX, num_score;
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 	for (i = 0; i < num_sched_list; ++i) {
720*4882a593Smuzhiyun 		sched = sched_list[i];
721*4882a593Smuzhiyun 
722*4882a593Smuzhiyun 		if (!sched->ready) {
723*4882a593Smuzhiyun 			DRM_WARN("scheduler %s is not ready, skipping",
724*4882a593Smuzhiyun 				 sched->name);
725*4882a593Smuzhiyun 			continue;
726*4882a593Smuzhiyun 		}
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun 		num_score = atomic_read(&sched->score);
729*4882a593Smuzhiyun 		if (num_score < min_score) {
730*4882a593Smuzhiyun 			min_score = num_score;
731*4882a593Smuzhiyun 			picked_sched = sched;
732*4882a593Smuzhiyun 		}
733*4882a593Smuzhiyun 	}
734*4882a593Smuzhiyun 
735*4882a593Smuzhiyun 	return picked_sched;
736*4882a593Smuzhiyun }
737*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_pick_best);
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun /**
740*4882a593Smuzhiyun  * drm_sched_blocked - check if the scheduler is blocked
741*4882a593Smuzhiyun  *
742*4882a593Smuzhiyun  * @sched: scheduler instance
743*4882a593Smuzhiyun  *
744*4882a593Smuzhiyun  * Returns true if blocked, otherwise false.
745*4882a593Smuzhiyun  */
drm_sched_blocked(struct drm_gpu_scheduler * sched)746*4882a593Smuzhiyun static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
747*4882a593Smuzhiyun {
748*4882a593Smuzhiyun 	if (kthread_should_park()) {
749*4882a593Smuzhiyun 		kthread_parkme();
750*4882a593Smuzhiyun 		return true;
751*4882a593Smuzhiyun 	}
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun 	return false;
754*4882a593Smuzhiyun }
755*4882a593Smuzhiyun 
756*4882a593Smuzhiyun /**
757*4882a593Smuzhiyun  * drm_sched_main - main scheduler thread
758*4882a593Smuzhiyun  *
759*4882a593Smuzhiyun  * @param: scheduler instance
760*4882a593Smuzhiyun  *
761*4882a593Smuzhiyun  * Returns 0.
762*4882a593Smuzhiyun  */
drm_sched_main(void * param)763*4882a593Smuzhiyun static int drm_sched_main(void *param)
764*4882a593Smuzhiyun {
765*4882a593Smuzhiyun 	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
766*4882a593Smuzhiyun 	int r;
767*4882a593Smuzhiyun 
768*4882a593Smuzhiyun 	sched_set_fifo_low(current);
769*4882a593Smuzhiyun 
770*4882a593Smuzhiyun 	while (!kthread_should_stop()) {
771*4882a593Smuzhiyun 		struct drm_sched_entity *entity = NULL;
772*4882a593Smuzhiyun 		struct drm_sched_fence *s_fence;
773*4882a593Smuzhiyun 		struct drm_sched_job *sched_job;
774*4882a593Smuzhiyun 		struct dma_fence *fence;
775*4882a593Smuzhiyun 		struct drm_sched_job *cleanup_job = NULL;
776*4882a593Smuzhiyun 
777*4882a593Smuzhiyun 		wait_event_interruptible(sched->wake_up_worker,
778*4882a593Smuzhiyun 					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
779*4882a593Smuzhiyun 					 (!drm_sched_blocked(sched) &&
780*4882a593Smuzhiyun 					  (entity = drm_sched_select_entity(sched))) ||
781*4882a593Smuzhiyun 					 kthread_should_stop());
782*4882a593Smuzhiyun 
783*4882a593Smuzhiyun 		if (cleanup_job) {
784*4882a593Smuzhiyun 			sched->ops->free_job(cleanup_job);
785*4882a593Smuzhiyun 			/* queue timeout for next job */
786*4882a593Smuzhiyun 			drm_sched_start_timeout(sched);
787*4882a593Smuzhiyun 		}
788*4882a593Smuzhiyun 
789*4882a593Smuzhiyun 		if (!entity)
790*4882a593Smuzhiyun 			continue;
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun 		sched_job = drm_sched_entity_pop_job(entity);
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 		complete(&entity->entity_idle);
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 		if (!sched_job)
797*4882a593Smuzhiyun 			continue;
798*4882a593Smuzhiyun 
799*4882a593Smuzhiyun 		s_fence = sched_job->s_fence;
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 		atomic_inc(&sched->hw_rq_count);
802*4882a593Smuzhiyun 		drm_sched_job_begin(sched_job);
803*4882a593Smuzhiyun 
804*4882a593Smuzhiyun 		trace_drm_run_job(sched_job, entity);
805*4882a593Smuzhiyun 		fence = sched->ops->run_job(sched_job);
806*4882a593Smuzhiyun 		drm_sched_fence_scheduled(s_fence);
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 		if (!IS_ERR_OR_NULL(fence)) {
809*4882a593Smuzhiyun 			s_fence->parent = dma_fence_get(fence);
810*4882a593Smuzhiyun 			r = dma_fence_add_callback(fence, &sched_job->cb,
811*4882a593Smuzhiyun 						   drm_sched_process_job);
812*4882a593Smuzhiyun 			if (r == -ENOENT)
813*4882a593Smuzhiyun 				drm_sched_process_job(fence, &sched_job->cb);
814*4882a593Smuzhiyun 			else if (r)
815*4882a593Smuzhiyun 				DRM_ERROR("fence add callback failed (%d)\n",
816*4882a593Smuzhiyun 					  r);
817*4882a593Smuzhiyun 			dma_fence_put(fence);
818*4882a593Smuzhiyun 		} else {
819*4882a593Smuzhiyun 			if (IS_ERR(fence))
820*4882a593Smuzhiyun 				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 			drm_sched_process_job(NULL, &sched_job->cb);
823*4882a593Smuzhiyun 		}
824*4882a593Smuzhiyun 
825*4882a593Smuzhiyun 		wake_up(&sched->job_scheduled);
826*4882a593Smuzhiyun 	}
827*4882a593Smuzhiyun 	return 0;
828*4882a593Smuzhiyun }
829*4882a593Smuzhiyun 
830*4882a593Smuzhiyun /**
831*4882a593Smuzhiyun  * drm_sched_init - Init a gpu scheduler instance
832*4882a593Smuzhiyun  *
833*4882a593Smuzhiyun  * @sched: scheduler instance
834*4882a593Smuzhiyun  * @ops: backend operations for this scheduler
835*4882a593Smuzhiyun  * @hw_submission: number of hw submissions that can be in flight
836*4882a593Smuzhiyun  * @hang_limit: number of times to allow a job to hang before dropping it
837*4882a593Smuzhiyun  * @timeout: timeout value in jiffies for the scheduler
838*4882a593Smuzhiyun  * @name: name used for debugging
839*4882a593Smuzhiyun  *
840*4882a593Smuzhiyun  * Return 0 on success, otherwise error code.
841*4882a593Smuzhiyun  */
drm_sched_init(struct drm_gpu_scheduler * sched,const struct drm_sched_backend_ops * ops,unsigned hw_submission,unsigned hang_limit,long timeout,const char * name)842*4882a593Smuzhiyun int drm_sched_init(struct drm_gpu_scheduler *sched,
843*4882a593Smuzhiyun 		   const struct drm_sched_backend_ops *ops,
844*4882a593Smuzhiyun 		   unsigned hw_submission,
845*4882a593Smuzhiyun 		   unsigned hang_limit,
846*4882a593Smuzhiyun 		   long timeout,
847*4882a593Smuzhiyun 		   const char *name)
848*4882a593Smuzhiyun {
849*4882a593Smuzhiyun 	int i, ret;
850*4882a593Smuzhiyun 	sched->ops = ops;
851*4882a593Smuzhiyun 	sched->hw_submission_limit = hw_submission;
852*4882a593Smuzhiyun 	sched->name = name;
853*4882a593Smuzhiyun 	sched->timeout = timeout;
854*4882a593Smuzhiyun 	sched->hang_limit = hang_limit;
855*4882a593Smuzhiyun 	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
856*4882a593Smuzhiyun 		drm_sched_rq_init(sched, &sched->sched_rq[i]);
857*4882a593Smuzhiyun 
858*4882a593Smuzhiyun 	init_waitqueue_head(&sched->wake_up_worker);
859*4882a593Smuzhiyun 	init_waitqueue_head(&sched->job_scheduled);
860*4882a593Smuzhiyun 	INIT_LIST_HEAD(&sched->ring_mirror_list);
861*4882a593Smuzhiyun 	spin_lock_init(&sched->job_list_lock);
862*4882a593Smuzhiyun 	atomic_set(&sched->hw_rq_count, 0);
863*4882a593Smuzhiyun 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
864*4882a593Smuzhiyun 	atomic_set(&sched->score, 0);
865*4882a593Smuzhiyun 	atomic64_set(&sched->job_id_count, 0);
866*4882a593Smuzhiyun 
867*4882a593Smuzhiyun 	/* Each scheduler will run on a seperate kernel thread */
868*4882a593Smuzhiyun 	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
869*4882a593Smuzhiyun 	if (IS_ERR(sched->thread)) {
870*4882a593Smuzhiyun 		ret = PTR_ERR(sched->thread);
871*4882a593Smuzhiyun 		sched->thread = NULL;
872*4882a593Smuzhiyun 		DRM_ERROR("Failed to create scheduler for %s.\n", name);
873*4882a593Smuzhiyun 		return ret;
874*4882a593Smuzhiyun 	}
875*4882a593Smuzhiyun 
876*4882a593Smuzhiyun 	sched->ready = true;
877*4882a593Smuzhiyun 	return 0;
878*4882a593Smuzhiyun }
879*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_init);
880*4882a593Smuzhiyun 
881*4882a593Smuzhiyun /**
882*4882a593Smuzhiyun  * drm_sched_fini - Destroy a gpu scheduler
883*4882a593Smuzhiyun  *
884*4882a593Smuzhiyun  * @sched: scheduler instance
885*4882a593Smuzhiyun  *
886*4882a593Smuzhiyun  * Tears down and cleans up the scheduler.
887*4882a593Smuzhiyun  */
drm_sched_fini(struct drm_gpu_scheduler * sched)888*4882a593Smuzhiyun void drm_sched_fini(struct drm_gpu_scheduler *sched)
889*4882a593Smuzhiyun {
890*4882a593Smuzhiyun 	struct drm_sched_entity *s_entity;
891*4882a593Smuzhiyun 	int i;
892*4882a593Smuzhiyun 
893*4882a593Smuzhiyun 	if (sched->thread)
894*4882a593Smuzhiyun 		kthread_stop(sched->thread);
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
897*4882a593Smuzhiyun 		struct drm_sched_rq *rq = &sched->sched_rq[i];
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun 		if (!rq)
900*4882a593Smuzhiyun 			continue;
901*4882a593Smuzhiyun 
902*4882a593Smuzhiyun 		spin_lock(&rq->lock);
903*4882a593Smuzhiyun 		list_for_each_entry(s_entity, &rq->entities, list)
904*4882a593Smuzhiyun 			/*
905*4882a593Smuzhiyun 			 * Prevents reinsertion and marks job_queue as idle,
906*4882a593Smuzhiyun 			 * it will removed from rq in drm_sched_entity_fini
907*4882a593Smuzhiyun 			 * eventually
908*4882a593Smuzhiyun 			 */
909*4882a593Smuzhiyun 			s_entity->stopped = true;
910*4882a593Smuzhiyun 		spin_unlock(&rq->lock);
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun 	}
913*4882a593Smuzhiyun 
914*4882a593Smuzhiyun 	/* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
915*4882a593Smuzhiyun 	wake_up_all(&sched->job_scheduled);
916*4882a593Smuzhiyun 
917*4882a593Smuzhiyun 	/* Confirm no work left behind accessing device structures */
918*4882a593Smuzhiyun 	cancel_delayed_work_sync(&sched->work_tdr);
919*4882a593Smuzhiyun 
920*4882a593Smuzhiyun 	sched->ready = false;
921*4882a593Smuzhiyun }
922*4882a593Smuzhiyun EXPORT_SYMBOL(drm_sched_fini);
923