1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * SPDX-License-Identifier: MIT
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright © 2018 Intel Corporation
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <linux/mutex.h>
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include "i915_drv.h"
10*4882a593Smuzhiyun #include "i915_globals.h"
11*4882a593Smuzhiyun #include "i915_request.h"
12*4882a593Smuzhiyun #include "i915_scheduler.h"
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun static struct i915_global_scheduler {
15*4882a593Smuzhiyun struct i915_global base;
16*4882a593Smuzhiyun struct kmem_cache *slab_dependencies;
17*4882a593Smuzhiyun struct kmem_cache *slab_priorities;
18*4882a593Smuzhiyun } global;
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun static DEFINE_SPINLOCK(schedule_lock);
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun static const struct i915_request *
node_to_request(const struct i915_sched_node * node)23*4882a593Smuzhiyun node_to_request(const struct i915_sched_node *node)
24*4882a593Smuzhiyun {
25*4882a593Smuzhiyun return container_of(node, const struct i915_request, sched);
26*4882a593Smuzhiyun }
27*4882a593Smuzhiyun
node_started(const struct i915_sched_node * node)28*4882a593Smuzhiyun static inline bool node_started(const struct i915_sched_node *node)
29*4882a593Smuzhiyun {
30*4882a593Smuzhiyun return i915_request_started(node_to_request(node));
31*4882a593Smuzhiyun }
32*4882a593Smuzhiyun
node_signaled(const struct i915_sched_node * node)33*4882a593Smuzhiyun static inline bool node_signaled(const struct i915_sched_node *node)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun return i915_request_completed(node_to_request(node));
36*4882a593Smuzhiyun }
37*4882a593Smuzhiyun
to_priolist(struct rb_node * rb)38*4882a593Smuzhiyun static inline struct i915_priolist *to_priolist(struct rb_node *rb)
39*4882a593Smuzhiyun {
40*4882a593Smuzhiyun return rb_entry(rb, struct i915_priolist, node);
41*4882a593Smuzhiyun }
42*4882a593Smuzhiyun
assert_priolists(struct intel_engine_execlists * const execlists)43*4882a593Smuzhiyun static void assert_priolists(struct intel_engine_execlists * const execlists)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun struct rb_node *rb;
46*4882a593Smuzhiyun long last_prio, i;
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
49*4882a593Smuzhiyun return;
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
52*4882a593Smuzhiyun rb_first(&execlists->queue.rb_root));
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun last_prio = INT_MAX;
55*4882a593Smuzhiyun for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
56*4882a593Smuzhiyun const struct i915_priolist *p = to_priolist(rb);
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun GEM_BUG_ON(p->priority > last_prio);
59*4882a593Smuzhiyun last_prio = p->priority;
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun GEM_BUG_ON(!p->used);
62*4882a593Smuzhiyun for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
63*4882a593Smuzhiyun if (list_empty(&p->requests[i]))
64*4882a593Smuzhiyun continue;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun GEM_BUG_ON(!(p->used & BIT(i)));
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun struct list_head *
i915_sched_lookup_priolist(struct intel_engine_cs * engine,int prio)72*4882a593Smuzhiyun i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun struct intel_engine_execlists * const execlists = &engine->execlists;
75*4882a593Smuzhiyun struct i915_priolist *p;
76*4882a593Smuzhiyun struct rb_node **parent, *rb;
77*4882a593Smuzhiyun bool first = true;
78*4882a593Smuzhiyun int idx, i;
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun lockdep_assert_held(&engine->active.lock);
81*4882a593Smuzhiyun assert_priolists(execlists);
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun /* buckets sorted from highest [in slot 0] to lowest priority */
84*4882a593Smuzhiyun idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
85*4882a593Smuzhiyun prio >>= I915_USER_PRIORITY_SHIFT;
86*4882a593Smuzhiyun if (unlikely(execlists->no_priolist))
87*4882a593Smuzhiyun prio = I915_PRIORITY_NORMAL;
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun find_priolist:
90*4882a593Smuzhiyun /* most positive priority is scheduled first, equal priorities fifo */
91*4882a593Smuzhiyun rb = NULL;
92*4882a593Smuzhiyun parent = &execlists->queue.rb_root.rb_node;
93*4882a593Smuzhiyun while (*parent) {
94*4882a593Smuzhiyun rb = *parent;
95*4882a593Smuzhiyun p = to_priolist(rb);
96*4882a593Smuzhiyun if (prio > p->priority) {
97*4882a593Smuzhiyun parent = &rb->rb_left;
98*4882a593Smuzhiyun } else if (prio < p->priority) {
99*4882a593Smuzhiyun parent = &rb->rb_right;
100*4882a593Smuzhiyun first = false;
101*4882a593Smuzhiyun } else {
102*4882a593Smuzhiyun goto out;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun if (prio == I915_PRIORITY_NORMAL) {
107*4882a593Smuzhiyun p = &execlists->default_priolist;
108*4882a593Smuzhiyun } else {
109*4882a593Smuzhiyun p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
110*4882a593Smuzhiyun /* Convert an allocation failure to a priority bump */
111*4882a593Smuzhiyun if (unlikely(!p)) {
112*4882a593Smuzhiyun prio = I915_PRIORITY_NORMAL; /* recurses just once */
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun /* To maintain ordering with all rendering, after an
115*4882a593Smuzhiyun * allocation failure we have to disable all scheduling.
116*4882a593Smuzhiyun * Requests will then be executed in fifo, and schedule
117*4882a593Smuzhiyun * will ensure that dependencies are emitted in fifo.
118*4882a593Smuzhiyun * There will be still some reordering with existing
119*4882a593Smuzhiyun * requests, so if userspace lied about their
120*4882a593Smuzhiyun * dependencies that reordering may be visible.
121*4882a593Smuzhiyun */
122*4882a593Smuzhiyun execlists->no_priolist = true;
123*4882a593Smuzhiyun goto find_priolist;
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun p->priority = prio;
128*4882a593Smuzhiyun for (i = 0; i < ARRAY_SIZE(p->requests); i++)
129*4882a593Smuzhiyun INIT_LIST_HEAD(&p->requests[i]);
130*4882a593Smuzhiyun rb_link_node(&p->node, rb, parent);
131*4882a593Smuzhiyun rb_insert_color_cached(&p->node, &execlists->queue, first);
132*4882a593Smuzhiyun p->used = 0;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun out:
135*4882a593Smuzhiyun p->used |= BIT(idx);
136*4882a593Smuzhiyun return &p->requests[idx];
137*4882a593Smuzhiyun }
138*4882a593Smuzhiyun
__i915_priolist_free(struct i915_priolist * p)139*4882a593Smuzhiyun void __i915_priolist_free(struct i915_priolist *p)
140*4882a593Smuzhiyun {
141*4882a593Smuzhiyun kmem_cache_free(global.slab_priorities, p);
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun struct sched_cache {
145*4882a593Smuzhiyun struct list_head *priolist;
146*4882a593Smuzhiyun };
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun static struct intel_engine_cs *
sched_lock_engine(const struct i915_sched_node * node,struct intel_engine_cs * locked,struct sched_cache * cache)149*4882a593Smuzhiyun sched_lock_engine(const struct i915_sched_node *node,
150*4882a593Smuzhiyun struct intel_engine_cs *locked,
151*4882a593Smuzhiyun struct sched_cache *cache)
152*4882a593Smuzhiyun {
153*4882a593Smuzhiyun const struct i915_request *rq = node_to_request(node);
154*4882a593Smuzhiyun struct intel_engine_cs *engine;
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun GEM_BUG_ON(!locked);
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /*
159*4882a593Smuzhiyun * Virtual engines complicate acquiring the engine timeline lock,
160*4882a593Smuzhiyun * as their rq->engine pointer is not stable until under that
161*4882a593Smuzhiyun * engine lock. The simple ploy we use is to take the lock then
162*4882a593Smuzhiyun * check that the rq still belongs to the newly locked engine.
163*4882a593Smuzhiyun */
164*4882a593Smuzhiyun while (locked != (engine = READ_ONCE(rq->engine))) {
165*4882a593Smuzhiyun spin_unlock(&locked->active.lock);
166*4882a593Smuzhiyun memset(cache, 0, sizeof(*cache));
167*4882a593Smuzhiyun spin_lock(&engine->active.lock);
168*4882a593Smuzhiyun locked = engine;
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun GEM_BUG_ON(locked != engine);
172*4882a593Smuzhiyun return locked;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun
rq_prio(const struct i915_request * rq)175*4882a593Smuzhiyun static inline int rq_prio(const struct i915_request *rq)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun return rq->sched.attr.priority;
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun
need_preempt(int prio,int active)180*4882a593Smuzhiyun static inline bool need_preempt(int prio, int active)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun /*
183*4882a593Smuzhiyun * Allow preemption of low -> normal -> high, but we do
184*4882a593Smuzhiyun * not allow low priority tasks to preempt other low priority
185*4882a593Smuzhiyun * tasks under the impression that latency for low priority
186*4882a593Smuzhiyun * tasks does not matter (as much as background throughput),
187*4882a593Smuzhiyun * so kiss.
188*4882a593Smuzhiyun */
189*4882a593Smuzhiyun return prio >= max(I915_PRIORITY_NORMAL, active);
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun
kick_submission(struct intel_engine_cs * engine,const struct i915_request * rq,int prio)192*4882a593Smuzhiyun static void kick_submission(struct intel_engine_cs *engine,
193*4882a593Smuzhiyun const struct i915_request *rq,
194*4882a593Smuzhiyun int prio)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun const struct i915_request *inflight;
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun /*
199*4882a593Smuzhiyun * We only need to kick the tasklet once for the high priority
200*4882a593Smuzhiyun * new context we add into the queue.
201*4882a593Smuzhiyun */
202*4882a593Smuzhiyun if (prio <= engine->execlists.queue_priority_hint)
203*4882a593Smuzhiyun return;
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun rcu_read_lock();
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun /* Nothing currently active? We're overdue for a submission! */
208*4882a593Smuzhiyun inflight = execlists_active(&engine->execlists);
209*4882a593Smuzhiyun if (!inflight)
210*4882a593Smuzhiyun goto unlock;
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun /*
213*4882a593Smuzhiyun * If we are already the currently executing context, don't
214*4882a593Smuzhiyun * bother evaluating if we should preempt ourselves.
215*4882a593Smuzhiyun */
216*4882a593Smuzhiyun if (inflight->context == rq->context)
217*4882a593Smuzhiyun goto unlock;
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun ENGINE_TRACE(engine,
220*4882a593Smuzhiyun "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
221*4882a593Smuzhiyun prio,
222*4882a593Smuzhiyun rq->fence.context, rq->fence.seqno,
223*4882a593Smuzhiyun inflight->fence.context, inflight->fence.seqno,
224*4882a593Smuzhiyun inflight->sched.attr.priority);
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun engine->execlists.queue_priority_hint = prio;
227*4882a593Smuzhiyun if (need_preempt(prio, rq_prio(inflight)))
228*4882a593Smuzhiyun tasklet_hi_schedule(&engine->execlists.tasklet);
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun unlock:
231*4882a593Smuzhiyun rcu_read_unlock();
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun
__i915_schedule(struct i915_sched_node * node,const struct i915_sched_attr * attr)234*4882a593Smuzhiyun static void __i915_schedule(struct i915_sched_node *node,
235*4882a593Smuzhiyun const struct i915_sched_attr *attr)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun const int prio = max(attr->priority, node->attr.priority);
238*4882a593Smuzhiyun struct intel_engine_cs *engine;
239*4882a593Smuzhiyun struct i915_dependency *dep, *p;
240*4882a593Smuzhiyun struct i915_dependency stack;
241*4882a593Smuzhiyun struct sched_cache cache;
242*4882a593Smuzhiyun LIST_HEAD(dfs);
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun /* Needed in order to use the temporary link inside i915_dependency */
245*4882a593Smuzhiyun lockdep_assert_held(&schedule_lock);
246*4882a593Smuzhiyun GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun if (node_signaled(node))
249*4882a593Smuzhiyun return;
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun stack.signaler = node;
252*4882a593Smuzhiyun list_add(&stack.dfs_link, &dfs);
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun /*
255*4882a593Smuzhiyun * Recursively bump all dependent priorities to match the new request.
256*4882a593Smuzhiyun *
257*4882a593Smuzhiyun * A naive approach would be to use recursion:
258*4882a593Smuzhiyun * static void update_priorities(struct i915_sched_node *node, prio) {
259*4882a593Smuzhiyun * list_for_each_entry(dep, &node->signalers_list, signal_link)
260*4882a593Smuzhiyun * update_priorities(dep->signal, prio)
261*4882a593Smuzhiyun * queue_request(node);
262*4882a593Smuzhiyun * }
263*4882a593Smuzhiyun * but that may have unlimited recursion depth and so runs a very
264*4882a593Smuzhiyun * real risk of overunning the kernel stack. Instead, we build
265*4882a593Smuzhiyun * a flat list of all dependencies starting with the current request.
266*4882a593Smuzhiyun * As we walk the list of dependencies, we add all of its dependencies
267*4882a593Smuzhiyun * to the end of the list (this may include an already visited
268*4882a593Smuzhiyun * request) and continue to walk onwards onto the new dependencies. The
269*4882a593Smuzhiyun * end result is a topological list of requests in reverse order, the
270*4882a593Smuzhiyun * last element in the list is the request we must execute first.
271*4882a593Smuzhiyun */
272*4882a593Smuzhiyun list_for_each_entry(dep, &dfs, dfs_link) {
273*4882a593Smuzhiyun struct i915_sched_node *node = dep->signaler;
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun /* If we are already flying, we know we have no signalers */
276*4882a593Smuzhiyun if (node_started(node))
277*4882a593Smuzhiyun continue;
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun /*
280*4882a593Smuzhiyun * Within an engine, there can be no cycle, but we may
281*4882a593Smuzhiyun * refer to the same dependency chain multiple times
282*4882a593Smuzhiyun * (redundant dependencies are not eliminated) and across
283*4882a593Smuzhiyun * engines.
284*4882a593Smuzhiyun */
285*4882a593Smuzhiyun list_for_each_entry(p, &node->signalers_list, signal_link) {
286*4882a593Smuzhiyun GEM_BUG_ON(p == dep); /* no cycles! */
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun if (node_signaled(p->signaler))
289*4882a593Smuzhiyun continue;
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun if (prio > READ_ONCE(p->signaler->attr.priority))
292*4882a593Smuzhiyun list_move_tail(&p->dfs_link, &dfs);
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun }
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun /*
297*4882a593Smuzhiyun * If we didn't need to bump any existing priorities, and we haven't
298*4882a593Smuzhiyun * yet submitted this request (i.e. there is no potential race with
299*4882a593Smuzhiyun * execlists_submit_request()), we can set our own priority and skip
300*4882a593Smuzhiyun * acquiring the engine locks.
301*4882a593Smuzhiyun */
302*4882a593Smuzhiyun if (node->attr.priority == I915_PRIORITY_INVALID) {
303*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&node->link));
304*4882a593Smuzhiyun node->attr = *attr;
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun if (stack.dfs_link.next == stack.dfs_link.prev)
307*4882a593Smuzhiyun return;
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun __list_del_entry(&stack.dfs_link);
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun memset(&cache, 0, sizeof(cache));
313*4882a593Smuzhiyun engine = node_to_request(node)->engine;
314*4882a593Smuzhiyun spin_lock(&engine->active.lock);
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun /* Fifo and depth-first replacement ensure our deps execute before us */
317*4882a593Smuzhiyun engine = sched_lock_engine(node, engine, &cache);
318*4882a593Smuzhiyun list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
319*4882a593Smuzhiyun INIT_LIST_HEAD(&dep->dfs_link);
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun node = dep->signaler;
322*4882a593Smuzhiyun engine = sched_lock_engine(node, engine, &cache);
323*4882a593Smuzhiyun lockdep_assert_held(&engine->active.lock);
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun /* Recheck after acquiring the engine->timeline.lock */
326*4882a593Smuzhiyun if (prio <= node->attr.priority || node_signaled(node))
327*4882a593Smuzhiyun continue;
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun GEM_BUG_ON(node_to_request(node)->engine != engine);
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun WRITE_ONCE(node->attr.priority, prio);
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun /*
334*4882a593Smuzhiyun * Once the request is ready, it will be placed into the
335*4882a593Smuzhiyun * priority lists and then onto the HW runlist. Before the
336*4882a593Smuzhiyun * request is ready, it does not contribute to our preemption
337*4882a593Smuzhiyun * decisions and we can safely ignore it, as it will, and
338*4882a593Smuzhiyun * any preemption required, be dealt with upon submission.
339*4882a593Smuzhiyun * See engine->submit_request()
340*4882a593Smuzhiyun */
341*4882a593Smuzhiyun if (list_empty(&node->link))
342*4882a593Smuzhiyun continue;
343*4882a593Smuzhiyun
344*4882a593Smuzhiyun if (i915_request_in_priority_queue(node_to_request(node))) {
345*4882a593Smuzhiyun if (!cache.priolist)
346*4882a593Smuzhiyun cache.priolist =
347*4882a593Smuzhiyun i915_sched_lookup_priolist(engine,
348*4882a593Smuzhiyun prio);
349*4882a593Smuzhiyun list_move_tail(&node->link, cache.priolist);
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun /* Defer (tasklet) submission until after all of our updates. */
353*4882a593Smuzhiyun kick_submission(engine, node_to_request(node), prio);
354*4882a593Smuzhiyun }
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun spin_unlock(&engine->active.lock);
357*4882a593Smuzhiyun }
358*4882a593Smuzhiyun
i915_schedule(struct i915_request * rq,const struct i915_sched_attr * attr)359*4882a593Smuzhiyun void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
360*4882a593Smuzhiyun {
361*4882a593Smuzhiyun spin_lock_irq(&schedule_lock);
362*4882a593Smuzhiyun __i915_schedule(&rq->sched, attr);
363*4882a593Smuzhiyun spin_unlock_irq(&schedule_lock);
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
__bump_priority(struct i915_sched_node * node,unsigned int bump)366*4882a593Smuzhiyun static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
367*4882a593Smuzhiyun {
368*4882a593Smuzhiyun struct i915_sched_attr attr = node->attr;
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun if (attr.priority & bump)
371*4882a593Smuzhiyun return;
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun attr.priority |= bump;
374*4882a593Smuzhiyun __i915_schedule(node, &attr);
375*4882a593Smuzhiyun }
376*4882a593Smuzhiyun
i915_schedule_bump_priority(struct i915_request * rq,unsigned int bump)377*4882a593Smuzhiyun void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
378*4882a593Smuzhiyun {
379*4882a593Smuzhiyun unsigned long flags;
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
382*4882a593Smuzhiyun if (READ_ONCE(rq->sched.attr.priority) & bump)
383*4882a593Smuzhiyun return;
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun spin_lock_irqsave(&schedule_lock, flags);
386*4882a593Smuzhiyun __bump_priority(&rq->sched, bump);
387*4882a593Smuzhiyun spin_unlock_irqrestore(&schedule_lock, flags);
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun
i915_sched_node_init(struct i915_sched_node * node)390*4882a593Smuzhiyun void i915_sched_node_init(struct i915_sched_node *node)
391*4882a593Smuzhiyun {
392*4882a593Smuzhiyun INIT_LIST_HEAD(&node->signalers_list);
393*4882a593Smuzhiyun INIT_LIST_HEAD(&node->waiters_list);
394*4882a593Smuzhiyun INIT_LIST_HEAD(&node->link);
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun i915_sched_node_reinit(node);
397*4882a593Smuzhiyun }
398*4882a593Smuzhiyun
i915_sched_node_reinit(struct i915_sched_node * node)399*4882a593Smuzhiyun void i915_sched_node_reinit(struct i915_sched_node *node)
400*4882a593Smuzhiyun {
401*4882a593Smuzhiyun node->attr.priority = I915_PRIORITY_INVALID;
402*4882a593Smuzhiyun node->semaphores = 0;
403*4882a593Smuzhiyun node->flags = 0;
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&node->signalers_list));
406*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&node->waiters_list));
407*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&node->link));
408*4882a593Smuzhiyun }
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun static struct i915_dependency *
i915_dependency_alloc(void)411*4882a593Smuzhiyun i915_dependency_alloc(void)
412*4882a593Smuzhiyun {
413*4882a593Smuzhiyun return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
414*4882a593Smuzhiyun }
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun static void
i915_dependency_free(struct i915_dependency * dep)417*4882a593Smuzhiyun i915_dependency_free(struct i915_dependency *dep)
418*4882a593Smuzhiyun {
419*4882a593Smuzhiyun kmem_cache_free(global.slab_dependencies, dep);
420*4882a593Smuzhiyun }
421*4882a593Smuzhiyun
__i915_sched_node_add_dependency(struct i915_sched_node * node,struct i915_sched_node * signal,struct i915_dependency * dep,unsigned long flags)422*4882a593Smuzhiyun bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
423*4882a593Smuzhiyun struct i915_sched_node *signal,
424*4882a593Smuzhiyun struct i915_dependency *dep,
425*4882a593Smuzhiyun unsigned long flags)
426*4882a593Smuzhiyun {
427*4882a593Smuzhiyun bool ret = false;
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun spin_lock_irq(&schedule_lock);
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun if (!node_signaled(signal)) {
432*4882a593Smuzhiyun INIT_LIST_HEAD(&dep->dfs_link);
433*4882a593Smuzhiyun dep->signaler = signal;
434*4882a593Smuzhiyun dep->waiter = node;
435*4882a593Smuzhiyun dep->flags = flags;
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun /* All set, now publish. Beware the lockless walkers. */
438*4882a593Smuzhiyun list_add_rcu(&dep->signal_link, &node->signalers_list);
439*4882a593Smuzhiyun list_add_rcu(&dep->wait_link, &signal->waiters_list);
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun /* Propagate the chains */
442*4882a593Smuzhiyun node->flags |= signal->flags;
443*4882a593Smuzhiyun ret = true;
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun spin_unlock_irq(&schedule_lock);
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun return ret;
449*4882a593Smuzhiyun }
450*4882a593Smuzhiyun
i915_sched_node_add_dependency(struct i915_sched_node * node,struct i915_sched_node * signal,unsigned long flags)451*4882a593Smuzhiyun int i915_sched_node_add_dependency(struct i915_sched_node *node,
452*4882a593Smuzhiyun struct i915_sched_node *signal,
453*4882a593Smuzhiyun unsigned long flags)
454*4882a593Smuzhiyun {
455*4882a593Smuzhiyun struct i915_dependency *dep;
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun dep = i915_dependency_alloc();
458*4882a593Smuzhiyun if (!dep)
459*4882a593Smuzhiyun return -ENOMEM;
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun local_bh_disable();
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun if (!__i915_sched_node_add_dependency(node, signal, dep,
464*4882a593Smuzhiyun flags | I915_DEPENDENCY_ALLOC))
465*4882a593Smuzhiyun i915_dependency_free(dep);
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun local_bh_enable(); /* kick submission tasklet */
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun return 0;
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun
i915_sched_node_fini(struct i915_sched_node * node)472*4882a593Smuzhiyun void i915_sched_node_fini(struct i915_sched_node *node)
473*4882a593Smuzhiyun {
474*4882a593Smuzhiyun struct i915_dependency *dep, *tmp;
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun spin_lock_irq(&schedule_lock);
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun /*
479*4882a593Smuzhiyun * Everyone we depended upon (the fences we wait to be signaled)
480*4882a593Smuzhiyun * should retire before us and remove themselves from our list.
481*4882a593Smuzhiyun * However, retirement is run independently on each timeline and
482*4882a593Smuzhiyun * so we may be called out-of-order.
483*4882a593Smuzhiyun */
484*4882a593Smuzhiyun list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
485*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&dep->dfs_link));
486*4882a593Smuzhiyun
487*4882a593Smuzhiyun list_del_rcu(&dep->wait_link);
488*4882a593Smuzhiyun if (dep->flags & I915_DEPENDENCY_ALLOC)
489*4882a593Smuzhiyun i915_dependency_free(dep);
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun INIT_LIST_HEAD(&node->signalers_list);
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun /* Remove ourselves from everyone who depends upon us */
494*4882a593Smuzhiyun list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
495*4882a593Smuzhiyun GEM_BUG_ON(dep->signaler != node);
496*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&dep->dfs_link));
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun list_del_rcu(&dep->signal_link);
499*4882a593Smuzhiyun if (dep->flags & I915_DEPENDENCY_ALLOC)
500*4882a593Smuzhiyun i915_dependency_free(dep);
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun INIT_LIST_HEAD(&node->waiters_list);
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun spin_unlock_irq(&schedule_lock);
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun
i915_global_scheduler_shrink(void)507*4882a593Smuzhiyun static void i915_global_scheduler_shrink(void)
508*4882a593Smuzhiyun {
509*4882a593Smuzhiyun kmem_cache_shrink(global.slab_dependencies);
510*4882a593Smuzhiyun kmem_cache_shrink(global.slab_priorities);
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun
i915_global_scheduler_exit(void)513*4882a593Smuzhiyun static void i915_global_scheduler_exit(void)
514*4882a593Smuzhiyun {
515*4882a593Smuzhiyun kmem_cache_destroy(global.slab_dependencies);
516*4882a593Smuzhiyun kmem_cache_destroy(global.slab_priorities);
517*4882a593Smuzhiyun }
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun static struct i915_global_scheduler global = { {
520*4882a593Smuzhiyun .shrink = i915_global_scheduler_shrink,
521*4882a593Smuzhiyun .exit = i915_global_scheduler_exit,
522*4882a593Smuzhiyun } };
523*4882a593Smuzhiyun
i915_global_scheduler_init(void)524*4882a593Smuzhiyun int __init i915_global_scheduler_init(void)
525*4882a593Smuzhiyun {
526*4882a593Smuzhiyun global.slab_dependencies = KMEM_CACHE(i915_dependency,
527*4882a593Smuzhiyun SLAB_HWCACHE_ALIGN |
528*4882a593Smuzhiyun SLAB_TYPESAFE_BY_RCU);
529*4882a593Smuzhiyun if (!global.slab_dependencies)
530*4882a593Smuzhiyun return -ENOMEM;
531*4882a593Smuzhiyun
532*4882a593Smuzhiyun global.slab_priorities = KMEM_CACHE(i915_priolist,
533*4882a593Smuzhiyun SLAB_HWCACHE_ALIGN);
534*4882a593Smuzhiyun if (!global.slab_priorities)
535*4882a593Smuzhiyun goto err_priorities;
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun i915_global_register(&global.base);
538*4882a593Smuzhiyun return 0;
539*4882a593Smuzhiyun
540*4882a593Smuzhiyun err_priorities:
541*4882a593Smuzhiyun kmem_cache_destroy(global.slab_priorities);
542*4882a593Smuzhiyun return -ENOMEM;
543*4882a593Smuzhiyun }
544