1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright © 2015 Intel Corporation
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * The above copyright notice and this permission notice (including the next
12*4882a593Smuzhiyun * paragraph) shall be included in all copies or substantial portions of the
13*4882a593Smuzhiyun * Software.
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*4882a593Smuzhiyun * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*4882a593Smuzhiyun * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*4882a593Smuzhiyun * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*4882a593Smuzhiyun * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*4882a593Smuzhiyun * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*4882a593Smuzhiyun * IN THE SOFTWARE.
22*4882a593Smuzhiyun *
23*4882a593Smuzhiyun */
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun #include <linux/kthread.h>
26*4882a593Smuzhiyun #include <trace/events/dma_fence.h>
27*4882a593Smuzhiyun #include <uapi/linux/sched/types.h>
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #include "i915_drv.h"
30*4882a593Smuzhiyun #include "i915_trace.h"
31*4882a593Smuzhiyun #include "intel_breadcrumbs.h"
32*4882a593Smuzhiyun #include "intel_context.h"
33*4882a593Smuzhiyun #include "intel_engine_pm.h"
34*4882a593Smuzhiyun #include "intel_gt_pm.h"
35*4882a593Smuzhiyun #include "intel_gt_requests.h"
36*4882a593Smuzhiyun
irq_enable(struct intel_engine_cs * engine)37*4882a593Smuzhiyun static bool irq_enable(struct intel_engine_cs *engine)
38*4882a593Smuzhiyun {
39*4882a593Smuzhiyun if (!engine->irq_enable)
40*4882a593Smuzhiyun return false;
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun /* Caller disables interrupts */
43*4882a593Smuzhiyun spin_lock(&engine->gt->irq_lock);
44*4882a593Smuzhiyun engine->irq_enable(engine);
45*4882a593Smuzhiyun spin_unlock(&engine->gt->irq_lock);
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun return true;
48*4882a593Smuzhiyun }
49*4882a593Smuzhiyun
irq_disable(struct intel_engine_cs * engine)50*4882a593Smuzhiyun static void irq_disable(struct intel_engine_cs *engine)
51*4882a593Smuzhiyun {
52*4882a593Smuzhiyun if (!engine->irq_disable)
53*4882a593Smuzhiyun return;
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun /* Caller disables interrupts */
56*4882a593Smuzhiyun spin_lock(&engine->gt->irq_lock);
57*4882a593Smuzhiyun engine->irq_disable(engine);
58*4882a593Smuzhiyun spin_unlock(&engine->gt->irq_lock);
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun
__intel_breadcrumbs_arm_irq(struct intel_breadcrumbs * b)61*4882a593Smuzhiyun static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
62*4882a593Smuzhiyun {
63*4882a593Smuzhiyun /*
64*4882a593Smuzhiyun * Since we are waiting on a request, the GPU should be busy
65*4882a593Smuzhiyun * and should have its own rpm reference.
66*4882a593Smuzhiyun */
67*4882a593Smuzhiyun if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
68*4882a593Smuzhiyun return;
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun /*
71*4882a593Smuzhiyun * The breadcrumb irq will be disarmed on the interrupt after the
72*4882a593Smuzhiyun * waiters are signaled. This gives us a single interrupt window in
73*4882a593Smuzhiyun * which we can add a new waiter and avoid the cost of re-enabling
74*4882a593Smuzhiyun * the irq.
75*4882a593Smuzhiyun */
76*4882a593Smuzhiyun WRITE_ONCE(b->irq_armed, true);
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun /* Requests may have completed before we could enable the interrupt. */
79*4882a593Smuzhiyun if (!b->irq_enabled++ && irq_enable(b->irq_engine))
80*4882a593Smuzhiyun irq_work_queue(&b->irq_work);
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun
intel_breadcrumbs_arm_irq(struct intel_breadcrumbs * b)83*4882a593Smuzhiyun static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun if (!b->irq_engine)
86*4882a593Smuzhiyun return;
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun spin_lock(&b->irq_lock);
89*4882a593Smuzhiyun if (!b->irq_armed)
90*4882a593Smuzhiyun __intel_breadcrumbs_arm_irq(b);
91*4882a593Smuzhiyun spin_unlock(&b->irq_lock);
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
__intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs * b)94*4882a593Smuzhiyun static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun GEM_BUG_ON(!b->irq_enabled);
97*4882a593Smuzhiyun if (!--b->irq_enabled)
98*4882a593Smuzhiyun irq_disable(b->irq_engine);
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun WRITE_ONCE(b->irq_armed, false);
101*4882a593Smuzhiyun intel_gt_pm_put_async(b->irq_engine->gt);
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs * b)104*4882a593Smuzhiyun static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun spin_lock(&b->irq_lock);
107*4882a593Smuzhiyun if (b->irq_armed)
108*4882a593Smuzhiyun __intel_breadcrumbs_disarm_irq(b);
109*4882a593Smuzhiyun spin_unlock(&b->irq_lock);
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun
add_signaling_context(struct intel_breadcrumbs * b,struct intel_context * ce)112*4882a593Smuzhiyun static void add_signaling_context(struct intel_breadcrumbs *b,
113*4882a593Smuzhiyun struct intel_context *ce)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun lockdep_assert_held(&ce->signal_lock);
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun spin_lock(&b->signalers_lock);
118*4882a593Smuzhiyun list_add_rcu(&ce->signal_link, &b->signalers);
119*4882a593Smuzhiyun spin_unlock(&b->signalers_lock);
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun
remove_signaling_context(struct intel_breadcrumbs * b,struct intel_context * ce)122*4882a593Smuzhiyun static bool remove_signaling_context(struct intel_breadcrumbs *b,
123*4882a593Smuzhiyun struct intel_context *ce)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun lockdep_assert_held(&ce->signal_lock);
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun if (!list_empty(&ce->signals))
128*4882a593Smuzhiyun return false;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun spin_lock(&b->signalers_lock);
131*4882a593Smuzhiyun list_del_rcu(&ce->signal_link);
132*4882a593Smuzhiyun spin_unlock(&b->signalers_lock);
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun return true;
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun __maybe_unused static bool
check_signal_order(struct intel_context * ce,struct i915_request * rq)138*4882a593Smuzhiyun check_signal_order(struct intel_context *ce, struct i915_request *rq)
139*4882a593Smuzhiyun {
140*4882a593Smuzhiyun if (rq->context != ce)
141*4882a593Smuzhiyun return false;
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun if (!list_is_last(&rq->signal_link, &ce->signals) &&
144*4882a593Smuzhiyun i915_seqno_passed(rq->fence.seqno,
145*4882a593Smuzhiyun list_next_entry(rq, signal_link)->fence.seqno))
146*4882a593Smuzhiyun return false;
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun if (!list_is_first(&rq->signal_link, &ce->signals) &&
149*4882a593Smuzhiyun i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
150*4882a593Smuzhiyun rq->fence.seqno))
151*4882a593Smuzhiyun return false;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun return true;
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun static bool
__dma_fence_signal(struct dma_fence * fence)157*4882a593Smuzhiyun __dma_fence_signal(struct dma_fence *fence)
158*4882a593Smuzhiyun {
159*4882a593Smuzhiyun return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun static void
__dma_fence_signal__timestamp(struct dma_fence * fence,ktime_t timestamp)163*4882a593Smuzhiyun __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
164*4882a593Smuzhiyun {
165*4882a593Smuzhiyun fence->timestamp = timestamp;
166*4882a593Smuzhiyun set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
167*4882a593Smuzhiyun trace_dma_fence_signaled(fence);
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun static void
__dma_fence_signal__notify(struct dma_fence * fence,const struct list_head * list)171*4882a593Smuzhiyun __dma_fence_signal__notify(struct dma_fence *fence,
172*4882a593Smuzhiyun const struct list_head *list)
173*4882a593Smuzhiyun {
174*4882a593Smuzhiyun struct dma_fence_cb *cur, *tmp;
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun lockdep_assert_held(fence->lock);
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun list_for_each_entry_safe(cur, tmp, list, node) {
179*4882a593Smuzhiyun INIT_LIST_HEAD(&cur->node);
180*4882a593Smuzhiyun cur->func(fence, cur);
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun
add_retire(struct intel_breadcrumbs * b,struct intel_timeline * tl)184*4882a593Smuzhiyun static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
185*4882a593Smuzhiyun {
186*4882a593Smuzhiyun if (b->irq_engine)
187*4882a593Smuzhiyun intel_engine_add_retire(b->irq_engine, tl);
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun
__signal_request(struct i915_request * rq)190*4882a593Smuzhiyun static bool __signal_request(struct i915_request *rq)
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun if (!__dma_fence_signal(&rq->fence)) {
195*4882a593Smuzhiyun i915_request_put(rq);
196*4882a593Smuzhiyun return false;
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun return true;
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun static struct llist_node *
slist_add(struct llist_node * node,struct llist_node * head)203*4882a593Smuzhiyun slist_add(struct llist_node *node, struct llist_node *head)
204*4882a593Smuzhiyun {
205*4882a593Smuzhiyun node->next = head;
206*4882a593Smuzhiyun return node;
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
signal_irq_work(struct irq_work * work)209*4882a593Smuzhiyun static void signal_irq_work(struct irq_work *work)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
212*4882a593Smuzhiyun const ktime_t timestamp = ktime_get();
213*4882a593Smuzhiyun struct llist_node *signal, *sn;
214*4882a593Smuzhiyun struct intel_context *ce;
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun signal = NULL;
217*4882a593Smuzhiyun if (unlikely(!llist_empty(&b->signaled_requests)))
218*4882a593Smuzhiyun signal = llist_del_all(&b->signaled_requests);
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun /*
221*4882a593Smuzhiyun * Keep the irq armed until the interrupt after all listeners are gone.
222*4882a593Smuzhiyun *
223*4882a593Smuzhiyun * Enabling/disabling the interrupt is rather costly, roughly a couple
224*4882a593Smuzhiyun * of hundred microseconds. If we are proactive and enable/disable
225*4882a593Smuzhiyun * the interrupt around every request that wants a breadcrumb, we
226*4882a593Smuzhiyun * quickly drown in the extra orders of magnitude of latency imposed
227*4882a593Smuzhiyun * on request submission.
228*4882a593Smuzhiyun *
229*4882a593Smuzhiyun * So we try to be lazy, and keep the interrupts enabled until no
230*4882a593Smuzhiyun * more listeners appear within a breadcrumb interrupt interval (that
231*4882a593Smuzhiyun * is until a request completes that no one cares about). The
232*4882a593Smuzhiyun * observation is that listeners come in batches, and will often
233*4882a593Smuzhiyun * listen to a bunch of requests in succession. Though note on icl+,
234*4882a593Smuzhiyun * interrupts are always enabled due to concerns with rc6 being
235*4882a593Smuzhiyun * dysfunctional with per-engine interrupt masking.
236*4882a593Smuzhiyun *
237*4882a593Smuzhiyun * We also try to avoid raising too many interrupts, as they may
238*4882a593Smuzhiyun * be generated by userspace batches and it is unfortunately rather
239*4882a593Smuzhiyun * too easy to drown the CPU under a flood of GPU interrupts. Thus
240*4882a593Smuzhiyun * whenever no one appears to be listening, we turn off the interrupts.
241*4882a593Smuzhiyun * Fewer interrupts should conserve power -- at the very least, fewer
242*4882a593Smuzhiyun * interrupt draw less ire from other users of the system and tools
243*4882a593Smuzhiyun * like powertop.
244*4882a593Smuzhiyun */
245*4882a593Smuzhiyun if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
246*4882a593Smuzhiyun intel_breadcrumbs_disarm_irq(b);
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun rcu_read_lock();
249*4882a593Smuzhiyun list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
250*4882a593Smuzhiyun struct i915_request *rq;
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
253*4882a593Smuzhiyun bool release;
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun if (!__i915_request_is_complete(rq))
256*4882a593Smuzhiyun break;
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
259*4882a593Smuzhiyun &rq->fence.flags))
260*4882a593Smuzhiyun break;
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun /*
263*4882a593Smuzhiyun * Queue for execution after dropping the signaling
264*4882a593Smuzhiyun * spinlock as the callback chain may end up adding
265*4882a593Smuzhiyun * more signalers to the same context or engine.
266*4882a593Smuzhiyun */
267*4882a593Smuzhiyun spin_lock(&ce->signal_lock);
268*4882a593Smuzhiyun list_del_rcu(&rq->signal_link);
269*4882a593Smuzhiyun release = remove_signaling_context(b, ce);
270*4882a593Smuzhiyun spin_unlock(&ce->signal_lock);
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun if (__signal_request(rq))
273*4882a593Smuzhiyun /* We own signal_node now, xfer to local list */
274*4882a593Smuzhiyun signal = slist_add(&rq->signal_node, signal);
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun if (release) {
277*4882a593Smuzhiyun add_retire(b, ce->timeline);
278*4882a593Smuzhiyun intel_context_put(ce);
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun rcu_read_unlock();
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun llist_for_each_safe(signal, sn, signal) {
285*4882a593Smuzhiyun struct i915_request *rq =
286*4882a593Smuzhiyun llist_entry(signal, typeof(*rq), signal_node);
287*4882a593Smuzhiyun struct list_head cb_list;
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun spin_lock(&rq->lock);
290*4882a593Smuzhiyun list_replace(&rq->fence.cb_list, &cb_list);
291*4882a593Smuzhiyun __dma_fence_signal__timestamp(&rq->fence, timestamp);
292*4882a593Smuzhiyun __dma_fence_signal__notify(&rq->fence, &cb_list);
293*4882a593Smuzhiyun spin_unlock(&rq->lock);
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun i915_request_put(rq);
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
299*4882a593Smuzhiyun intel_breadcrumbs_arm_irq(b);
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun struct intel_breadcrumbs *
intel_breadcrumbs_create(struct intel_engine_cs * irq_engine)303*4882a593Smuzhiyun intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
304*4882a593Smuzhiyun {
305*4882a593Smuzhiyun struct intel_breadcrumbs *b;
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun b = kzalloc(sizeof(*b), GFP_KERNEL);
308*4882a593Smuzhiyun if (!b)
309*4882a593Smuzhiyun return NULL;
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun b->irq_engine = irq_engine;
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun spin_lock_init(&b->signalers_lock);
314*4882a593Smuzhiyun INIT_LIST_HEAD(&b->signalers);
315*4882a593Smuzhiyun init_llist_head(&b->signaled_requests);
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun spin_lock_init(&b->irq_lock);
318*4882a593Smuzhiyun init_irq_work(&b->irq_work, signal_irq_work);
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun return b;
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun
intel_breadcrumbs_reset(struct intel_breadcrumbs * b)323*4882a593Smuzhiyun void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
324*4882a593Smuzhiyun {
325*4882a593Smuzhiyun unsigned long flags;
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun if (!b->irq_engine)
328*4882a593Smuzhiyun return;
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun spin_lock_irqsave(&b->irq_lock, flags);
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun if (b->irq_enabled)
333*4882a593Smuzhiyun irq_enable(b->irq_engine);
334*4882a593Smuzhiyun else
335*4882a593Smuzhiyun irq_disable(b->irq_engine);
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun spin_unlock_irqrestore(&b->irq_lock, flags);
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun
intel_breadcrumbs_park(struct intel_breadcrumbs * b)340*4882a593Smuzhiyun void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
341*4882a593Smuzhiyun {
342*4882a593Smuzhiyun /* Kick the work once more to drain the signalers */
343*4882a593Smuzhiyun irq_work_sync(&b->irq_work);
344*4882a593Smuzhiyun while (unlikely(READ_ONCE(b->irq_armed))) {
345*4882a593Smuzhiyun local_irq_disable();
346*4882a593Smuzhiyun signal_irq_work(&b->irq_work);
347*4882a593Smuzhiyun local_irq_enable();
348*4882a593Smuzhiyun cond_resched();
349*4882a593Smuzhiyun }
350*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&b->signalers));
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun
intel_breadcrumbs_free(struct intel_breadcrumbs * b)353*4882a593Smuzhiyun void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
354*4882a593Smuzhiyun {
355*4882a593Smuzhiyun irq_work_sync(&b->irq_work);
356*4882a593Smuzhiyun GEM_BUG_ON(!list_empty(&b->signalers));
357*4882a593Smuzhiyun GEM_BUG_ON(b->irq_armed);
358*4882a593Smuzhiyun kfree(b);
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun
insert_breadcrumb(struct i915_request * rq)361*4882a593Smuzhiyun static void insert_breadcrumb(struct i915_request *rq)
362*4882a593Smuzhiyun {
363*4882a593Smuzhiyun struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
364*4882a593Smuzhiyun struct intel_context *ce = rq->context;
365*4882a593Smuzhiyun struct list_head *pos;
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
368*4882a593Smuzhiyun return;
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun i915_request_get(rq);
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun /*
373*4882a593Smuzhiyun * If the request is already completed, we can transfer it
374*4882a593Smuzhiyun * straight onto a signaled list, and queue the irq worker for
375*4882a593Smuzhiyun * its signal completion.
376*4882a593Smuzhiyun */
377*4882a593Smuzhiyun if (__i915_request_is_complete(rq)) {
378*4882a593Smuzhiyun if (__signal_request(rq) &&
379*4882a593Smuzhiyun llist_add(&rq->signal_node, &b->signaled_requests))
380*4882a593Smuzhiyun irq_work_queue(&b->irq_work);
381*4882a593Smuzhiyun return;
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun if (list_empty(&ce->signals)) {
385*4882a593Smuzhiyun intel_context_get(ce);
386*4882a593Smuzhiyun add_signaling_context(b, ce);
387*4882a593Smuzhiyun pos = &ce->signals;
388*4882a593Smuzhiyun } else {
389*4882a593Smuzhiyun /*
390*4882a593Smuzhiyun * We keep the seqno in retirement order, so we can break
391*4882a593Smuzhiyun * inside intel_engine_signal_breadcrumbs as soon as we've
392*4882a593Smuzhiyun * passed the last completed request (or seen a request that
393*4882a593Smuzhiyun * hasn't event started). We could walk the timeline->requests,
394*4882a593Smuzhiyun * but keeping a separate signalers_list has the advantage of
395*4882a593Smuzhiyun * hopefully being much smaller than the full list and so
396*4882a593Smuzhiyun * provides faster iteration and detection when there are no
397*4882a593Smuzhiyun * more interrupts required for this context.
398*4882a593Smuzhiyun *
399*4882a593Smuzhiyun * We typically expect to add new signalers in order, so we
400*4882a593Smuzhiyun * start looking for our insertion point from the tail of
401*4882a593Smuzhiyun * the list.
402*4882a593Smuzhiyun */
403*4882a593Smuzhiyun list_for_each_prev(pos, &ce->signals) {
404*4882a593Smuzhiyun struct i915_request *it =
405*4882a593Smuzhiyun list_entry(pos, typeof(*it), signal_link);
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
408*4882a593Smuzhiyun break;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun list_add_rcu(&rq->signal_link, pos);
412*4882a593Smuzhiyun GEM_BUG_ON(!check_signal_order(ce, rq));
413*4882a593Smuzhiyun GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
414*4882a593Smuzhiyun set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun /*
417*4882a593Smuzhiyun * Defer enabling the interrupt to after HW submission and recheck
418*4882a593Smuzhiyun * the request as it may have completed and raised the interrupt as
419*4882a593Smuzhiyun * we were attaching it into the lists.
420*4882a593Smuzhiyun */
421*4882a593Smuzhiyun irq_work_queue(&b->irq_work);
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun
i915_request_enable_breadcrumb(struct i915_request * rq)424*4882a593Smuzhiyun bool i915_request_enable_breadcrumb(struct i915_request *rq)
425*4882a593Smuzhiyun {
426*4882a593Smuzhiyun struct intel_context *ce = rq->context;
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun /* Serialises with i915_request_retire() using rq->lock */
429*4882a593Smuzhiyun if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
430*4882a593Smuzhiyun return true;
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun /*
433*4882a593Smuzhiyun * Peek at i915_request_submit()/i915_request_unsubmit() status.
434*4882a593Smuzhiyun *
435*4882a593Smuzhiyun * If the request is not yet active (and not signaled), we will
436*4882a593Smuzhiyun * attach the breadcrumb later.
437*4882a593Smuzhiyun */
438*4882a593Smuzhiyun if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
439*4882a593Smuzhiyun return true;
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun spin_lock(&ce->signal_lock);
442*4882a593Smuzhiyun if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
443*4882a593Smuzhiyun insert_breadcrumb(rq);
444*4882a593Smuzhiyun spin_unlock(&ce->signal_lock);
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun return true;
447*4882a593Smuzhiyun }
448*4882a593Smuzhiyun
i915_request_cancel_breadcrumb(struct i915_request * rq)449*4882a593Smuzhiyun void i915_request_cancel_breadcrumb(struct i915_request *rq)
450*4882a593Smuzhiyun {
451*4882a593Smuzhiyun struct intel_context *ce = rq->context;
452*4882a593Smuzhiyun bool release;
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun spin_lock(&ce->signal_lock);
455*4882a593Smuzhiyun if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
456*4882a593Smuzhiyun spin_unlock(&ce->signal_lock);
457*4882a593Smuzhiyun return;
458*4882a593Smuzhiyun }
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun list_del_rcu(&rq->signal_link);
461*4882a593Smuzhiyun release = remove_signaling_context(rq->engine->breadcrumbs, ce);
462*4882a593Smuzhiyun spin_unlock(&ce->signal_lock);
463*4882a593Smuzhiyun if (release)
464*4882a593Smuzhiyun intel_context_put(ce);
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun i915_request_put(rq);
467*4882a593Smuzhiyun }
468*4882a593Smuzhiyun
print_signals(struct intel_breadcrumbs * b,struct drm_printer * p)469*4882a593Smuzhiyun static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
470*4882a593Smuzhiyun {
471*4882a593Smuzhiyun struct intel_context *ce;
472*4882a593Smuzhiyun struct i915_request *rq;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun drm_printf(p, "Signals:\n");
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun rcu_read_lock();
477*4882a593Smuzhiyun list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
478*4882a593Smuzhiyun list_for_each_entry_rcu(rq, &ce->signals, signal_link)
479*4882a593Smuzhiyun drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
480*4882a593Smuzhiyun rq->fence.context, rq->fence.seqno,
481*4882a593Smuzhiyun i915_request_completed(rq) ? "!" :
482*4882a593Smuzhiyun i915_request_started(rq) ? "*" :
483*4882a593Smuzhiyun "",
484*4882a593Smuzhiyun jiffies_to_msecs(jiffies - rq->emitted_jiffies));
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun rcu_read_unlock();
487*4882a593Smuzhiyun }
488*4882a593Smuzhiyun
intel_engine_print_breadcrumbs(struct intel_engine_cs * engine,struct drm_printer * p)489*4882a593Smuzhiyun void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
490*4882a593Smuzhiyun struct drm_printer *p)
491*4882a593Smuzhiyun {
492*4882a593Smuzhiyun struct intel_breadcrumbs *b;
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun b = engine->breadcrumbs;
495*4882a593Smuzhiyun if (!b)
496*4882a593Smuzhiyun return;
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed));
499*4882a593Smuzhiyun if (!list_empty(&b->signalers))
500*4882a593Smuzhiyun print_signals(b, p);
501*4882a593Smuzhiyun }
502