1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * SPDX-License-Identifier: MIT
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright © 2019 Intel Corporation
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #ifndef _I915_ACTIVE_H_
8*4882a593Smuzhiyun #define _I915_ACTIVE_H_
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <linux/lockdep.h>
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include "i915_active_types.h"
13*4882a593Smuzhiyun #include "i915_request.h"
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun struct i915_request;
16*4882a593Smuzhiyun struct intel_engine_cs;
17*4882a593Smuzhiyun struct intel_timeline;
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun /*
20*4882a593Smuzhiyun * We treat requests as fences. This is not be to confused with our
21*4882a593Smuzhiyun * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
22*4882a593Smuzhiyun * We use the fences to synchronize access from the CPU with activity on the
23*4882a593Smuzhiyun * GPU, for example, we should not rewrite an object's PTE whilst the GPU
24*4882a593Smuzhiyun * is reading them. We also track fences at a higher level to provide
25*4882a593Smuzhiyun * implicit synchronisation around GEM objects, e.g. set-domain will wait
26*4882a593Smuzhiyun * for outstanding GPU rendering before marking the object ready for CPU
27*4882a593Smuzhiyun * access, or a pageflip will wait until the GPU is complete before showing
28*4882a593Smuzhiyun * the frame on the scanout.
29*4882a593Smuzhiyun *
30*4882a593Smuzhiyun * In order to use a fence, the object must track the fence it needs to
31*4882a593Smuzhiyun * serialise with. For example, GEM objects want to track both read and
32*4882a593Smuzhiyun * write access so that we can perform concurrent read operations between
33*4882a593Smuzhiyun * the CPU and GPU engines, as well as waiting for all rendering to
34*4882a593Smuzhiyun * complete, or waiting for the last GPU user of a "fence register". The
35*4882a593Smuzhiyun * object then embeds a #i915_active_fence to track the most recent (in
36*4882a593Smuzhiyun * retirement order) request relevant for the desired mode of access.
37*4882a593Smuzhiyun * The #i915_active_fence is updated with i915_active_fence_set() to
38*4882a593Smuzhiyun * track the most recent fence request, typically this is done as part of
39*4882a593Smuzhiyun * i915_vma_move_to_active().
40*4882a593Smuzhiyun *
41*4882a593Smuzhiyun * When the #i915_active_fence completes (is retired), it will
42*4882a593Smuzhiyun * signal its completion to the owner through a callback as well as mark
43*4882a593Smuzhiyun * itself as idle (i915_active_fence.request == NULL). The owner
44*4882a593Smuzhiyun * can then perform any action, such as delayed freeing of an active
45*4882a593Smuzhiyun * resource including itself.
46*4882a593Smuzhiyun */
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb);
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun /**
51*4882a593Smuzhiyun * __i915_active_fence_init - prepares the activity tracker for use
52*4882a593Smuzhiyun * @active - the active tracker
53*4882a593Smuzhiyun * @fence - initial fence to track, can be NULL
54*4882a593Smuzhiyun * @func - a callback when then the tracker is retired (becomes idle),
55*4882a593Smuzhiyun * can be NULL
56*4882a593Smuzhiyun *
57*4882a593Smuzhiyun * i915_active_fence_init() prepares the embedded @active struct for use as
58*4882a593Smuzhiyun * an activity tracker, that is for tracking the last known active fence
59*4882a593Smuzhiyun * associated with it. When the last fence becomes idle, when it is retired
60*4882a593Smuzhiyun * after completion, the optional callback @func is invoked.
61*4882a593Smuzhiyun */
62*4882a593Smuzhiyun static inline void
__i915_active_fence_init(struct i915_active_fence * active,void * fence,dma_fence_func_t fn)63*4882a593Smuzhiyun __i915_active_fence_init(struct i915_active_fence *active,
64*4882a593Smuzhiyun void *fence,
65*4882a593Smuzhiyun dma_fence_func_t fn)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun RCU_INIT_POINTER(active->fence, fence);
68*4882a593Smuzhiyun active->cb.func = fn ?: i915_active_noop;
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun #define INIT_ACTIVE_FENCE(A) \
72*4882a593Smuzhiyun __i915_active_fence_init((A), NULL, NULL)
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun struct dma_fence *
75*4882a593Smuzhiyun __i915_active_fence_set(struct i915_active_fence *active,
76*4882a593Smuzhiyun struct dma_fence *fence);
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun /**
79*4882a593Smuzhiyun * i915_active_fence_set - updates the tracker to watch the current fence
80*4882a593Smuzhiyun * @active - the active tracker
81*4882a593Smuzhiyun * @rq - the request to watch
82*4882a593Smuzhiyun *
83*4882a593Smuzhiyun * i915_active_fence_set() watches the given @rq for completion. While
84*4882a593Smuzhiyun * that @rq is busy, the @active reports busy. When that @rq is signaled
85*4882a593Smuzhiyun * (or else retired) the @active tracker is updated to report idle.
86*4882a593Smuzhiyun */
87*4882a593Smuzhiyun int __must_check
88*4882a593Smuzhiyun i915_active_fence_set(struct i915_active_fence *active,
89*4882a593Smuzhiyun struct i915_request *rq);
90*4882a593Smuzhiyun /**
91*4882a593Smuzhiyun * i915_active_fence_get - return a reference to the active fence
92*4882a593Smuzhiyun * @active - the active tracker
93*4882a593Smuzhiyun *
94*4882a593Smuzhiyun * i915_active_fence_get() returns a reference to the active fence,
95*4882a593Smuzhiyun * or NULL if the active tracker is idle. The reference is obtained under RCU,
96*4882a593Smuzhiyun * so no locking is required by the caller.
97*4882a593Smuzhiyun *
98*4882a593Smuzhiyun * The reference should be freed with dma_fence_put().
99*4882a593Smuzhiyun */
100*4882a593Smuzhiyun static inline struct dma_fence *
i915_active_fence_get(struct i915_active_fence * active)101*4882a593Smuzhiyun i915_active_fence_get(struct i915_active_fence *active)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun struct dma_fence *fence;
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun rcu_read_lock();
106*4882a593Smuzhiyun fence = dma_fence_get_rcu_safe(&active->fence);
107*4882a593Smuzhiyun rcu_read_unlock();
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun return fence;
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun /**
113*4882a593Smuzhiyun * i915_active_fence_isset - report whether the active tracker is assigned
114*4882a593Smuzhiyun * @active - the active tracker
115*4882a593Smuzhiyun *
116*4882a593Smuzhiyun * i915_active_fence_isset() returns true if the active tracker is currently
117*4882a593Smuzhiyun * assigned to a fence. Due to the lazy retiring, that fence may be idle
118*4882a593Smuzhiyun * and this may report stale information.
119*4882a593Smuzhiyun */
120*4882a593Smuzhiyun static inline bool
i915_active_fence_isset(const struct i915_active_fence * active)121*4882a593Smuzhiyun i915_active_fence_isset(const struct i915_active_fence *active)
122*4882a593Smuzhiyun {
123*4882a593Smuzhiyun return rcu_access_pointer(active->fence);
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun /*
127*4882a593Smuzhiyun * GPU activity tracking
128*4882a593Smuzhiyun *
129*4882a593Smuzhiyun * Each set of commands submitted to the GPU compromises a single request that
130*4882a593Smuzhiyun * signals a fence upon completion. struct i915_request combines the
131*4882a593Smuzhiyun * command submission, scheduling and fence signaling roles. If we want to see
132*4882a593Smuzhiyun * if a particular task is complete, we need to grab the fence (struct
133*4882a593Smuzhiyun * i915_request) for that task and check or wait for it to be signaled. More
134*4882a593Smuzhiyun * often though we want to track the status of a bunch of tasks, for example
135*4882a593Smuzhiyun * to wait for the GPU to finish accessing some memory across a variety of
136*4882a593Smuzhiyun * different command pipelines from different clients. We could choose to
137*4882a593Smuzhiyun * track every single request associated with the task, but knowing that
138*4882a593Smuzhiyun * each request belongs to an ordered timeline (later requests within a
139*4882a593Smuzhiyun * timeline must wait for earlier requests), we need only track the
140*4882a593Smuzhiyun * latest request in each timeline to determine the overall status of the
141*4882a593Smuzhiyun * task.
142*4882a593Smuzhiyun *
143*4882a593Smuzhiyun * struct i915_active provides this tracking across timelines. It builds a
144*4882a593Smuzhiyun * composite shared-fence, and is updated as new work is submitted to the task,
145*4882a593Smuzhiyun * forming a snapshot of the current status. It should be embedded into the
146*4882a593Smuzhiyun * different resources that need to track their associated GPU activity to
147*4882a593Smuzhiyun * provide a callback when that GPU activity has ceased, or otherwise to
148*4882a593Smuzhiyun * provide a serialisation point either for request submission or for CPU
149*4882a593Smuzhiyun * synchronisation.
150*4882a593Smuzhiyun */
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun void __i915_active_init(struct i915_active *ref,
153*4882a593Smuzhiyun int (*active)(struct i915_active *ref),
154*4882a593Smuzhiyun void (*retire)(struct i915_active *ref),
155*4882a593Smuzhiyun struct lock_class_key *mkey,
156*4882a593Smuzhiyun struct lock_class_key *wkey);
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /* Specialise each class of i915_active to avoid impossible lockdep cycles. */
159*4882a593Smuzhiyun #define i915_active_init(ref, active, retire) do { \
160*4882a593Smuzhiyun static struct lock_class_key __mkey; \
161*4882a593Smuzhiyun static struct lock_class_key __wkey; \
162*4882a593Smuzhiyun \
163*4882a593Smuzhiyun __i915_active_init(ref, active, retire, &__mkey, &__wkey); \
164*4882a593Smuzhiyun } while (0)
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun struct dma_fence *
167*4882a593Smuzhiyun __i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
168*4882a593Smuzhiyun int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun static inline int
i915_active_add_request(struct i915_active * ref,struct i915_request * rq)171*4882a593Smuzhiyun i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun return i915_active_ref(ref,
174*4882a593Smuzhiyun i915_request_timeline(rq)->fence_context,
175*4882a593Smuzhiyun &rq->fence);
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun struct dma_fence *
179*4882a593Smuzhiyun i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
180*4882a593Smuzhiyun
i915_active_has_exclusive(struct i915_active * ref)181*4882a593Smuzhiyun static inline bool i915_active_has_exclusive(struct i915_active *ref)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun return rcu_access_pointer(ref->excl.fence);
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun int __i915_active_wait(struct i915_active *ref, int state);
i915_active_wait(struct i915_active * ref)187*4882a593Smuzhiyun static inline int i915_active_wait(struct i915_active *ref)
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun return __i915_active_wait(ref, TASK_INTERRUPTIBLE);
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun int i915_sw_fence_await_active(struct i915_sw_fence *fence,
193*4882a593Smuzhiyun struct i915_active *ref,
194*4882a593Smuzhiyun unsigned int flags);
195*4882a593Smuzhiyun int i915_request_await_active(struct i915_request *rq,
196*4882a593Smuzhiyun struct i915_active *ref,
197*4882a593Smuzhiyun unsigned int flags);
198*4882a593Smuzhiyun #define I915_ACTIVE_AWAIT_EXCL BIT(0)
199*4882a593Smuzhiyun #define I915_ACTIVE_AWAIT_ACTIVE BIT(1)
200*4882a593Smuzhiyun #define I915_ACTIVE_AWAIT_BARRIER BIT(2)
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun int i915_active_acquire(struct i915_active *ref);
203*4882a593Smuzhiyun int i915_active_acquire_for_context(struct i915_active *ref, u64 idx);
204*4882a593Smuzhiyun bool i915_active_acquire_if_busy(struct i915_active *ref);
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun void i915_active_release(struct i915_active *ref);
207*4882a593Smuzhiyun
__i915_active_acquire(struct i915_active * ref)208*4882a593Smuzhiyun static inline void __i915_active_acquire(struct i915_active *ref)
209*4882a593Smuzhiyun {
210*4882a593Smuzhiyun GEM_BUG_ON(!atomic_read(&ref->count));
211*4882a593Smuzhiyun atomic_inc(&ref->count);
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun static inline bool
i915_active_is_idle(const struct i915_active * ref)215*4882a593Smuzhiyun i915_active_is_idle(const struct i915_active *ref)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun return !atomic_read(&ref->count);
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun void i915_active_fini(struct i915_active *ref);
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
223*4882a593Smuzhiyun struct intel_engine_cs *engine);
224*4882a593Smuzhiyun void i915_active_acquire_barrier(struct i915_active *ref);
225*4882a593Smuzhiyun void i915_request_add_active_barriers(struct i915_request *rq);
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun void i915_active_print(struct i915_active *ref, struct drm_printer *m);
228*4882a593Smuzhiyun void i915_active_unlock_wait(struct i915_active *ref);
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun struct i915_active *i915_active_create(void);
231*4882a593Smuzhiyun struct i915_active *i915_active_get(struct i915_active *ref);
232*4882a593Smuzhiyun void i915_active_put(struct i915_active *ref);
233*4882a593Smuzhiyun
__i915_request_await_exclusive(struct i915_request * rq,struct i915_active * active)234*4882a593Smuzhiyun static inline int __i915_request_await_exclusive(struct i915_request *rq,
235*4882a593Smuzhiyun struct i915_active *active)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun struct dma_fence *fence;
238*4882a593Smuzhiyun int err = 0;
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun fence = i915_active_fence_get(&active->excl);
241*4882a593Smuzhiyun if (fence) {
242*4882a593Smuzhiyun err = i915_request_await_dma_fence(rq, fence);
243*4882a593Smuzhiyun dma_fence_put(fence);
244*4882a593Smuzhiyun }
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun return err;
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun #endif /* _I915_ACTIVE_H_ */
250