xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/i915/gt/intel_timeline.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * SPDX-License-Identifier: MIT
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Copyright © 2016-2018 Intel Corporation
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #include "i915_drv.h"
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include "i915_active.h"
10*4882a593Smuzhiyun #include "i915_syncmap.h"
11*4882a593Smuzhiyun #include "intel_gt.h"
12*4882a593Smuzhiyun #include "intel_ring.h"
13*4882a593Smuzhiyun #include "intel_timeline.h"
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
16*4882a593Smuzhiyun #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun #define CACHELINE_BITS 6
19*4882a593Smuzhiyun #define CACHELINE_FREE CACHELINE_BITS
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun struct intel_timeline_hwsp {
22*4882a593Smuzhiyun 	struct intel_gt *gt;
23*4882a593Smuzhiyun 	struct intel_gt_timelines *gt_timelines;
24*4882a593Smuzhiyun 	struct list_head free_link;
25*4882a593Smuzhiyun 	struct i915_vma *vma;
26*4882a593Smuzhiyun 	u64 free_bitmap;
27*4882a593Smuzhiyun };
28*4882a593Smuzhiyun 
__hwsp_alloc(struct intel_gt * gt)29*4882a593Smuzhiyun static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
30*4882a593Smuzhiyun {
31*4882a593Smuzhiyun 	struct drm_i915_private *i915 = gt->i915;
32*4882a593Smuzhiyun 	struct drm_i915_gem_object *obj;
33*4882a593Smuzhiyun 	struct i915_vma *vma;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
36*4882a593Smuzhiyun 	if (IS_ERR(obj))
37*4882a593Smuzhiyun 		return ERR_CAST(obj);
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
42*4882a593Smuzhiyun 	if (IS_ERR(vma))
43*4882a593Smuzhiyun 		i915_gem_object_put(obj);
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	return vma;
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun static struct i915_vma *
hwsp_alloc(struct intel_timeline * timeline,unsigned int * cacheline)49*4882a593Smuzhiyun hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun 	struct intel_gt_timelines *gt = &timeline->gt->timelines;
52*4882a593Smuzhiyun 	struct intel_timeline_hwsp *hwsp;
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun 	spin_lock_irq(&gt->hwsp_lock);
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun 	/* hwsp_free_list only contains HWSP that have available cachelines */
59*4882a593Smuzhiyun 	hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
60*4882a593Smuzhiyun 					typeof(*hwsp), free_link);
61*4882a593Smuzhiyun 	if (!hwsp) {
62*4882a593Smuzhiyun 		struct i915_vma *vma;
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 		spin_unlock_irq(&gt->hwsp_lock);
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
67*4882a593Smuzhiyun 		if (!hwsp)
68*4882a593Smuzhiyun 			return ERR_PTR(-ENOMEM);
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 		vma = __hwsp_alloc(timeline->gt);
71*4882a593Smuzhiyun 		if (IS_ERR(vma)) {
72*4882a593Smuzhiyun 			kfree(hwsp);
73*4882a593Smuzhiyun 			return vma;
74*4882a593Smuzhiyun 		}
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 		GT_TRACE(timeline->gt, "new HWSP allocated\n");
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 		vma->private = hwsp;
79*4882a593Smuzhiyun 		hwsp->gt = timeline->gt;
80*4882a593Smuzhiyun 		hwsp->vma = vma;
81*4882a593Smuzhiyun 		hwsp->free_bitmap = ~0ull;
82*4882a593Smuzhiyun 		hwsp->gt_timelines = gt;
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 		spin_lock_irq(&gt->hwsp_lock);
85*4882a593Smuzhiyun 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
86*4882a593Smuzhiyun 	}
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun 	GEM_BUG_ON(!hwsp->free_bitmap);
89*4882a593Smuzhiyun 	*cacheline = __ffs64(hwsp->free_bitmap);
90*4882a593Smuzhiyun 	hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
91*4882a593Smuzhiyun 	if (!hwsp->free_bitmap)
92*4882a593Smuzhiyun 		list_del(&hwsp->free_link);
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	spin_unlock_irq(&gt->hwsp_lock);
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	GEM_BUG_ON(hwsp->vma->private != hwsp);
97*4882a593Smuzhiyun 	return hwsp->vma;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun 
__idle_hwsp_free(struct intel_timeline_hwsp * hwsp,int cacheline)100*4882a593Smuzhiyun static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
101*4882a593Smuzhiyun {
102*4882a593Smuzhiyun 	struct intel_gt_timelines *gt = hwsp->gt_timelines;
103*4882a593Smuzhiyun 	unsigned long flags;
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun 	spin_lock_irqsave(&gt->hwsp_lock, flags);
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 	/* As a cacheline becomes available, publish the HWSP on the freelist */
108*4882a593Smuzhiyun 	if (!hwsp->free_bitmap)
109*4882a593Smuzhiyun 		list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
112*4882a593Smuzhiyun 	hwsp->free_bitmap |= BIT_ULL(cacheline);
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 	/* And if no one is left using it, give the page back to the system */
115*4882a593Smuzhiyun 	if (hwsp->free_bitmap == ~0ull) {
116*4882a593Smuzhiyun 		i915_vma_put(hwsp->vma);
117*4882a593Smuzhiyun 		list_del(&hwsp->free_link);
118*4882a593Smuzhiyun 		kfree(hwsp);
119*4882a593Smuzhiyun 	}
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 	spin_unlock_irqrestore(&gt->hwsp_lock, flags);
122*4882a593Smuzhiyun }
123*4882a593Smuzhiyun 
__rcu_cacheline_free(struct rcu_head * rcu)124*4882a593Smuzhiyun static void __rcu_cacheline_free(struct rcu_head *rcu)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun 	struct intel_timeline_cacheline *cl =
127*4882a593Smuzhiyun 		container_of(rcu, typeof(*cl), rcu);
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	/* Must wait until after all *rq->hwsp are complete before removing */
130*4882a593Smuzhiyun 	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
131*4882a593Smuzhiyun 	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	i915_active_fini(&cl->active);
134*4882a593Smuzhiyun 	kfree(cl);
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun 
__idle_cacheline_free(struct intel_timeline_cacheline * cl)137*4882a593Smuzhiyun static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
140*4882a593Smuzhiyun 	call_rcu(&cl->rcu, __rcu_cacheline_free);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun __i915_active_call
__cacheline_retire(struct i915_active * active)144*4882a593Smuzhiyun static void __cacheline_retire(struct i915_active *active)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun 	struct intel_timeline_cacheline *cl =
147*4882a593Smuzhiyun 		container_of(active, typeof(*cl), active);
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	i915_vma_unpin(cl->hwsp->vma);
150*4882a593Smuzhiyun 	if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
151*4882a593Smuzhiyun 		__idle_cacheline_free(cl);
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun 
__cacheline_active(struct i915_active * active)154*4882a593Smuzhiyun static int __cacheline_active(struct i915_active *active)
155*4882a593Smuzhiyun {
156*4882a593Smuzhiyun 	struct intel_timeline_cacheline *cl =
157*4882a593Smuzhiyun 		container_of(active, typeof(*cl), active);
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	__i915_vma_pin(cl->hwsp->vma);
160*4882a593Smuzhiyun 	return 0;
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun static struct intel_timeline_cacheline *
cacheline_alloc(struct intel_timeline_hwsp * hwsp,unsigned int cacheline)164*4882a593Smuzhiyun cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun 	struct intel_timeline_cacheline *cl;
167*4882a593Smuzhiyun 	void *vaddr;
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	cl = kmalloc(sizeof(*cl), GFP_KERNEL);
172*4882a593Smuzhiyun 	if (!cl)
173*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
176*4882a593Smuzhiyun 	if (IS_ERR(vaddr)) {
177*4882a593Smuzhiyun 		kfree(cl);
178*4882a593Smuzhiyun 		return ERR_CAST(vaddr);
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	cl->hwsp = hwsp;
182*4882a593Smuzhiyun 	cl->vaddr = page_pack_bits(vaddr, cacheline);
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun 	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	return cl;
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun 
cacheline_acquire(struct intel_timeline_cacheline * cl,u32 ggtt_offset)189*4882a593Smuzhiyun static void cacheline_acquire(struct intel_timeline_cacheline *cl,
190*4882a593Smuzhiyun 			      u32 ggtt_offset)
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun 	if (!cl)
193*4882a593Smuzhiyun 		return;
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	cl->ggtt_offset = ggtt_offset;
196*4882a593Smuzhiyun 	i915_active_acquire(&cl->active);
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun 
cacheline_release(struct intel_timeline_cacheline * cl)199*4882a593Smuzhiyun static void cacheline_release(struct intel_timeline_cacheline *cl)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun 	if (cl)
202*4882a593Smuzhiyun 		i915_active_release(&cl->active);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
cacheline_free(struct intel_timeline_cacheline * cl)205*4882a593Smuzhiyun static void cacheline_free(struct intel_timeline_cacheline *cl)
206*4882a593Smuzhiyun {
207*4882a593Smuzhiyun 	if (!i915_active_acquire_if_busy(&cl->active)) {
208*4882a593Smuzhiyun 		__idle_cacheline_free(cl);
209*4882a593Smuzhiyun 		return;
210*4882a593Smuzhiyun 	}
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
213*4882a593Smuzhiyun 	cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	i915_active_release(&cl->active);
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun 
intel_timeline_init(struct intel_timeline * timeline,struct intel_gt * gt,struct i915_vma * hwsp,unsigned int offset)218*4882a593Smuzhiyun static int intel_timeline_init(struct intel_timeline *timeline,
219*4882a593Smuzhiyun 			       struct intel_gt *gt,
220*4882a593Smuzhiyun 			       struct i915_vma *hwsp,
221*4882a593Smuzhiyun 			       unsigned int offset)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun 	void *vaddr;
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	kref_init(&timeline->kref);
226*4882a593Smuzhiyun 	atomic_set(&timeline->pin_count, 0);
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	timeline->gt = gt;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	timeline->has_initial_breadcrumb = !hwsp;
231*4882a593Smuzhiyun 	timeline->hwsp_cacheline = NULL;
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	if (!hwsp) {
234*4882a593Smuzhiyun 		struct intel_timeline_cacheline *cl;
235*4882a593Smuzhiyun 		unsigned int cacheline;
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 		hwsp = hwsp_alloc(timeline, &cacheline);
238*4882a593Smuzhiyun 		if (IS_ERR(hwsp))
239*4882a593Smuzhiyun 			return PTR_ERR(hwsp);
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 		cl = cacheline_alloc(hwsp->private, cacheline);
242*4882a593Smuzhiyun 		if (IS_ERR(cl)) {
243*4882a593Smuzhiyun 			__idle_hwsp_free(hwsp->private, cacheline);
244*4882a593Smuzhiyun 			return PTR_ERR(cl);
245*4882a593Smuzhiyun 		}
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 		timeline->hwsp_cacheline = cl;
248*4882a593Smuzhiyun 		timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 		vaddr = page_mask_bits(cl->vaddr);
251*4882a593Smuzhiyun 	} else {
252*4882a593Smuzhiyun 		timeline->hwsp_offset = offset;
253*4882a593Smuzhiyun 		vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
254*4882a593Smuzhiyun 		if (IS_ERR(vaddr))
255*4882a593Smuzhiyun 			return PTR_ERR(vaddr);
256*4882a593Smuzhiyun 	}
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	timeline->hwsp_seqno =
259*4882a593Smuzhiyun 		memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	timeline->hwsp_ggtt = i915_vma_get(hwsp);
262*4882a593Smuzhiyun 	GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	timeline->fence_context = dma_fence_context_alloc(1);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	mutex_init(&timeline->mutex);
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	INIT_ACTIVE_FENCE(&timeline->last_request);
269*4882a593Smuzhiyun 	INIT_LIST_HEAD(&timeline->requests);
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	i915_syncmap_init(&timeline->sync);
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	return 0;
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun 
intel_gt_init_timelines(struct intel_gt * gt)276*4882a593Smuzhiyun void intel_gt_init_timelines(struct intel_gt *gt)
277*4882a593Smuzhiyun {
278*4882a593Smuzhiyun 	struct intel_gt_timelines *timelines = &gt->timelines;
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 	spin_lock_init(&timelines->lock);
281*4882a593Smuzhiyun 	INIT_LIST_HEAD(&timelines->active_list);
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 	spin_lock_init(&timelines->hwsp_lock);
284*4882a593Smuzhiyun 	INIT_LIST_HEAD(&timelines->hwsp_free_list);
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun 
intel_timeline_fini(struct intel_timeline * timeline)287*4882a593Smuzhiyun static void intel_timeline_fini(struct intel_timeline *timeline)
288*4882a593Smuzhiyun {
289*4882a593Smuzhiyun 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
290*4882a593Smuzhiyun 	GEM_BUG_ON(!list_empty(&timeline->requests));
291*4882a593Smuzhiyun 	GEM_BUG_ON(timeline->retire);
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	if (timeline->hwsp_cacheline)
294*4882a593Smuzhiyun 		cacheline_free(timeline->hwsp_cacheline);
295*4882a593Smuzhiyun 	else
296*4882a593Smuzhiyun 		i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	i915_vma_put(timeline->hwsp_ggtt);
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	/*
301*4882a593Smuzhiyun 	 * A small race exists between intel_gt_retire_requests_timeout and
302*4882a593Smuzhiyun 	 * intel_timeline_exit which could result in the syncmap not getting
303*4882a593Smuzhiyun 	 * free'd. Rather than work to hard to seal this race, simply cleanup
304*4882a593Smuzhiyun 	 * the syncmap on fini.
305*4882a593Smuzhiyun 	 */
306*4882a593Smuzhiyun 	i915_syncmap_free(&timeline->sync);
307*4882a593Smuzhiyun }
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun struct intel_timeline *
__intel_timeline_create(struct intel_gt * gt,struct i915_vma * global_hwsp,unsigned int offset)310*4882a593Smuzhiyun __intel_timeline_create(struct intel_gt *gt,
311*4882a593Smuzhiyun 			struct i915_vma *global_hwsp,
312*4882a593Smuzhiyun 			unsigned int offset)
313*4882a593Smuzhiyun {
314*4882a593Smuzhiyun 	struct intel_timeline *timeline;
315*4882a593Smuzhiyun 	int err;
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
318*4882a593Smuzhiyun 	if (!timeline)
319*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	err = intel_timeline_init(timeline, gt, global_hwsp, offset);
322*4882a593Smuzhiyun 	if (err) {
323*4882a593Smuzhiyun 		kfree(timeline);
324*4882a593Smuzhiyun 		return ERR_PTR(err);
325*4882a593Smuzhiyun 	}
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	return timeline;
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun 
__intel_timeline_pin(struct intel_timeline * tl)330*4882a593Smuzhiyun void __intel_timeline_pin(struct intel_timeline *tl)
331*4882a593Smuzhiyun {
332*4882a593Smuzhiyun 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
333*4882a593Smuzhiyun 	atomic_inc(&tl->pin_count);
334*4882a593Smuzhiyun }
335*4882a593Smuzhiyun 
intel_timeline_pin(struct intel_timeline * tl,struct i915_gem_ww_ctx * ww)336*4882a593Smuzhiyun int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
337*4882a593Smuzhiyun {
338*4882a593Smuzhiyun 	int err;
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 	if (atomic_add_unless(&tl->pin_count, 1, 0))
341*4882a593Smuzhiyun 		return 0;
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
344*4882a593Smuzhiyun 	if (err)
345*4882a593Smuzhiyun 		return err;
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	tl->hwsp_offset =
348*4882a593Smuzhiyun 		i915_ggtt_offset(tl->hwsp_ggtt) +
349*4882a593Smuzhiyun 		offset_in_page(tl->hwsp_offset);
350*4882a593Smuzhiyun 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
351*4882a593Smuzhiyun 		 tl->fence_context, tl->hwsp_offset);
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 	cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
354*4882a593Smuzhiyun 	if (atomic_fetch_inc(&tl->pin_count)) {
355*4882a593Smuzhiyun 		cacheline_release(tl->hwsp_cacheline);
356*4882a593Smuzhiyun 		__i915_vma_unpin(tl->hwsp_ggtt);
357*4882a593Smuzhiyun 	}
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 	return 0;
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun 
intel_timeline_reset_seqno(const struct intel_timeline * tl)362*4882a593Smuzhiyun void intel_timeline_reset_seqno(const struct intel_timeline *tl)
363*4882a593Smuzhiyun {
364*4882a593Smuzhiyun 	/* Must be pinned to be writable, and no requests in flight. */
365*4882a593Smuzhiyun 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
366*4882a593Smuzhiyun 	WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun 
intel_timeline_enter(struct intel_timeline * tl)369*4882a593Smuzhiyun void intel_timeline_enter(struct intel_timeline *tl)
370*4882a593Smuzhiyun {
371*4882a593Smuzhiyun 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	/*
374*4882a593Smuzhiyun 	 * Pretend we are serialised by the timeline->mutex.
375*4882a593Smuzhiyun 	 *
376*4882a593Smuzhiyun 	 * While generally true, there are a few exceptions to the rule
377*4882a593Smuzhiyun 	 * for the engine->kernel_context being used to manage power
378*4882a593Smuzhiyun 	 * transitions. As the engine_park may be called from under any
379*4882a593Smuzhiyun 	 * timeline, it uses the power mutex as a global serialisation
380*4882a593Smuzhiyun 	 * lock to prevent any other request entering its timeline.
381*4882a593Smuzhiyun 	 *
382*4882a593Smuzhiyun 	 * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
383*4882a593Smuzhiyun 	 *
384*4882a593Smuzhiyun 	 * However, intel_gt_retire_request() does not know which engine
385*4882a593Smuzhiyun 	 * it is retiring along and so cannot partake in the engine-pm
386*4882a593Smuzhiyun 	 * barrier, and there we use the tl->active_count as a means to
387*4882a593Smuzhiyun 	 * pin the timeline in the active_list while the locks are dropped.
388*4882a593Smuzhiyun 	 * Ergo, as that is outside of the engine-pm barrier, we need to
389*4882a593Smuzhiyun 	 * use atomic to manipulate tl->active_count.
390*4882a593Smuzhiyun 	 */
391*4882a593Smuzhiyun 	lockdep_assert_held(&tl->mutex);
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	if (atomic_add_unless(&tl->active_count, 1, 0))
394*4882a593Smuzhiyun 		return;
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 	spin_lock(&timelines->lock);
397*4882a593Smuzhiyun 	if (!atomic_fetch_inc(&tl->active_count)) {
398*4882a593Smuzhiyun 		/*
399*4882a593Smuzhiyun 		 * The HWSP is volatile, and may have been lost while inactive,
400*4882a593Smuzhiyun 		 * e.g. across suspend/resume. Be paranoid, and ensure that
401*4882a593Smuzhiyun 		 * the HWSP value matches our seqno so we don't proclaim
402*4882a593Smuzhiyun 		 * the next request as already complete.
403*4882a593Smuzhiyun 		 */
404*4882a593Smuzhiyun 		intel_timeline_reset_seqno(tl);
405*4882a593Smuzhiyun 		list_add_tail(&tl->link, &timelines->active_list);
406*4882a593Smuzhiyun 	}
407*4882a593Smuzhiyun 	spin_unlock(&timelines->lock);
408*4882a593Smuzhiyun }
409*4882a593Smuzhiyun 
intel_timeline_exit(struct intel_timeline * tl)410*4882a593Smuzhiyun void intel_timeline_exit(struct intel_timeline *tl)
411*4882a593Smuzhiyun {
412*4882a593Smuzhiyun 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun 	/* See intel_timeline_enter() */
415*4882a593Smuzhiyun 	lockdep_assert_held(&tl->mutex);
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 	GEM_BUG_ON(!atomic_read(&tl->active_count));
418*4882a593Smuzhiyun 	if (atomic_add_unless(&tl->active_count, -1, 1))
419*4882a593Smuzhiyun 		return;
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 	spin_lock(&timelines->lock);
422*4882a593Smuzhiyun 	if (atomic_dec_and_test(&tl->active_count))
423*4882a593Smuzhiyun 		list_del(&tl->link);
424*4882a593Smuzhiyun 	spin_unlock(&timelines->lock);
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 	/*
427*4882a593Smuzhiyun 	 * Since this timeline is idle, all bariers upon which we were waiting
428*4882a593Smuzhiyun 	 * must also be complete and so we can discard the last used barriers
429*4882a593Smuzhiyun 	 * without loss of information.
430*4882a593Smuzhiyun 	 */
431*4882a593Smuzhiyun 	i915_syncmap_free(&tl->sync);
432*4882a593Smuzhiyun }
433*4882a593Smuzhiyun 
timeline_advance(struct intel_timeline * tl)434*4882a593Smuzhiyun static u32 timeline_advance(struct intel_timeline *tl)
435*4882a593Smuzhiyun {
436*4882a593Smuzhiyun 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
437*4882a593Smuzhiyun 	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 	return tl->seqno += 1 + tl->has_initial_breadcrumb;
440*4882a593Smuzhiyun }
441*4882a593Smuzhiyun 
timeline_rollback(struct intel_timeline * tl)442*4882a593Smuzhiyun static void timeline_rollback(struct intel_timeline *tl)
443*4882a593Smuzhiyun {
444*4882a593Smuzhiyun 	tl->seqno -= 1 + tl->has_initial_breadcrumb;
445*4882a593Smuzhiyun }
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun static noinline int
__intel_timeline_get_seqno(struct intel_timeline * tl,struct i915_request * rq,u32 * seqno)448*4882a593Smuzhiyun __intel_timeline_get_seqno(struct intel_timeline *tl,
449*4882a593Smuzhiyun 			   struct i915_request *rq,
450*4882a593Smuzhiyun 			   u32 *seqno)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun 	struct intel_timeline_cacheline *cl;
453*4882a593Smuzhiyun 	unsigned int cacheline;
454*4882a593Smuzhiyun 	struct i915_vma *vma;
455*4882a593Smuzhiyun 	void *vaddr;
456*4882a593Smuzhiyun 	int err;
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 	might_lock(&tl->gt->ggtt->vm.mutex);
459*4882a593Smuzhiyun 	GT_TRACE(tl->gt, "timeline:%llx wrapped\n", tl->fence_context);
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	/*
462*4882a593Smuzhiyun 	 * If there is an outstanding GPU reference to this cacheline,
463*4882a593Smuzhiyun 	 * such as it being sampled by a HW semaphore on another timeline,
464*4882a593Smuzhiyun 	 * we cannot wraparound our seqno value (the HW semaphore does
465*4882a593Smuzhiyun 	 * a strict greater-than-or-equals compare, not i915_seqno_passed).
466*4882a593Smuzhiyun 	 * So if the cacheline is still busy, we must detach ourselves
467*4882a593Smuzhiyun 	 * from it and leave it inflight alongside its users.
468*4882a593Smuzhiyun 	 *
469*4882a593Smuzhiyun 	 * However, if nobody is watching and we can guarantee that nobody
470*4882a593Smuzhiyun 	 * will, we could simply reuse the same cacheline.
471*4882a593Smuzhiyun 	 *
472*4882a593Smuzhiyun 	 * if (i915_active_request_is_signaled(&tl->last_request) &&
473*4882a593Smuzhiyun 	 *     i915_active_is_signaled(&tl->hwsp_cacheline->active))
474*4882a593Smuzhiyun 	 *	return 0;
475*4882a593Smuzhiyun 	 *
476*4882a593Smuzhiyun 	 * That seems unlikely for a busy timeline that needed to wrap in
477*4882a593Smuzhiyun 	 * the first place, so just replace the cacheline.
478*4882a593Smuzhiyun 	 */
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 	vma = hwsp_alloc(tl, &cacheline);
481*4882a593Smuzhiyun 	if (IS_ERR(vma)) {
482*4882a593Smuzhiyun 		err = PTR_ERR(vma);
483*4882a593Smuzhiyun 		goto err_rollback;
484*4882a593Smuzhiyun 	}
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
487*4882a593Smuzhiyun 	if (err) {
488*4882a593Smuzhiyun 		__idle_hwsp_free(vma->private, cacheline);
489*4882a593Smuzhiyun 		goto err_rollback;
490*4882a593Smuzhiyun 	}
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 	cl = cacheline_alloc(vma->private, cacheline);
493*4882a593Smuzhiyun 	if (IS_ERR(cl)) {
494*4882a593Smuzhiyun 		err = PTR_ERR(cl);
495*4882a593Smuzhiyun 		__idle_hwsp_free(vma->private, cacheline);
496*4882a593Smuzhiyun 		goto err_unpin;
497*4882a593Smuzhiyun 	}
498*4882a593Smuzhiyun 	GEM_BUG_ON(cl->hwsp->vma != vma);
499*4882a593Smuzhiyun 
500*4882a593Smuzhiyun 	/*
501*4882a593Smuzhiyun 	 * Attach the old cacheline to the current request, so that we only
502*4882a593Smuzhiyun 	 * free it after the current request is retired, which ensures that
503*4882a593Smuzhiyun 	 * all writes into the cacheline from previous requests are complete.
504*4882a593Smuzhiyun 	 */
505*4882a593Smuzhiyun 	err = i915_active_ref(&tl->hwsp_cacheline->active,
506*4882a593Smuzhiyun 			      tl->fence_context,
507*4882a593Smuzhiyun 			      &rq->fence);
508*4882a593Smuzhiyun 	if (err)
509*4882a593Smuzhiyun 		goto err_cacheline;
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 	cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
512*4882a593Smuzhiyun 	cacheline_free(tl->hwsp_cacheline);
513*4882a593Smuzhiyun 
514*4882a593Smuzhiyun 	i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
515*4882a593Smuzhiyun 	i915_vma_put(tl->hwsp_ggtt);
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	tl->hwsp_ggtt = i915_vma_get(vma);
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	vaddr = page_mask_bits(cl->vaddr);
520*4882a593Smuzhiyun 	tl->hwsp_offset = cacheline * CACHELINE_BYTES;
521*4882a593Smuzhiyun 	tl->hwsp_seqno =
522*4882a593Smuzhiyun 		memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 	tl->hwsp_offset += i915_ggtt_offset(vma);
525*4882a593Smuzhiyun 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
526*4882a593Smuzhiyun 		 tl->fence_context, tl->hwsp_offset);
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	cacheline_acquire(cl, tl->hwsp_offset);
529*4882a593Smuzhiyun 	tl->hwsp_cacheline = cl;
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 	*seqno = timeline_advance(tl);
532*4882a593Smuzhiyun 	GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
533*4882a593Smuzhiyun 	return 0;
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun err_cacheline:
536*4882a593Smuzhiyun 	cacheline_free(cl);
537*4882a593Smuzhiyun err_unpin:
538*4882a593Smuzhiyun 	i915_vma_unpin(vma);
539*4882a593Smuzhiyun err_rollback:
540*4882a593Smuzhiyun 	timeline_rollback(tl);
541*4882a593Smuzhiyun 	return err;
542*4882a593Smuzhiyun }
543*4882a593Smuzhiyun 
intel_timeline_get_seqno(struct intel_timeline * tl,struct i915_request * rq,u32 * seqno)544*4882a593Smuzhiyun int intel_timeline_get_seqno(struct intel_timeline *tl,
545*4882a593Smuzhiyun 			     struct i915_request *rq,
546*4882a593Smuzhiyun 			     u32 *seqno)
547*4882a593Smuzhiyun {
548*4882a593Smuzhiyun 	*seqno = timeline_advance(tl);
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun 	/* Replace the HWSP on wraparound for HW semaphores */
551*4882a593Smuzhiyun 	if (unlikely(!*seqno && tl->hwsp_cacheline))
552*4882a593Smuzhiyun 		return __intel_timeline_get_seqno(tl, rq, seqno);
553*4882a593Smuzhiyun 
554*4882a593Smuzhiyun 	return 0;
555*4882a593Smuzhiyun }
556*4882a593Smuzhiyun 
cacheline_ref(struct intel_timeline_cacheline * cl,struct i915_request * rq)557*4882a593Smuzhiyun static int cacheline_ref(struct intel_timeline_cacheline *cl,
558*4882a593Smuzhiyun 			 struct i915_request *rq)
559*4882a593Smuzhiyun {
560*4882a593Smuzhiyun 	return i915_active_add_request(&cl->active, rq);
561*4882a593Smuzhiyun }
562*4882a593Smuzhiyun 
intel_timeline_read_hwsp(struct i915_request * from,struct i915_request * to,u32 * hwsp)563*4882a593Smuzhiyun int intel_timeline_read_hwsp(struct i915_request *from,
564*4882a593Smuzhiyun 			     struct i915_request *to,
565*4882a593Smuzhiyun 			     u32 *hwsp)
566*4882a593Smuzhiyun {
567*4882a593Smuzhiyun 	struct intel_timeline_cacheline *cl;
568*4882a593Smuzhiyun 	int err;
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
571*4882a593Smuzhiyun 
572*4882a593Smuzhiyun 	rcu_read_lock();
573*4882a593Smuzhiyun 	cl = rcu_dereference(from->hwsp_cacheline);
574*4882a593Smuzhiyun 	if (i915_request_completed(from)) /* confirm cacheline is valid */
575*4882a593Smuzhiyun 		goto unlock;
576*4882a593Smuzhiyun 	if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
577*4882a593Smuzhiyun 		goto unlock; /* seqno wrapped and completed! */
578*4882a593Smuzhiyun 	if (unlikely(i915_request_completed(from)))
579*4882a593Smuzhiyun 		goto release;
580*4882a593Smuzhiyun 	rcu_read_unlock();
581*4882a593Smuzhiyun 
582*4882a593Smuzhiyun 	err = cacheline_ref(cl, to);
583*4882a593Smuzhiyun 	if (err)
584*4882a593Smuzhiyun 		goto out;
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	*hwsp = cl->ggtt_offset;
587*4882a593Smuzhiyun out:
588*4882a593Smuzhiyun 	i915_active_release(&cl->active);
589*4882a593Smuzhiyun 	return err;
590*4882a593Smuzhiyun 
591*4882a593Smuzhiyun release:
592*4882a593Smuzhiyun 	i915_active_release(&cl->active);
593*4882a593Smuzhiyun unlock:
594*4882a593Smuzhiyun 	rcu_read_unlock();
595*4882a593Smuzhiyun 	return 1;
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun 
intel_timeline_unpin(struct intel_timeline * tl)598*4882a593Smuzhiyun void intel_timeline_unpin(struct intel_timeline *tl)
599*4882a593Smuzhiyun {
600*4882a593Smuzhiyun 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
601*4882a593Smuzhiyun 	if (!atomic_dec_and_test(&tl->pin_count))
602*4882a593Smuzhiyun 		return;
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	cacheline_release(tl->hwsp_cacheline);
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 	__i915_vma_unpin(tl->hwsp_ggtt);
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun 
__intel_timeline_free(struct kref * kref)609*4882a593Smuzhiyun void __intel_timeline_free(struct kref *kref)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun 	struct intel_timeline *timeline =
612*4882a593Smuzhiyun 		container_of(kref, typeof(*timeline), kref);
613*4882a593Smuzhiyun 
614*4882a593Smuzhiyun 	intel_timeline_fini(timeline);
615*4882a593Smuzhiyun 	kfree_rcu(timeline, rcu);
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun 
intel_gt_fini_timelines(struct intel_gt * gt)618*4882a593Smuzhiyun void intel_gt_fini_timelines(struct intel_gt *gt)
619*4882a593Smuzhiyun {
620*4882a593Smuzhiyun 	struct intel_gt_timelines *timelines = &gt->timelines;
621*4882a593Smuzhiyun 
622*4882a593Smuzhiyun 	GEM_BUG_ON(!list_empty(&timelines->active_list));
623*4882a593Smuzhiyun 	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
627*4882a593Smuzhiyun #include "gt/selftests/mock_timeline.c"
628*4882a593Smuzhiyun #include "gt/selftest_timeline.c"
629*4882a593Smuzhiyun #endif
630