xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/i915/gt/selftest_timeline.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * SPDX-License-Identifier: MIT
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Copyright © 2017-2018 Intel Corporation
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #include <linux/prime_numbers.h>
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include "intel_context.h"
10*4882a593Smuzhiyun #include "intel_engine_heartbeat.h"
11*4882a593Smuzhiyun #include "intel_engine_pm.h"
12*4882a593Smuzhiyun #include "intel_gt.h"
13*4882a593Smuzhiyun #include "intel_gt_requests.h"
14*4882a593Smuzhiyun #include "intel_ring.h"
15*4882a593Smuzhiyun #include "selftest_engine_heartbeat.h"
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun #include "../selftests/i915_random.h"
18*4882a593Smuzhiyun #include "../i915_selftest.h"
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include "../selftests/igt_flush_test.h"
21*4882a593Smuzhiyun #include "../selftests/mock_gem_device.h"
22*4882a593Smuzhiyun #include "selftests/mock_timeline.h"
23*4882a593Smuzhiyun 
hwsp_page(struct intel_timeline * tl)24*4882a593Smuzhiyun static struct page *hwsp_page(struct intel_timeline *tl)
25*4882a593Smuzhiyun {
26*4882a593Smuzhiyun 	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
29*4882a593Smuzhiyun 	return sg_page(obj->mm.pages->sgl);
30*4882a593Smuzhiyun }
31*4882a593Smuzhiyun 
hwsp_cacheline(struct intel_timeline * tl)32*4882a593Smuzhiyun static unsigned long hwsp_cacheline(struct intel_timeline *tl)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun 	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun 	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
37*4882a593Smuzhiyun }
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun struct mock_hwsp_freelist {
42*4882a593Smuzhiyun 	struct intel_gt *gt;
43*4882a593Smuzhiyun 	struct radix_tree_root cachelines;
44*4882a593Smuzhiyun 	struct intel_timeline **history;
45*4882a593Smuzhiyun 	unsigned long count, max;
46*4882a593Smuzhiyun 	struct rnd_state prng;
47*4882a593Smuzhiyun };
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun enum {
50*4882a593Smuzhiyun 	SHUFFLE = BIT(0),
51*4882a593Smuzhiyun };
52*4882a593Smuzhiyun 
__mock_hwsp_record(struct mock_hwsp_freelist * state,unsigned int idx,struct intel_timeline * tl)53*4882a593Smuzhiyun static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
54*4882a593Smuzhiyun 			       unsigned int idx,
55*4882a593Smuzhiyun 			       struct intel_timeline *tl)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	tl = xchg(&state->history[idx], tl);
58*4882a593Smuzhiyun 	if (tl) {
59*4882a593Smuzhiyun 		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
60*4882a593Smuzhiyun 		intel_timeline_put(tl);
61*4882a593Smuzhiyun 	}
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun 
__mock_hwsp_timeline(struct mock_hwsp_freelist * state,unsigned int count,unsigned int flags)64*4882a593Smuzhiyun static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
65*4882a593Smuzhiyun 				unsigned int count,
66*4882a593Smuzhiyun 				unsigned int flags)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun 	struct intel_timeline *tl;
69*4882a593Smuzhiyun 	unsigned int idx;
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun 	while (count--) {
72*4882a593Smuzhiyun 		unsigned long cacheline;
73*4882a593Smuzhiyun 		int err;
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 		tl = intel_timeline_create(state->gt);
76*4882a593Smuzhiyun 		if (IS_ERR(tl))
77*4882a593Smuzhiyun 			return PTR_ERR(tl);
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 		cacheline = hwsp_cacheline(tl);
80*4882a593Smuzhiyun 		err = radix_tree_insert(&state->cachelines, cacheline, tl);
81*4882a593Smuzhiyun 		if (err) {
82*4882a593Smuzhiyun 			if (err == -EEXIST) {
83*4882a593Smuzhiyun 				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
84*4882a593Smuzhiyun 				       cacheline);
85*4882a593Smuzhiyun 			}
86*4882a593Smuzhiyun 			intel_timeline_put(tl);
87*4882a593Smuzhiyun 			return err;
88*4882a593Smuzhiyun 		}
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 		idx = state->count++ % state->max;
91*4882a593Smuzhiyun 		__mock_hwsp_record(state, idx, tl);
92*4882a593Smuzhiyun 	}
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	if (flags & SHUFFLE)
95*4882a593Smuzhiyun 		i915_prandom_shuffle(state->history,
96*4882a593Smuzhiyun 				     sizeof(*state->history),
97*4882a593Smuzhiyun 				     min(state->count, state->max),
98*4882a593Smuzhiyun 				     &state->prng);
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	count = i915_prandom_u32_max_state(min(state->count, state->max),
101*4882a593Smuzhiyun 					   &state->prng);
102*4882a593Smuzhiyun 	while (count--) {
103*4882a593Smuzhiyun 		idx = --state->count % state->max;
104*4882a593Smuzhiyun 		__mock_hwsp_record(state, idx, NULL);
105*4882a593Smuzhiyun 	}
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 	return 0;
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun 
mock_hwsp_freelist(void * arg)110*4882a593Smuzhiyun static int mock_hwsp_freelist(void *arg)
111*4882a593Smuzhiyun {
112*4882a593Smuzhiyun 	struct mock_hwsp_freelist state;
113*4882a593Smuzhiyun 	struct drm_i915_private *i915;
114*4882a593Smuzhiyun 	const struct {
115*4882a593Smuzhiyun 		const char *name;
116*4882a593Smuzhiyun 		unsigned int flags;
117*4882a593Smuzhiyun 	} phases[] = {
118*4882a593Smuzhiyun 		{ "linear", 0 },
119*4882a593Smuzhiyun 		{ "shuffled", SHUFFLE },
120*4882a593Smuzhiyun 		{ },
121*4882a593Smuzhiyun 	}, *p;
122*4882a593Smuzhiyun 	unsigned int na;
123*4882a593Smuzhiyun 	int err = 0;
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	i915 = mock_gem_device();
126*4882a593Smuzhiyun 	if (!i915)
127*4882a593Smuzhiyun 		return -ENOMEM;
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
130*4882a593Smuzhiyun 	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 	state.gt = &i915->gt;
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	/*
135*4882a593Smuzhiyun 	 * Create a bunch of timelines and check that their HWSP do not overlap.
136*4882a593Smuzhiyun 	 * Free some, and try again.
137*4882a593Smuzhiyun 	 */
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	state.max = PAGE_SIZE / sizeof(*state.history);
140*4882a593Smuzhiyun 	state.count = 0;
141*4882a593Smuzhiyun 	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
142*4882a593Smuzhiyun 	if (!state.history) {
143*4882a593Smuzhiyun 		err = -ENOMEM;
144*4882a593Smuzhiyun 		goto err_put;
145*4882a593Smuzhiyun 	}
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	for (p = phases; p->name; p++) {
148*4882a593Smuzhiyun 		pr_debug("%s(%s)\n", __func__, p->name);
149*4882a593Smuzhiyun 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
150*4882a593Smuzhiyun 			err = __mock_hwsp_timeline(&state, na, p->flags);
151*4882a593Smuzhiyun 			if (err)
152*4882a593Smuzhiyun 				goto out;
153*4882a593Smuzhiyun 		}
154*4882a593Smuzhiyun 	}
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun out:
157*4882a593Smuzhiyun 	for (na = 0; na < state.max; na++)
158*4882a593Smuzhiyun 		__mock_hwsp_record(&state, na, NULL);
159*4882a593Smuzhiyun 	kfree(state.history);
160*4882a593Smuzhiyun err_put:
161*4882a593Smuzhiyun 	mock_destroy_device(i915);
162*4882a593Smuzhiyun 	return err;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun struct __igt_sync {
166*4882a593Smuzhiyun 	const char *name;
167*4882a593Smuzhiyun 	u32 seqno;
168*4882a593Smuzhiyun 	bool expected;
169*4882a593Smuzhiyun 	bool set;
170*4882a593Smuzhiyun };
171*4882a593Smuzhiyun 
__igt_sync(struct intel_timeline * tl,u64 ctx,const struct __igt_sync * p,const char * name)172*4882a593Smuzhiyun static int __igt_sync(struct intel_timeline *tl,
173*4882a593Smuzhiyun 		      u64 ctx,
174*4882a593Smuzhiyun 		      const struct __igt_sync *p,
175*4882a593Smuzhiyun 		      const char *name)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun 	int ret;
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
180*4882a593Smuzhiyun 		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
181*4882a593Smuzhiyun 		       name, p->name, ctx, p->seqno, yesno(p->expected));
182*4882a593Smuzhiyun 		return -EINVAL;
183*4882a593Smuzhiyun 	}
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	if (p->set) {
186*4882a593Smuzhiyun 		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
187*4882a593Smuzhiyun 		if (ret)
188*4882a593Smuzhiyun 			return ret;
189*4882a593Smuzhiyun 	}
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun 	return 0;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun 
igt_sync(void * arg)194*4882a593Smuzhiyun static int igt_sync(void *arg)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun 	const struct __igt_sync pass[] = {
197*4882a593Smuzhiyun 		{ "unset", 0, false, false },
198*4882a593Smuzhiyun 		{ "new", 0, false, true },
199*4882a593Smuzhiyun 		{ "0a", 0, true, true },
200*4882a593Smuzhiyun 		{ "1a", 1, false, true },
201*4882a593Smuzhiyun 		{ "1b", 1, true, true },
202*4882a593Smuzhiyun 		{ "0b", 0, true, false },
203*4882a593Smuzhiyun 		{ "2a", 2, false, true },
204*4882a593Smuzhiyun 		{ "4", 4, false, true },
205*4882a593Smuzhiyun 		{ "INT_MAX", INT_MAX, false, true },
206*4882a593Smuzhiyun 		{ "INT_MAX-1", INT_MAX-1, true, false },
207*4882a593Smuzhiyun 		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
208*4882a593Smuzhiyun 		{ "INT_MAX", INT_MAX, true, false },
209*4882a593Smuzhiyun 		{ "UINT_MAX", UINT_MAX, false, true },
210*4882a593Smuzhiyun 		{ "wrap", 0, false, true },
211*4882a593Smuzhiyun 		{ "unwrap", UINT_MAX, true, false },
212*4882a593Smuzhiyun 		{},
213*4882a593Smuzhiyun 	}, *p;
214*4882a593Smuzhiyun 	struct intel_timeline tl;
215*4882a593Smuzhiyun 	int order, offset;
216*4882a593Smuzhiyun 	int ret = -ENODEV;
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	mock_timeline_init(&tl, 0);
219*4882a593Smuzhiyun 	for (p = pass; p->name; p++) {
220*4882a593Smuzhiyun 		for (order = 1; order < 64; order++) {
221*4882a593Smuzhiyun 			for (offset = -1; offset <= (order > 1); offset++) {
222*4882a593Smuzhiyun 				u64 ctx = BIT_ULL(order) + offset;
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 				ret = __igt_sync(&tl, ctx, p, "1");
225*4882a593Smuzhiyun 				if (ret)
226*4882a593Smuzhiyun 					goto out;
227*4882a593Smuzhiyun 			}
228*4882a593Smuzhiyun 		}
229*4882a593Smuzhiyun 	}
230*4882a593Smuzhiyun 	mock_timeline_fini(&tl);
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	mock_timeline_init(&tl, 0);
233*4882a593Smuzhiyun 	for (order = 1; order < 64; order++) {
234*4882a593Smuzhiyun 		for (offset = -1; offset <= (order > 1); offset++) {
235*4882a593Smuzhiyun 			u64 ctx = BIT_ULL(order) + offset;
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 			for (p = pass; p->name; p++) {
238*4882a593Smuzhiyun 				ret = __igt_sync(&tl, ctx, p, "2");
239*4882a593Smuzhiyun 				if (ret)
240*4882a593Smuzhiyun 					goto out;
241*4882a593Smuzhiyun 			}
242*4882a593Smuzhiyun 		}
243*4882a593Smuzhiyun 	}
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun out:
246*4882a593Smuzhiyun 	mock_timeline_fini(&tl);
247*4882a593Smuzhiyun 	return ret;
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun 
random_engine(struct rnd_state * rnd)250*4882a593Smuzhiyun static unsigned int random_engine(struct rnd_state *rnd)
251*4882a593Smuzhiyun {
252*4882a593Smuzhiyun 	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun 
bench_sync(void * arg)255*4882a593Smuzhiyun static int bench_sync(void *arg)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun 	struct rnd_state prng;
258*4882a593Smuzhiyun 	struct intel_timeline tl;
259*4882a593Smuzhiyun 	unsigned long end_time, count;
260*4882a593Smuzhiyun 	u64 prng32_1M;
261*4882a593Smuzhiyun 	ktime_t kt;
262*4882a593Smuzhiyun 	int order, last_order;
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	mock_timeline_init(&tl, 0);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	/* Lookups from cache are very fast and so the random number generation
267*4882a593Smuzhiyun 	 * and the loop itself becomes a significant factor in the per-iteration
268*4882a593Smuzhiyun 	 * timings. We try to compensate the results by measuring the overhead
269*4882a593Smuzhiyun 	 * of the prng and subtract it from the reported results.
270*4882a593Smuzhiyun 	 */
271*4882a593Smuzhiyun 	prandom_seed_state(&prng, i915_selftest.random_seed);
272*4882a593Smuzhiyun 	count = 0;
273*4882a593Smuzhiyun 	kt = ktime_get();
274*4882a593Smuzhiyun 	end_time = jiffies + HZ/10;
275*4882a593Smuzhiyun 	do {
276*4882a593Smuzhiyun 		u32 x;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 		/* Make sure the compiler doesn't optimise away the prng call */
279*4882a593Smuzhiyun 		WRITE_ONCE(x, prandom_u32_state(&prng));
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 		count++;
282*4882a593Smuzhiyun 	} while (!time_after(jiffies, end_time));
283*4882a593Smuzhiyun 	kt = ktime_sub(ktime_get(), kt);
284*4882a593Smuzhiyun 	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
285*4882a593Smuzhiyun 		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
286*4882a593Smuzhiyun 	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	/* Benchmark (only) setting random context ids */
289*4882a593Smuzhiyun 	prandom_seed_state(&prng, i915_selftest.random_seed);
290*4882a593Smuzhiyun 	count = 0;
291*4882a593Smuzhiyun 	kt = ktime_get();
292*4882a593Smuzhiyun 	end_time = jiffies + HZ/10;
293*4882a593Smuzhiyun 	do {
294*4882a593Smuzhiyun 		u64 id = i915_prandom_u64_state(&prng);
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 		__intel_timeline_sync_set(&tl, id, 0);
297*4882a593Smuzhiyun 		count++;
298*4882a593Smuzhiyun 	} while (!time_after(jiffies, end_time));
299*4882a593Smuzhiyun 	kt = ktime_sub(ktime_get(), kt);
300*4882a593Smuzhiyun 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
301*4882a593Smuzhiyun 	pr_info("%s: %lu random insertions, %lluns/insert\n",
302*4882a593Smuzhiyun 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 	/* Benchmark looking up the exact same context ids as we just set */
305*4882a593Smuzhiyun 	prandom_seed_state(&prng, i915_selftest.random_seed);
306*4882a593Smuzhiyun 	end_time = count;
307*4882a593Smuzhiyun 	kt = ktime_get();
308*4882a593Smuzhiyun 	while (end_time--) {
309*4882a593Smuzhiyun 		u64 id = i915_prandom_u64_state(&prng);
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 		if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
312*4882a593Smuzhiyun 			mock_timeline_fini(&tl);
313*4882a593Smuzhiyun 			pr_err("Lookup of %llu failed\n", id);
314*4882a593Smuzhiyun 			return -EINVAL;
315*4882a593Smuzhiyun 		}
316*4882a593Smuzhiyun 	}
317*4882a593Smuzhiyun 	kt = ktime_sub(ktime_get(), kt);
318*4882a593Smuzhiyun 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
319*4882a593Smuzhiyun 	pr_info("%s: %lu random lookups, %lluns/lookup\n",
320*4882a593Smuzhiyun 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	mock_timeline_fini(&tl);
323*4882a593Smuzhiyun 	cond_resched();
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	mock_timeline_init(&tl, 0);
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	/* Benchmark setting the first N (in order) contexts */
328*4882a593Smuzhiyun 	count = 0;
329*4882a593Smuzhiyun 	kt = ktime_get();
330*4882a593Smuzhiyun 	end_time = jiffies + HZ/10;
331*4882a593Smuzhiyun 	do {
332*4882a593Smuzhiyun 		__intel_timeline_sync_set(&tl, count++, 0);
333*4882a593Smuzhiyun 	} while (!time_after(jiffies, end_time));
334*4882a593Smuzhiyun 	kt = ktime_sub(ktime_get(), kt);
335*4882a593Smuzhiyun 	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
336*4882a593Smuzhiyun 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
337*4882a593Smuzhiyun 
338*4882a593Smuzhiyun 	/* Benchmark looking up the exact same context ids as we just set */
339*4882a593Smuzhiyun 	end_time = count;
340*4882a593Smuzhiyun 	kt = ktime_get();
341*4882a593Smuzhiyun 	while (end_time--) {
342*4882a593Smuzhiyun 		if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
343*4882a593Smuzhiyun 			pr_err("Lookup of %lu failed\n", end_time);
344*4882a593Smuzhiyun 			mock_timeline_fini(&tl);
345*4882a593Smuzhiyun 			return -EINVAL;
346*4882a593Smuzhiyun 		}
347*4882a593Smuzhiyun 	}
348*4882a593Smuzhiyun 	kt = ktime_sub(ktime_get(), kt);
349*4882a593Smuzhiyun 	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
350*4882a593Smuzhiyun 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
351*4882a593Smuzhiyun 
352*4882a593Smuzhiyun 	mock_timeline_fini(&tl);
353*4882a593Smuzhiyun 	cond_resched();
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	mock_timeline_init(&tl, 0);
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun 	/* Benchmark searching for a random context id and maybe changing it */
358*4882a593Smuzhiyun 	prandom_seed_state(&prng, i915_selftest.random_seed);
359*4882a593Smuzhiyun 	count = 0;
360*4882a593Smuzhiyun 	kt = ktime_get();
361*4882a593Smuzhiyun 	end_time = jiffies + HZ/10;
362*4882a593Smuzhiyun 	do {
363*4882a593Smuzhiyun 		u32 id = random_engine(&prng);
364*4882a593Smuzhiyun 		u32 seqno = prandom_u32_state(&prng);
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 		if (!__intel_timeline_sync_is_later(&tl, id, seqno))
367*4882a593Smuzhiyun 			__intel_timeline_sync_set(&tl, id, seqno);
368*4882a593Smuzhiyun 
369*4882a593Smuzhiyun 		count++;
370*4882a593Smuzhiyun 	} while (!time_after(jiffies, end_time));
371*4882a593Smuzhiyun 	kt = ktime_sub(ktime_get(), kt);
372*4882a593Smuzhiyun 	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
373*4882a593Smuzhiyun 	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
374*4882a593Smuzhiyun 		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
375*4882a593Smuzhiyun 	mock_timeline_fini(&tl);
376*4882a593Smuzhiyun 	cond_resched();
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 	/* Benchmark searching for a known context id and changing the seqno */
379*4882a593Smuzhiyun 	for (last_order = 1, order = 1; order < 32;
380*4882a593Smuzhiyun 	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
381*4882a593Smuzhiyun 		unsigned int mask = BIT(order) - 1;
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 		mock_timeline_init(&tl, 0);
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 		count = 0;
386*4882a593Smuzhiyun 		kt = ktime_get();
387*4882a593Smuzhiyun 		end_time = jiffies + HZ/10;
388*4882a593Smuzhiyun 		do {
389*4882a593Smuzhiyun 			/* Without assuming too many details of the underlying
390*4882a593Smuzhiyun 			 * implementation, try to identify its phase-changes
391*4882a593Smuzhiyun 			 * (if any)!
392*4882a593Smuzhiyun 			 */
393*4882a593Smuzhiyun 			u64 id = (u64)(count & mask) << order;
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 			__intel_timeline_sync_is_later(&tl, id, 0);
396*4882a593Smuzhiyun 			__intel_timeline_sync_set(&tl, id, 0);
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 			count++;
399*4882a593Smuzhiyun 		} while (!time_after(jiffies, end_time));
400*4882a593Smuzhiyun 		kt = ktime_sub(ktime_get(), kt);
401*4882a593Smuzhiyun 		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
402*4882a593Smuzhiyun 			__func__, count, order,
403*4882a593Smuzhiyun 			(long long)div64_ul(ktime_to_ns(kt), count));
404*4882a593Smuzhiyun 		mock_timeline_fini(&tl);
405*4882a593Smuzhiyun 		cond_resched();
406*4882a593Smuzhiyun 	}
407*4882a593Smuzhiyun 
408*4882a593Smuzhiyun 	return 0;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun 
intel_timeline_mock_selftests(void)411*4882a593Smuzhiyun int intel_timeline_mock_selftests(void)
412*4882a593Smuzhiyun {
413*4882a593Smuzhiyun 	static const struct i915_subtest tests[] = {
414*4882a593Smuzhiyun 		SUBTEST(mock_hwsp_freelist),
415*4882a593Smuzhiyun 		SUBTEST(igt_sync),
416*4882a593Smuzhiyun 		SUBTEST(bench_sync),
417*4882a593Smuzhiyun 	};
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	return i915_subtests(tests, NULL);
420*4882a593Smuzhiyun }
421*4882a593Smuzhiyun 
emit_ggtt_store_dw(struct i915_request * rq,u32 addr,u32 value)422*4882a593Smuzhiyun static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
423*4882a593Smuzhiyun {
424*4882a593Smuzhiyun 	u32 *cs;
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 	cs = intel_ring_begin(rq, 4);
427*4882a593Smuzhiyun 	if (IS_ERR(cs))
428*4882a593Smuzhiyun 		return PTR_ERR(cs);
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	if (INTEL_GEN(rq->engine->i915) >= 8) {
431*4882a593Smuzhiyun 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
432*4882a593Smuzhiyun 		*cs++ = addr;
433*4882a593Smuzhiyun 		*cs++ = 0;
434*4882a593Smuzhiyun 		*cs++ = value;
435*4882a593Smuzhiyun 	} else if (INTEL_GEN(rq->engine->i915) >= 4) {
436*4882a593Smuzhiyun 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
437*4882a593Smuzhiyun 		*cs++ = 0;
438*4882a593Smuzhiyun 		*cs++ = addr;
439*4882a593Smuzhiyun 		*cs++ = value;
440*4882a593Smuzhiyun 	} else {
441*4882a593Smuzhiyun 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
442*4882a593Smuzhiyun 		*cs++ = addr;
443*4882a593Smuzhiyun 		*cs++ = value;
444*4882a593Smuzhiyun 		*cs++ = MI_NOOP;
445*4882a593Smuzhiyun 	}
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 	intel_ring_advance(rq, cs);
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun 	return 0;
450*4882a593Smuzhiyun }
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun static struct i915_request *
tl_write(struct intel_timeline * tl,struct intel_engine_cs * engine,u32 value)453*4882a593Smuzhiyun tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
454*4882a593Smuzhiyun {
455*4882a593Smuzhiyun 	struct i915_request *rq;
456*4882a593Smuzhiyun 	int err;
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 	err = intel_timeline_pin(tl, NULL);
459*4882a593Smuzhiyun 	if (err) {
460*4882a593Smuzhiyun 		rq = ERR_PTR(err);
461*4882a593Smuzhiyun 		goto out;
462*4882a593Smuzhiyun 	}
463*4882a593Smuzhiyun 
464*4882a593Smuzhiyun 	rq = intel_engine_create_kernel_request(engine);
465*4882a593Smuzhiyun 	if (IS_ERR(rq))
466*4882a593Smuzhiyun 		goto out_unpin;
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	i915_request_get(rq);
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
471*4882a593Smuzhiyun 	i915_request_add(rq);
472*4882a593Smuzhiyun 	if (err) {
473*4882a593Smuzhiyun 		i915_request_put(rq);
474*4882a593Smuzhiyun 		rq = ERR_PTR(err);
475*4882a593Smuzhiyun 	}
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun out_unpin:
478*4882a593Smuzhiyun 	intel_timeline_unpin(tl);
479*4882a593Smuzhiyun out:
480*4882a593Smuzhiyun 	if (IS_ERR(rq))
481*4882a593Smuzhiyun 		pr_err("Failed to write to timeline!\n");
482*4882a593Smuzhiyun 	return rq;
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun 
485*4882a593Smuzhiyun static struct intel_timeline *
checked_intel_timeline_create(struct intel_gt * gt)486*4882a593Smuzhiyun checked_intel_timeline_create(struct intel_gt *gt)
487*4882a593Smuzhiyun {
488*4882a593Smuzhiyun 	struct intel_timeline *tl;
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	tl = intel_timeline_create(gt);
491*4882a593Smuzhiyun 	if (IS_ERR(tl))
492*4882a593Smuzhiyun 		return tl;
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
495*4882a593Smuzhiyun 		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
496*4882a593Smuzhiyun 		       *tl->hwsp_seqno, tl->seqno);
497*4882a593Smuzhiyun 		intel_timeline_put(tl);
498*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
499*4882a593Smuzhiyun 	}
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	return tl;
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun 
live_hwsp_engine(void * arg)504*4882a593Smuzhiyun static int live_hwsp_engine(void *arg)
505*4882a593Smuzhiyun {
506*4882a593Smuzhiyun #define NUM_TIMELINES 4096
507*4882a593Smuzhiyun 	struct intel_gt *gt = arg;
508*4882a593Smuzhiyun 	struct intel_timeline **timelines;
509*4882a593Smuzhiyun 	struct intel_engine_cs *engine;
510*4882a593Smuzhiyun 	enum intel_engine_id id;
511*4882a593Smuzhiyun 	unsigned long count, n;
512*4882a593Smuzhiyun 	int err = 0;
513*4882a593Smuzhiyun 
514*4882a593Smuzhiyun 	/*
515*4882a593Smuzhiyun 	 * Create a bunch of timelines and check we can write
516*4882a593Smuzhiyun 	 * independently to each of their breadcrumb slots.
517*4882a593Smuzhiyun 	 */
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
520*4882a593Smuzhiyun 				   sizeof(*timelines),
521*4882a593Smuzhiyun 				   GFP_KERNEL);
522*4882a593Smuzhiyun 	if (!timelines)
523*4882a593Smuzhiyun 		return -ENOMEM;
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 	count = 0;
526*4882a593Smuzhiyun 	for_each_engine(engine, gt, id) {
527*4882a593Smuzhiyun 		if (!intel_engine_can_store_dword(engine))
528*4882a593Smuzhiyun 			continue;
529*4882a593Smuzhiyun 
530*4882a593Smuzhiyun 		intel_engine_pm_get(engine);
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 		for (n = 0; n < NUM_TIMELINES; n++) {
533*4882a593Smuzhiyun 			struct intel_timeline *tl;
534*4882a593Smuzhiyun 			struct i915_request *rq;
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 			tl = checked_intel_timeline_create(gt);
537*4882a593Smuzhiyun 			if (IS_ERR(tl)) {
538*4882a593Smuzhiyun 				err = PTR_ERR(tl);
539*4882a593Smuzhiyun 				break;
540*4882a593Smuzhiyun 			}
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun 			rq = tl_write(tl, engine, count);
543*4882a593Smuzhiyun 			if (IS_ERR(rq)) {
544*4882a593Smuzhiyun 				intel_timeline_put(tl);
545*4882a593Smuzhiyun 				err = PTR_ERR(rq);
546*4882a593Smuzhiyun 				break;
547*4882a593Smuzhiyun 			}
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 			timelines[count++] = tl;
550*4882a593Smuzhiyun 			i915_request_put(rq);
551*4882a593Smuzhiyun 		}
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 		intel_engine_pm_put(engine);
554*4882a593Smuzhiyun 		if (err)
555*4882a593Smuzhiyun 			break;
556*4882a593Smuzhiyun 	}
557*4882a593Smuzhiyun 
558*4882a593Smuzhiyun 	if (igt_flush_test(gt->i915))
559*4882a593Smuzhiyun 		err = -EIO;
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	for (n = 0; n < count; n++) {
562*4882a593Smuzhiyun 		struct intel_timeline *tl = timelines[n];
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 		if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
565*4882a593Smuzhiyun 			GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
566*4882a593Smuzhiyun 				      n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
567*4882a593Smuzhiyun 			GEM_TRACE_DUMP();
568*4882a593Smuzhiyun 			err = -EINVAL;
569*4882a593Smuzhiyun 		}
570*4882a593Smuzhiyun 		intel_timeline_put(tl);
571*4882a593Smuzhiyun 	}
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 	kvfree(timelines);
574*4882a593Smuzhiyun 	return err;
575*4882a593Smuzhiyun #undef NUM_TIMELINES
576*4882a593Smuzhiyun }
577*4882a593Smuzhiyun 
live_hwsp_alternate(void * arg)578*4882a593Smuzhiyun static int live_hwsp_alternate(void *arg)
579*4882a593Smuzhiyun {
580*4882a593Smuzhiyun #define NUM_TIMELINES 4096
581*4882a593Smuzhiyun 	struct intel_gt *gt = arg;
582*4882a593Smuzhiyun 	struct intel_timeline **timelines;
583*4882a593Smuzhiyun 	struct intel_engine_cs *engine;
584*4882a593Smuzhiyun 	enum intel_engine_id id;
585*4882a593Smuzhiyun 	unsigned long count, n;
586*4882a593Smuzhiyun 	int err = 0;
587*4882a593Smuzhiyun 
588*4882a593Smuzhiyun 	/*
589*4882a593Smuzhiyun 	 * Create a bunch of timelines and check we can write
590*4882a593Smuzhiyun 	 * independently to each of their breadcrumb slots with adjacent
591*4882a593Smuzhiyun 	 * engines.
592*4882a593Smuzhiyun 	 */
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
595*4882a593Smuzhiyun 				   sizeof(*timelines),
596*4882a593Smuzhiyun 				   GFP_KERNEL);
597*4882a593Smuzhiyun 	if (!timelines)
598*4882a593Smuzhiyun 		return -ENOMEM;
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	count = 0;
601*4882a593Smuzhiyun 	for (n = 0; n < NUM_TIMELINES; n++) {
602*4882a593Smuzhiyun 		for_each_engine(engine, gt, id) {
603*4882a593Smuzhiyun 			struct intel_timeline *tl;
604*4882a593Smuzhiyun 			struct i915_request *rq;
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 			if (!intel_engine_can_store_dword(engine))
607*4882a593Smuzhiyun 				continue;
608*4882a593Smuzhiyun 
609*4882a593Smuzhiyun 			tl = checked_intel_timeline_create(gt);
610*4882a593Smuzhiyun 			if (IS_ERR(tl)) {
611*4882a593Smuzhiyun 				err = PTR_ERR(tl);
612*4882a593Smuzhiyun 				goto out;
613*4882a593Smuzhiyun 			}
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 			intel_engine_pm_get(engine);
616*4882a593Smuzhiyun 			rq = tl_write(tl, engine, count);
617*4882a593Smuzhiyun 			intel_engine_pm_put(engine);
618*4882a593Smuzhiyun 			if (IS_ERR(rq)) {
619*4882a593Smuzhiyun 				intel_timeline_put(tl);
620*4882a593Smuzhiyun 				err = PTR_ERR(rq);
621*4882a593Smuzhiyun 				goto out;
622*4882a593Smuzhiyun 			}
623*4882a593Smuzhiyun 
624*4882a593Smuzhiyun 			timelines[count++] = tl;
625*4882a593Smuzhiyun 			i915_request_put(rq);
626*4882a593Smuzhiyun 		}
627*4882a593Smuzhiyun 	}
628*4882a593Smuzhiyun 
629*4882a593Smuzhiyun out:
630*4882a593Smuzhiyun 	if (igt_flush_test(gt->i915))
631*4882a593Smuzhiyun 		err = -EIO;
632*4882a593Smuzhiyun 
633*4882a593Smuzhiyun 	for (n = 0; n < count; n++) {
634*4882a593Smuzhiyun 		struct intel_timeline *tl = timelines[n];
635*4882a593Smuzhiyun 
636*4882a593Smuzhiyun 		if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
637*4882a593Smuzhiyun 			GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
638*4882a593Smuzhiyun 				      n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
639*4882a593Smuzhiyun 			GEM_TRACE_DUMP();
640*4882a593Smuzhiyun 			err = -EINVAL;
641*4882a593Smuzhiyun 		}
642*4882a593Smuzhiyun 		intel_timeline_put(tl);
643*4882a593Smuzhiyun 	}
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun 	kvfree(timelines);
646*4882a593Smuzhiyun 	return err;
647*4882a593Smuzhiyun #undef NUM_TIMELINES
648*4882a593Smuzhiyun }
649*4882a593Smuzhiyun 
live_hwsp_wrap(void * arg)650*4882a593Smuzhiyun static int live_hwsp_wrap(void *arg)
651*4882a593Smuzhiyun {
652*4882a593Smuzhiyun 	struct intel_gt *gt = arg;
653*4882a593Smuzhiyun 	struct intel_engine_cs *engine;
654*4882a593Smuzhiyun 	struct intel_timeline *tl;
655*4882a593Smuzhiyun 	enum intel_engine_id id;
656*4882a593Smuzhiyun 	int err = 0;
657*4882a593Smuzhiyun 
658*4882a593Smuzhiyun 	/*
659*4882a593Smuzhiyun 	 * Across a seqno wrap, we need to keep the old cacheline alive for
660*4882a593Smuzhiyun 	 * foreign GPU references.
661*4882a593Smuzhiyun 	 */
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun 	tl = intel_timeline_create(gt);
664*4882a593Smuzhiyun 	if (IS_ERR(tl))
665*4882a593Smuzhiyun 		return PTR_ERR(tl);
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
668*4882a593Smuzhiyun 		goto out_free;
669*4882a593Smuzhiyun 
670*4882a593Smuzhiyun 	err = intel_timeline_pin(tl, NULL);
671*4882a593Smuzhiyun 	if (err)
672*4882a593Smuzhiyun 		goto out_free;
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 	for_each_engine(engine, gt, id) {
675*4882a593Smuzhiyun 		const u32 *hwsp_seqno[2];
676*4882a593Smuzhiyun 		struct i915_request *rq;
677*4882a593Smuzhiyun 		u32 seqno[2];
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 		if (!intel_engine_can_store_dword(engine))
680*4882a593Smuzhiyun 			continue;
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 		rq = intel_engine_create_kernel_request(engine);
683*4882a593Smuzhiyun 		if (IS_ERR(rq)) {
684*4882a593Smuzhiyun 			err = PTR_ERR(rq);
685*4882a593Smuzhiyun 			goto out;
686*4882a593Smuzhiyun 		}
687*4882a593Smuzhiyun 
688*4882a593Smuzhiyun 		tl->seqno = -4u;
689*4882a593Smuzhiyun 
690*4882a593Smuzhiyun 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
691*4882a593Smuzhiyun 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
692*4882a593Smuzhiyun 		mutex_unlock(&tl->mutex);
693*4882a593Smuzhiyun 		if (err) {
694*4882a593Smuzhiyun 			i915_request_add(rq);
695*4882a593Smuzhiyun 			goto out;
696*4882a593Smuzhiyun 		}
697*4882a593Smuzhiyun 		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
698*4882a593Smuzhiyun 			 seqno[0], tl->hwsp_offset);
699*4882a593Smuzhiyun 
700*4882a593Smuzhiyun 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
701*4882a593Smuzhiyun 		if (err) {
702*4882a593Smuzhiyun 			i915_request_add(rq);
703*4882a593Smuzhiyun 			goto out;
704*4882a593Smuzhiyun 		}
705*4882a593Smuzhiyun 		hwsp_seqno[0] = tl->hwsp_seqno;
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
708*4882a593Smuzhiyun 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
709*4882a593Smuzhiyun 		mutex_unlock(&tl->mutex);
710*4882a593Smuzhiyun 		if (err) {
711*4882a593Smuzhiyun 			i915_request_add(rq);
712*4882a593Smuzhiyun 			goto out;
713*4882a593Smuzhiyun 		}
714*4882a593Smuzhiyun 		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
715*4882a593Smuzhiyun 			 seqno[1], tl->hwsp_offset);
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun 		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
718*4882a593Smuzhiyun 		if (err) {
719*4882a593Smuzhiyun 			i915_request_add(rq);
720*4882a593Smuzhiyun 			goto out;
721*4882a593Smuzhiyun 		}
722*4882a593Smuzhiyun 		hwsp_seqno[1] = tl->hwsp_seqno;
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun 		/* With wrap should come a new hwsp */
725*4882a593Smuzhiyun 		GEM_BUG_ON(seqno[1] >= seqno[0]);
726*4882a593Smuzhiyun 		GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun 		i915_request_add(rq);
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
731*4882a593Smuzhiyun 			pr_err("Wait for timeline writes timed out!\n");
732*4882a593Smuzhiyun 			err = -EIO;
733*4882a593Smuzhiyun 			goto out;
734*4882a593Smuzhiyun 		}
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun 		if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
737*4882a593Smuzhiyun 		    READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
738*4882a593Smuzhiyun 			pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
739*4882a593Smuzhiyun 			       *hwsp_seqno[0], *hwsp_seqno[1],
740*4882a593Smuzhiyun 			       seqno[0], seqno[1]);
741*4882a593Smuzhiyun 			err = -EINVAL;
742*4882a593Smuzhiyun 			goto out;
743*4882a593Smuzhiyun 		}
744*4882a593Smuzhiyun 
745*4882a593Smuzhiyun 		intel_gt_retire_requests(gt); /* recycle HWSP */
746*4882a593Smuzhiyun 	}
747*4882a593Smuzhiyun 
748*4882a593Smuzhiyun out:
749*4882a593Smuzhiyun 	if (igt_flush_test(gt->i915))
750*4882a593Smuzhiyun 		err = -EIO;
751*4882a593Smuzhiyun 
752*4882a593Smuzhiyun 	intel_timeline_unpin(tl);
753*4882a593Smuzhiyun out_free:
754*4882a593Smuzhiyun 	intel_timeline_put(tl);
755*4882a593Smuzhiyun 	return err;
756*4882a593Smuzhiyun }
757*4882a593Smuzhiyun 
live_hwsp_rollover_kernel(void * arg)758*4882a593Smuzhiyun static int live_hwsp_rollover_kernel(void *arg)
759*4882a593Smuzhiyun {
760*4882a593Smuzhiyun 	struct intel_gt *gt = arg;
761*4882a593Smuzhiyun 	struct intel_engine_cs *engine;
762*4882a593Smuzhiyun 	enum intel_engine_id id;
763*4882a593Smuzhiyun 	int err = 0;
764*4882a593Smuzhiyun 
765*4882a593Smuzhiyun 	/*
766*4882a593Smuzhiyun 	 * Run the host for long enough, and even the kernel context will
767*4882a593Smuzhiyun 	 * see a seqno rollover.
768*4882a593Smuzhiyun 	 */
769*4882a593Smuzhiyun 
770*4882a593Smuzhiyun 	for_each_engine(engine, gt, id) {
771*4882a593Smuzhiyun 		struct intel_context *ce = engine->kernel_context;
772*4882a593Smuzhiyun 		struct intel_timeline *tl = ce->timeline;
773*4882a593Smuzhiyun 		struct i915_request *rq[3] = {};
774*4882a593Smuzhiyun 		int i;
775*4882a593Smuzhiyun 
776*4882a593Smuzhiyun 		st_engine_heartbeat_disable(engine);
777*4882a593Smuzhiyun 		if (intel_gt_wait_for_idle(gt, HZ / 2)) {
778*4882a593Smuzhiyun 			err = -EIO;
779*4882a593Smuzhiyun 			goto out;
780*4882a593Smuzhiyun 		}
781*4882a593Smuzhiyun 
782*4882a593Smuzhiyun 		GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
783*4882a593Smuzhiyun 		tl->seqno = 0;
784*4882a593Smuzhiyun 		timeline_rollback(tl);
785*4882a593Smuzhiyun 		timeline_rollback(tl);
786*4882a593Smuzhiyun 		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
787*4882a593Smuzhiyun 
788*4882a593Smuzhiyun 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
789*4882a593Smuzhiyun 			struct i915_request *this;
790*4882a593Smuzhiyun 
791*4882a593Smuzhiyun 			this = i915_request_create(ce);
792*4882a593Smuzhiyun 			if (IS_ERR(this)) {
793*4882a593Smuzhiyun 				err = PTR_ERR(this);
794*4882a593Smuzhiyun 				goto out;
795*4882a593Smuzhiyun 			}
796*4882a593Smuzhiyun 
797*4882a593Smuzhiyun 			pr_debug("%s: create fence.seqnp:%d\n",
798*4882a593Smuzhiyun 				 engine->name,
799*4882a593Smuzhiyun 				 lower_32_bits(this->fence.seqno));
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
802*4882a593Smuzhiyun 
803*4882a593Smuzhiyun 			rq[i] = i915_request_get(this);
804*4882a593Smuzhiyun 			i915_request_add(this);
805*4882a593Smuzhiyun 		}
806*4882a593Smuzhiyun 
807*4882a593Smuzhiyun 		/* We expected a wrap! */
808*4882a593Smuzhiyun 		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
809*4882a593Smuzhiyun 
810*4882a593Smuzhiyun 		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
811*4882a593Smuzhiyun 			pr_err("Wait for timeline wrap timed out!\n");
812*4882a593Smuzhiyun 			err = -EIO;
813*4882a593Smuzhiyun 			goto out;
814*4882a593Smuzhiyun 		}
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
817*4882a593Smuzhiyun 			if (!i915_request_completed(rq[i])) {
818*4882a593Smuzhiyun 				pr_err("Pre-wrap request not completed!\n");
819*4882a593Smuzhiyun 				err = -EINVAL;
820*4882a593Smuzhiyun 				goto out;
821*4882a593Smuzhiyun 			}
822*4882a593Smuzhiyun 		}
823*4882a593Smuzhiyun 
824*4882a593Smuzhiyun out:
825*4882a593Smuzhiyun 		for (i = 0; i < ARRAY_SIZE(rq); i++)
826*4882a593Smuzhiyun 			i915_request_put(rq[i]);
827*4882a593Smuzhiyun 		st_engine_heartbeat_enable(engine);
828*4882a593Smuzhiyun 		if (err)
829*4882a593Smuzhiyun 			break;
830*4882a593Smuzhiyun 	}
831*4882a593Smuzhiyun 
832*4882a593Smuzhiyun 	if (igt_flush_test(gt->i915))
833*4882a593Smuzhiyun 		err = -EIO;
834*4882a593Smuzhiyun 
835*4882a593Smuzhiyun 	return err;
836*4882a593Smuzhiyun }
837*4882a593Smuzhiyun 
live_hwsp_rollover_user(void * arg)838*4882a593Smuzhiyun static int live_hwsp_rollover_user(void *arg)
839*4882a593Smuzhiyun {
840*4882a593Smuzhiyun 	struct intel_gt *gt = arg;
841*4882a593Smuzhiyun 	struct intel_engine_cs *engine;
842*4882a593Smuzhiyun 	enum intel_engine_id id;
843*4882a593Smuzhiyun 	int err = 0;
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 	/*
846*4882a593Smuzhiyun 	 * Simulate a long running user context, and force the seqno wrap
847*4882a593Smuzhiyun 	 * on the user's timeline.
848*4882a593Smuzhiyun 	 */
849*4882a593Smuzhiyun 
850*4882a593Smuzhiyun 	for_each_engine(engine, gt, id) {
851*4882a593Smuzhiyun 		struct i915_request *rq[3] = {};
852*4882a593Smuzhiyun 		struct intel_timeline *tl;
853*4882a593Smuzhiyun 		struct intel_context *ce;
854*4882a593Smuzhiyun 		int i;
855*4882a593Smuzhiyun 
856*4882a593Smuzhiyun 		ce = intel_context_create(engine);
857*4882a593Smuzhiyun 		if (IS_ERR(ce))
858*4882a593Smuzhiyun 			return PTR_ERR(ce);
859*4882a593Smuzhiyun 
860*4882a593Smuzhiyun 		err = intel_context_alloc_state(ce);
861*4882a593Smuzhiyun 		if (err)
862*4882a593Smuzhiyun 			goto out;
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 		tl = ce->timeline;
865*4882a593Smuzhiyun 		if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
866*4882a593Smuzhiyun 			goto out;
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 		timeline_rollback(tl);
869*4882a593Smuzhiyun 		timeline_rollback(tl);
870*4882a593Smuzhiyun 		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
873*4882a593Smuzhiyun 			struct i915_request *this;
874*4882a593Smuzhiyun 
875*4882a593Smuzhiyun 			this = intel_context_create_request(ce);
876*4882a593Smuzhiyun 			if (IS_ERR(this)) {
877*4882a593Smuzhiyun 				err = PTR_ERR(this);
878*4882a593Smuzhiyun 				goto out;
879*4882a593Smuzhiyun 			}
880*4882a593Smuzhiyun 
881*4882a593Smuzhiyun 			pr_debug("%s: create fence.seqnp:%d\n",
882*4882a593Smuzhiyun 				 engine->name,
883*4882a593Smuzhiyun 				 lower_32_bits(this->fence.seqno));
884*4882a593Smuzhiyun 
885*4882a593Smuzhiyun 			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
886*4882a593Smuzhiyun 
887*4882a593Smuzhiyun 			rq[i] = i915_request_get(this);
888*4882a593Smuzhiyun 			i915_request_add(this);
889*4882a593Smuzhiyun 		}
890*4882a593Smuzhiyun 
891*4882a593Smuzhiyun 		/* We expected a wrap! */
892*4882a593Smuzhiyun 		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
893*4882a593Smuzhiyun 
894*4882a593Smuzhiyun 		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
895*4882a593Smuzhiyun 			pr_err("Wait for timeline wrap timed out!\n");
896*4882a593Smuzhiyun 			err = -EIO;
897*4882a593Smuzhiyun 			goto out;
898*4882a593Smuzhiyun 		}
899*4882a593Smuzhiyun 
900*4882a593Smuzhiyun 		for (i = 0; i < ARRAY_SIZE(rq); i++) {
901*4882a593Smuzhiyun 			if (!i915_request_completed(rq[i])) {
902*4882a593Smuzhiyun 				pr_err("Pre-wrap request not completed!\n");
903*4882a593Smuzhiyun 				err = -EINVAL;
904*4882a593Smuzhiyun 				goto out;
905*4882a593Smuzhiyun 			}
906*4882a593Smuzhiyun 		}
907*4882a593Smuzhiyun 
908*4882a593Smuzhiyun out:
909*4882a593Smuzhiyun 		for (i = 0; i < ARRAY_SIZE(rq); i++)
910*4882a593Smuzhiyun 			i915_request_put(rq[i]);
911*4882a593Smuzhiyun 		intel_context_put(ce);
912*4882a593Smuzhiyun 		if (err)
913*4882a593Smuzhiyun 			break;
914*4882a593Smuzhiyun 	}
915*4882a593Smuzhiyun 
916*4882a593Smuzhiyun 	if (igt_flush_test(gt->i915))
917*4882a593Smuzhiyun 		err = -EIO;
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 	return err;
920*4882a593Smuzhiyun }
921*4882a593Smuzhiyun 
live_hwsp_recycle(void * arg)922*4882a593Smuzhiyun static int live_hwsp_recycle(void *arg)
923*4882a593Smuzhiyun {
924*4882a593Smuzhiyun 	struct intel_gt *gt = arg;
925*4882a593Smuzhiyun 	struct intel_engine_cs *engine;
926*4882a593Smuzhiyun 	enum intel_engine_id id;
927*4882a593Smuzhiyun 	unsigned long count;
928*4882a593Smuzhiyun 	int err = 0;
929*4882a593Smuzhiyun 
930*4882a593Smuzhiyun 	/*
931*4882a593Smuzhiyun 	 * Check seqno writes into one timeline at a time. We expect to
932*4882a593Smuzhiyun 	 * recycle the breadcrumb slot between iterations and neither
933*4882a593Smuzhiyun 	 * want to confuse ourselves or the GPU.
934*4882a593Smuzhiyun 	 */
935*4882a593Smuzhiyun 
936*4882a593Smuzhiyun 	count = 0;
937*4882a593Smuzhiyun 	for_each_engine(engine, gt, id) {
938*4882a593Smuzhiyun 		IGT_TIMEOUT(end_time);
939*4882a593Smuzhiyun 
940*4882a593Smuzhiyun 		if (!intel_engine_can_store_dword(engine))
941*4882a593Smuzhiyun 			continue;
942*4882a593Smuzhiyun 
943*4882a593Smuzhiyun 		intel_engine_pm_get(engine);
944*4882a593Smuzhiyun 
945*4882a593Smuzhiyun 		do {
946*4882a593Smuzhiyun 			struct intel_timeline *tl;
947*4882a593Smuzhiyun 			struct i915_request *rq;
948*4882a593Smuzhiyun 
949*4882a593Smuzhiyun 			tl = checked_intel_timeline_create(gt);
950*4882a593Smuzhiyun 			if (IS_ERR(tl)) {
951*4882a593Smuzhiyun 				err = PTR_ERR(tl);
952*4882a593Smuzhiyun 				break;
953*4882a593Smuzhiyun 			}
954*4882a593Smuzhiyun 
955*4882a593Smuzhiyun 			rq = tl_write(tl, engine, count);
956*4882a593Smuzhiyun 			if (IS_ERR(rq)) {
957*4882a593Smuzhiyun 				intel_timeline_put(tl);
958*4882a593Smuzhiyun 				err = PTR_ERR(rq);
959*4882a593Smuzhiyun 				break;
960*4882a593Smuzhiyun 			}
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
963*4882a593Smuzhiyun 				pr_err("Wait for timeline writes timed out!\n");
964*4882a593Smuzhiyun 				i915_request_put(rq);
965*4882a593Smuzhiyun 				intel_timeline_put(tl);
966*4882a593Smuzhiyun 				err = -EIO;
967*4882a593Smuzhiyun 				break;
968*4882a593Smuzhiyun 			}
969*4882a593Smuzhiyun 
970*4882a593Smuzhiyun 			if (READ_ONCE(*tl->hwsp_seqno) != count) {
971*4882a593Smuzhiyun 				GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
972*4882a593Smuzhiyun 					      count, tl->fence_context,
973*4882a593Smuzhiyun 					      tl->hwsp_offset, *tl->hwsp_seqno);
974*4882a593Smuzhiyun 				GEM_TRACE_DUMP();
975*4882a593Smuzhiyun 				err = -EINVAL;
976*4882a593Smuzhiyun 			}
977*4882a593Smuzhiyun 
978*4882a593Smuzhiyun 			i915_request_put(rq);
979*4882a593Smuzhiyun 			intel_timeline_put(tl);
980*4882a593Smuzhiyun 			count++;
981*4882a593Smuzhiyun 
982*4882a593Smuzhiyun 			if (err)
983*4882a593Smuzhiyun 				break;
984*4882a593Smuzhiyun 		} while (!__igt_timeout(end_time, NULL));
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun 		intel_engine_pm_put(engine);
987*4882a593Smuzhiyun 		if (err)
988*4882a593Smuzhiyun 			break;
989*4882a593Smuzhiyun 	}
990*4882a593Smuzhiyun 
991*4882a593Smuzhiyun 	return err;
992*4882a593Smuzhiyun }
993*4882a593Smuzhiyun 
intel_timeline_live_selftests(struct drm_i915_private * i915)994*4882a593Smuzhiyun int intel_timeline_live_selftests(struct drm_i915_private *i915)
995*4882a593Smuzhiyun {
996*4882a593Smuzhiyun 	static const struct i915_subtest tests[] = {
997*4882a593Smuzhiyun 		SUBTEST(live_hwsp_recycle),
998*4882a593Smuzhiyun 		SUBTEST(live_hwsp_engine),
999*4882a593Smuzhiyun 		SUBTEST(live_hwsp_alternate),
1000*4882a593Smuzhiyun 		SUBTEST(live_hwsp_wrap),
1001*4882a593Smuzhiyun 		SUBTEST(live_hwsp_rollover_kernel),
1002*4882a593Smuzhiyun 		SUBTEST(live_hwsp_rollover_user),
1003*4882a593Smuzhiyun 	};
1004*4882a593Smuzhiyun 
1005*4882a593Smuzhiyun 	if (intel_gt_is_wedged(&i915->gt))
1006*4882a593Smuzhiyun 		return 0;
1007*4882a593Smuzhiyun 
1008*4882a593Smuzhiyun 	return intel_gt_live_subtests(tests, &i915->gt);
1009*4882a593Smuzhiyun }
1010