1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright © 2016 Intel Corporation
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * The above copyright notice and this permission notice (including the next
12*4882a593Smuzhiyun * paragraph) shall be included in all copies or substantial portions of the
13*4882a593Smuzhiyun * Software.
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*4882a593Smuzhiyun * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*4882a593Smuzhiyun * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*4882a593Smuzhiyun * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*4882a593Smuzhiyun * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*4882a593Smuzhiyun * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*4882a593Smuzhiyun * IN THE SOFTWARE.
22*4882a593Smuzhiyun *
23*4882a593Smuzhiyun */
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun #include <linux/kthread.h>
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #include "gem/i915_gem_context.h"
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #include "intel_gt.h"
30*4882a593Smuzhiyun #include "intel_engine_heartbeat.h"
31*4882a593Smuzhiyun #include "intel_engine_pm.h"
32*4882a593Smuzhiyun #include "selftest_engine_heartbeat.h"
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #include "i915_selftest.h"
35*4882a593Smuzhiyun #include "selftests/i915_random.h"
36*4882a593Smuzhiyun #include "selftests/igt_flush_test.h"
37*4882a593Smuzhiyun #include "selftests/igt_reset.h"
38*4882a593Smuzhiyun #include "selftests/igt_atomic.h"
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun #include "selftests/mock_drm.h"
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun #include "gem/selftests/mock_context.h"
43*4882a593Smuzhiyun #include "gem/selftests/igt_gem_utils.h"
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun struct hang {
48*4882a593Smuzhiyun struct intel_gt *gt;
49*4882a593Smuzhiyun struct drm_i915_gem_object *hws;
50*4882a593Smuzhiyun struct drm_i915_gem_object *obj;
51*4882a593Smuzhiyun struct i915_gem_context *ctx;
52*4882a593Smuzhiyun u32 *seqno;
53*4882a593Smuzhiyun u32 *batch;
54*4882a593Smuzhiyun };
55*4882a593Smuzhiyun
hang_init(struct hang * h,struct intel_gt * gt)56*4882a593Smuzhiyun static int hang_init(struct hang *h, struct intel_gt *gt)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun void *vaddr;
59*4882a593Smuzhiyun int err;
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun memset(h, 0, sizeof(*h));
62*4882a593Smuzhiyun h->gt = gt;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun h->ctx = kernel_context(gt->i915);
65*4882a593Smuzhiyun if (IS_ERR(h->ctx))
66*4882a593Smuzhiyun return PTR_ERR(h->ctx);
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx));
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun h->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
71*4882a593Smuzhiyun if (IS_ERR(h->hws)) {
72*4882a593Smuzhiyun err = PTR_ERR(h->hws);
73*4882a593Smuzhiyun goto err_ctx;
74*4882a593Smuzhiyun }
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun h->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
77*4882a593Smuzhiyun if (IS_ERR(h->obj)) {
78*4882a593Smuzhiyun err = PTR_ERR(h->obj);
79*4882a593Smuzhiyun goto err_hws;
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC);
83*4882a593Smuzhiyun vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
84*4882a593Smuzhiyun if (IS_ERR(vaddr)) {
85*4882a593Smuzhiyun err = PTR_ERR(vaddr);
86*4882a593Smuzhiyun goto err_obj;
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun vaddr = i915_gem_object_pin_map(h->obj,
91*4882a593Smuzhiyun i915_coherent_map_type(gt->i915));
92*4882a593Smuzhiyun if (IS_ERR(vaddr)) {
93*4882a593Smuzhiyun err = PTR_ERR(vaddr);
94*4882a593Smuzhiyun goto err_unpin_hws;
95*4882a593Smuzhiyun }
96*4882a593Smuzhiyun h->batch = vaddr;
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun return 0;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun err_unpin_hws:
101*4882a593Smuzhiyun i915_gem_object_unpin_map(h->hws);
102*4882a593Smuzhiyun err_obj:
103*4882a593Smuzhiyun i915_gem_object_put(h->obj);
104*4882a593Smuzhiyun err_hws:
105*4882a593Smuzhiyun i915_gem_object_put(h->hws);
106*4882a593Smuzhiyun err_ctx:
107*4882a593Smuzhiyun kernel_context_close(h->ctx);
108*4882a593Smuzhiyun return err;
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
hws_address(const struct i915_vma * hws,const struct i915_request * rq)111*4882a593Smuzhiyun static u64 hws_address(const struct i915_vma *hws,
112*4882a593Smuzhiyun const struct i915_request *rq)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun
move_to_active(struct i915_vma * vma,struct i915_request * rq,unsigned int flags)117*4882a593Smuzhiyun static int move_to_active(struct i915_vma *vma,
118*4882a593Smuzhiyun struct i915_request *rq,
119*4882a593Smuzhiyun unsigned int flags)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun int err;
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun i915_vma_lock(vma);
124*4882a593Smuzhiyun err = i915_request_await_object(rq, vma->obj,
125*4882a593Smuzhiyun flags & EXEC_OBJECT_WRITE);
126*4882a593Smuzhiyun if (err == 0)
127*4882a593Smuzhiyun err = i915_vma_move_to_active(vma, rq, flags);
128*4882a593Smuzhiyun i915_vma_unlock(vma);
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun return err;
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun static struct i915_request *
hang_create_request(struct hang * h,struct intel_engine_cs * engine)134*4882a593Smuzhiyun hang_create_request(struct hang *h, struct intel_engine_cs *engine)
135*4882a593Smuzhiyun {
136*4882a593Smuzhiyun struct intel_gt *gt = h->gt;
137*4882a593Smuzhiyun struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx);
138*4882a593Smuzhiyun struct drm_i915_gem_object *obj;
139*4882a593Smuzhiyun struct i915_request *rq = NULL;
140*4882a593Smuzhiyun struct i915_vma *hws, *vma;
141*4882a593Smuzhiyun unsigned int flags;
142*4882a593Smuzhiyun void *vaddr;
143*4882a593Smuzhiyun u32 *batch;
144*4882a593Smuzhiyun int err;
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
147*4882a593Smuzhiyun if (IS_ERR(obj)) {
148*4882a593Smuzhiyun i915_vm_put(vm);
149*4882a593Smuzhiyun return ERR_CAST(obj);
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915));
153*4882a593Smuzhiyun if (IS_ERR(vaddr)) {
154*4882a593Smuzhiyun i915_gem_object_put(obj);
155*4882a593Smuzhiyun i915_vm_put(vm);
156*4882a593Smuzhiyun return ERR_CAST(vaddr);
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun i915_gem_object_unpin_map(h->obj);
160*4882a593Smuzhiyun i915_gem_object_put(h->obj);
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun h->obj = obj;
163*4882a593Smuzhiyun h->batch = vaddr;
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun vma = i915_vma_instance(h->obj, vm, NULL);
166*4882a593Smuzhiyun if (IS_ERR(vma)) {
167*4882a593Smuzhiyun i915_vm_put(vm);
168*4882a593Smuzhiyun return ERR_CAST(vma);
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun hws = i915_vma_instance(h->hws, vm, NULL);
172*4882a593Smuzhiyun if (IS_ERR(hws)) {
173*4882a593Smuzhiyun i915_vm_put(vm);
174*4882a593Smuzhiyun return ERR_CAST(hws);
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun err = i915_vma_pin(vma, 0, 0, PIN_USER);
178*4882a593Smuzhiyun if (err) {
179*4882a593Smuzhiyun i915_vm_put(vm);
180*4882a593Smuzhiyun return ERR_PTR(err);
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun err = i915_vma_pin(hws, 0, 0, PIN_USER);
184*4882a593Smuzhiyun if (err)
185*4882a593Smuzhiyun goto unpin_vma;
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun rq = igt_request_alloc(h->ctx, engine);
188*4882a593Smuzhiyun if (IS_ERR(rq)) {
189*4882a593Smuzhiyun err = PTR_ERR(rq);
190*4882a593Smuzhiyun goto unpin_hws;
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun err = move_to_active(vma, rq, 0);
194*4882a593Smuzhiyun if (err)
195*4882a593Smuzhiyun goto cancel_rq;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun err = move_to_active(hws, rq, 0);
198*4882a593Smuzhiyun if (err)
199*4882a593Smuzhiyun goto cancel_rq;
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun batch = h->batch;
202*4882a593Smuzhiyun if (INTEL_GEN(gt->i915) >= 8) {
203*4882a593Smuzhiyun *batch++ = MI_STORE_DWORD_IMM_GEN4;
204*4882a593Smuzhiyun *batch++ = lower_32_bits(hws_address(hws, rq));
205*4882a593Smuzhiyun *batch++ = upper_32_bits(hws_address(hws, rq));
206*4882a593Smuzhiyun *batch++ = rq->fence.seqno;
207*4882a593Smuzhiyun *batch++ = MI_NOOP;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun memset(batch, 0, 1024);
210*4882a593Smuzhiyun batch += 1024 / sizeof(*batch);
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun *batch++ = MI_NOOP;
213*4882a593Smuzhiyun *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
214*4882a593Smuzhiyun *batch++ = lower_32_bits(vma->node.start);
215*4882a593Smuzhiyun *batch++ = upper_32_bits(vma->node.start);
216*4882a593Smuzhiyun } else if (INTEL_GEN(gt->i915) >= 6) {
217*4882a593Smuzhiyun *batch++ = MI_STORE_DWORD_IMM_GEN4;
218*4882a593Smuzhiyun *batch++ = 0;
219*4882a593Smuzhiyun *batch++ = lower_32_bits(hws_address(hws, rq));
220*4882a593Smuzhiyun *batch++ = rq->fence.seqno;
221*4882a593Smuzhiyun *batch++ = MI_NOOP;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun memset(batch, 0, 1024);
224*4882a593Smuzhiyun batch += 1024 / sizeof(*batch);
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun *batch++ = MI_NOOP;
227*4882a593Smuzhiyun *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
228*4882a593Smuzhiyun *batch++ = lower_32_bits(vma->node.start);
229*4882a593Smuzhiyun } else if (INTEL_GEN(gt->i915) >= 4) {
230*4882a593Smuzhiyun *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
231*4882a593Smuzhiyun *batch++ = 0;
232*4882a593Smuzhiyun *batch++ = lower_32_bits(hws_address(hws, rq));
233*4882a593Smuzhiyun *batch++ = rq->fence.seqno;
234*4882a593Smuzhiyun *batch++ = MI_NOOP;
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun memset(batch, 0, 1024);
237*4882a593Smuzhiyun batch += 1024 / sizeof(*batch);
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun *batch++ = MI_NOOP;
240*4882a593Smuzhiyun *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
241*4882a593Smuzhiyun *batch++ = lower_32_bits(vma->node.start);
242*4882a593Smuzhiyun } else {
243*4882a593Smuzhiyun *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
244*4882a593Smuzhiyun *batch++ = lower_32_bits(hws_address(hws, rq));
245*4882a593Smuzhiyun *batch++ = rq->fence.seqno;
246*4882a593Smuzhiyun *batch++ = MI_NOOP;
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun memset(batch, 0, 1024);
249*4882a593Smuzhiyun batch += 1024 / sizeof(*batch);
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun *batch++ = MI_NOOP;
252*4882a593Smuzhiyun *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
253*4882a593Smuzhiyun *batch++ = lower_32_bits(vma->node.start);
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun *batch++ = MI_BATCH_BUFFER_END; /* not reached */
256*4882a593Smuzhiyun intel_gt_chipset_flush(engine->gt);
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun if (rq->engine->emit_init_breadcrumb) {
259*4882a593Smuzhiyun err = rq->engine->emit_init_breadcrumb(rq);
260*4882a593Smuzhiyun if (err)
261*4882a593Smuzhiyun goto cancel_rq;
262*4882a593Smuzhiyun }
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun flags = 0;
265*4882a593Smuzhiyun if (INTEL_GEN(gt->i915) <= 5)
266*4882a593Smuzhiyun flags |= I915_DISPATCH_SECURE;
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun cancel_rq:
271*4882a593Smuzhiyun if (err) {
272*4882a593Smuzhiyun i915_request_set_error_once(rq, err);
273*4882a593Smuzhiyun i915_request_add(rq);
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun unpin_hws:
276*4882a593Smuzhiyun i915_vma_unpin(hws);
277*4882a593Smuzhiyun unpin_vma:
278*4882a593Smuzhiyun i915_vma_unpin(vma);
279*4882a593Smuzhiyun i915_vm_put(vm);
280*4882a593Smuzhiyun return err ? ERR_PTR(err) : rq;
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun
hws_seqno(const struct hang * h,const struct i915_request * rq)283*4882a593Smuzhiyun static u32 hws_seqno(const struct hang *h, const struct i915_request *rq)
284*4882a593Smuzhiyun {
285*4882a593Smuzhiyun return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun
hang_fini(struct hang * h)288*4882a593Smuzhiyun static void hang_fini(struct hang *h)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun *h->batch = MI_BATCH_BUFFER_END;
291*4882a593Smuzhiyun intel_gt_chipset_flush(h->gt);
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun i915_gem_object_unpin_map(h->obj);
294*4882a593Smuzhiyun i915_gem_object_put(h->obj);
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun i915_gem_object_unpin_map(h->hws);
297*4882a593Smuzhiyun i915_gem_object_put(h->hws);
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun kernel_context_close(h->ctx);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun igt_flush_test(h->gt->i915);
302*4882a593Smuzhiyun }
303*4882a593Smuzhiyun
wait_until_running(struct hang * h,struct i915_request * rq)304*4882a593Smuzhiyun static bool wait_until_running(struct hang *h, struct i915_request *rq)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
307*4882a593Smuzhiyun rq->fence.seqno),
308*4882a593Smuzhiyun 10) &&
309*4882a593Smuzhiyun wait_for(i915_seqno_passed(hws_seqno(h, rq),
310*4882a593Smuzhiyun rq->fence.seqno),
311*4882a593Smuzhiyun 1000));
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun
igt_hang_sanitycheck(void * arg)314*4882a593Smuzhiyun static int igt_hang_sanitycheck(void *arg)
315*4882a593Smuzhiyun {
316*4882a593Smuzhiyun struct intel_gt *gt = arg;
317*4882a593Smuzhiyun struct i915_request *rq;
318*4882a593Smuzhiyun struct intel_engine_cs *engine;
319*4882a593Smuzhiyun enum intel_engine_id id;
320*4882a593Smuzhiyun struct hang h;
321*4882a593Smuzhiyun int err;
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun /* Basic check that we can execute our hanging batch */
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun err = hang_init(&h, gt);
326*4882a593Smuzhiyun if (err)
327*4882a593Smuzhiyun return err;
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun for_each_engine(engine, gt, id) {
330*4882a593Smuzhiyun struct intel_wedge_me w;
331*4882a593Smuzhiyun long timeout;
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun if (!intel_engine_can_store_dword(engine))
334*4882a593Smuzhiyun continue;
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun rq = hang_create_request(&h, engine);
337*4882a593Smuzhiyun if (IS_ERR(rq)) {
338*4882a593Smuzhiyun err = PTR_ERR(rq);
339*4882a593Smuzhiyun pr_err("Failed to create request for %s, err=%d\n",
340*4882a593Smuzhiyun engine->name, err);
341*4882a593Smuzhiyun goto fini;
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun
344*4882a593Smuzhiyun i915_request_get(rq);
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun *h.batch = MI_BATCH_BUFFER_END;
347*4882a593Smuzhiyun intel_gt_chipset_flush(engine->gt);
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun i915_request_add(rq);
350*4882a593Smuzhiyun
351*4882a593Smuzhiyun timeout = 0;
352*4882a593Smuzhiyun intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */)
353*4882a593Smuzhiyun timeout = i915_request_wait(rq, 0,
354*4882a593Smuzhiyun MAX_SCHEDULE_TIMEOUT);
355*4882a593Smuzhiyun if (intel_gt_is_wedged(gt))
356*4882a593Smuzhiyun timeout = -EIO;
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun i915_request_put(rq);
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun if (timeout < 0) {
361*4882a593Smuzhiyun err = timeout;
362*4882a593Smuzhiyun pr_err("Wait for request failed on %s, err=%d\n",
363*4882a593Smuzhiyun engine->name, err);
364*4882a593Smuzhiyun goto fini;
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun }
367*4882a593Smuzhiyun
368*4882a593Smuzhiyun fini:
369*4882a593Smuzhiyun hang_fini(&h);
370*4882a593Smuzhiyun return err;
371*4882a593Smuzhiyun }
372*4882a593Smuzhiyun
wait_for_idle(struct intel_engine_cs * engine)373*4882a593Smuzhiyun static bool wait_for_idle(struct intel_engine_cs *engine)
374*4882a593Smuzhiyun {
375*4882a593Smuzhiyun return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun
igt_reset_nop(void * arg)378*4882a593Smuzhiyun static int igt_reset_nop(void *arg)
379*4882a593Smuzhiyun {
380*4882a593Smuzhiyun struct intel_gt *gt = arg;
381*4882a593Smuzhiyun struct i915_gpu_error *global = >->i915->gpu_error;
382*4882a593Smuzhiyun struct intel_engine_cs *engine;
383*4882a593Smuzhiyun unsigned int reset_count, count;
384*4882a593Smuzhiyun enum intel_engine_id id;
385*4882a593Smuzhiyun IGT_TIMEOUT(end_time);
386*4882a593Smuzhiyun int err = 0;
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun /* Check that we can reset during non-user portions of requests */
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun reset_count = i915_reset_count(global);
391*4882a593Smuzhiyun count = 0;
392*4882a593Smuzhiyun do {
393*4882a593Smuzhiyun for_each_engine(engine, gt, id) {
394*4882a593Smuzhiyun struct intel_context *ce;
395*4882a593Smuzhiyun int i;
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun ce = intel_context_create(engine);
398*4882a593Smuzhiyun if (IS_ERR(ce)) {
399*4882a593Smuzhiyun err = PTR_ERR(ce);
400*4882a593Smuzhiyun break;
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun for (i = 0; i < 16; i++) {
404*4882a593Smuzhiyun struct i915_request *rq;
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun rq = intel_context_create_request(ce);
407*4882a593Smuzhiyun if (IS_ERR(rq)) {
408*4882a593Smuzhiyun err = PTR_ERR(rq);
409*4882a593Smuzhiyun break;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun i915_request_add(rq);
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun intel_context_put(ce);
416*4882a593Smuzhiyun }
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun igt_global_reset_lock(gt);
419*4882a593Smuzhiyun intel_gt_reset(gt, ALL_ENGINES, NULL);
420*4882a593Smuzhiyun igt_global_reset_unlock(gt);
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun if (intel_gt_is_wedged(gt)) {
423*4882a593Smuzhiyun err = -EIO;
424*4882a593Smuzhiyun break;
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun if (i915_reset_count(global) != reset_count + ++count) {
428*4882a593Smuzhiyun pr_err("Full GPU reset not recorded!\n");
429*4882a593Smuzhiyun err = -EINVAL;
430*4882a593Smuzhiyun break;
431*4882a593Smuzhiyun }
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun err = igt_flush_test(gt->i915);
434*4882a593Smuzhiyun if (err)
435*4882a593Smuzhiyun break;
436*4882a593Smuzhiyun } while (time_before(jiffies, end_time));
437*4882a593Smuzhiyun pr_info("%s: %d resets\n", __func__, count);
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun if (igt_flush_test(gt->i915))
440*4882a593Smuzhiyun err = -EIO;
441*4882a593Smuzhiyun return err;
442*4882a593Smuzhiyun }
443*4882a593Smuzhiyun
igt_reset_nop_engine(void * arg)444*4882a593Smuzhiyun static int igt_reset_nop_engine(void *arg)
445*4882a593Smuzhiyun {
446*4882a593Smuzhiyun struct intel_gt *gt = arg;
447*4882a593Smuzhiyun struct i915_gpu_error *global = >->i915->gpu_error;
448*4882a593Smuzhiyun struct intel_engine_cs *engine;
449*4882a593Smuzhiyun enum intel_engine_id id;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun /* Check that we can engine-reset during non-user portions */
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun if (!intel_has_reset_engine(gt))
454*4882a593Smuzhiyun return 0;
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun for_each_engine(engine, gt, id) {
457*4882a593Smuzhiyun unsigned int reset_count, reset_engine_count, count;
458*4882a593Smuzhiyun struct intel_context *ce;
459*4882a593Smuzhiyun IGT_TIMEOUT(end_time);
460*4882a593Smuzhiyun int err;
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun ce = intel_context_create(engine);
463*4882a593Smuzhiyun if (IS_ERR(ce))
464*4882a593Smuzhiyun return PTR_ERR(ce);
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun reset_count = i915_reset_count(global);
467*4882a593Smuzhiyun reset_engine_count = i915_reset_engine_count(global, engine);
468*4882a593Smuzhiyun count = 0;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun st_engine_heartbeat_disable(engine);
471*4882a593Smuzhiyun set_bit(I915_RESET_ENGINE + id, >->reset.flags);
472*4882a593Smuzhiyun do {
473*4882a593Smuzhiyun int i;
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun if (!wait_for_idle(engine)) {
476*4882a593Smuzhiyun pr_err("%s failed to idle before reset\n",
477*4882a593Smuzhiyun engine->name);
478*4882a593Smuzhiyun err = -EIO;
479*4882a593Smuzhiyun break;
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun for (i = 0; i < 16; i++) {
483*4882a593Smuzhiyun struct i915_request *rq;
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun rq = intel_context_create_request(ce);
486*4882a593Smuzhiyun if (IS_ERR(rq)) {
487*4882a593Smuzhiyun struct drm_printer p =
488*4882a593Smuzhiyun drm_info_printer(gt->i915->drm.dev);
489*4882a593Smuzhiyun intel_engine_dump(engine, &p,
490*4882a593Smuzhiyun "%s(%s): failed to submit request\n",
491*4882a593Smuzhiyun __func__,
492*4882a593Smuzhiyun engine->name);
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun GEM_TRACE("%s(%s): failed to submit request\n",
495*4882a593Smuzhiyun __func__,
496*4882a593Smuzhiyun engine->name);
497*4882a593Smuzhiyun GEM_TRACE_DUMP();
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun intel_gt_set_wedged(gt);
500*4882a593Smuzhiyun
501*4882a593Smuzhiyun err = PTR_ERR(rq);
502*4882a593Smuzhiyun break;
503*4882a593Smuzhiyun }
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun i915_request_add(rq);
506*4882a593Smuzhiyun }
507*4882a593Smuzhiyun err = intel_engine_reset(engine, NULL);
508*4882a593Smuzhiyun if (err) {
509*4882a593Smuzhiyun pr_err("i915_reset_engine failed\n");
510*4882a593Smuzhiyun break;
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun if (i915_reset_count(global) != reset_count) {
514*4882a593Smuzhiyun pr_err("Full GPU reset recorded! (engine reset expected)\n");
515*4882a593Smuzhiyun err = -EINVAL;
516*4882a593Smuzhiyun break;
517*4882a593Smuzhiyun }
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun if (i915_reset_engine_count(global, engine) !=
520*4882a593Smuzhiyun reset_engine_count + ++count) {
521*4882a593Smuzhiyun pr_err("%s engine reset not recorded!\n",
522*4882a593Smuzhiyun engine->name);
523*4882a593Smuzhiyun err = -EINVAL;
524*4882a593Smuzhiyun break;
525*4882a593Smuzhiyun }
526*4882a593Smuzhiyun } while (time_before(jiffies, end_time));
527*4882a593Smuzhiyun clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
528*4882a593Smuzhiyun st_engine_heartbeat_enable(engine);
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
531*4882a593Smuzhiyun
532*4882a593Smuzhiyun intel_context_put(ce);
533*4882a593Smuzhiyun if (igt_flush_test(gt->i915))
534*4882a593Smuzhiyun err = -EIO;
535*4882a593Smuzhiyun if (err)
536*4882a593Smuzhiyun return err;
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun return 0;
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun
__igt_reset_engine(struct intel_gt * gt,bool active)542*4882a593Smuzhiyun static int __igt_reset_engine(struct intel_gt *gt, bool active)
543*4882a593Smuzhiyun {
544*4882a593Smuzhiyun struct i915_gpu_error *global = >->i915->gpu_error;
545*4882a593Smuzhiyun struct intel_engine_cs *engine;
546*4882a593Smuzhiyun enum intel_engine_id id;
547*4882a593Smuzhiyun struct hang h;
548*4882a593Smuzhiyun int err = 0;
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun /* Check that we can issue an engine reset on an idle engine (no-op) */
551*4882a593Smuzhiyun
552*4882a593Smuzhiyun if (!intel_has_reset_engine(gt))
553*4882a593Smuzhiyun return 0;
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun if (active) {
556*4882a593Smuzhiyun err = hang_init(&h, gt);
557*4882a593Smuzhiyun if (err)
558*4882a593Smuzhiyun return err;
559*4882a593Smuzhiyun }
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun for_each_engine(engine, gt, id) {
562*4882a593Smuzhiyun unsigned int reset_count, reset_engine_count;
563*4882a593Smuzhiyun IGT_TIMEOUT(end_time);
564*4882a593Smuzhiyun
565*4882a593Smuzhiyun if (active && !intel_engine_can_store_dword(engine))
566*4882a593Smuzhiyun continue;
567*4882a593Smuzhiyun
568*4882a593Smuzhiyun if (!wait_for_idle(engine)) {
569*4882a593Smuzhiyun pr_err("%s failed to idle before reset\n",
570*4882a593Smuzhiyun engine->name);
571*4882a593Smuzhiyun err = -EIO;
572*4882a593Smuzhiyun break;
573*4882a593Smuzhiyun }
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun reset_count = i915_reset_count(global);
576*4882a593Smuzhiyun reset_engine_count = i915_reset_engine_count(global, engine);
577*4882a593Smuzhiyun
578*4882a593Smuzhiyun st_engine_heartbeat_disable(engine);
579*4882a593Smuzhiyun set_bit(I915_RESET_ENGINE + id, >->reset.flags);
580*4882a593Smuzhiyun do {
581*4882a593Smuzhiyun if (active) {
582*4882a593Smuzhiyun struct i915_request *rq;
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun rq = hang_create_request(&h, engine);
585*4882a593Smuzhiyun if (IS_ERR(rq)) {
586*4882a593Smuzhiyun err = PTR_ERR(rq);
587*4882a593Smuzhiyun break;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun i915_request_get(rq);
591*4882a593Smuzhiyun i915_request_add(rq);
592*4882a593Smuzhiyun
593*4882a593Smuzhiyun if (!wait_until_running(&h, rq)) {
594*4882a593Smuzhiyun struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun pr_err("%s: Failed to start request %llx, at %x\n",
597*4882a593Smuzhiyun __func__, rq->fence.seqno, hws_seqno(&h, rq));
598*4882a593Smuzhiyun intel_engine_dump(engine, &p,
599*4882a593Smuzhiyun "%s\n", engine->name);
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun i915_request_put(rq);
602*4882a593Smuzhiyun err = -EIO;
603*4882a593Smuzhiyun break;
604*4882a593Smuzhiyun }
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun i915_request_put(rq);
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun err = intel_engine_reset(engine, NULL);
610*4882a593Smuzhiyun if (err) {
611*4882a593Smuzhiyun pr_err("i915_reset_engine failed\n");
612*4882a593Smuzhiyun break;
613*4882a593Smuzhiyun }
614*4882a593Smuzhiyun
615*4882a593Smuzhiyun if (i915_reset_count(global) != reset_count) {
616*4882a593Smuzhiyun pr_err("Full GPU reset recorded! (engine reset expected)\n");
617*4882a593Smuzhiyun err = -EINVAL;
618*4882a593Smuzhiyun break;
619*4882a593Smuzhiyun }
620*4882a593Smuzhiyun
621*4882a593Smuzhiyun if (i915_reset_engine_count(global, engine) !=
622*4882a593Smuzhiyun ++reset_engine_count) {
623*4882a593Smuzhiyun pr_err("%s engine reset not recorded!\n",
624*4882a593Smuzhiyun engine->name);
625*4882a593Smuzhiyun err = -EINVAL;
626*4882a593Smuzhiyun break;
627*4882a593Smuzhiyun }
628*4882a593Smuzhiyun } while (time_before(jiffies, end_time));
629*4882a593Smuzhiyun clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
630*4882a593Smuzhiyun st_engine_heartbeat_enable(engine);
631*4882a593Smuzhiyun
632*4882a593Smuzhiyun if (err)
633*4882a593Smuzhiyun break;
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun err = igt_flush_test(gt->i915);
636*4882a593Smuzhiyun if (err)
637*4882a593Smuzhiyun break;
638*4882a593Smuzhiyun }
639*4882a593Smuzhiyun
640*4882a593Smuzhiyun if (intel_gt_is_wedged(gt))
641*4882a593Smuzhiyun err = -EIO;
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun if (active)
644*4882a593Smuzhiyun hang_fini(&h);
645*4882a593Smuzhiyun
646*4882a593Smuzhiyun return err;
647*4882a593Smuzhiyun }
648*4882a593Smuzhiyun
igt_reset_idle_engine(void * arg)649*4882a593Smuzhiyun static int igt_reset_idle_engine(void *arg)
650*4882a593Smuzhiyun {
651*4882a593Smuzhiyun return __igt_reset_engine(arg, false);
652*4882a593Smuzhiyun }
653*4882a593Smuzhiyun
igt_reset_active_engine(void * arg)654*4882a593Smuzhiyun static int igt_reset_active_engine(void *arg)
655*4882a593Smuzhiyun {
656*4882a593Smuzhiyun return __igt_reset_engine(arg, true);
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun struct active_engine {
660*4882a593Smuzhiyun struct task_struct *task;
661*4882a593Smuzhiyun struct intel_engine_cs *engine;
662*4882a593Smuzhiyun unsigned long resets;
663*4882a593Smuzhiyun unsigned int flags;
664*4882a593Smuzhiyun };
665*4882a593Smuzhiyun
666*4882a593Smuzhiyun #define TEST_ACTIVE BIT(0)
667*4882a593Smuzhiyun #define TEST_OTHERS BIT(1)
668*4882a593Smuzhiyun #define TEST_SELF BIT(2)
669*4882a593Smuzhiyun #define TEST_PRIORITY BIT(3)
670*4882a593Smuzhiyun
active_request_put(struct i915_request * rq)671*4882a593Smuzhiyun static int active_request_put(struct i915_request *rq)
672*4882a593Smuzhiyun {
673*4882a593Smuzhiyun int err = 0;
674*4882a593Smuzhiyun
675*4882a593Smuzhiyun if (!rq)
676*4882a593Smuzhiyun return 0;
677*4882a593Smuzhiyun
678*4882a593Smuzhiyun if (i915_request_wait(rq, 0, 5 * HZ) < 0) {
679*4882a593Smuzhiyun GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n",
680*4882a593Smuzhiyun rq->engine->name,
681*4882a593Smuzhiyun rq->fence.context,
682*4882a593Smuzhiyun rq->fence.seqno);
683*4882a593Smuzhiyun GEM_TRACE_DUMP();
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun intel_gt_set_wedged(rq->engine->gt);
686*4882a593Smuzhiyun err = -EIO;
687*4882a593Smuzhiyun }
688*4882a593Smuzhiyun
689*4882a593Smuzhiyun i915_request_put(rq);
690*4882a593Smuzhiyun
691*4882a593Smuzhiyun return err;
692*4882a593Smuzhiyun }
693*4882a593Smuzhiyun
active_engine(void * data)694*4882a593Smuzhiyun static int active_engine(void *data)
695*4882a593Smuzhiyun {
696*4882a593Smuzhiyun I915_RND_STATE(prng);
697*4882a593Smuzhiyun struct active_engine *arg = data;
698*4882a593Smuzhiyun struct intel_engine_cs *engine = arg->engine;
699*4882a593Smuzhiyun struct i915_request *rq[8] = {};
700*4882a593Smuzhiyun struct intel_context *ce[ARRAY_SIZE(rq)];
701*4882a593Smuzhiyun unsigned long count;
702*4882a593Smuzhiyun int err = 0;
703*4882a593Smuzhiyun
704*4882a593Smuzhiyun for (count = 0; count < ARRAY_SIZE(ce); count++) {
705*4882a593Smuzhiyun ce[count] = intel_context_create(engine);
706*4882a593Smuzhiyun if (IS_ERR(ce[count])) {
707*4882a593Smuzhiyun err = PTR_ERR(ce[count]);
708*4882a593Smuzhiyun while (--count)
709*4882a593Smuzhiyun intel_context_put(ce[count]);
710*4882a593Smuzhiyun return err;
711*4882a593Smuzhiyun }
712*4882a593Smuzhiyun }
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun count = 0;
715*4882a593Smuzhiyun while (!kthread_should_stop()) {
716*4882a593Smuzhiyun unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
717*4882a593Smuzhiyun struct i915_request *old = rq[idx];
718*4882a593Smuzhiyun struct i915_request *new;
719*4882a593Smuzhiyun
720*4882a593Smuzhiyun new = intel_context_create_request(ce[idx]);
721*4882a593Smuzhiyun if (IS_ERR(new)) {
722*4882a593Smuzhiyun err = PTR_ERR(new);
723*4882a593Smuzhiyun break;
724*4882a593Smuzhiyun }
725*4882a593Smuzhiyun
726*4882a593Smuzhiyun rq[idx] = i915_request_get(new);
727*4882a593Smuzhiyun i915_request_add(new);
728*4882a593Smuzhiyun
729*4882a593Smuzhiyun if (engine->schedule && arg->flags & TEST_PRIORITY) {
730*4882a593Smuzhiyun struct i915_sched_attr attr = {
731*4882a593Smuzhiyun .priority =
732*4882a593Smuzhiyun i915_prandom_u32_max_state(512, &prng),
733*4882a593Smuzhiyun };
734*4882a593Smuzhiyun engine->schedule(rq[idx], &attr);
735*4882a593Smuzhiyun }
736*4882a593Smuzhiyun
737*4882a593Smuzhiyun err = active_request_put(old);
738*4882a593Smuzhiyun if (err)
739*4882a593Smuzhiyun break;
740*4882a593Smuzhiyun
741*4882a593Smuzhiyun cond_resched();
742*4882a593Smuzhiyun }
743*4882a593Smuzhiyun
744*4882a593Smuzhiyun for (count = 0; count < ARRAY_SIZE(rq); count++) {
745*4882a593Smuzhiyun int err__ = active_request_put(rq[count]);
746*4882a593Smuzhiyun
747*4882a593Smuzhiyun /* Keep the first error */
748*4882a593Smuzhiyun if (!err)
749*4882a593Smuzhiyun err = err__;
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun intel_context_put(ce[count]);
752*4882a593Smuzhiyun }
753*4882a593Smuzhiyun
754*4882a593Smuzhiyun return err;
755*4882a593Smuzhiyun }
756*4882a593Smuzhiyun
__igt_reset_engines(struct intel_gt * gt,const char * test_name,unsigned int flags)757*4882a593Smuzhiyun static int __igt_reset_engines(struct intel_gt *gt,
758*4882a593Smuzhiyun const char *test_name,
759*4882a593Smuzhiyun unsigned int flags)
760*4882a593Smuzhiyun {
761*4882a593Smuzhiyun struct i915_gpu_error *global = >->i915->gpu_error;
762*4882a593Smuzhiyun struct intel_engine_cs *engine, *other;
763*4882a593Smuzhiyun enum intel_engine_id id, tmp;
764*4882a593Smuzhiyun struct hang h;
765*4882a593Smuzhiyun int err = 0;
766*4882a593Smuzhiyun
767*4882a593Smuzhiyun /* Check that issuing a reset on one engine does not interfere
768*4882a593Smuzhiyun * with any other engine.
769*4882a593Smuzhiyun */
770*4882a593Smuzhiyun
771*4882a593Smuzhiyun if (!intel_has_reset_engine(gt))
772*4882a593Smuzhiyun return 0;
773*4882a593Smuzhiyun
774*4882a593Smuzhiyun if (flags & TEST_ACTIVE) {
775*4882a593Smuzhiyun err = hang_init(&h, gt);
776*4882a593Smuzhiyun if (err)
777*4882a593Smuzhiyun return err;
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun if (flags & TEST_PRIORITY)
780*4882a593Smuzhiyun h.ctx->sched.priority = 1024;
781*4882a593Smuzhiyun }
782*4882a593Smuzhiyun
783*4882a593Smuzhiyun for_each_engine(engine, gt, id) {
784*4882a593Smuzhiyun struct active_engine threads[I915_NUM_ENGINES] = {};
785*4882a593Smuzhiyun unsigned long device = i915_reset_count(global);
786*4882a593Smuzhiyun unsigned long count = 0, reported;
787*4882a593Smuzhiyun IGT_TIMEOUT(end_time);
788*4882a593Smuzhiyun
789*4882a593Smuzhiyun if (flags & TEST_ACTIVE &&
790*4882a593Smuzhiyun !intel_engine_can_store_dword(engine))
791*4882a593Smuzhiyun continue;
792*4882a593Smuzhiyun
793*4882a593Smuzhiyun if (!wait_for_idle(engine)) {
794*4882a593Smuzhiyun pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n",
795*4882a593Smuzhiyun engine->name, test_name);
796*4882a593Smuzhiyun err = -EIO;
797*4882a593Smuzhiyun break;
798*4882a593Smuzhiyun }
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun memset(threads, 0, sizeof(threads));
801*4882a593Smuzhiyun for_each_engine(other, gt, tmp) {
802*4882a593Smuzhiyun struct task_struct *tsk;
803*4882a593Smuzhiyun
804*4882a593Smuzhiyun threads[tmp].resets =
805*4882a593Smuzhiyun i915_reset_engine_count(global, other);
806*4882a593Smuzhiyun
807*4882a593Smuzhiyun if (other == engine && !(flags & TEST_SELF))
808*4882a593Smuzhiyun continue;
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun if (other != engine && !(flags & TEST_OTHERS))
811*4882a593Smuzhiyun continue;
812*4882a593Smuzhiyun
813*4882a593Smuzhiyun threads[tmp].engine = other;
814*4882a593Smuzhiyun threads[tmp].flags = flags;
815*4882a593Smuzhiyun
816*4882a593Smuzhiyun tsk = kthread_run(active_engine, &threads[tmp],
817*4882a593Smuzhiyun "igt/%s", other->name);
818*4882a593Smuzhiyun if (IS_ERR(tsk)) {
819*4882a593Smuzhiyun err = PTR_ERR(tsk);
820*4882a593Smuzhiyun goto unwind;
821*4882a593Smuzhiyun }
822*4882a593Smuzhiyun
823*4882a593Smuzhiyun threads[tmp].task = tsk;
824*4882a593Smuzhiyun get_task_struct(tsk);
825*4882a593Smuzhiyun }
826*4882a593Smuzhiyun
827*4882a593Smuzhiyun yield(); /* start all threads before we begin */
828*4882a593Smuzhiyun
829*4882a593Smuzhiyun st_engine_heartbeat_disable(engine);
830*4882a593Smuzhiyun set_bit(I915_RESET_ENGINE + id, >->reset.flags);
831*4882a593Smuzhiyun do {
832*4882a593Smuzhiyun struct i915_request *rq = NULL;
833*4882a593Smuzhiyun
834*4882a593Smuzhiyun if (flags & TEST_ACTIVE) {
835*4882a593Smuzhiyun rq = hang_create_request(&h, engine);
836*4882a593Smuzhiyun if (IS_ERR(rq)) {
837*4882a593Smuzhiyun err = PTR_ERR(rq);
838*4882a593Smuzhiyun break;
839*4882a593Smuzhiyun }
840*4882a593Smuzhiyun
841*4882a593Smuzhiyun i915_request_get(rq);
842*4882a593Smuzhiyun i915_request_add(rq);
843*4882a593Smuzhiyun
844*4882a593Smuzhiyun if (!wait_until_running(&h, rq)) {
845*4882a593Smuzhiyun struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
846*4882a593Smuzhiyun
847*4882a593Smuzhiyun pr_err("%s: Failed to start request %llx, at %x\n",
848*4882a593Smuzhiyun __func__, rq->fence.seqno, hws_seqno(&h, rq));
849*4882a593Smuzhiyun intel_engine_dump(engine, &p,
850*4882a593Smuzhiyun "%s\n", engine->name);
851*4882a593Smuzhiyun
852*4882a593Smuzhiyun i915_request_put(rq);
853*4882a593Smuzhiyun err = -EIO;
854*4882a593Smuzhiyun break;
855*4882a593Smuzhiyun }
856*4882a593Smuzhiyun }
857*4882a593Smuzhiyun
858*4882a593Smuzhiyun err = intel_engine_reset(engine, NULL);
859*4882a593Smuzhiyun if (err) {
860*4882a593Smuzhiyun pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
861*4882a593Smuzhiyun engine->name, test_name, err);
862*4882a593Smuzhiyun break;
863*4882a593Smuzhiyun }
864*4882a593Smuzhiyun
865*4882a593Smuzhiyun count++;
866*4882a593Smuzhiyun
867*4882a593Smuzhiyun if (rq) {
868*4882a593Smuzhiyun if (rq->fence.error != -EIO) {
869*4882a593Smuzhiyun pr_err("i915_reset_engine(%s:%s):"
870*4882a593Smuzhiyun " failed to reset request %llx:%lld\n",
871*4882a593Smuzhiyun engine->name, test_name,
872*4882a593Smuzhiyun rq->fence.context,
873*4882a593Smuzhiyun rq->fence.seqno);
874*4882a593Smuzhiyun i915_request_put(rq);
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun GEM_TRACE_DUMP();
877*4882a593Smuzhiyun intel_gt_set_wedged(gt);
878*4882a593Smuzhiyun err = -EIO;
879*4882a593Smuzhiyun break;
880*4882a593Smuzhiyun }
881*4882a593Smuzhiyun
882*4882a593Smuzhiyun if (i915_request_wait(rq, 0, HZ / 5) < 0) {
883*4882a593Smuzhiyun struct drm_printer p =
884*4882a593Smuzhiyun drm_info_printer(gt->i915->drm.dev);
885*4882a593Smuzhiyun
886*4882a593Smuzhiyun pr_err("i915_reset_engine(%s:%s):"
887*4882a593Smuzhiyun " failed to complete request %llx:%lld after reset\n",
888*4882a593Smuzhiyun engine->name, test_name,
889*4882a593Smuzhiyun rq->fence.context,
890*4882a593Smuzhiyun rq->fence.seqno);
891*4882a593Smuzhiyun intel_engine_dump(engine, &p,
892*4882a593Smuzhiyun "%s\n", engine->name);
893*4882a593Smuzhiyun i915_request_put(rq);
894*4882a593Smuzhiyun
895*4882a593Smuzhiyun GEM_TRACE_DUMP();
896*4882a593Smuzhiyun intel_gt_set_wedged(gt);
897*4882a593Smuzhiyun err = -EIO;
898*4882a593Smuzhiyun break;
899*4882a593Smuzhiyun }
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun i915_request_put(rq);
902*4882a593Smuzhiyun }
903*4882a593Smuzhiyun
904*4882a593Smuzhiyun if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
905*4882a593Smuzhiyun struct drm_printer p =
906*4882a593Smuzhiyun drm_info_printer(gt->i915->drm.dev);
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun pr_err("i915_reset_engine(%s:%s):"
909*4882a593Smuzhiyun " failed to idle after reset\n",
910*4882a593Smuzhiyun engine->name, test_name);
911*4882a593Smuzhiyun intel_engine_dump(engine, &p,
912*4882a593Smuzhiyun "%s\n", engine->name);
913*4882a593Smuzhiyun
914*4882a593Smuzhiyun err = -EIO;
915*4882a593Smuzhiyun break;
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun } while (time_before(jiffies, end_time));
918*4882a593Smuzhiyun clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
919*4882a593Smuzhiyun st_engine_heartbeat_enable(engine);
920*4882a593Smuzhiyun
921*4882a593Smuzhiyun pr_info("i915_reset_engine(%s:%s): %lu resets\n",
922*4882a593Smuzhiyun engine->name, test_name, count);
923*4882a593Smuzhiyun
924*4882a593Smuzhiyun reported = i915_reset_engine_count(global, engine);
925*4882a593Smuzhiyun reported -= threads[engine->id].resets;
926*4882a593Smuzhiyun if (reported != count) {
927*4882a593Smuzhiyun pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
928*4882a593Smuzhiyun engine->name, test_name, count, reported);
929*4882a593Smuzhiyun if (!err)
930*4882a593Smuzhiyun err = -EINVAL;
931*4882a593Smuzhiyun }
932*4882a593Smuzhiyun
933*4882a593Smuzhiyun unwind:
934*4882a593Smuzhiyun for_each_engine(other, gt, tmp) {
935*4882a593Smuzhiyun int ret;
936*4882a593Smuzhiyun
937*4882a593Smuzhiyun if (!threads[tmp].task)
938*4882a593Smuzhiyun continue;
939*4882a593Smuzhiyun
940*4882a593Smuzhiyun ret = kthread_stop(threads[tmp].task);
941*4882a593Smuzhiyun if (ret) {
942*4882a593Smuzhiyun pr_err("kthread for other engine %s failed, err=%d\n",
943*4882a593Smuzhiyun other->name, ret);
944*4882a593Smuzhiyun if (!err)
945*4882a593Smuzhiyun err = ret;
946*4882a593Smuzhiyun }
947*4882a593Smuzhiyun put_task_struct(threads[tmp].task);
948*4882a593Smuzhiyun
949*4882a593Smuzhiyun if (other->uabi_class != engine->uabi_class &&
950*4882a593Smuzhiyun threads[tmp].resets !=
951*4882a593Smuzhiyun i915_reset_engine_count(global, other)) {
952*4882a593Smuzhiyun pr_err("Innocent engine %s was reset (count=%ld)\n",
953*4882a593Smuzhiyun other->name,
954*4882a593Smuzhiyun i915_reset_engine_count(global, other) -
955*4882a593Smuzhiyun threads[tmp].resets);
956*4882a593Smuzhiyun if (!err)
957*4882a593Smuzhiyun err = -EINVAL;
958*4882a593Smuzhiyun }
959*4882a593Smuzhiyun }
960*4882a593Smuzhiyun
961*4882a593Smuzhiyun if (device != i915_reset_count(global)) {
962*4882a593Smuzhiyun pr_err("Global reset (count=%ld)!\n",
963*4882a593Smuzhiyun i915_reset_count(global) - device);
964*4882a593Smuzhiyun if (!err)
965*4882a593Smuzhiyun err = -EINVAL;
966*4882a593Smuzhiyun }
967*4882a593Smuzhiyun
968*4882a593Smuzhiyun if (err)
969*4882a593Smuzhiyun break;
970*4882a593Smuzhiyun
971*4882a593Smuzhiyun err = igt_flush_test(gt->i915);
972*4882a593Smuzhiyun if (err)
973*4882a593Smuzhiyun break;
974*4882a593Smuzhiyun }
975*4882a593Smuzhiyun
976*4882a593Smuzhiyun if (intel_gt_is_wedged(gt))
977*4882a593Smuzhiyun err = -EIO;
978*4882a593Smuzhiyun
979*4882a593Smuzhiyun if (flags & TEST_ACTIVE)
980*4882a593Smuzhiyun hang_fini(&h);
981*4882a593Smuzhiyun
982*4882a593Smuzhiyun return err;
983*4882a593Smuzhiyun }
984*4882a593Smuzhiyun
igt_reset_engines(void * arg)985*4882a593Smuzhiyun static int igt_reset_engines(void *arg)
986*4882a593Smuzhiyun {
987*4882a593Smuzhiyun static const struct {
988*4882a593Smuzhiyun const char *name;
989*4882a593Smuzhiyun unsigned int flags;
990*4882a593Smuzhiyun } phases[] = {
991*4882a593Smuzhiyun { "idle", 0 },
992*4882a593Smuzhiyun { "active", TEST_ACTIVE },
993*4882a593Smuzhiyun { "others-idle", TEST_OTHERS },
994*4882a593Smuzhiyun { "others-active", TEST_OTHERS | TEST_ACTIVE },
995*4882a593Smuzhiyun {
996*4882a593Smuzhiyun "others-priority",
997*4882a593Smuzhiyun TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY
998*4882a593Smuzhiyun },
999*4882a593Smuzhiyun {
1000*4882a593Smuzhiyun "self-priority",
1001*4882a593Smuzhiyun TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
1002*4882a593Smuzhiyun },
1003*4882a593Smuzhiyun { }
1004*4882a593Smuzhiyun };
1005*4882a593Smuzhiyun struct intel_gt *gt = arg;
1006*4882a593Smuzhiyun typeof(*phases) *p;
1007*4882a593Smuzhiyun int err;
1008*4882a593Smuzhiyun
1009*4882a593Smuzhiyun for (p = phases; p->name; p++) {
1010*4882a593Smuzhiyun if (p->flags & TEST_PRIORITY) {
1011*4882a593Smuzhiyun if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
1012*4882a593Smuzhiyun continue;
1013*4882a593Smuzhiyun }
1014*4882a593Smuzhiyun
1015*4882a593Smuzhiyun err = __igt_reset_engines(arg, p->name, p->flags);
1016*4882a593Smuzhiyun if (err)
1017*4882a593Smuzhiyun return err;
1018*4882a593Smuzhiyun }
1019*4882a593Smuzhiyun
1020*4882a593Smuzhiyun return 0;
1021*4882a593Smuzhiyun }
1022*4882a593Smuzhiyun
fake_hangcheck(struct intel_gt * gt,intel_engine_mask_t mask)1023*4882a593Smuzhiyun static u32 fake_hangcheck(struct intel_gt *gt, intel_engine_mask_t mask)
1024*4882a593Smuzhiyun {
1025*4882a593Smuzhiyun u32 count = i915_reset_count(>->i915->gpu_error);
1026*4882a593Smuzhiyun
1027*4882a593Smuzhiyun intel_gt_reset(gt, mask, NULL);
1028*4882a593Smuzhiyun
1029*4882a593Smuzhiyun return count;
1030*4882a593Smuzhiyun }
1031*4882a593Smuzhiyun
igt_reset_wait(void * arg)1032*4882a593Smuzhiyun static int igt_reset_wait(void *arg)
1033*4882a593Smuzhiyun {
1034*4882a593Smuzhiyun struct intel_gt *gt = arg;
1035*4882a593Smuzhiyun struct i915_gpu_error *global = >->i915->gpu_error;
1036*4882a593Smuzhiyun struct intel_engine_cs *engine = gt->engine[RCS0];
1037*4882a593Smuzhiyun struct i915_request *rq;
1038*4882a593Smuzhiyun unsigned int reset_count;
1039*4882a593Smuzhiyun struct hang h;
1040*4882a593Smuzhiyun long timeout;
1041*4882a593Smuzhiyun int err;
1042*4882a593Smuzhiyun
1043*4882a593Smuzhiyun if (!engine || !intel_engine_can_store_dword(engine))
1044*4882a593Smuzhiyun return 0;
1045*4882a593Smuzhiyun
1046*4882a593Smuzhiyun /* Check that we detect a stuck waiter and issue a reset */
1047*4882a593Smuzhiyun
1048*4882a593Smuzhiyun igt_global_reset_lock(gt);
1049*4882a593Smuzhiyun
1050*4882a593Smuzhiyun err = hang_init(&h, gt);
1051*4882a593Smuzhiyun if (err)
1052*4882a593Smuzhiyun goto unlock;
1053*4882a593Smuzhiyun
1054*4882a593Smuzhiyun rq = hang_create_request(&h, engine);
1055*4882a593Smuzhiyun if (IS_ERR(rq)) {
1056*4882a593Smuzhiyun err = PTR_ERR(rq);
1057*4882a593Smuzhiyun goto fini;
1058*4882a593Smuzhiyun }
1059*4882a593Smuzhiyun
1060*4882a593Smuzhiyun i915_request_get(rq);
1061*4882a593Smuzhiyun i915_request_add(rq);
1062*4882a593Smuzhiyun
1063*4882a593Smuzhiyun if (!wait_until_running(&h, rq)) {
1064*4882a593Smuzhiyun struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1065*4882a593Smuzhiyun
1066*4882a593Smuzhiyun pr_err("%s: Failed to start request %llx, at %x\n",
1067*4882a593Smuzhiyun __func__, rq->fence.seqno, hws_seqno(&h, rq));
1068*4882a593Smuzhiyun intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1069*4882a593Smuzhiyun
1070*4882a593Smuzhiyun intel_gt_set_wedged(gt);
1071*4882a593Smuzhiyun
1072*4882a593Smuzhiyun err = -EIO;
1073*4882a593Smuzhiyun goto out_rq;
1074*4882a593Smuzhiyun }
1075*4882a593Smuzhiyun
1076*4882a593Smuzhiyun reset_count = fake_hangcheck(gt, ALL_ENGINES);
1077*4882a593Smuzhiyun
1078*4882a593Smuzhiyun timeout = i915_request_wait(rq, 0, 10);
1079*4882a593Smuzhiyun if (timeout < 0) {
1080*4882a593Smuzhiyun pr_err("i915_request_wait failed on a stuck request: err=%ld\n",
1081*4882a593Smuzhiyun timeout);
1082*4882a593Smuzhiyun err = timeout;
1083*4882a593Smuzhiyun goto out_rq;
1084*4882a593Smuzhiyun }
1085*4882a593Smuzhiyun
1086*4882a593Smuzhiyun if (i915_reset_count(global) == reset_count) {
1087*4882a593Smuzhiyun pr_err("No GPU reset recorded!\n");
1088*4882a593Smuzhiyun err = -EINVAL;
1089*4882a593Smuzhiyun goto out_rq;
1090*4882a593Smuzhiyun }
1091*4882a593Smuzhiyun
1092*4882a593Smuzhiyun out_rq:
1093*4882a593Smuzhiyun i915_request_put(rq);
1094*4882a593Smuzhiyun fini:
1095*4882a593Smuzhiyun hang_fini(&h);
1096*4882a593Smuzhiyun unlock:
1097*4882a593Smuzhiyun igt_global_reset_unlock(gt);
1098*4882a593Smuzhiyun
1099*4882a593Smuzhiyun if (intel_gt_is_wedged(gt))
1100*4882a593Smuzhiyun return -EIO;
1101*4882a593Smuzhiyun
1102*4882a593Smuzhiyun return err;
1103*4882a593Smuzhiyun }
1104*4882a593Smuzhiyun
1105*4882a593Smuzhiyun struct evict_vma {
1106*4882a593Smuzhiyun struct completion completion;
1107*4882a593Smuzhiyun struct i915_vma *vma;
1108*4882a593Smuzhiyun };
1109*4882a593Smuzhiyun
evict_vma(void * data)1110*4882a593Smuzhiyun static int evict_vma(void *data)
1111*4882a593Smuzhiyun {
1112*4882a593Smuzhiyun struct evict_vma *arg = data;
1113*4882a593Smuzhiyun struct i915_address_space *vm = arg->vma->vm;
1114*4882a593Smuzhiyun struct drm_mm_node evict = arg->vma->node;
1115*4882a593Smuzhiyun int err;
1116*4882a593Smuzhiyun
1117*4882a593Smuzhiyun complete(&arg->completion);
1118*4882a593Smuzhiyun
1119*4882a593Smuzhiyun mutex_lock(&vm->mutex);
1120*4882a593Smuzhiyun err = i915_gem_evict_for_node(vm, &evict, 0);
1121*4882a593Smuzhiyun mutex_unlock(&vm->mutex);
1122*4882a593Smuzhiyun
1123*4882a593Smuzhiyun return err;
1124*4882a593Smuzhiyun }
1125*4882a593Smuzhiyun
evict_fence(void * data)1126*4882a593Smuzhiyun static int evict_fence(void *data)
1127*4882a593Smuzhiyun {
1128*4882a593Smuzhiyun struct evict_vma *arg = data;
1129*4882a593Smuzhiyun int err;
1130*4882a593Smuzhiyun
1131*4882a593Smuzhiyun complete(&arg->completion);
1132*4882a593Smuzhiyun
1133*4882a593Smuzhiyun /* Mark the fence register as dirty to force the mmio update. */
1134*4882a593Smuzhiyun err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
1135*4882a593Smuzhiyun if (err) {
1136*4882a593Smuzhiyun pr_err("Invalid Y-tiling settings; err:%d\n", err);
1137*4882a593Smuzhiyun return err;
1138*4882a593Smuzhiyun }
1139*4882a593Smuzhiyun
1140*4882a593Smuzhiyun err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE);
1141*4882a593Smuzhiyun if (err) {
1142*4882a593Smuzhiyun pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err);
1143*4882a593Smuzhiyun return err;
1144*4882a593Smuzhiyun }
1145*4882a593Smuzhiyun
1146*4882a593Smuzhiyun err = i915_vma_pin_fence(arg->vma);
1147*4882a593Smuzhiyun i915_vma_unpin(arg->vma);
1148*4882a593Smuzhiyun if (err) {
1149*4882a593Smuzhiyun pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
1150*4882a593Smuzhiyun return err;
1151*4882a593Smuzhiyun }
1152*4882a593Smuzhiyun
1153*4882a593Smuzhiyun i915_vma_unpin_fence(arg->vma);
1154*4882a593Smuzhiyun
1155*4882a593Smuzhiyun return 0;
1156*4882a593Smuzhiyun }
1157*4882a593Smuzhiyun
__igt_reset_evict_vma(struct intel_gt * gt,struct i915_address_space * vm,int (* fn)(void *),unsigned int flags)1158*4882a593Smuzhiyun static int __igt_reset_evict_vma(struct intel_gt *gt,
1159*4882a593Smuzhiyun struct i915_address_space *vm,
1160*4882a593Smuzhiyun int (*fn)(void *),
1161*4882a593Smuzhiyun unsigned int flags)
1162*4882a593Smuzhiyun {
1163*4882a593Smuzhiyun struct intel_engine_cs *engine = gt->engine[RCS0];
1164*4882a593Smuzhiyun struct drm_i915_gem_object *obj;
1165*4882a593Smuzhiyun struct task_struct *tsk = NULL;
1166*4882a593Smuzhiyun struct i915_request *rq;
1167*4882a593Smuzhiyun struct evict_vma arg;
1168*4882a593Smuzhiyun struct hang h;
1169*4882a593Smuzhiyun unsigned int pin_flags;
1170*4882a593Smuzhiyun int err;
1171*4882a593Smuzhiyun
1172*4882a593Smuzhiyun if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE)
1173*4882a593Smuzhiyun return 0;
1174*4882a593Smuzhiyun
1175*4882a593Smuzhiyun if (!engine || !intel_engine_can_store_dword(engine))
1176*4882a593Smuzhiyun return 0;
1177*4882a593Smuzhiyun
1178*4882a593Smuzhiyun /* Check that we can recover an unbind stuck on a hanging request */
1179*4882a593Smuzhiyun
1180*4882a593Smuzhiyun err = hang_init(&h, gt);
1181*4882a593Smuzhiyun if (err)
1182*4882a593Smuzhiyun return err;
1183*4882a593Smuzhiyun
1184*4882a593Smuzhiyun obj = i915_gem_object_create_internal(gt->i915, SZ_1M);
1185*4882a593Smuzhiyun if (IS_ERR(obj)) {
1186*4882a593Smuzhiyun err = PTR_ERR(obj);
1187*4882a593Smuzhiyun goto fini;
1188*4882a593Smuzhiyun }
1189*4882a593Smuzhiyun
1190*4882a593Smuzhiyun if (flags & EXEC_OBJECT_NEEDS_FENCE) {
1191*4882a593Smuzhiyun err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512);
1192*4882a593Smuzhiyun if (err) {
1193*4882a593Smuzhiyun pr_err("Invalid X-tiling settings; err:%d\n", err);
1194*4882a593Smuzhiyun goto out_obj;
1195*4882a593Smuzhiyun }
1196*4882a593Smuzhiyun }
1197*4882a593Smuzhiyun
1198*4882a593Smuzhiyun arg.vma = i915_vma_instance(obj, vm, NULL);
1199*4882a593Smuzhiyun if (IS_ERR(arg.vma)) {
1200*4882a593Smuzhiyun err = PTR_ERR(arg.vma);
1201*4882a593Smuzhiyun goto out_obj;
1202*4882a593Smuzhiyun }
1203*4882a593Smuzhiyun
1204*4882a593Smuzhiyun rq = hang_create_request(&h, engine);
1205*4882a593Smuzhiyun if (IS_ERR(rq)) {
1206*4882a593Smuzhiyun err = PTR_ERR(rq);
1207*4882a593Smuzhiyun goto out_obj;
1208*4882a593Smuzhiyun }
1209*4882a593Smuzhiyun
1210*4882a593Smuzhiyun pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER;
1211*4882a593Smuzhiyun
1212*4882a593Smuzhiyun if (flags & EXEC_OBJECT_NEEDS_FENCE)
1213*4882a593Smuzhiyun pin_flags |= PIN_MAPPABLE;
1214*4882a593Smuzhiyun
1215*4882a593Smuzhiyun err = i915_vma_pin(arg.vma, 0, 0, pin_flags);
1216*4882a593Smuzhiyun if (err) {
1217*4882a593Smuzhiyun i915_request_add(rq);
1218*4882a593Smuzhiyun goto out_obj;
1219*4882a593Smuzhiyun }
1220*4882a593Smuzhiyun
1221*4882a593Smuzhiyun if (flags & EXEC_OBJECT_NEEDS_FENCE) {
1222*4882a593Smuzhiyun err = i915_vma_pin_fence(arg.vma);
1223*4882a593Smuzhiyun if (err) {
1224*4882a593Smuzhiyun pr_err("Unable to pin X-tiled fence; err:%d\n", err);
1225*4882a593Smuzhiyun i915_vma_unpin(arg.vma);
1226*4882a593Smuzhiyun i915_request_add(rq);
1227*4882a593Smuzhiyun goto out_obj;
1228*4882a593Smuzhiyun }
1229*4882a593Smuzhiyun }
1230*4882a593Smuzhiyun
1231*4882a593Smuzhiyun i915_vma_lock(arg.vma);
1232*4882a593Smuzhiyun err = i915_request_await_object(rq, arg.vma->obj,
1233*4882a593Smuzhiyun flags & EXEC_OBJECT_WRITE);
1234*4882a593Smuzhiyun if (err == 0)
1235*4882a593Smuzhiyun err = i915_vma_move_to_active(arg.vma, rq, flags);
1236*4882a593Smuzhiyun i915_vma_unlock(arg.vma);
1237*4882a593Smuzhiyun
1238*4882a593Smuzhiyun if (flags & EXEC_OBJECT_NEEDS_FENCE)
1239*4882a593Smuzhiyun i915_vma_unpin_fence(arg.vma);
1240*4882a593Smuzhiyun i915_vma_unpin(arg.vma);
1241*4882a593Smuzhiyun
1242*4882a593Smuzhiyun i915_request_get(rq);
1243*4882a593Smuzhiyun i915_request_add(rq);
1244*4882a593Smuzhiyun if (err)
1245*4882a593Smuzhiyun goto out_rq;
1246*4882a593Smuzhiyun
1247*4882a593Smuzhiyun if (!wait_until_running(&h, rq)) {
1248*4882a593Smuzhiyun struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1249*4882a593Smuzhiyun
1250*4882a593Smuzhiyun pr_err("%s: Failed to start request %llx, at %x\n",
1251*4882a593Smuzhiyun __func__, rq->fence.seqno, hws_seqno(&h, rq));
1252*4882a593Smuzhiyun intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1253*4882a593Smuzhiyun
1254*4882a593Smuzhiyun intel_gt_set_wedged(gt);
1255*4882a593Smuzhiyun goto out_reset;
1256*4882a593Smuzhiyun }
1257*4882a593Smuzhiyun
1258*4882a593Smuzhiyun init_completion(&arg.completion);
1259*4882a593Smuzhiyun
1260*4882a593Smuzhiyun tsk = kthread_run(fn, &arg, "igt/evict_vma");
1261*4882a593Smuzhiyun if (IS_ERR(tsk)) {
1262*4882a593Smuzhiyun err = PTR_ERR(tsk);
1263*4882a593Smuzhiyun tsk = NULL;
1264*4882a593Smuzhiyun goto out_reset;
1265*4882a593Smuzhiyun }
1266*4882a593Smuzhiyun get_task_struct(tsk);
1267*4882a593Smuzhiyun
1268*4882a593Smuzhiyun wait_for_completion(&arg.completion);
1269*4882a593Smuzhiyun
1270*4882a593Smuzhiyun if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
1271*4882a593Smuzhiyun struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1272*4882a593Smuzhiyun
1273*4882a593Smuzhiyun pr_err("igt/evict_vma kthread did not wait\n");
1274*4882a593Smuzhiyun intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1275*4882a593Smuzhiyun
1276*4882a593Smuzhiyun intel_gt_set_wedged(gt);
1277*4882a593Smuzhiyun goto out_reset;
1278*4882a593Smuzhiyun }
1279*4882a593Smuzhiyun
1280*4882a593Smuzhiyun out_reset:
1281*4882a593Smuzhiyun igt_global_reset_lock(gt);
1282*4882a593Smuzhiyun fake_hangcheck(gt, rq->engine->mask);
1283*4882a593Smuzhiyun igt_global_reset_unlock(gt);
1284*4882a593Smuzhiyun
1285*4882a593Smuzhiyun if (tsk) {
1286*4882a593Smuzhiyun struct intel_wedge_me w;
1287*4882a593Smuzhiyun
1288*4882a593Smuzhiyun /* The reset, even indirectly, should take less than 10ms. */
1289*4882a593Smuzhiyun intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */)
1290*4882a593Smuzhiyun err = kthread_stop(tsk);
1291*4882a593Smuzhiyun
1292*4882a593Smuzhiyun put_task_struct(tsk);
1293*4882a593Smuzhiyun }
1294*4882a593Smuzhiyun
1295*4882a593Smuzhiyun out_rq:
1296*4882a593Smuzhiyun i915_request_put(rq);
1297*4882a593Smuzhiyun out_obj:
1298*4882a593Smuzhiyun i915_gem_object_put(obj);
1299*4882a593Smuzhiyun fini:
1300*4882a593Smuzhiyun hang_fini(&h);
1301*4882a593Smuzhiyun if (intel_gt_is_wedged(gt))
1302*4882a593Smuzhiyun return -EIO;
1303*4882a593Smuzhiyun
1304*4882a593Smuzhiyun return err;
1305*4882a593Smuzhiyun }
1306*4882a593Smuzhiyun
igt_reset_evict_ggtt(void * arg)1307*4882a593Smuzhiyun static int igt_reset_evict_ggtt(void *arg)
1308*4882a593Smuzhiyun {
1309*4882a593Smuzhiyun struct intel_gt *gt = arg;
1310*4882a593Smuzhiyun
1311*4882a593Smuzhiyun return __igt_reset_evict_vma(gt, >->ggtt->vm,
1312*4882a593Smuzhiyun evict_vma, EXEC_OBJECT_WRITE);
1313*4882a593Smuzhiyun }
1314*4882a593Smuzhiyun
igt_reset_evict_ppgtt(void * arg)1315*4882a593Smuzhiyun static int igt_reset_evict_ppgtt(void *arg)
1316*4882a593Smuzhiyun {
1317*4882a593Smuzhiyun struct intel_gt *gt = arg;
1318*4882a593Smuzhiyun struct i915_ppgtt *ppgtt;
1319*4882a593Smuzhiyun int err;
1320*4882a593Smuzhiyun
1321*4882a593Smuzhiyun /* aliasing == global gtt locking, covered above */
1322*4882a593Smuzhiyun if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
1323*4882a593Smuzhiyun return 0;
1324*4882a593Smuzhiyun
1325*4882a593Smuzhiyun ppgtt = i915_ppgtt_create(gt);
1326*4882a593Smuzhiyun if (IS_ERR(ppgtt))
1327*4882a593Smuzhiyun return PTR_ERR(ppgtt);
1328*4882a593Smuzhiyun
1329*4882a593Smuzhiyun err = __igt_reset_evict_vma(gt, &ppgtt->vm,
1330*4882a593Smuzhiyun evict_vma, EXEC_OBJECT_WRITE);
1331*4882a593Smuzhiyun i915_vm_put(&ppgtt->vm);
1332*4882a593Smuzhiyun
1333*4882a593Smuzhiyun return err;
1334*4882a593Smuzhiyun }
1335*4882a593Smuzhiyun
igt_reset_evict_fence(void * arg)1336*4882a593Smuzhiyun static int igt_reset_evict_fence(void *arg)
1337*4882a593Smuzhiyun {
1338*4882a593Smuzhiyun struct intel_gt *gt = arg;
1339*4882a593Smuzhiyun
1340*4882a593Smuzhiyun return __igt_reset_evict_vma(gt, >->ggtt->vm,
1341*4882a593Smuzhiyun evict_fence, EXEC_OBJECT_NEEDS_FENCE);
1342*4882a593Smuzhiyun }
1343*4882a593Smuzhiyun
wait_for_others(struct intel_gt * gt,struct intel_engine_cs * exclude)1344*4882a593Smuzhiyun static int wait_for_others(struct intel_gt *gt,
1345*4882a593Smuzhiyun struct intel_engine_cs *exclude)
1346*4882a593Smuzhiyun {
1347*4882a593Smuzhiyun struct intel_engine_cs *engine;
1348*4882a593Smuzhiyun enum intel_engine_id id;
1349*4882a593Smuzhiyun
1350*4882a593Smuzhiyun for_each_engine(engine, gt, id) {
1351*4882a593Smuzhiyun if (engine == exclude)
1352*4882a593Smuzhiyun continue;
1353*4882a593Smuzhiyun
1354*4882a593Smuzhiyun if (!wait_for_idle(engine))
1355*4882a593Smuzhiyun return -EIO;
1356*4882a593Smuzhiyun }
1357*4882a593Smuzhiyun
1358*4882a593Smuzhiyun return 0;
1359*4882a593Smuzhiyun }
1360*4882a593Smuzhiyun
igt_reset_queue(void * arg)1361*4882a593Smuzhiyun static int igt_reset_queue(void *arg)
1362*4882a593Smuzhiyun {
1363*4882a593Smuzhiyun struct intel_gt *gt = arg;
1364*4882a593Smuzhiyun struct i915_gpu_error *global = >->i915->gpu_error;
1365*4882a593Smuzhiyun struct intel_engine_cs *engine;
1366*4882a593Smuzhiyun enum intel_engine_id id;
1367*4882a593Smuzhiyun struct hang h;
1368*4882a593Smuzhiyun int err;
1369*4882a593Smuzhiyun
1370*4882a593Smuzhiyun /* Check that we replay pending requests following a hang */
1371*4882a593Smuzhiyun
1372*4882a593Smuzhiyun igt_global_reset_lock(gt);
1373*4882a593Smuzhiyun
1374*4882a593Smuzhiyun err = hang_init(&h, gt);
1375*4882a593Smuzhiyun if (err)
1376*4882a593Smuzhiyun goto unlock;
1377*4882a593Smuzhiyun
1378*4882a593Smuzhiyun for_each_engine(engine, gt, id) {
1379*4882a593Smuzhiyun struct i915_request *prev;
1380*4882a593Smuzhiyun IGT_TIMEOUT(end_time);
1381*4882a593Smuzhiyun unsigned int count;
1382*4882a593Smuzhiyun
1383*4882a593Smuzhiyun if (!intel_engine_can_store_dword(engine))
1384*4882a593Smuzhiyun continue;
1385*4882a593Smuzhiyun
1386*4882a593Smuzhiyun prev = hang_create_request(&h, engine);
1387*4882a593Smuzhiyun if (IS_ERR(prev)) {
1388*4882a593Smuzhiyun err = PTR_ERR(prev);
1389*4882a593Smuzhiyun goto fini;
1390*4882a593Smuzhiyun }
1391*4882a593Smuzhiyun
1392*4882a593Smuzhiyun i915_request_get(prev);
1393*4882a593Smuzhiyun i915_request_add(prev);
1394*4882a593Smuzhiyun
1395*4882a593Smuzhiyun count = 0;
1396*4882a593Smuzhiyun do {
1397*4882a593Smuzhiyun struct i915_request *rq;
1398*4882a593Smuzhiyun unsigned int reset_count;
1399*4882a593Smuzhiyun
1400*4882a593Smuzhiyun rq = hang_create_request(&h, engine);
1401*4882a593Smuzhiyun if (IS_ERR(rq)) {
1402*4882a593Smuzhiyun err = PTR_ERR(rq);
1403*4882a593Smuzhiyun goto fini;
1404*4882a593Smuzhiyun }
1405*4882a593Smuzhiyun
1406*4882a593Smuzhiyun i915_request_get(rq);
1407*4882a593Smuzhiyun i915_request_add(rq);
1408*4882a593Smuzhiyun
1409*4882a593Smuzhiyun /*
1410*4882a593Smuzhiyun * XXX We don't handle resetting the kernel context
1411*4882a593Smuzhiyun * very well. If we trigger a device reset twice in
1412*4882a593Smuzhiyun * quick succession while the kernel context is
1413*4882a593Smuzhiyun * executing, we may end up skipping the breadcrumb.
1414*4882a593Smuzhiyun * This is really only a problem for the selftest as
1415*4882a593Smuzhiyun * normally there is a large interlude between resets
1416*4882a593Smuzhiyun * (hangcheck), or we focus on resetting just one
1417*4882a593Smuzhiyun * engine and so avoid repeatedly resetting innocents.
1418*4882a593Smuzhiyun */
1419*4882a593Smuzhiyun err = wait_for_others(gt, engine);
1420*4882a593Smuzhiyun if (err) {
1421*4882a593Smuzhiyun pr_err("%s(%s): Failed to idle other inactive engines after device reset\n",
1422*4882a593Smuzhiyun __func__, engine->name);
1423*4882a593Smuzhiyun i915_request_put(rq);
1424*4882a593Smuzhiyun i915_request_put(prev);
1425*4882a593Smuzhiyun
1426*4882a593Smuzhiyun GEM_TRACE_DUMP();
1427*4882a593Smuzhiyun intel_gt_set_wedged(gt);
1428*4882a593Smuzhiyun goto fini;
1429*4882a593Smuzhiyun }
1430*4882a593Smuzhiyun
1431*4882a593Smuzhiyun if (!wait_until_running(&h, prev)) {
1432*4882a593Smuzhiyun struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1433*4882a593Smuzhiyun
1434*4882a593Smuzhiyun pr_err("%s(%s): Failed to start request %llx, at %x\n",
1435*4882a593Smuzhiyun __func__, engine->name,
1436*4882a593Smuzhiyun prev->fence.seqno, hws_seqno(&h, prev));
1437*4882a593Smuzhiyun intel_engine_dump(engine, &p,
1438*4882a593Smuzhiyun "%s\n", engine->name);
1439*4882a593Smuzhiyun
1440*4882a593Smuzhiyun i915_request_put(rq);
1441*4882a593Smuzhiyun i915_request_put(prev);
1442*4882a593Smuzhiyun
1443*4882a593Smuzhiyun intel_gt_set_wedged(gt);
1444*4882a593Smuzhiyun
1445*4882a593Smuzhiyun err = -EIO;
1446*4882a593Smuzhiyun goto fini;
1447*4882a593Smuzhiyun }
1448*4882a593Smuzhiyun
1449*4882a593Smuzhiyun reset_count = fake_hangcheck(gt, BIT(id));
1450*4882a593Smuzhiyun
1451*4882a593Smuzhiyun if (prev->fence.error != -EIO) {
1452*4882a593Smuzhiyun pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
1453*4882a593Smuzhiyun prev->fence.error);
1454*4882a593Smuzhiyun i915_request_put(rq);
1455*4882a593Smuzhiyun i915_request_put(prev);
1456*4882a593Smuzhiyun err = -EINVAL;
1457*4882a593Smuzhiyun goto fini;
1458*4882a593Smuzhiyun }
1459*4882a593Smuzhiyun
1460*4882a593Smuzhiyun if (rq->fence.error) {
1461*4882a593Smuzhiyun pr_err("Fence error status not zero [%d] after unrelated reset\n",
1462*4882a593Smuzhiyun rq->fence.error);
1463*4882a593Smuzhiyun i915_request_put(rq);
1464*4882a593Smuzhiyun i915_request_put(prev);
1465*4882a593Smuzhiyun err = -EINVAL;
1466*4882a593Smuzhiyun goto fini;
1467*4882a593Smuzhiyun }
1468*4882a593Smuzhiyun
1469*4882a593Smuzhiyun if (i915_reset_count(global) == reset_count) {
1470*4882a593Smuzhiyun pr_err("No GPU reset recorded!\n");
1471*4882a593Smuzhiyun i915_request_put(rq);
1472*4882a593Smuzhiyun i915_request_put(prev);
1473*4882a593Smuzhiyun err = -EINVAL;
1474*4882a593Smuzhiyun goto fini;
1475*4882a593Smuzhiyun }
1476*4882a593Smuzhiyun
1477*4882a593Smuzhiyun i915_request_put(prev);
1478*4882a593Smuzhiyun prev = rq;
1479*4882a593Smuzhiyun count++;
1480*4882a593Smuzhiyun } while (time_before(jiffies, end_time));
1481*4882a593Smuzhiyun pr_info("%s: Completed %d resets\n", engine->name, count);
1482*4882a593Smuzhiyun
1483*4882a593Smuzhiyun *h.batch = MI_BATCH_BUFFER_END;
1484*4882a593Smuzhiyun intel_gt_chipset_flush(engine->gt);
1485*4882a593Smuzhiyun
1486*4882a593Smuzhiyun i915_request_put(prev);
1487*4882a593Smuzhiyun
1488*4882a593Smuzhiyun err = igt_flush_test(gt->i915);
1489*4882a593Smuzhiyun if (err)
1490*4882a593Smuzhiyun break;
1491*4882a593Smuzhiyun }
1492*4882a593Smuzhiyun
1493*4882a593Smuzhiyun fini:
1494*4882a593Smuzhiyun hang_fini(&h);
1495*4882a593Smuzhiyun unlock:
1496*4882a593Smuzhiyun igt_global_reset_unlock(gt);
1497*4882a593Smuzhiyun
1498*4882a593Smuzhiyun if (intel_gt_is_wedged(gt))
1499*4882a593Smuzhiyun return -EIO;
1500*4882a593Smuzhiyun
1501*4882a593Smuzhiyun return err;
1502*4882a593Smuzhiyun }
1503*4882a593Smuzhiyun
igt_handle_error(void * arg)1504*4882a593Smuzhiyun static int igt_handle_error(void *arg)
1505*4882a593Smuzhiyun {
1506*4882a593Smuzhiyun struct intel_gt *gt = arg;
1507*4882a593Smuzhiyun struct i915_gpu_error *global = >->i915->gpu_error;
1508*4882a593Smuzhiyun struct intel_engine_cs *engine = gt->engine[RCS0];
1509*4882a593Smuzhiyun struct hang h;
1510*4882a593Smuzhiyun struct i915_request *rq;
1511*4882a593Smuzhiyun struct i915_gpu_coredump *error;
1512*4882a593Smuzhiyun int err;
1513*4882a593Smuzhiyun
1514*4882a593Smuzhiyun /* Check that we can issue a global GPU and engine reset */
1515*4882a593Smuzhiyun
1516*4882a593Smuzhiyun if (!intel_has_reset_engine(gt))
1517*4882a593Smuzhiyun return 0;
1518*4882a593Smuzhiyun
1519*4882a593Smuzhiyun if (!engine || !intel_engine_can_store_dword(engine))
1520*4882a593Smuzhiyun return 0;
1521*4882a593Smuzhiyun
1522*4882a593Smuzhiyun err = hang_init(&h, gt);
1523*4882a593Smuzhiyun if (err)
1524*4882a593Smuzhiyun return err;
1525*4882a593Smuzhiyun
1526*4882a593Smuzhiyun rq = hang_create_request(&h, engine);
1527*4882a593Smuzhiyun if (IS_ERR(rq)) {
1528*4882a593Smuzhiyun err = PTR_ERR(rq);
1529*4882a593Smuzhiyun goto err_fini;
1530*4882a593Smuzhiyun }
1531*4882a593Smuzhiyun
1532*4882a593Smuzhiyun i915_request_get(rq);
1533*4882a593Smuzhiyun i915_request_add(rq);
1534*4882a593Smuzhiyun
1535*4882a593Smuzhiyun if (!wait_until_running(&h, rq)) {
1536*4882a593Smuzhiyun struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1537*4882a593Smuzhiyun
1538*4882a593Smuzhiyun pr_err("%s: Failed to start request %llx, at %x\n",
1539*4882a593Smuzhiyun __func__, rq->fence.seqno, hws_seqno(&h, rq));
1540*4882a593Smuzhiyun intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
1541*4882a593Smuzhiyun
1542*4882a593Smuzhiyun intel_gt_set_wedged(gt);
1543*4882a593Smuzhiyun
1544*4882a593Smuzhiyun err = -EIO;
1545*4882a593Smuzhiyun goto err_request;
1546*4882a593Smuzhiyun }
1547*4882a593Smuzhiyun
1548*4882a593Smuzhiyun /* Temporarily disable error capture */
1549*4882a593Smuzhiyun error = xchg(&global->first_error, (void *)-1);
1550*4882a593Smuzhiyun
1551*4882a593Smuzhiyun intel_gt_handle_error(gt, engine->mask, 0, NULL);
1552*4882a593Smuzhiyun
1553*4882a593Smuzhiyun xchg(&global->first_error, error);
1554*4882a593Smuzhiyun
1555*4882a593Smuzhiyun if (rq->fence.error != -EIO) {
1556*4882a593Smuzhiyun pr_err("Guilty request not identified!\n");
1557*4882a593Smuzhiyun err = -EINVAL;
1558*4882a593Smuzhiyun goto err_request;
1559*4882a593Smuzhiyun }
1560*4882a593Smuzhiyun
1561*4882a593Smuzhiyun err_request:
1562*4882a593Smuzhiyun i915_request_put(rq);
1563*4882a593Smuzhiyun err_fini:
1564*4882a593Smuzhiyun hang_fini(&h);
1565*4882a593Smuzhiyun return err;
1566*4882a593Smuzhiyun }
1567*4882a593Smuzhiyun
__igt_atomic_reset_engine(struct intel_engine_cs * engine,const struct igt_atomic_section * p,const char * mode)1568*4882a593Smuzhiyun static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
1569*4882a593Smuzhiyun const struct igt_atomic_section *p,
1570*4882a593Smuzhiyun const char *mode)
1571*4882a593Smuzhiyun {
1572*4882a593Smuzhiyun struct tasklet_struct * const t = &engine->execlists.tasklet;
1573*4882a593Smuzhiyun int err;
1574*4882a593Smuzhiyun
1575*4882a593Smuzhiyun GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
1576*4882a593Smuzhiyun engine->name, mode, p->name);
1577*4882a593Smuzhiyun
1578*4882a593Smuzhiyun tasklet_disable(t);
1579*4882a593Smuzhiyun p->critical_section_begin();
1580*4882a593Smuzhiyun
1581*4882a593Smuzhiyun err = intel_engine_reset(engine, NULL);
1582*4882a593Smuzhiyun
1583*4882a593Smuzhiyun p->critical_section_end();
1584*4882a593Smuzhiyun tasklet_enable(t);
1585*4882a593Smuzhiyun
1586*4882a593Smuzhiyun if (err)
1587*4882a593Smuzhiyun pr_err("i915_reset_engine(%s:%s) failed under %s\n",
1588*4882a593Smuzhiyun engine->name, mode, p->name);
1589*4882a593Smuzhiyun
1590*4882a593Smuzhiyun return err;
1591*4882a593Smuzhiyun }
1592*4882a593Smuzhiyun
igt_atomic_reset_engine(struct intel_engine_cs * engine,const struct igt_atomic_section * p)1593*4882a593Smuzhiyun static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
1594*4882a593Smuzhiyun const struct igt_atomic_section *p)
1595*4882a593Smuzhiyun {
1596*4882a593Smuzhiyun struct i915_request *rq;
1597*4882a593Smuzhiyun struct hang h;
1598*4882a593Smuzhiyun int err;
1599*4882a593Smuzhiyun
1600*4882a593Smuzhiyun err = __igt_atomic_reset_engine(engine, p, "idle");
1601*4882a593Smuzhiyun if (err)
1602*4882a593Smuzhiyun return err;
1603*4882a593Smuzhiyun
1604*4882a593Smuzhiyun err = hang_init(&h, engine->gt);
1605*4882a593Smuzhiyun if (err)
1606*4882a593Smuzhiyun return err;
1607*4882a593Smuzhiyun
1608*4882a593Smuzhiyun rq = hang_create_request(&h, engine);
1609*4882a593Smuzhiyun if (IS_ERR(rq)) {
1610*4882a593Smuzhiyun err = PTR_ERR(rq);
1611*4882a593Smuzhiyun goto out;
1612*4882a593Smuzhiyun }
1613*4882a593Smuzhiyun
1614*4882a593Smuzhiyun i915_request_get(rq);
1615*4882a593Smuzhiyun i915_request_add(rq);
1616*4882a593Smuzhiyun
1617*4882a593Smuzhiyun if (wait_until_running(&h, rq)) {
1618*4882a593Smuzhiyun err = __igt_atomic_reset_engine(engine, p, "active");
1619*4882a593Smuzhiyun } else {
1620*4882a593Smuzhiyun pr_err("%s(%s): Failed to start request %llx, at %x\n",
1621*4882a593Smuzhiyun __func__, engine->name,
1622*4882a593Smuzhiyun rq->fence.seqno, hws_seqno(&h, rq));
1623*4882a593Smuzhiyun intel_gt_set_wedged(engine->gt);
1624*4882a593Smuzhiyun err = -EIO;
1625*4882a593Smuzhiyun }
1626*4882a593Smuzhiyun
1627*4882a593Smuzhiyun if (err == 0) {
1628*4882a593Smuzhiyun struct intel_wedge_me w;
1629*4882a593Smuzhiyun
1630*4882a593Smuzhiyun intel_wedge_on_timeout(&w, engine->gt, HZ / 20 /* 50ms */)
1631*4882a593Smuzhiyun i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1632*4882a593Smuzhiyun if (intel_gt_is_wedged(engine->gt))
1633*4882a593Smuzhiyun err = -EIO;
1634*4882a593Smuzhiyun }
1635*4882a593Smuzhiyun
1636*4882a593Smuzhiyun i915_request_put(rq);
1637*4882a593Smuzhiyun out:
1638*4882a593Smuzhiyun hang_fini(&h);
1639*4882a593Smuzhiyun return err;
1640*4882a593Smuzhiyun }
1641*4882a593Smuzhiyun
igt_reset_engines_atomic(void * arg)1642*4882a593Smuzhiyun static int igt_reset_engines_atomic(void *arg)
1643*4882a593Smuzhiyun {
1644*4882a593Smuzhiyun struct intel_gt *gt = arg;
1645*4882a593Smuzhiyun const typeof(*igt_atomic_phases) *p;
1646*4882a593Smuzhiyun int err = 0;
1647*4882a593Smuzhiyun
1648*4882a593Smuzhiyun /* Check that the engines resets are usable from atomic context */
1649*4882a593Smuzhiyun
1650*4882a593Smuzhiyun if (!intel_has_reset_engine(gt))
1651*4882a593Smuzhiyun return 0;
1652*4882a593Smuzhiyun
1653*4882a593Smuzhiyun if (intel_uc_uses_guc_submission(>->uc))
1654*4882a593Smuzhiyun return 0;
1655*4882a593Smuzhiyun
1656*4882a593Smuzhiyun igt_global_reset_lock(gt);
1657*4882a593Smuzhiyun
1658*4882a593Smuzhiyun /* Flush any requests before we get started and check basics */
1659*4882a593Smuzhiyun if (!igt_force_reset(gt))
1660*4882a593Smuzhiyun goto unlock;
1661*4882a593Smuzhiyun
1662*4882a593Smuzhiyun for (p = igt_atomic_phases; p->name; p++) {
1663*4882a593Smuzhiyun struct intel_engine_cs *engine;
1664*4882a593Smuzhiyun enum intel_engine_id id;
1665*4882a593Smuzhiyun
1666*4882a593Smuzhiyun for_each_engine(engine, gt, id) {
1667*4882a593Smuzhiyun err = igt_atomic_reset_engine(engine, p);
1668*4882a593Smuzhiyun if (err)
1669*4882a593Smuzhiyun goto out;
1670*4882a593Smuzhiyun }
1671*4882a593Smuzhiyun }
1672*4882a593Smuzhiyun
1673*4882a593Smuzhiyun out:
1674*4882a593Smuzhiyun /* As we poke around the guts, do a full reset before continuing. */
1675*4882a593Smuzhiyun igt_force_reset(gt);
1676*4882a593Smuzhiyun unlock:
1677*4882a593Smuzhiyun igt_global_reset_unlock(gt);
1678*4882a593Smuzhiyun
1679*4882a593Smuzhiyun return err;
1680*4882a593Smuzhiyun }
1681*4882a593Smuzhiyun
intel_hangcheck_live_selftests(struct drm_i915_private * i915)1682*4882a593Smuzhiyun int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
1683*4882a593Smuzhiyun {
1684*4882a593Smuzhiyun static const struct i915_subtest tests[] = {
1685*4882a593Smuzhiyun SUBTEST(igt_hang_sanitycheck),
1686*4882a593Smuzhiyun SUBTEST(igt_reset_nop),
1687*4882a593Smuzhiyun SUBTEST(igt_reset_nop_engine),
1688*4882a593Smuzhiyun SUBTEST(igt_reset_idle_engine),
1689*4882a593Smuzhiyun SUBTEST(igt_reset_active_engine),
1690*4882a593Smuzhiyun SUBTEST(igt_reset_engines),
1691*4882a593Smuzhiyun SUBTEST(igt_reset_engines_atomic),
1692*4882a593Smuzhiyun SUBTEST(igt_reset_queue),
1693*4882a593Smuzhiyun SUBTEST(igt_reset_wait),
1694*4882a593Smuzhiyun SUBTEST(igt_reset_evict_ggtt),
1695*4882a593Smuzhiyun SUBTEST(igt_reset_evict_ppgtt),
1696*4882a593Smuzhiyun SUBTEST(igt_reset_evict_fence),
1697*4882a593Smuzhiyun SUBTEST(igt_handle_error),
1698*4882a593Smuzhiyun };
1699*4882a593Smuzhiyun struct intel_gt *gt = &i915->gt;
1700*4882a593Smuzhiyun intel_wakeref_t wakeref;
1701*4882a593Smuzhiyun int err;
1702*4882a593Smuzhiyun
1703*4882a593Smuzhiyun if (!intel_has_gpu_reset(gt))
1704*4882a593Smuzhiyun return 0;
1705*4882a593Smuzhiyun
1706*4882a593Smuzhiyun if (intel_gt_is_wedged(gt))
1707*4882a593Smuzhiyun return -EIO; /* we're long past hope of a successful reset */
1708*4882a593Smuzhiyun
1709*4882a593Smuzhiyun wakeref = intel_runtime_pm_get(gt->uncore->rpm);
1710*4882a593Smuzhiyun
1711*4882a593Smuzhiyun err = intel_gt_live_subtests(tests, gt);
1712*4882a593Smuzhiyun
1713*4882a593Smuzhiyun intel_runtime_pm_put(gt->uncore->rpm, wakeref);
1714*4882a593Smuzhiyun
1715*4882a593Smuzhiyun return err;
1716*4882a593Smuzhiyun }
1717