1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Copyright © 2014 Intel Corporation
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * The above copyright notice and this permission notice (including the next
12*4882a593Smuzhiyun * paragraph) shall be included in all copies or substantial portions of the
13*4882a593Smuzhiyun * Software.
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*4882a593Smuzhiyun * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*4882a593Smuzhiyun * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*4882a593Smuzhiyun * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*4882a593Smuzhiyun * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*4882a593Smuzhiyun * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*4882a593Smuzhiyun * IN THE SOFTWARE.
22*4882a593Smuzhiyun *
23*4882a593Smuzhiyun * Authors:
24*4882a593Smuzhiyun * Mika Kuoppala <mika.kuoppala@intel.com>
25*4882a593Smuzhiyun *
26*4882a593Smuzhiyun */
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #include "i915_drv.h"
29*4882a593Smuzhiyun #include "intel_renderstate.h"
30*4882a593Smuzhiyun #include "gt/intel_context.h"
31*4882a593Smuzhiyun #include "intel_ring.h"
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun static const struct intel_renderstate_rodata *
render_state_get_rodata(const struct intel_engine_cs * engine)34*4882a593Smuzhiyun render_state_get_rodata(const struct intel_engine_cs *engine)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun if (engine->class != RENDER_CLASS)
37*4882a593Smuzhiyun return NULL;
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun switch (INTEL_GEN(engine->i915)) {
40*4882a593Smuzhiyun case 6:
41*4882a593Smuzhiyun return &gen6_null_state;
42*4882a593Smuzhiyun case 7:
43*4882a593Smuzhiyun return &gen7_null_state;
44*4882a593Smuzhiyun case 8:
45*4882a593Smuzhiyun return &gen8_null_state;
46*4882a593Smuzhiyun case 9:
47*4882a593Smuzhiyun return &gen9_null_state;
48*4882a593Smuzhiyun }
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun return NULL;
51*4882a593Smuzhiyun }
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun /*
54*4882a593Smuzhiyun * Macro to add commands to auxiliary batch.
55*4882a593Smuzhiyun * This macro only checks for page overflow before inserting the commands,
56*4882a593Smuzhiyun * this is sufficient as the null state generator makes the final batch
57*4882a593Smuzhiyun * with two passes to build command and state separately. At this point
58*4882a593Smuzhiyun * the size of both are known and it compacts them by relocating the state
59*4882a593Smuzhiyun * right after the commands taking care of alignment so we should sufficient
60*4882a593Smuzhiyun * space below them for adding new commands.
61*4882a593Smuzhiyun */
62*4882a593Smuzhiyun #define OUT_BATCH(batch, i, val) \
63*4882a593Smuzhiyun do { \
64*4882a593Smuzhiyun if ((i) >= PAGE_SIZE / sizeof(u32)) \
65*4882a593Smuzhiyun goto out; \
66*4882a593Smuzhiyun (batch)[(i)++] = (val); \
67*4882a593Smuzhiyun } while(0)
68*4882a593Smuzhiyun
render_state_setup(struct intel_renderstate * so,struct drm_i915_private * i915)69*4882a593Smuzhiyun static int render_state_setup(struct intel_renderstate *so,
70*4882a593Smuzhiyun struct drm_i915_private *i915)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun const struct intel_renderstate_rodata *rodata = so->rodata;
73*4882a593Smuzhiyun unsigned int i = 0, reloc_index = 0;
74*4882a593Smuzhiyun int ret = -EINVAL;
75*4882a593Smuzhiyun u32 *d;
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun d = i915_gem_object_pin_map(so->vma->obj, I915_MAP_WB);
78*4882a593Smuzhiyun if (IS_ERR(d))
79*4882a593Smuzhiyun return PTR_ERR(d);
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun while (i < rodata->batch_items) {
82*4882a593Smuzhiyun u32 s = rodata->batch[i];
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun if (i * 4 == rodata->reloc[reloc_index]) {
85*4882a593Smuzhiyun u64 r = s + so->vma->node.start;
86*4882a593Smuzhiyun s = lower_32_bits(r);
87*4882a593Smuzhiyun if (HAS_64BIT_RELOC(i915)) {
88*4882a593Smuzhiyun if (i + 1 >= rodata->batch_items ||
89*4882a593Smuzhiyun rodata->batch[i + 1] != 0)
90*4882a593Smuzhiyun goto out;
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun d[i++] = s;
93*4882a593Smuzhiyun s = upper_32_bits(r);
94*4882a593Smuzhiyun }
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun reloc_index++;
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun d[i++] = s;
100*4882a593Smuzhiyun }
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun if (rodata->reloc[reloc_index] != -1) {
103*4882a593Smuzhiyun drm_err(&i915->drm, "only %d relocs resolved\n", reloc_index);
104*4882a593Smuzhiyun goto out;
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun so->batch_offset = i915_ggtt_offset(so->vma);
108*4882a593Smuzhiyun so->batch_size = rodata->batch_items * sizeof(u32);
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun while (i % CACHELINE_DWORDS)
111*4882a593Smuzhiyun OUT_BATCH(d, i, MI_NOOP);
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun so->aux_offset = i * sizeof(u32);
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun if (HAS_POOLED_EU(i915)) {
116*4882a593Smuzhiyun /*
117*4882a593Smuzhiyun * We always program 3x6 pool config but depending upon which
118*4882a593Smuzhiyun * subslice is disabled HW drops down to appropriate config
119*4882a593Smuzhiyun * shown below.
120*4882a593Smuzhiyun *
121*4882a593Smuzhiyun * In the below table 2x6 config always refers to
122*4882a593Smuzhiyun * fused-down version, native 2x6 is not available and can
123*4882a593Smuzhiyun * be ignored
124*4882a593Smuzhiyun *
125*4882a593Smuzhiyun * SNo subslices config eu pool configuration
126*4882a593Smuzhiyun * -----------------------------------------------------------
127*4882a593Smuzhiyun * 1 3 subslices enabled (3x6) - 0x00777000 (9+9)
128*4882a593Smuzhiyun * 2 ss0 disabled (2x6) - 0x00777000 (3+9)
129*4882a593Smuzhiyun * 3 ss1 disabled (2x6) - 0x00770000 (6+6)
130*4882a593Smuzhiyun * 4 ss2 disabled (2x6) - 0x00007000 (9+3)
131*4882a593Smuzhiyun */
132*4882a593Smuzhiyun u32 eu_pool_config = 0x00777000;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
135*4882a593Smuzhiyun OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
136*4882a593Smuzhiyun OUT_BATCH(d, i, eu_pool_config);
137*4882a593Smuzhiyun OUT_BATCH(d, i, 0);
138*4882a593Smuzhiyun OUT_BATCH(d, i, 0);
139*4882a593Smuzhiyun OUT_BATCH(d, i, 0);
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
143*4882a593Smuzhiyun so->aux_size = i * sizeof(u32) - so->aux_offset;
144*4882a593Smuzhiyun so->aux_offset += so->batch_offset;
145*4882a593Smuzhiyun /*
146*4882a593Smuzhiyun * Since we are sending length, we need to strictly conform to
147*4882a593Smuzhiyun * all requirements. For Gen2 this must be a multiple of 8.
148*4882a593Smuzhiyun */
149*4882a593Smuzhiyun so->aux_size = ALIGN(so->aux_size, 8);
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun ret = 0;
152*4882a593Smuzhiyun out:
153*4882a593Smuzhiyun __i915_gem_object_flush_map(so->vma->obj, 0, i * sizeof(u32));
154*4882a593Smuzhiyun __i915_gem_object_release_map(so->vma->obj);
155*4882a593Smuzhiyun return ret;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun #undef OUT_BATCH
159*4882a593Smuzhiyun
intel_renderstate_init(struct intel_renderstate * so,struct intel_context * ce)160*4882a593Smuzhiyun int intel_renderstate_init(struct intel_renderstate *so,
161*4882a593Smuzhiyun struct intel_context *ce)
162*4882a593Smuzhiyun {
163*4882a593Smuzhiyun struct intel_engine_cs *engine = ce->engine;
164*4882a593Smuzhiyun struct drm_i915_gem_object *obj = NULL;
165*4882a593Smuzhiyun int err;
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun memset(so, 0, sizeof(*so));
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun so->rodata = render_state_get_rodata(engine);
170*4882a593Smuzhiyun if (so->rodata) {
171*4882a593Smuzhiyun if (so->rodata->batch_items * 4 > PAGE_SIZE)
172*4882a593Smuzhiyun return -EINVAL;
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
175*4882a593Smuzhiyun if (IS_ERR(obj))
176*4882a593Smuzhiyun return PTR_ERR(obj);
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
179*4882a593Smuzhiyun if (IS_ERR(so->vma)) {
180*4882a593Smuzhiyun err = PTR_ERR(so->vma);
181*4882a593Smuzhiyun goto err_obj;
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun i915_gem_ww_ctx_init(&so->ww, true);
186*4882a593Smuzhiyun retry:
187*4882a593Smuzhiyun err = intel_context_pin_ww(ce, &so->ww);
188*4882a593Smuzhiyun if (err)
189*4882a593Smuzhiyun goto err_fini;
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun /* return early if there's nothing to setup */
192*4882a593Smuzhiyun if (!err && !so->rodata)
193*4882a593Smuzhiyun return 0;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun err = i915_gem_object_lock(so->vma->obj, &so->ww);
196*4882a593Smuzhiyun if (err)
197*4882a593Smuzhiyun goto err_context;
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
200*4882a593Smuzhiyun if (err)
201*4882a593Smuzhiyun goto err_context;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun err = render_state_setup(so, engine->i915);
204*4882a593Smuzhiyun if (err)
205*4882a593Smuzhiyun goto err_unpin;
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun return 0;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun err_unpin:
210*4882a593Smuzhiyun i915_vma_unpin(so->vma);
211*4882a593Smuzhiyun err_context:
212*4882a593Smuzhiyun intel_context_unpin(ce);
213*4882a593Smuzhiyun err_fini:
214*4882a593Smuzhiyun if (err == -EDEADLK) {
215*4882a593Smuzhiyun err = i915_gem_ww_ctx_backoff(&so->ww);
216*4882a593Smuzhiyun if (!err)
217*4882a593Smuzhiyun goto retry;
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun i915_gem_ww_ctx_fini(&so->ww);
220*4882a593Smuzhiyun err_obj:
221*4882a593Smuzhiyun if (obj)
222*4882a593Smuzhiyun i915_gem_object_put(obj);
223*4882a593Smuzhiyun so->vma = NULL;
224*4882a593Smuzhiyun return err;
225*4882a593Smuzhiyun }
226*4882a593Smuzhiyun
intel_renderstate_emit(struct intel_renderstate * so,struct i915_request * rq)227*4882a593Smuzhiyun int intel_renderstate_emit(struct intel_renderstate *so,
228*4882a593Smuzhiyun struct i915_request *rq)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun struct intel_engine_cs *engine = rq->engine;
231*4882a593Smuzhiyun int err;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun if (!so->vma)
234*4882a593Smuzhiyun return 0;
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun err = i915_request_await_object(rq, so->vma->obj, false);
237*4882a593Smuzhiyun if (err == 0)
238*4882a593Smuzhiyun err = i915_vma_move_to_active(so->vma, rq, 0);
239*4882a593Smuzhiyun if (err)
240*4882a593Smuzhiyun return err;
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun err = engine->emit_bb_start(rq,
243*4882a593Smuzhiyun so->batch_offset, so->batch_size,
244*4882a593Smuzhiyun I915_DISPATCH_SECURE);
245*4882a593Smuzhiyun if (err)
246*4882a593Smuzhiyun return err;
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun if (so->aux_size > 8) {
249*4882a593Smuzhiyun err = engine->emit_bb_start(rq,
250*4882a593Smuzhiyun so->aux_offset, so->aux_size,
251*4882a593Smuzhiyun I915_DISPATCH_SECURE);
252*4882a593Smuzhiyun if (err)
253*4882a593Smuzhiyun return err;
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun return 0;
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
intel_renderstate_fini(struct intel_renderstate * so,struct intel_context * ce)259*4882a593Smuzhiyun void intel_renderstate_fini(struct intel_renderstate *so,
260*4882a593Smuzhiyun struct intel_context *ce)
261*4882a593Smuzhiyun {
262*4882a593Smuzhiyun if (so->vma) {
263*4882a593Smuzhiyun i915_vma_unpin(so->vma);
264*4882a593Smuzhiyun i915_vma_close(so->vma);
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun intel_context_unpin(ce);
268*4882a593Smuzhiyun i915_gem_ww_ctx_fini(&so->ww);
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun if (so->vma)
271*4882a593Smuzhiyun i915_gem_object_put(so->vma->obj);
272*4882a593Smuzhiyun }
273