xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/i915/gt/gen7_renderclear.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: MIT
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright © 2019 Intel Corporation
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include "gen7_renderclear.h"
7*4882a593Smuzhiyun #include "i915_drv.h"
8*4882a593Smuzhiyun #include "intel_gpu_commands.h"
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #define GT3_INLINE_DATA_DELAYS 0x1E00
11*4882a593Smuzhiyun #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS))
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun struct cb_kernel {
14*4882a593Smuzhiyun 	const void *data;
15*4882a593Smuzhiyun 	u32 size;
16*4882a593Smuzhiyun };
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun #define CB_KERNEL(name) { .data = (name), .size = sizeof(name) }
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include "ivb_clear_kernel.c"
21*4882a593Smuzhiyun static const struct cb_kernel cb_kernel_ivb = CB_KERNEL(ivb_clear_kernel);
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun #include "hsw_clear_kernel.c"
24*4882a593Smuzhiyun static const struct cb_kernel cb_kernel_hsw = CB_KERNEL(hsw_clear_kernel);
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun struct batch_chunk {
27*4882a593Smuzhiyun 	struct i915_vma *vma;
28*4882a593Smuzhiyun 	u32 offset;
29*4882a593Smuzhiyun 	u32 *start;
30*4882a593Smuzhiyun 	u32 *end;
31*4882a593Smuzhiyun 	u32 max_items;
32*4882a593Smuzhiyun };
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun struct batch_vals {
35*4882a593Smuzhiyun 	u32 max_threads;
36*4882a593Smuzhiyun 	u32 state_start;
37*4882a593Smuzhiyun 	u32 surface_start;
38*4882a593Smuzhiyun 	u32 surface_height;
39*4882a593Smuzhiyun 	u32 surface_width;
40*4882a593Smuzhiyun 	u32 size;
41*4882a593Smuzhiyun };
42*4882a593Smuzhiyun 
num_primitives(const struct batch_vals * bv)43*4882a593Smuzhiyun static inline int num_primitives(const struct batch_vals *bv)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun 	/*
46*4882a593Smuzhiyun 	 * We need to saturate the GPU with work in order to dispatch
47*4882a593Smuzhiyun 	 * a shader on every HW thread, and clear the thread-local registers.
48*4882a593Smuzhiyun 	 * In short, we have to dispatch work faster than the shaders can
49*4882a593Smuzhiyun 	 * run in order to fill the EU and occupy each HW thread.
50*4882a593Smuzhiyun 	 */
51*4882a593Smuzhiyun 	return bv->max_threads;
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun static void
batch_get_defaults(struct drm_i915_private * i915,struct batch_vals * bv)55*4882a593Smuzhiyun batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun 	if (IS_HASWELL(i915)) {
58*4882a593Smuzhiyun 		switch (INTEL_INFO(i915)->gt) {
59*4882a593Smuzhiyun 		default:
60*4882a593Smuzhiyun 		case 1:
61*4882a593Smuzhiyun 			bv->max_threads = 70;
62*4882a593Smuzhiyun 			break;
63*4882a593Smuzhiyun 		case 2:
64*4882a593Smuzhiyun 			bv->max_threads = 140;
65*4882a593Smuzhiyun 			break;
66*4882a593Smuzhiyun 		case 3:
67*4882a593Smuzhiyun 			bv->max_threads = 280;
68*4882a593Smuzhiyun 			break;
69*4882a593Smuzhiyun 		}
70*4882a593Smuzhiyun 		bv->surface_height = 16 * 16;
71*4882a593Smuzhiyun 		bv->surface_width = 32 * 2 * 16;
72*4882a593Smuzhiyun 	} else {
73*4882a593Smuzhiyun 		switch (INTEL_INFO(i915)->gt) {
74*4882a593Smuzhiyun 		default:
75*4882a593Smuzhiyun 		case 1: /* including vlv */
76*4882a593Smuzhiyun 			bv->max_threads = 36;
77*4882a593Smuzhiyun 			break;
78*4882a593Smuzhiyun 		case 2:
79*4882a593Smuzhiyun 			bv->max_threads = 128;
80*4882a593Smuzhiyun 			break;
81*4882a593Smuzhiyun 		}
82*4882a593Smuzhiyun 		bv->surface_height = 16 * 8;
83*4882a593Smuzhiyun 		bv->surface_width = 32 * 16;
84*4882a593Smuzhiyun 	}
85*4882a593Smuzhiyun 	bv->state_start = round_up(SZ_1K + num_primitives(bv) * 64, SZ_4K);
86*4882a593Smuzhiyun 	bv->surface_start = bv->state_start + SZ_4K;
87*4882a593Smuzhiyun 	bv->size = bv->surface_start + bv->surface_height * bv->surface_width;
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun 
batch_init(struct batch_chunk * bc,struct i915_vma * vma,u32 * start,u32 offset,u32 max_bytes)90*4882a593Smuzhiyun static void batch_init(struct batch_chunk *bc,
91*4882a593Smuzhiyun 		       struct i915_vma *vma,
92*4882a593Smuzhiyun 		       u32 *start, u32 offset, u32 max_bytes)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun 	bc->vma = vma;
95*4882a593Smuzhiyun 	bc->offset = offset;
96*4882a593Smuzhiyun 	bc->start = start + bc->offset / sizeof(*bc->start);
97*4882a593Smuzhiyun 	bc->end = bc->start;
98*4882a593Smuzhiyun 	bc->max_items = max_bytes / sizeof(*bc->start);
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun 
batch_offset(const struct batch_chunk * bc,u32 * cs)101*4882a593Smuzhiyun static u32 batch_offset(const struct batch_chunk *bc, u32 *cs)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun 	return (cs - bc->start) * sizeof(*bc->start) + bc->offset;
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun 
batch_addr(const struct batch_chunk * bc)106*4882a593Smuzhiyun static u32 batch_addr(const struct batch_chunk *bc)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	return bc->vma->node.start;
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun 
batch_add(struct batch_chunk * bc,const u32 d)111*4882a593Smuzhiyun static void batch_add(struct batch_chunk *bc, const u32 d)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun 	GEM_BUG_ON((bc->end - bc->start) >= bc->max_items);
114*4882a593Smuzhiyun 	*bc->end++ = d;
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun 
batch_alloc_items(struct batch_chunk * bc,u32 align,u32 items)117*4882a593Smuzhiyun static u32 *batch_alloc_items(struct batch_chunk *bc, u32 align, u32 items)
118*4882a593Smuzhiyun {
119*4882a593Smuzhiyun 	u32 *map;
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 	if (align) {
122*4882a593Smuzhiyun 		u32 *end = PTR_ALIGN(bc->end, align);
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 		memset32(bc->end, 0, end - bc->end);
125*4882a593Smuzhiyun 		bc->end = end;
126*4882a593Smuzhiyun 	}
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	map = bc->end;
129*4882a593Smuzhiyun 	bc->end += items;
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	return map;
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun 
batch_alloc_bytes(struct batch_chunk * bc,u32 align,u32 bytes)134*4882a593Smuzhiyun static u32 *batch_alloc_bytes(struct batch_chunk *bc, u32 align, u32 bytes)
135*4882a593Smuzhiyun {
136*4882a593Smuzhiyun 	GEM_BUG_ON(!IS_ALIGNED(bytes, sizeof(*bc->start)));
137*4882a593Smuzhiyun 	return batch_alloc_items(bc, align, bytes / sizeof(*bc->start));
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun static u32
gen7_fill_surface_state(struct batch_chunk * state,const u32 dst_offset,const struct batch_vals * bv)141*4882a593Smuzhiyun gen7_fill_surface_state(struct batch_chunk *state,
142*4882a593Smuzhiyun 			const u32 dst_offset,
143*4882a593Smuzhiyun 			const struct batch_vals *bv)
144*4882a593Smuzhiyun {
145*4882a593Smuzhiyun 	u32 surface_h = bv->surface_height;
146*4882a593Smuzhiyun 	u32 surface_w = bv->surface_width;
147*4882a593Smuzhiyun 	u32 *cs = batch_alloc_items(state, 32, 8);
148*4882a593Smuzhiyun 	u32 offset = batch_offset(state, cs);
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun #define SURFACE_2D 1
151*4882a593Smuzhiyun #define SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
152*4882a593Smuzhiyun #define RENDER_CACHE_READ_WRITE 1
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	*cs++ = SURFACE_2D << 29 |
155*4882a593Smuzhiyun 		(SURFACEFORMAT_B8G8R8A8_UNORM << 18) |
156*4882a593Smuzhiyun 		(RENDER_CACHE_READ_WRITE << 8);
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	*cs++ = batch_addr(state) + dst_offset;
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	*cs++ = ((surface_h / 4 - 1) << 16) | (surface_w / 4 - 1);
161*4882a593Smuzhiyun 	*cs++ = surface_w;
162*4882a593Smuzhiyun 	*cs++ = 0;
163*4882a593Smuzhiyun 	*cs++ = 0;
164*4882a593Smuzhiyun 	*cs++ = 0;
165*4882a593Smuzhiyun #define SHADER_CHANNELS(r, g, b, a) \
166*4882a593Smuzhiyun 	(((r) << 25) | ((g) << 22) | ((b) << 19) | ((a) << 16))
167*4882a593Smuzhiyun 	*cs++ = SHADER_CHANNELS(4, 5, 6, 7);
168*4882a593Smuzhiyun 	batch_advance(state, cs);
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun 	return offset;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun static u32
gen7_fill_binding_table(struct batch_chunk * state,const struct batch_vals * bv)174*4882a593Smuzhiyun gen7_fill_binding_table(struct batch_chunk *state,
175*4882a593Smuzhiyun 			const struct batch_vals *bv)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun 	u32 surface_start =
178*4882a593Smuzhiyun 		gen7_fill_surface_state(state, bv->surface_start, bv);
179*4882a593Smuzhiyun 	u32 *cs = batch_alloc_items(state, 32, 8);
180*4882a593Smuzhiyun 	u32 offset = batch_offset(state, cs);
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 	*cs++ = surface_start - state->offset;
183*4882a593Smuzhiyun 	*cs++ = 0;
184*4882a593Smuzhiyun 	*cs++ = 0;
185*4882a593Smuzhiyun 	*cs++ = 0;
186*4882a593Smuzhiyun 	*cs++ = 0;
187*4882a593Smuzhiyun 	*cs++ = 0;
188*4882a593Smuzhiyun 	*cs++ = 0;
189*4882a593Smuzhiyun 	*cs++ = 0;
190*4882a593Smuzhiyun 	batch_advance(state, cs);
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	return offset;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun static u32
gen7_fill_kernel_data(struct batch_chunk * state,const u32 * data,const u32 size)196*4882a593Smuzhiyun gen7_fill_kernel_data(struct batch_chunk *state,
197*4882a593Smuzhiyun 		      const u32 *data,
198*4882a593Smuzhiyun 		      const u32 size)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun 	return batch_offset(state,
201*4882a593Smuzhiyun 			    memcpy(batch_alloc_bytes(state, 64, size),
202*4882a593Smuzhiyun 				   data, size));
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun static u32
gen7_fill_interface_descriptor(struct batch_chunk * state,const struct batch_vals * bv,const struct cb_kernel * kernel,unsigned int count)206*4882a593Smuzhiyun gen7_fill_interface_descriptor(struct batch_chunk *state,
207*4882a593Smuzhiyun 			       const struct batch_vals *bv,
208*4882a593Smuzhiyun 			       const struct cb_kernel *kernel,
209*4882a593Smuzhiyun 			       unsigned int count)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun 	u32 kernel_offset =
212*4882a593Smuzhiyun 		gen7_fill_kernel_data(state, kernel->data, kernel->size);
213*4882a593Smuzhiyun 	u32 binding_table = gen7_fill_binding_table(state, bv);
214*4882a593Smuzhiyun 	u32 *cs = batch_alloc_items(state, 32, 8 * count);
215*4882a593Smuzhiyun 	u32 offset = batch_offset(state, cs);
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	*cs++ = kernel_offset;
218*4882a593Smuzhiyun 	*cs++ = (1 << 7) | (1 << 13);
219*4882a593Smuzhiyun 	*cs++ = 0;
220*4882a593Smuzhiyun 	*cs++ = (binding_table - state->offset) | 1;
221*4882a593Smuzhiyun 	*cs++ = 0;
222*4882a593Smuzhiyun 	*cs++ = 0;
223*4882a593Smuzhiyun 	*cs++ = 0;
224*4882a593Smuzhiyun 	*cs++ = 0;
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 	/* 1 - 63dummy idds */
227*4882a593Smuzhiyun 	memset32(cs, 0x00, (count - 1) * 8);
228*4882a593Smuzhiyun 	batch_advance(state, cs + (count - 1) * 8);
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	return offset;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun static void
gen7_emit_state_base_address(struct batch_chunk * batch,u32 surface_state_base)234*4882a593Smuzhiyun gen7_emit_state_base_address(struct batch_chunk *batch,
235*4882a593Smuzhiyun 			     u32 surface_state_base)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun 	u32 *cs = batch_alloc_items(batch, 0, 10);
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	*cs++ = STATE_BASE_ADDRESS | (10 - 2);
240*4882a593Smuzhiyun 	/* general */
241*4882a593Smuzhiyun 	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
242*4882a593Smuzhiyun 	/* surface */
243*4882a593Smuzhiyun 	*cs++ = (batch_addr(batch) + surface_state_base) | BASE_ADDRESS_MODIFY;
244*4882a593Smuzhiyun 	/* dynamic */
245*4882a593Smuzhiyun 	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
246*4882a593Smuzhiyun 	/* indirect */
247*4882a593Smuzhiyun 	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
248*4882a593Smuzhiyun 	/* instruction */
249*4882a593Smuzhiyun 	*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	/* general/dynamic/indirect/instruction access Bound */
252*4882a593Smuzhiyun 	*cs++ = 0;
253*4882a593Smuzhiyun 	*cs++ = BASE_ADDRESS_MODIFY;
254*4882a593Smuzhiyun 	*cs++ = 0;
255*4882a593Smuzhiyun 	*cs++ = BASE_ADDRESS_MODIFY;
256*4882a593Smuzhiyun 	batch_advance(batch, cs);
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun static void
gen7_emit_vfe_state(struct batch_chunk * batch,const struct batch_vals * bv,u32 urb_size,u32 curbe_size,u32 mode)260*4882a593Smuzhiyun gen7_emit_vfe_state(struct batch_chunk *batch,
261*4882a593Smuzhiyun 		    const struct batch_vals *bv,
262*4882a593Smuzhiyun 		    u32 urb_size, u32 curbe_size,
263*4882a593Smuzhiyun 		    u32 mode)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun 	u32 threads = bv->max_threads - 1;
266*4882a593Smuzhiyun 	u32 *cs = batch_alloc_items(batch, 32, 8);
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	*cs++ = MEDIA_VFE_STATE | (8 - 2);
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	/* scratch buffer */
271*4882a593Smuzhiyun 	*cs++ = 0;
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	/* number of threads & urb entries for GPGPU vs Media Mode */
274*4882a593Smuzhiyun 	*cs++ = threads << 16 | 1 << 8 | mode << 2;
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	*cs++ = 0;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 	/* urb entry size & curbe size in 256 bits unit */
279*4882a593Smuzhiyun 	*cs++ = urb_size << 16 | curbe_size;
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 	/* scoreboard */
282*4882a593Smuzhiyun 	*cs++ = 0;
283*4882a593Smuzhiyun 	*cs++ = 0;
284*4882a593Smuzhiyun 	*cs++ = 0;
285*4882a593Smuzhiyun 	batch_advance(batch, cs);
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun static void
gen7_emit_interface_descriptor_load(struct batch_chunk * batch,const u32 interface_descriptor,unsigned int count)289*4882a593Smuzhiyun gen7_emit_interface_descriptor_load(struct batch_chunk *batch,
290*4882a593Smuzhiyun 				    const u32 interface_descriptor,
291*4882a593Smuzhiyun 				    unsigned int count)
292*4882a593Smuzhiyun {
293*4882a593Smuzhiyun 	u32 *cs = batch_alloc_items(batch, 8, 4);
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	*cs++ = MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2);
296*4882a593Smuzhiyun 	*cs++ = 0;
297*4882a593Smuzhiyun 	*cs++ = count * 8 * sizeof(*cs);
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 	/*
300*4882a593Smuzhiyun 	 * interface descriptor address - it is relative to the dynamics base
301*4882a593Smuzhiyun 	 * address
302*4882a593Smuzhiyun 	 */
303*4882a593Smuzhiyun 	*cs++ = interface_descriptor;
304*4882a593Smuzhiyun 	batch_advance(batch, cs);
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun 
307*4882a593Smuzhiyun static void
gen7_emit_media_object(struct batch_chunk * batch,unsigned int media_object_index)308*4882a593Smuzhiyun gen7_emit_media_object(struct batch_chunk *batch,
309*4882a593Smuzhiyun 		       unsigned int media_object_index)
310*4882a593Smuzhiyun {
311*4882a593Smuzhiyun 	unsigned int x_offset = (media_object_index % 16) * 64;
312*4882a593Smuzhiyun 	unsigned int y_offset = (media_object_index / 16) * 16;
313*4882a593Smuzhiyun 	unsigned int pkt = 6 + 3;
314*4882a593Smuzhiyun 	u32 *cs;
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	cs = batch_alloc_items(batch, 8, pkt);
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun 	*cs++ = MEDIA_OBJECT | (pkt - 2);
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	/* interface descriptor offset */
321*4882a593Smuzhiyun 	*cs++ = 0;
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 	/* without indirect data */
324*4882a593Smuzhiyun 	*cs++ = 0;
325*4882a593Smuzhiyun 	*cs++ = 0;
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	/* scoreboard */
328*4882a593Smuzhiyun 	*cs++ = 0;
329*4882a593Smuzhiyun 	*cs++ = 0;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	/* inline */
332*4882a593Smuzhiyun 	*cs++ = y_offset << 16 | x_offset;
333*4882a593Smuzhiyun 	*cs++ = 0;
334*4882a593Smuzhiyun 	*cs++ = GT3_INLINE_DATA_DELAYS;
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	batch_advance(batch, cs);
337*4882a593Smuzhiyun }
338*4882a593Smuzhiyun 
gen7_emit_pipeline_flush(struct batch_chunk * batch)339*4882a593Smuzhiyun static void gen7_emit_pipeline_flush(struct batch_chunk *batch)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun 	u32 *cs = batch_alloc_items(batch, 0, 4);
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	*cs++ = GFX_OP_PIPE_CONTROL(4);
344*4882a593Smuzhiyun 	*cs++ = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
345*4882a593Smuzhiyun 		PIPE_CONTROL_DEPTH_CACHE_FLUSH |
346*4882a593Smuzhiyun 		PIPE_CONTROL_DC_FLUSH_ENABLE |
347*4882a593Smuzhiyun 		PIPE_CONTROL_CS_STALL;
348*4882a593Smuzhiyun 	*cs++ = 0;
349*4882a593Smuzhiyun 	*cs++ = 0;
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 	batch_advance(batch, cs);
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun 
gen7_emit_pipeline_invalidate(struct batch_chunk * batch)354*4882a593Smuzhiyun static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch)
355*4882a593Smuzhiyun {
356*4882a593Smuzhiyun 	u32 *cs = batch_alloc_items(batch, 0, 10);
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	/* ivb: Stall before STATE_CACHE_INVALIDATE */
359*4882a593Smuzhiyun 	*cs++ = GFX_OP_PIPE_CONTROL(5);
360*4882a593Smuzhiyun 	*cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD |
361*4882a593Smuzhiyun 		PIPE_CONTROL_CS_STALL;
362*4882a593Smuzhiyun 	*cs++ = 0;
363*4882a593Smuzhiyun 	*cs++ = 0;
364*4882a593Smuzhiyun 	*cs++ = 0;
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 	*cs++ = GFX_OP_PIPE_CONTROL(5);
367*4882a593Smuzhiyun 	*cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE;
368*4882a593Smuzhiyun 	*cs++ = 0;
369*4882a593Smuzhiyun 	*cs++ = 0;
370*4882a593Smuzhiyun 	*cs++ = 0;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	batch_advance(batch, cs);
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun 
emit_batch(struct i915_vma * const vma,u32 * start,const struct batch_vals * bv)375*4882a593Smuzhiyun static void emit_batch(struct i915_vma * const vma,
376*4882a593Smuzhiyun 		       u32 *start,
377*4882a593Smuzhiyun 		       const struct batch_vals *bv)
378*4882a593Smuzhiyun {
379*4882a593Smuzhiyun 	struct drm_i915_private *i915 = vma->vm->i915;
380*4882a593Smuzhiyun 	const unsigned int desc_count = 1;
381*4882a593Smuzhiyun 	const unsigned int urb_size = 1;
382*4882a593Smuzhiyun 	struct batch_chunk cmds, state;
383*4882a593Smuzhiyun 	u32 descriptors;
384*4882a593Smuzhiyun 	unsigned int i;
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 	batch_init(&cmds, vma, start, 0, bv->state_start);
387*4882a593Smuzhiyun 	batch_init(&state, vma, start, bv->state_start, SZ_4K);
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 	descriptors = gen7_fill_interface_descriptor(&state, bv,
390*4882a593Smuzhiyun 						     IS_HASWELL(i915) ?
391*4882a593Smuzhiyun 						     &cb_kernel_hsw :
392*4882a593Smuzhiyun 						     &cb_kernel_ivb,
393*4882a593Smuzhiyun 						     desc_count);
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	/* Reset inherited context registers */
396*4882a593Smuzhiyun 	gen7_emit_pipeline_flush(&cmds);
397*4882a593Smuzhiyun 	gen7_emit_pipeline_invalidate(&cmds);
398*4882a593Smuzhiyun 	batch_add(&cmds, MI_LOAD_REGISTER_IMM(2));
399*4882a593Smuzhiyun 	batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7));
400*4882a593Smuzhiyun 	batch_add(&cmds, 0xffff0000 |
401*4882a593Smuzhiyun 			((IS_IVB_GT1(i915) || IS_VALLEYVIEW(i915)) ?
402*4882a593Smuzhiyun 			 HIZ_RAW_STALL_OPT_DISABLE :
403*4882a593Smuzhiyun 			 0));
404*4882a593Smuzhiyun 	batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1));
405*4882a593Smuzhiyun 	batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
406*4882a593Smuzhiyun 	gen7_emit_pipeline_invalidate(&cmds);
407*4882a593Smuzhiyun 	gen7_emit_pipeline_flush(&cmds);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	/* Switch to the media pipeline and our base address */
410*4882a593Smuzhiyun 	gen7_emit_pipeline_invalidate(&cmds);
411*4882a593Smuzhiyun 	batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
412*4882a593Smuzhiyun 	batch_add(&cmds, MI_NOOP);
413*4882a593Smuzhiyun 	gen7_emit_pipeline_invalidate(&cmds);
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 	gen7_emit_pipeline_flush(&cmds);
416*4882a593Smuzhiyun 	gen7_emit_state_base_address(&cmds, descriptors);
417*4882a593Smuzhiyun 	gen7_emit_pipeline_invalidate(&cmds);
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	/* Set the clear-residual kernel state */
420*4882a593Smuzhiyun 	gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0);
421*4882a593Smuzhiyun 	gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count);
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun 	/* Execute the kernel on all HW threads */
424*4882a593Smuzhiyun 	for (i = 0; i < num_primitives(bv); i++)
425*4882a593Smuzhiyun 		gen7_emit_media_object(&cmds, i);
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	batch_add(&cmds, MI_BATCH_BUFFER_END);
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun 
gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,struct i915_vma * const vma)430*4882a593Smuzhiyun int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
431*4882a593Smuzhiyun 			    struct i915_vma * const vma)
432*4882a593Smuzhiyun {
433*4882a593Smuzhiyun 	struct batch_vals bv;
434*4882a593Smuzhiyun 	u32 *batch;
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	batch_get_defaults(engine->i915, &bv);
437*4882a593Smuzhiyun 	if (!vma)
438*4882a593Smuzhiyun 		return bv.size;
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	GEM_BUG_ON(vma->obj->base.size < bv.size);
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun 	batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
443*4882a593Smuzhiyun 	if (IS_ERR(batch))
444*4882a593Smuzhiyun 		return PTR_ERR(batch);
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	emit_batch(vma, memset(batch, 0, bv.size), &bv);
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 	i915_gem_object_flush_map(vma->obj);
449*4882a593Smuzhiyun 	__i915_gem_object_release_map(vma->obj);
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	return 0;
452*4882a593Smuzhiyun }
453