xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/vc4/vc4_validate.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright © 2014 Broadcom
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun  * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun  * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun  * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun  * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * The above copyright notice and this permission notice (including the next
12*4882a593Smuzhiyun  * paragraph) shall be included in all copies or substantial portions of the
13*4882a593Smuzhiyun  * Software.
14*4882a593Smuzhiyun  *
15*4882a593Smuzhiyun  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*4882a593Smuzhiyun  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*4882a593Smuzhiyun  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18*4882a593Smuzhiyun  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*4882a593Smuzhiyun  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*4882a593Smuzhiyun  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*4882a593Smuzhiyun  * IN THE SOFTWARE.
22*4882a593Smuzhiyun  */
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun /**
25*4882a593Smuzhiyun  * DOC: Command list validator for VC4.
26*4882a593Smuzhiyun  *
27*4882a593Smuzhiyun  * Since the VC4 has no IOMMU between it and system memory, a user
28*4882a593Smuzhiyun  * with access to execute command lists could escalate privilege by
29*4882a593Smuzhiyun  * overwriting system memory (drawing to it as a framebuffer) or
30*4882a593Smuzhiyun  * reading system memory it shouldn't (reading it as a vertex buffer
31*4882a593Smuzhiyun  * or index buffer)
32*4882a593Smuzhiyun  *
33*4882a593Smuzhiyun  * We validate binner command lists to ensure that all accesses are
34*4882a593Smuzhiyun  * within the bounds of the GEM objects referenced by the submitted
35*4882a593Smuzhiyun  * job.  It explicitly whitelists packets, and looks at the offsets in
36*4882a593Smuzhiyun  * any address fields to make sure they're contained within the BOs
37*4882a593Smuzhiyun  * they reference.
38*4882a593Smuzhiyun  *
39*4882a593Smuzhiyun  * Note that because CL validation is already reading the
40*4882a593Smuzhiyun  * user-submitted CL and writing the validated copy out to the memory
41*4882a593Smuzhiyun  * that the GPU will actually read, this is also where GEM relocation
42*4882a593Smuzhiyun  * processing (turning BO references into actual addresses for the GPU
43*4882a593Smuzhiyun  * to use) happens.
44*4882a593Smuzhiyun  */
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun #include "uapi/drm/vc4_drm.h"
47*4882a593Smuzhiyun #include "vc4_drv.h"
48*4882a593Smuzhiyun #include "vc4_packet.h"
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun #define VALIDATE_ARGS \
51*4882a593Smuzhiyun 	struct vc4_exec_info *exec,			\
52*4882a593Smuzhiyun 	void *validated,				\
53*4882a593Smuzhiyun 	void *untrusted
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun /** Return the width in pixels of a 64-byte microtile. */
56*4882a593Smuzhiyun static uint32_t
utile_width(int cpp)57*4882a593Smuzhiyun utile_width(int cpp)
58*4882a593Smuzhiyun {
59*4882a593Smuzhiyun 	switch (cpp) {
60*4882a593Smuzhiyun 	case 1:
61*4882a593Smuzhiyun 	case 2:
62*4882a593Smuzhiyun 		return 8;
63*4882a593Smuzhiyun 	case 4:
64*4882a593Smuzhiyun 		return 4;
65*4882a593Smuzhiyun 	case 8:
66*4882a593Smuzhiyun 		return 2;
67*4882a593Smuzhiyun 	default:
68*4882a593Smuzhiyun 		DRM_ERROR("unknown cpp: %d\n", cpp);
69*4882a593Smuzhiyun 		return 1;
70*4882a593Smuzhiyun 	}
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun /** Return the height in pixels of a 64-byte microtile. */
74*4882a593Smuzhiyun static uint32_t
utile_height(int cpp)75*4882a593Smuzhiyun utile_height(int cpp)
76*4882a593Smuzhiyun {
77*4882a593Smuzhiyun 	switch (cpp) {
78*4882a593Smuzhiyun 	case 1:
79*4882a593Smuzhiyun 		return 8;
80*4882a593Smuzhiyun 	case 2:
81*4882a593Smuzhiyun 	case 4:
82*4882a593Smuzhiyun 	case 8:
83*4882a593Smuzhiyun 		return 4;
84*4882a593Smuzhiyun 	default:
85*4882a593Smuzhiyun 		DRM_ERROR("unknown cpp: %d\n", cpp);
86*4882a593Smuzhiyun 		return 1;
87*4882a593Smuzhiyun 	}
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun /**
91*4882a593Smuzhiyun  * size_is_lt() - Returns whether a miplevel of the given size will
92*4882a593Smuzhiyun  * use the lineartile (LT) tiling layout rather than the normal T
93*4882a593Smuzhiyun  * tiling layout.
94*4882a593Smuzhiyun  * @width: Width in pixels of the miplevel
95*4882a593Smuzhiyun  * @height: Height in pixels of the miplevel
96*4882a593Smuzhiyun  * @cpp: Bytes per pixel of the pixel format
97*4882a593Smuzhiyun  */
98*4882a593Smuzhiyun static bool
size_is_lt(uint32_t width,uint32_t height,int cpp)99*4882a593Smuzhiyun size_is_lt(uint32_t width, uint32_t height, int cpp)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun 	return (width <= 4 * utile_width(cpp) ||
102*4882a593Smuzhiyun 		height <= 4 * utile_height(cpp));
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun struct drm_gem_cma_object *
vc4_use_bo(struct vc4_exec_info * exec,uint32_t hindex)106*4882a593Smuzhiyun vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	struct drm_gem_cma_object *obj;
109*4882a593Smuzhiyun 	struct vc4_bo *bo;
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	if (hindex >= exec->bo_count) {
112*4882a593Smuzhiyun 		DRM_DEBUG("BO index %d greater than BO count %d\n",
113*4882a593Smuzhiyun 			  hindex, exec->bo_count);
114*4882a593Smuzhiyun 		return NULL;
115*4882a593Smuzhiyun 	}
116*4882a593Smuzhiyun 	obj = exec->bo[hindex];
117*4882a593Smuzhiyun 	bo = to_vc4_bo(&obj->base);
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	if (bo->validated_shader) {
120*4882a593Smuzhiyun 		DRM_DEBUG("Trying to use shader BO as something other than "
121*4882a593Smuzhiyun 			  "a shader\n");
122*4882a593Smuzhiyun 		return NULL;
123*4882a593Smuzhiyun 	}
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	return obj;
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun static struct drm_gem_cma_object *
vc4_use_handle(struct vc4_exec_info * exec,uint32_t gem_handles_packet_index)129*4882a593Smuzhiyun vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun 	return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun static bool
validate_bin_pos(struct vc4_exec_info * exec,void * untrusted,uint32_t pos)135*4882a593Smuzhiyun validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun 	/* Note that the untrusted pointer passed to these functions is
138*4882a593Smuzhiyun 	 * incremented past the packet byte.
139*4882a593Smuzhiyun 	 */
140*4882a593Smuzhiyun 	return (untrusted - 1 == exec->bin_u + pos);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun static uint32_t
gl_shader_rec_size(uint32_t pointer_bits)144*4882a593Smuzhiyun gl_shader_rec_size(uint32_t pointer_bits)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun 	uint32_t attribute_count = pointer_bits & 7;
147*4882a593Smuzhiyun 	bool extended = pointer_bits & 8;
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	if (attribute_count == 0)
150*4882a593Smuzhiyun 		attribute_count = 8;
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 	if (extended)
153*4882a593Smuzhiyun 		return 100 + attribute_count * 4;
154*4882a593Smuzhiyun 	else
155*4882a593Smuzhiyun 		return 36 + attribute_count * 8;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun bool
vc4_check_tex_size(struct vc4_exec_info * exec,struct drm_gem_cma_object * fbo,uint32_t offset,uint8_t tiling_format,uint32_t width,uint32_t height,uint8_t cpp)159*4882a593Smuzhiyun vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
160*4882a593Smuzhiyun 		   uint32_t offset, uint8_t tiling_format,
161*4882a593Smuzhiyun 		   uint32_t width, uint32_t height, uint8_t cpp)
162*4882a593Smuzhiyun {
163*4882a593Smuzhiyun 	uint32_t aligned_width, aligned_height, stride, size;
164*4882a593Smuzhiyun 	uint32_t utile_w = utile_width(cpp);
165*4882a593Smuzhiyun 	uint32_t utile_h = utile_height(cpp);
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	/* The shaded vertex format stores signed 12.4 fixed point
168*4882a593Smuzhiyun 	 * (-2048,2047) offsets from the viewport center, so we should
169*4882a593Smuzhiyun 	 * never have a render target larger than 4096.  The texture
170*4882a593Smuzhiyun 	 * unit can only sample from 2048x2048, so it's even more
171*4882a593Smuzhiyun 	 * restricted.  This lets us avoid worrying about overflow in
172*4882a593Smuzhiyun 	 * our math.
173*4882a593Smuzhiyun 	 */
174*4882a593Smuzhiyun 	if (width > 4096 || height > 4096) {
175*4882a593Smuzhiyun 		DRM_DEBUG("Surface dimensions (%d,%d) too large",
176*4882a593Smuzhiyun 			  width, height);
177*4882a593Smuzhiyun 		return false;
178*4882a593Smuzhiyun 	}
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	switch (tiling_format) {
181*4882a593Smuzhiyun 	case VC4_TILING_FORMAT_LINEAR:
182*4882a593Smuzhiyun 		aligned_width = round_up(width, utile_w);
183*4882a593Smuzhiyun 		aligned_height = height;
184*4882a593Smuzhiyun 		break;
185*4882a593Smuzhiyun 	case VC4_TILING_FORMAT_T:
186*4882a593Smuzhiyun 		aligned_width = round_up(width, utile_w * 8);
187*4882a593Smuzhiyun 		aligned_height = round_up(height, utile_h * 8);
188*4882a593Smuzhiyun 		break;
189*4882a593Smuzhiyun 	case VC4_TILING_FORMAT_LT:
190*4882a593Smuzhiyun 		aligned_width = round_up(width, utile_w);
191*4882a593Smuzhiyun 		aligned_height = round_up(height, utile_h);
192*4882a593Smuzhiyun 		break;
193*4882a593Smuzhiyun 	default:
194*4882a593Smuzhiyun 		DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format);
195*4882a593Smuzhiyun 		return false;
196*4882a593Smuzhiyun 	}
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 	stride = aligned_width * cpp;
199*4882a593Smuzhiyun 	size = stride * aligned_height;
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	if (size + offset < size ||
202*4882a593Smuzhiyun 	    size + offset > fbo->base.size) {
203*4882a593Smuzhiyun 		DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
204*4882a593Smuzhiyun 			  width, height,
205*4882a593Smuzhiyun 			  aligned_width, aligned_height,
206*4882a593Smuzhiyun 			  size, offset, fbo->base.size);
207*4882a593Smuzhiyun 		return false;
208*4882a593Smuzhiyun 	}
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 	return true;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun static int
validate_flush(VALIDATE_ARGS)214*4882a593Smuzhiyun validate_flush(VALIDATE_ARGS)
215*4882a593Smuzhiyun {
216*4882a593Smuzhiyun 	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
217*4882a593Smuzhiyun 		DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n");
218*4882a593Smuzhiyun 		return -EINVAL;
219*4882a593Smuzhiyun 	}
220*4882a593Smuzhiyun 	exec->found_flush = true;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	return 0;
223*4882a593Smuzhiyun }
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun static int
validate_start_tile_binning(VALIDATE_ARGS)226*4882a593Smuzhiyun validate_start_tile_binning(VALIDATE_ARGS)
227*4882a593Smuzhiyun {
228*4882a593Smuzhiyun 	if (exec->found_start_tile_binning_packet) {
229*4882a593Smuzhiyun 		DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n");
230*4882a593Smuzhiyun 		return -EINVAL;
231*4882a593Smuzhiyun 	}
232*4882a593Smuzhiyun 	exec->found_start_tile_binning_packet = true;
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 	if (!exec->found_tile_binning_mode_config_packet) {
235*4882a593Smuzhiyun 		DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
236*4882a593Smuzhiyun 		return -EINVAL;
237*4882a593Smuzhiyun 	}
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	return 0;
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun static int
validate_increment_semaphore(VALIDATE_ARGS)243*4882a593Smuzhiyun validate_increment_semaphore(VALIDATE_ARGS)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
246*4882a593Smuzhiyun 		DRM_DEBUG("Bin CL must end with "
247*4882a593Smuzhiyun 			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
248*4882a593Smuzhiyun 		return -EINVAL;
249*4882a593Smuzhiyun 	}
250*4882a593Smuzhiyun 	exec->found_increment_semaphore_packet = true;
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 	return 0;
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun static int
validate_indexed_prim_list(VALIDATE_ARGS)256*4882a593Smuzhiyun validate_indexed_prim_list(VALIDATE_ARGS)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	struct drm_gem_cma_object *ib;
259*4882a593Smuzhiyun 	uint32_t length = *(uint32_t *)(untrusted + 1);
260*4882a593Smuzhiyun 	uint32_t offset = *(uint32_t *)(untrusted + 5);
261*4882a593Smuzhiyun 	uint32_t max_index = *(uint32_t *)(untrusted + 9);
262*4882a593Smuzhiyun 	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
263*4882a593Smuzhiyun 	struct vc4_shader_state *shader_state;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	/* Check overflow condition */
266*4882a593Smuzhiyun 	if (exec->shader_state_count == 0) {
267*4882a593Smuzhiyun 		DRM_DEBUG("shader state must precede primitives\n");
268*4882a593Smuzhiyun 		return -EINVAL;
269*4882a593Smuzhiyun 	}
270*4882a593Smuzhiyun 	shader_state = &exec->shader_state[exec->shader_state_count - 1];
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	if (max_index > shader_state->max_index)
273*4882a593Smuzhiyun 		shader_state->max_index = max_index;
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 	ib = vc4_use_handle(exec, 0);
276*4882a593Smuzhiyun 	if (!ib)
277*4882a593Smuzhiyun 		return -EINVAL;
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun 	exec->bin_dep_seqno = max(exec->bin_dep_seqno,
280*4882a593Smuzhiyun 				  to_vc4_bo(&ib->base)->write_seqno);
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 	if (offset > ib->base.size ||
283*4882a593Smuzhiyun 	    (ib->base.size - offset) / index_size < length) {
284*4882a593Smuzhiyun 		DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
285*4882a593Smuzhiyun 			  offset, length, index_size, ib->base.size);
286*4882a593Smuzhiyun 		return -EINVAL;
287*4882a593Smuzhiyun 	}
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 	*(uint32_t *)(validated + 5) = ib->paddr + offset;
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun 	return 0;
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun static int
validate_gl_array_primitive(VALIDATE_ARGS)295*4882a593Smuzhiyun validate_gl_array_primitive(VALIDATE_ARGS)
296*4882a593Smuzhiyun {
297*4882a593Smuzhiyun 	uint32_t length = *(uint32_t *)(untrusted + 1);
298*4882a593Smuzhiyun 	uint32_t base_index = *(uint32_t *)(untrusted + 5);
299*4882a593Smuzhiyun 	uint32_t max_index;
300*4882a593Smuzhiyun 	struct vc4_shader_state *shader_state;
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	/* Check overflow condition */
303*4882a593Smuzhiyun 	if (exec->shader_state_count == 0) {
304*4882a593Smuzhiyun 		DRM_DEBUG("shader state must precede primitives\n");
305*4882a593Smuzhiyun 		return -EINVAL;
306*4882a593Smuzhiyun 	}
307*4882a593Smuzhiyun 	shader_state = &exec->shader_state[exec->shader_state_count - 1];
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	if (length + base_index < length) {
310*4882a593Smuzhiyun 		DRM_DEBUG("primitive vertex count overflow\n");
311*4882a593Smuzhiyun 		return -EINVAL;
312*4882a593Smuzhiyun 	}
313*4882a593Smuzhiyun 	max_index = length + base_index - 1;
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	if (max_index > shader_state->max_index)
316*4882a593Smuzhiyun 		shader_state->max_index = max_index;
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun 	return 0;
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun static int
validate_gl_shader_state(VALIDATE_ARGS)322*4882a593Smuzhiyun validate_gl_shader_state(VALIDATE_ARGS)
323*4882a593Smuzhiyun {
324*4882a593Smuzhiyun 	uint32_t i = exec->shader_state_count++;
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	if (i >= exec->shader_state_size) {
327*4882a593Smuzhiyun 		DRM_DEBUG("More requests for shader states than declared\n");
328*4882a593Smuzhiyun 		return -EINVAL;
329*4882a593Smuzhiyun 	}
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	exec->shader_state[i].addr = *(uint32_t *)untrusted;
332*4882a593Smuzhiyun 	exec->shader_state[i].max_index = 0;
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun 	if (exec->shader_state[i].addr & ~0xf) {
335*4882a593Smuzhiyun 		DRM_DEBUG("high bits set in GL shader rec reference\n");
336*4882a593Smuzhiyun 		return -EINVAL;
337*4882a593Smuzhiyun 	}
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 	*(uint32_t *)validated = (exec->shader_rec_p +
340*4882a593Smuzhiyun 				  exec->shader_state[i].addr);
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	exec->shader_rec_p +=
343*4882a593Smuzhiyun 		roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	return 0;
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun static int
validate_tile_binning_config(VALIDATE_ARGS)349*4882a593Smuzhiyun validate_tile_binning_config(VALIDATE_ARGS)
350*4882a593Smuzhiyun {
351*4882a593Smuzhiyun 	struct drm_device *dev = exec->exec_bo->base.dev;
352*4882a593Smuzhiyun 	struct vc4_dev *vc4 = to_vc4_dev(dev);
353*4882a593Smuzhiyun 	uint8_t flags;
354*4882a593Smuzhiyun 	uint32_t tile_state_size;
355*4882a593Smuzhiyun 	uint32_t tile_count, bin_addr;
356*4882a593Smuzhiyun 	int bin_slot;
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	if (exec->found_tile_binning_mode_config_packet) {
359*4882a593Smuzhiyun 		DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
360*4882a593Smuzhiyun 		return -EINVAL;
361*4882a593Smuzhiyun 	}
362*4882a593Smuzhiyun 	exec->found_tile_binning_mode_config_packet = true;
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
365*4882a593Smuzhiyun 	exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
366*4882a593Smuzhiyun 	tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
367*4882a593Smuzhiyun 	flags = *(uint8_t *)(untrusted + 14);
368*4882a593Smuzhiyun 
369*4882a593Smuzhiyun 	if (exec->bin_tiles_x == 0 ||
370*4882a593Smuzhiyun 	    exec->bin_tiles_y == 0) {
371*4882a593Smuzhiyun 		DRM_DEBUG("Tile binning config of %dx%d too small\n",
372*4882a593Smuzhiyun 			  exec->bin_tiles_x, exec->bin_tiles_y);
373*4882a593Smuzhiyun 		return -EINVAL;
374*4882a593Smuzhiyun 	}
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 	if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
377*4882a593Smuzhiyun 		     VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
378*4882a593Smuzhiyun 		DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags);
379*4882a593Smuzhiyun 		return -EINVAL;
380*4882a593Smuzhiyun 	}
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 	bin_slot = vc4_v3d_get_bin_slot(vc4);
383*4882a593Smuzhiyun 	if (bin_slot < 0) {
384*4882a593Smuzhiyun 		if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
385*4882a593Smuzhiyun 			DRM_ERROR("Failed to allocate binner memory: %d\n",
386*4882a593Smuzhiyun 				  bin_slot);
387*4882a593Smuzhiyun 		}
388*4882a593Smuzhiyun 		return bin_slot;
389*4882a593Smuzhiyun 	}
390*4882a593Smuzhiyun 
391*4882a593Smuzhiyun 	/* The slot we allocated will only be used by this job, and is
392*4882a593Smuzhiyun 	 * free when the job completes rendering.
393*4882a593Smuzhiyun 	 */
394*4882a593Smuzhiyun 	exec->bin_slots |= BIT(bin_slot);
395*4882a593Smuzhiyun 	bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size;
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	/* The tile state data array is 48 bytes per tile, and we put it at
398*4882a593Smuzhiyun 	 * the start of a BO containing both it and the tile alloc.
399*4882a593Smuzhiyun 	 */
400*4882a593Smuzhiyun 	tile_state_size = 48 * tile_count;
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	/* Since the tile alloc array will follow us, align. */
403*4882a593Smuzhiyun 	exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
404*4882a593Smuzhiyun 
405*4882a593Smuzhiyun 	*(uint8_t *)(validated + 14) =
406*4882a593Smuzhiyun 		((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
407*4882a593Smuzhiyun 			    VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
408*4882a593Smuzhiyun 		 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
409*4882a593Smuzhiyun 		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
410*4882a593Smuzhiyun 			       VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
411*4882a593Smuzhiyun 		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
412*4882a593Smuzhiyun 			       VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun 	/* tile alloc address. */
415*4882a593Smuzhiyun 	*(uint32_t *)(validated + 0) = exec->tile_alloc_offset;
416*4882a593Smuzhiyun 	/* tile alloc size. */
417*4882a593Smuzhiyun 	*(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size -
418*4882a593Smuzhiyun 					exec->tile_alloc_offset);
419*4882a593Smuzhiyun 	/* tile state address. */
420*4882a593Smuzhiyun 	*(uint32_t *)(validated + 8) = bin_addr;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	return 0;
423*4882a593Smuzhiyun }
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun static int
validate_gem_handles(VALIDATE_ARGS)426*4882a593Smuzhiyun validate_gem_handles(VALIDATE_ARGS)
427*4882a593Smuzhiyun {
428*4882a593Smuzhiyun 	memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
429*4882a593Smuzhiyun 	return 0;
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun #define VC4_DEFINE_PACKET(packet, func) \
433*4882a593Smuzhiyun 	[packet] = { packet ## _SIZE, #packet, func }
434*4882a593Smuzhiyun 
435*4882a593Smuzhiyun static const struct cmd_info {
436*4882a593Smuzhiyun 	uint16_t len;
437*4882a593Smuzhiyun 	const char *name;
438*4882a593Smuzhiyun 	int (*func)(struct vc4_exec_info *exec, void *validated,
439*4882a593Smuzhiyun 		    void *untrusted);
440*4882a593Smuzhiyun } cmd_info[] = {
441*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
442*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
443*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
444*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
445*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
446*4882a593Smuzhiyun 			  validate_start_tile_binning),
447*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
448*4882a593Smuzhiyun 			  validate_increment_semaphore),
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
451*4882a593Smuzhiyun 			  validate_indexed_prim_list),
452*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
453*4882a593Smuzhiyun 			  validate_gl_array_primitive),
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
460*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
461*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
462*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
463*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
464*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
465*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
466*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
467*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
468*4882a593Smuzhiyun 	/* Note: The docs say this was also 105, but it was 106 in the
469*4882a593Smuzhiyun 	 * initial userland code drop.
470*4882a593Smuzhiyun 	 */
471*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
472*4882a593Smuzhiyun 
473*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
474*4882a593Smuzhiyun 			  validate_tile_binning_config),
475*4882a593Smuzhiyun 
476*4882a593Smuzhiyun 	VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
477*4882a593Smuzhiyun };
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun int
vc4_validate_bin_cl(struct drm_device * dev,void * validated,void * unvalidated,struct vc4_exec_info * exec)480*4882a593Smuzhiyun vc4_validate_bin_cl(struct drm_device *dev,
481*4882a593Smuzhiyun 		    void *validated,
482*4882a593Smuzhiyun 		    void *unvalidated,
483*4882a593Smuzhiyun 		    struct vc4_exec_info *exec)
484*4882a593Smuzhiyun {
485*4882a593Smuzhiyun 	uint32_t len = exec->args->bin_cl_size;
486*4882a593Smuzhiyun 	uint32_t dst_offset = 0;
487*4882a593Smuzhiyun 	uint32_t src_offset = 0;
488*4882a593Smuzhiyun 
489*4882a593Smuzhiyun 	while (src_offset < len) {
490*4882a593Smuzhiyun 		void *dst_pkt = validated + dst_offset;
491*4882a593Smuzhiyun 		void *src_pkt = unvalidated + src_offset;
492*4882a593Smuzhiyun 		u8 cmd = *(uint8_t *)src_pkt;
493*4882a593Smuzhiyun 		const struct cmd_info *info;
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 		if (cmd >= ARRAY_SIZE(cmd_info)) {
496*4882a593Smuzhiyun 			DRM_DEBUG("0x%08x: packet %d out of bounds\n",
497*4882a593Smuzhiyun 				  src_offset, cmd);
498*4882a593Smuzhiyun 			return -EINVAL;
499*4882a593Smuzhiyun 		}
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 		info = &cmd_info[cmd];
502*4882a593Smuzhiyun 		if (!info->name) {
503*4882a593Smuzhiyun 			DRM_DEBUG("0x%08x: packet %d invalid\n",
504*4882a593Smuzhiyun 				  src_offset, cmd);
505*4882a593Smuzhiyun 			return -EINVAL;
506*4882a593Smuzhiyun 		}
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 		if (src_offset + info->len > len) {
509*4882a593Smuzhiyun 			DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x "
510*4882a593Smuzhiyun 				  "exceeds bounds (0x%08x)\n",
511*4882a593Smuzhiyun 				  src_offset, cmd, info->name, info->len,
512*4882a593Smuzhiyun 				  src_offset + len);
513*4882a593Smuzhiyun 			return -EINVAL;
514*4882a593Smuzhiyun 		}
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 		if (cmd != VC4_PACKET_GEM_HANDLES)
517*4882a593Smuzhiyun 			memcpy(dst_pkt, src_pkt, info->len);
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 		if (info->func && info->func(exec,
520*4882a593Smuzhiyun 					     dst_pkt + 1,
521*4882a593Smuzhiyun 					     src_pkt + 1)) {
522*4882a593Smuzhiyun 			DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n",
523*4882a593Smuzhiyun 				  src_offset, cmd, info->name);
524*4882a593Smuzhiyun 			return -EINVAL;
525*4882a593Smuzhiyun 		}
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun 		src_offset += info->len;
528*4882a593Smuzhiyun 		/* GEM handle loading doesn't produce HW packets. */
529*4882a593Smuzhiyun 		if (cmd != VC4_PACKET_GEM_HANDLES)
530*4882a593Smuzhiyun 			dst_offset += info->len;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 		/* When the CL hits halt, it'll stop reading anything else. */
533*4882a593Smuzhiyun 		if (cmd == VC4_PACKET_HALT)
534*4882a593Smuzhiyun 			break;
535*4882a593Smuzhiyun 	}
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	exec->ct0ea = exec->ct0ca + dst_offset;
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun 	if (!exec->found_start_tile_binning_packet) {
540*4882a593Smuzhiyun 		DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
541*4882a593Smuzhiyun 		return -EINVAL;
542*4882a593Smuzhiyun 	}
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
545*4882a593Smuzhiyun 	 * semaphore is used to trigger the render CL to start up, and the
546*4882a593Smuzhiyun 	 * FLUSH is what caps the bin lists with
547*4882a593Smuzhiyun 	 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
548*4882a593Smuzhiyun 	 * render CL when they get called to) and actually triggers the queued
549*4882a593Smuzhiyun 	 * semaphore increment.
550*4882a593Smuzhiyun 	 */
551*4882a593Smuzhiyun 	if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
552*4882a593Smuzhiyun 		DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
553*4882a593Smuzhiyun 			  "VC4_PACKET_FLUSH\n");
554*4882a593Smuzhiyun 		return -EINVAL;
555*4882a593Smuzhiyun 	}
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 	return 0;
558*4882a593Smuzhiyun }
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun static bool
reloc_tex(struct vc4_exec_info * exec,void * uniform_data_u,struct vc4_texture_sample_info * sample,uint32_t texture_handle_index,bool is_cs)561*4882a593Smuzhiyun reloc_tex(struct vc4_exec_info *exec,
562*4882a593Smuzhiyun 	  void *uniform_data_u,
563*4882a593Smuzhiyun 	  struct vc4_texture_sample_info *sample,
564*4882a593Smuzhiyun 	  uint32_t texture_handle_index, bool is_cs)
565*4882a593Smuzhiyun {
566*4882a593Smuzhiyun 	struct drm_gem_cma_object *tex;
567*4882a593Smuzhiyun 	uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
568*4882a593Smuzhiyun 	uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
569*4882a593Smuzhiyun 	uint32_t p2 = (sample->p_offset[2] != ~0 ?
570*4882a593Smuzhiyun 		       *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
571*4882a593Smuzhiyun 	uint32_t p3 = (sample->p_offset[3] != ~0 ?
572*4882a593Smuzhiyun 		       *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
573*4882a593Smuzhiyun 	uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
574*4882a593Smuzhiyun 	uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
575*4882a593Smuzhiyun 	uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
576*4882a593Smuzhiyun 	uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
577*4882a593Smuzhiyun 	uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
578*4882a593Smuzhiyun 	uint32_t cpp, tiling_format, utile_w, utile_h;
579*4882a593Smuzhiyun 	uint32_t i;
580*4882a593Smuzhiyun 	uint32_t cube_map_stride = 0;
581*4882a593Smuzhiyun 	enum vc4_texture_data_type type;
582*4882a593Smuzhiyun 
583*4882a593Smuzhiyun 	tex = vc4_use_bo(exec, texture_handle_index);
584*4882a593Smuzhiyun 	if (!tex)
585*4882a593Smuzhiyun 		return false;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	if (sample->is_direct) {
588*4882a593Smuzhiyun 		uint32_t remaining_size = tex->base.size - p0;
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun 		if (p0 > tex->base.size - 4) {
591*4882a593Smuzhiyun 			DRM_DEBUG("UBO offset greater than UBO size\n");
592*4882a593Smuzhiyun 			goto fail;
593*4882a593Smuzhiyun 		}
594*4882a593Smuzhiyun 		if (p1 > remaining_size - 4) {
595*4882a593Smuzhiyun 			DRM_DEBUG("UBO clamp would allow reads "
596*4882a593Smuzhiyun 				  "outside of UBO\n");
597*4882a593Smuzhiyun 			goto fail;
598*4882a593Smuzhiyun 		}
599*4882a593Smuzhiyun 		*validated_p0 = tex->paddr + p0;
600*4882a593Smuzhiyun 		return true;
601*4882a593Smuzhiyun 	}
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	if (width == 0)
604*4882a593Smuzhiyun 		width = 2048;
605*4882a593Smuzhiyun 	if (height == 0)
606*4882a593Smuzhiyun 		height = 2048;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	if (p0 & VC4_TEX_P0_CMMODE_MASK) {
609*4882a593Smuzhiyun 		if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
610*4882a593Smuzhiyun 		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
611*4882a593Smuzhiyun 			cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
612*4882a593Smuzhiyun 		if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
613*4882a593Smuzhiyun 		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
614*4882a593Smuzhiyun 			if (cube_map_stride) {
615*4882a593Smuzhiyun 				DRM_DEBUG("Cube map stride set twice\n");
616*4882a593Smuzhiyun 				goto fail;
617*4882a593Smuzhiyun 			}
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 			cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
620*4882a593Smuzhiyun 		}
621*4882a593Smuzhiyun 		if (!cube_map_stride) {
622*4882a593Smuzhiyun 			DRM_DEBUG("Cube map stride not set\n");
623*4882a593Smuzhiyun 			goto fail;
624*4882a593Smuzhiyun 		}
625*4882a593Smuzhiyun 	}
626*4882a593Smuzhiyun 
627*4882a593Smuzhiyun 	type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
628*4882a593Smuzhiyun 		(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun 	switch (type) {
631*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_RGBA8888:
632*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_RGBX8888:
633*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_RGBA32R:
634*4882a593Smuzhiyun 		cpp = 4;
635*4882a593Smuzhiyun 		break;
636*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_RGBA4444:
637*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_RGBA5551:
638*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_RGB565:
639*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_LUMALPHA:
640*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_S16F:
641*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_S16:
642*4882a593Smuzhiyun 		cpp = 2;
643*4882a593Smuzhiyun 		break;
644*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_LUMINANCE:
645*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_ALPHA:
646*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_S8:
647*4882a593Smuzhiyun 		cpp = 1;
648*4882a593Smuzhiyun 		break;
649*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_ETC1:
650*4882a593Smuzhiyun 		/* ETC1 is arranged as 64-bit blocks, where each block is 4x4
651*4882a593Smuzhiyun 		 * pixels.
652*4882a593Smuzhiyun 		 */
653*4882a593Smuzhiyun 		cpp = 8;
654*4882a593Smuzhiyun 		width = (width + 3) >> 2;
655*4882a593Smuzhiyun 		height = (height + 3) >> 2;
656*4882a593Smuzhiyun 		break;
657*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_BW1:
658*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_A4:
659*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_A1:
660*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_RGBA64:
661*4882a593Smuzhiyun 	case VC4_TEXTURE_TYPE_YUV422R:
662*4882a593Smuzhiyun 	default:
663*4882a593Smuzhiyun 		DRM_DEBUG("Texture format %d unsupported\n", type);
664*4882a593Smuzhiyun 		goto fail;
665*4882a593Smuzhiyun 	}
666*4882a593Smuzhiyun 	utile_w = utile_width(cpp);
667*4882a593Smuzhiyun 	utile_h = utile_height(cpp);
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun 	if (type == VC4_TEXTURE_TYPE_RGBA32R) {
670*4882a593Smuzhiyun 		tiling_format = VC4_TILING_FORMAT_LINEAR;
671*4882a593Smuzhiyun 	} else {
672*4882a593Smuzhiyun 		if (size_is_lt(width, height, cpp))
673*4882a593Smuzhiyun 			tiling_format = VC4_TILING_FORMAT_LT;
674*4882a593Smuzhiyun 		else
675*4882a593Smuzhiyun 			tiling_format = VC4_TILING_FORMAT_T;
676*4882a593Smuzhiyun 	}
677*4882a593Smuzhiyun 
678*4882a593Smuzhiyun 	if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
679*4882a593Smuzhiyun 				tiling_format, width, height, cpp)) {
680*4882a593Smuzhiyun 		goto fail;
681*4882a593Smuzhiyun 	}
682*4882a593Smuzhiyun 
683*4882a593Smuzhiyun 	/* The mipmap levels are stored before the base of the texture.  Make
684*4882a593Smuzhiyun 	 * sure there is actually space in the BO.
685*4882a593Smuzhiyun 	 */
686*4882a593Smuzhiyun 	for (i = 1; i <= miplevels; i++) {
687*4882a593Smuzhiyun 		uint32_t level_width = max(width >> i, 1u);
688*4882a593Smuzhiyun 		uint32_t level_height = max(height >> i, 1u);
689*4882a593Smuzhiyun 		uint32_t aligned_width, aligned_height;
690*4882a593Smuzhiyun 		uint32_t level_size;
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 		/* Once the levels get small enough, they drop from T to LT. */
693*4882a593Smuzhiyun 		if (tiling_format == VC4_TILING_FORMAT_T &&
694*4882a593Smuzhiyun 		    size_is_lt(level_width, level_height, cpp)) {
695*4882a593Smuzhiyun 			tiling_format = VC4_TILING_FORMAT_LT;
696*4882a593Smuzhiyun 		}
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 		switch (tiling_format) {
699*4882a593Smuzhiyun 		case VC4_TILING_FORMAT_T:
700*4882a593Smuzhiyun 			aligned_width = round_up(level_width, utile_w * 8);
701*4882a593Smuzhiyun 			aligned_height = round_up(level_height, utile_h * 8);
702*4882a593Smuzhiyun 			break;
703*4882a593Smuzhiyun 		case VC4_TILING_FORMAT_LT:
704*4882a593Smuzhiyun 			aligned_width = round_up(level_width, utile_w);
705*4882a593Smuzhiyun 			aligned_height = round_up(level_height, utile_h);
706*4882a593Smuzhiyun 			break;
707*4882a593Smuzhiyun 		default:
708*4882a593Smuzhiyun 			aligned_width = round_up(level_width, utile_w);
709*4882a593Smuzhiyun 			aligned_height = level_height;
710*4882a593Smuzhiyun 			break;
711*4882a593Smuzhiyun 		}
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun 		level_size = aligned_width * cpp * aligned_height;
714*4882a593Smuzhiyun 
715*4882a593Smuzhiyun 		if (offset < level_size) {
716*4882a593Smuzhiyun 			DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db "
717*4882a593Smuzhiyun 				  "overflowed buffer bounds (offset %d)\n",
718*4882a593Smuzhiyun 				  i, level_width, level_height,
719*4882a593Smuzhiyun 				  aligned_width, aligned_height,
720*4882a593Smuzhiyun 				  level_size, offset);
721*4882a593Smuzhiyun 			goto fail;
722*4882a593Smuzhiyun 		}
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun 		offset -= level_size;
725*4882a593Smuzhiyun 	}
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 	*validated_p0 = tex->paddr + p0;
728*4882a593Smuzhiyun 
729*4882a593Smuzhiyun 	if (is_cs) {
730*4882a593Smuzhiyun 		exec->bin_dep_seqno = max(exec->bin_dep_seqno,
731*4882a593Smuzhiyun 					  to_vc4_bo(&tex->base)->write_seqno);
732*4882a593Smuzhiyun 	}
733*4882a593Smuzhiyun 
734*4882a593Smuzhiyun 	return true;
735*4882a593Smuzhiyun  fail:
736*4882a593Smuzhiyun 	DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
737*4882a593Smuzhiyun 	DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
738*4882a593Smuzhiyun 	DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
739*4882a593Smuzhiyun 	DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
740*4882a593Smuzhiyun 	return false;
741*4882a593Smuzhiyun }
742*4882a593Smuzhiyun 
743*4882a593Smuzhiyun static int
validate_gl_shader_rec(struct drm_device * dev,struct vc4_exec_info * exec,struct vc4_shader_state * state)744*4882a593Smuzhiyun validate_gl_shader_rec(struct drm_device *dev,
745*4882a593Smuzhiyun 		       struct vc4_exec_info *exec,
746*4882a593Smuzhiyun 		       struct vc4_shader_state *state)
747*4882a593Smuzhiyun {
748*4882a593Smuzhiyun 	uint32_t *src_handles;
749*4882a593Smuzhiyun 	void *pkt_u, *pkt_v;
750*4882a593Smuzhiyun 	static const uint32_t shader_reloc_offsets[] = {
751*4882a593Smuzhiyun 		4, /* fs */
752*4882a593Smuzhiyun 		16, /* vs */
753*4882a593Smuzhiyun 		28, /* cs */
754*4882a593Smuzhiyun 	};
755*4882a593Smuzhiyun 	uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
756*4882a593Smuzhiyun 	struct drm_gem_cma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8];
757*4882a593Smuzhiyun 	uint32_t nr_attributes, nr_relocs, packet_size;
758*4882a593Smuzhiyun 	int i;
759*4882a593Smuzhiyun 
760*4882a593Smuzhiyun 	nr_attributes = state->addr & 0x7;
761*4882a593Smuzhiyun 	if (nr_attributes == 0)
762*4882a593Smuzhiyun 		nr_attributes = 8;
763*4882a593Smuzhiyun 	packet_size = gl_shader_rec_size(state->addr);
764*4882a593Smuzhiyun 
765*4882a593Smuzhiyun 	nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
766*4882a593Smuzhiyun 	if (nr_relocs * 4 > exec->shader_rec_size) {
767*4882a593Smuzhiyun 		DRM_DEBUG("overflowed shader recs reading %d handles "
768*4882a593Smuzhiyun 			  "from %d bytes left\n",
769*4882a593Smuzhiyun 			  nr_relocs, exec->shader_rec_size);
770*4882a593Smuzhiyun 		return -EINVAL;
771*4882a593Smuzhiyun 	}
772*4882a593Smuzhiyun 	src_handles = exec->shader_rec_u;
773*4882a593Smuzhiyun 	exec->shader_rec_u += nr_relocs * 4;
774*4882a593Smuzhiyun 	exec->shader_rec_size -= nr_relocs * 4;
775*4882a593Smuzhiyun 
776*4882a593Smuzhiyun 	if (packet_size > exec->shader_rec_size) {
777*4882a593Smuzhiyun 		DRM_DEBUG("overflowed shader recs copying %db packet "
778*4882a593Smuzhiyun 			  "from %d bytes left\n",
779*4882a593Smuzhiyun 			  packet_size, exec->shader_rec_size);
780*4882a593Smuzhiyun 		return -EINVAL;
781*4882a593Smuzhiyun 	}
782*4882a593Smuzhiyun 	pkt_u = exec->shader_rec_u;
783*4882a593Smuzhiyun 	pkt_v = exec->shader_rec_v;
784*4882a593Smuzhiyun 	memcpy(pkt_v, pkt_u, packet_size);
785*4882a593Smuzhiyun 	exec->shader_rec_u += packet_size;
786*4882a593Smuzhiyun 	/* Shader recs have to be aligned to 16 bytes (due to the attribute
787*4882a593Smuzhiyun 	 * flags being in the low bytes), so round the next validated shader
788*4882a593Smuzhiyun 	 * rec address up.  This should be safe, since we've got so many
789*4882a593Smuzhiyun 	 * relocations in a shader rec packet.
790*4882a593Smuzhiyun 	 */
791*4882a593Smuzhiyun 	BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
792*4882a593Smuzhiyun 	exec->shader_rec_v += roundup(packet_size, 16);
793*4882a593Smuzhiyun 	exec->shader_rec_size -= packet_size;
794*4882a593Smuzhiyun 
795*4882a593Smuzhiyun 	for (i = 0; i < shader_reloc_count; i++) {
796*4882a593Smuzhiyun 		if (src_handles[i] > exec->bo_count) {
797*4882a593Smuzhiyun 			DRM_DEBUG("Shader handle %d too big\n", src_handles[i]);
798*4882a593Smuzhiyun 			return -EINVAL;
799*4882a593Smuzhiyun 		}
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 		bo[i] = exec->bo[src_handles[i]];
802*4882a593Smuzhiyun 		if (!bo[i])
803*4882a593Smuzhiyun 			return -EINVAL;
804*4882a593Smuzhiyun 	}
805*4882a593Smuzhiyun 	for (i = shader_reloc_count; i < nr_relocs; i++) {
806*4882a593Smuzhiyun 		bo[i] = vc4_use_bo(exec, src_handles[i]);
807*4882a593Smuzhiyun 		if (!bo[i])
808*4882a593Smuzhiyun 			return -EINVAL;
809*4882a593Smuzhiyun 	}
810*4882a593Smuzhiyun 
811*4882a593Smuzhiyun 	if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
812*4882a593Smuzhiyun 	    to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
813*4882a593Smuzhiyun 		DRM_DEBUG("Thread mode of CL and FS do not match\n");
814*4882a593Smuzhiyun 		return -EINVAL;
815*4882a593Smuzhiyun 	}
816*4882a593Smuzhiyun 
817*4882a593Smuzhiyun 	if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
818*4882a593Smuzhiyun 	    to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
819*4882a593Smuzhiyun 		DRM_DEBUG("cs and vs cannot be threaded\n");
820*4882a593Smuzhiyun 		return -EINVAL;
821*4882a593Smuzhiyun 	}
822*4882a593Smuzhiyun 
823*4882a593Smuzhiyun 	for (i = 0; i < shader_reloc_count; i++) {
824*4882a593Smuzhiyun 		struct vc4_validated_shader_info *validated_shader;
825*4882a593Smuzhiyun 		uint32_t o = shader_reloc_offsets[i];
826*4882a593Smuzhiyun 		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
827*4882a593Smuzhiyun 		uint32_t *texture_handles_u;
828*4882a593Smuzhiyun 		void *uniform_data_u;
829*4882a593Smuzhiyun 		uint32_t tex, uni;
830*4882a593Smuzhiyun 
831*4882a593Smuzhiyun 		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
832*4882a593Smuzhiyun 
833*4882a593Smuzhiyun 		if (src_offset != 0) {
834*4882a593Smuzhiyun 			DRM_DEBUG("Shaders must be at offset 0 of "
835*4882a593Smuzhiyun 				  "the BO.\n");
836*4882a593Smuzhiyun 			return -EINVAL;
837*4882a593Smuzhiyun 		}
838*4882a593Smuzhiyun 
839*4882a593Smuzhiyun 		validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
840*4882a593Smuzhiyun 		if (!validated_shader)
841*4882a593Smuzhiyun 			return -EINVAL;
842*4882a593Smuzhiyun 
843*4882a593Smuzhiyun 		if (validated_shader->uniforms_src_size >
844*4882a593Smuzhiyun 		    exec->uniforms_size) {
845*4882a593Smuzhiyun 			DRM_DEBUG("Uniforms src buffer overflow\n");
846*4882a593Smuzhiyun 			return -EINVAL;
847*4882a593Smuzhiyun 		}
848*4882a593Smuzhiyun 
849*4882a593Smuzhiyun 		texture_handles_u = exec->uniforms_u;
850*4882a593Smuzhiyun 		uniform_data_u = (texture_handles_u +
851*4882a593Smuzhiyun 				  validated_shader->num_texture_samples);
852*4882a593Smuzhiyun 
853*4882a593Smuzhiyun 		memcpy(exec->uniforms_v, uniform_data_u,
854*4882a593Smuzhiyun 		       validated_shader->uniforms_size);
855*4882a593Smuzhiyun 
856*4882a593Smuzhiyun 		for (tex = 0;
857*4882a593Smuzhiyun 		     tex < validated_shader->num_texture_samples;
858*4882a593Smuzhiyun 		     tex++) {
859*4882a593Smuzhiyun 			if (!reloc_tex(exec,
860*4882a593Smuzhiyun 				       uniform_data_u,
861*4882a593Smuzhiyun 				       &validated_shader->texture_samples[tex],
862*4882a593Smuzhiyun 				       texture_handles_u[tex],
863*4882a593Smuzhiyun 				       i == 2)) {
864*4882a593Smuzhiyun 				return -EINVAL;
865*4882a593Smuzhiyun 			}
866*4882a593Smuzhiyun 		}
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 		/* Fill in the uniform slots that need this shader's
869*4882a593Smuzhiyun 		 * start-of-uniforms address (used for resetting the uniform
870*4882a593Smuzhiyun 		 * stream in the presence of control flow).
871*4882a593Smuzhiyun 		 */
872*4882a593Smuzhiyun 		for (uni = 0;
873*4882a593Smuzhiyun 		     uni < validated_shader->num_uniform_addr_offsets;
874*4882a593Smuzhiyun 		     uni++) {
875*4882a593Smuzhiyun 			uint32_t o = validated_shader->uniform_addr_offsets[uni];
876*4882a593Smuzhiyun 			((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
877*4882a593Smuzhiyun 		}
878*4882a593Smuzhiyun 
879*4882a593Smuzhiyun 		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
880*4882a593Smuzhiyun 
881*4882a593Smuzhiyun 		exec->uniforms_u += validated_shader->uniforms_src_size;
882*4882a593Smuzhiyun 		exec->uniforms_v += validated_shader->uniforms_size;
883*4882a593Smuzhiyun 		exec->uniforms_p += validated_shader->uniforms_size;
884*4882a593Smuzhiyun 	}
885*4882a593Smuzhiyun 
886*4882a593Smuzhiyun 	for (i = 0; i < nr_attributes; i++) {
887*4882a593Smuzhiyun 		struct drm_gem_cma_object *vbo =
888*4882a593Smuzhiyun 			bo[ARRAY_SIZE(shader_reloc_offsets) + i];
889*4882a593Smuzhiyun 		uint32_t o = 36 + i * 8;
890*4882a593Smuzhiyun 		uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
891*4882a593Smuzhiyun 		uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
892*4882a593Smuzhiyun 		uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
893*4882a593Smuzhiyun 		uint32_t max_index;
894*4882a593Smuzhiyun 
895*4882a593Smuzhiyun 		exec->bin_dep_seqno = max(exec->bin_dep_seqno,
896*4882a593Smuzhiyun 					  to_vc4_bo(&vbo->base)->write_seqno);
897*4882a593Smuzhiyun 
898*4882a593Smuzhiyun 		if (state->addr & 0x8)
899*4882a593Smuzhiyun 			stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 		if (vbo->base.size < offset ||
902*4882a593Smuzhiyun 		    vbo->base.size - offset < attr_size) {
903*4882a593Smuzhiyun 			DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n",
904*4882a593Smuzhiyun 				  offset, attr_size, vbo->base.size);
905*4882a593Smuzhiyun 			return -EINVAL;
906*4882a593Smuzhiyun 		}
907*4882a593Smuzhiyun 
908*4882a593Smuzhiyun 		if (stride != 0) {
909*4882a593Smuzhiyun 			max_index = ((vbo->base.size - offset - attr_size) /
910*4882a593Smuzhiyun 				     stride);
911*4882a593Smuzhiyun 			if (state->max_index > max_index) {
912*4882a593Smuzhiyun 				DRM_DEBUG("primitives use index %d out of "
913*4882a593Smuzhiyun 					  "supplied %d\n",
914*4882a593Smuzhiyun 					  state->max_index, max_index);
915*4882a593Smuzhiyun 				return -EINVAL;
916*4882a593Smuzhiyun 			}
917*4882a593Smuzhiyun 		}
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 		*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
920*4882a593Smuzhiyun 	}
921*4882a593Smuzhiyun 
922*4882a593Smuzhiyun 	return 0;
923*4882a593Smuzhiyun }
924*4882a593Smuzhiyun 
925*4882a593Smuzhiyun int
vc4_validate_shader_recs(struct drm_device * dev,struct vc4_exec_info * exec)926*4882a593Smuzhiyun vc4_validate_shader_recs(struct drm_device *dev,
927*4882a593Smuzhiyun 			 struct vc4_exec_info *exec)
928*4882a593Smuzhiyun {
929*4882a593Smuzhiyun 	uint32_t i;
930*4882a593Smuzhiyun 	int ret = 0;
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun 	for (i = 0; i < exec->shader_state_count; i++) {
933*4882a593Smuzhiyun 		ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
934*4882a593Smuzhiyun 		if (ret)
935*4882a593Smuzhiyun 			return ret;
936*4882a593Smuzhiyun 	}
937*4882a593Smuzhiyun 
938*4882a593Smuzhiyun 	return ret;
939*4882a593Smuzhiyun }
940