xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/etnaviv/etnaviv_buffer.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2014-2018 Etnaviv Project
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #include <drm/drm_drv.h>
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include "etnaviv_cmdbuf.h"
9*4882a593Smuzhiyun #include "etnaviv_gpu.h"
10*4882a593Smuzhiyun #include "etnaviv_gem.h"
11*4882a593Smuzhiyun #include "etnaviv_mmu.h"
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun #include "common.xml.h"
14*4882a593Smuzhiyun #include "state.xml.h"
15*4882a593Smuzhiyun #include "state_blt.xml.h"
16*4882a593Smuzhiyun #include "state_hi.xml.h"
17*4882a593Smuzhiyun #include "state_3d.xml.h"
18*4882a593Smuzhiyun #include "cmdstream.xml.h"
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun /*
21*4882a593Smuzhiyun  * Command Buffer helper:
22*4882a593Smuzhiyun  */
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun 
OUT(struct etnaviv_cmdbuf * buffer,u32 data)25*4882a593Smuzhiyun static inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun 	u32 *vaddr = (u32 *)buffer->vaddr;
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun 	BUG_ON(buffer->user_size >= buffer->size);
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun 	vaddr[buffer->user_size / 4] = data;
32*4882a593Smuzhiyun 	buffer->user_size += 4;
33*4882a593Smuzhiyun }
34*4882a593Smuzhiyun 
CMD_LOAD_STATE(struct etnaviv_cmdbuf * buffer,u32 reg,u32 value)35*4882a593Smuzhiyun static inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,
36*4882a593Smuzhiyun 	u32 reg, u32 value)
37*4882a593Smuzhiyun {
38*4882a593Smuzhiyun 	u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun 	buffer->user_size = ALIGN(buffer->user_size, 8);
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun 	/* write a register via cmd stream */
43*4882a593Smuzhiyun 	OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE |
44*4882a593Smuzhiyun 		    VIV_FE_LOAD_STATE_HEADER_COUNT(1) |
45*4882a593Smuzhiyun 		    VIV_FE_LOAD_STATE_HEADER_OFFSET(index));
46*4882a593Smuzhiyun 	OUT(buffer, value);
47*4882a593Smuzhiyun }
48*4882a593Smuzhiyun 
CMD_END(struct etnaviv_cmdbuf * buffer)49*4882a593Smuzhiyun static inline void CMD_END(struct etnaviv_cmdbuf *buffer)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun 	buffer->user_size = ALIGN(buffer->user_size, 8);
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	OUT(buffer, VIV_FE_END_HEADER_OP_END);
54*4882a593Smuzhiyun }
55*4882a593Smuzhiyun 
CMD_WAIT(struct etnaviv_cmdbuf * buffer)56*4882a593Smuzhiyun static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun 	buffer->user_size = ALIGN(buffer->user_size, 8);
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | 200);
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun 
CMD_LINK(struct etnaviv_cmdbuf * buffer,u16 prefetch,u32 address)63*4882a593Smuzhiyun static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
64*4882a593Smuzhiyun 	u16 prefetch, u32 address)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun 	buffer->user_size = ALIGN(buffer->user_size, 8);
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK |
69*4882a593Smuzhiyun 		    VIV_FE_LINK_HEADER_PREFETCH(prefetch));
70*4882a593Smuzhiyun 	OUT(buffer, address);
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun 
CMD_STALL(struct etnaviv_cmdbuf * buffer,u32 from,u32 to)73*4882a593Smuzhiyun static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
74*4882a593Smuzhiyun 	u32 from, u32 to)
75*4882a593Smuzhiyun {
76*4882a593Smuzhiyun 	buffer->user_size = ALIGN(buffer->user_size, 8);
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);
79*4882a593Smuzhiyun 	OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun 
CMD_SEM(struct etnaviv_cmdbuf * buffer,u32 from,u32 to)82*4882a593Smuzhiyun static inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun 	CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,
85*4882a593Smuzhiyun 		       VIVS_GL_SEMAPHORE_TOKEN_FROM(from) |
86*4882a593Smuzhiyun 		       VIVS_GL_SEMAPHORE_TOKEN_TO(to));
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun 
etnaviv_cmd_select_pipe(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buffer,u8 pipe)89*4882a593Smuzhiyun static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
90*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buffer, u8 pipe)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun 	u32 flush = 0;
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	lockdep_assert_held(&gpu->lock);
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	/*
97*4882a593Smuzhiyun 	 * This assumes that if we're switching to 2D, we're switching
98*4882a593Smuzhiyun 	 * away from 3D, and vice versa.  Hence, if we're switching to
99*4882a593Smuzhiyun 	 * the 2D core, we need to flush the 3D depth and color caches,
100*4882a593Smuzhiyun 	 * otherwise we need to flush the 2D pixel engine cache.
101*4882a593Smuzhiyun 	 */
102*4882a593Smuzhiyun 	if (gpu->exec_state == ETNA_PIPE_2D)
103*4882a593Smuzhiyun 		flush = VIVS_GL_FLUSH_CACHE_PE2D;
104*4882a593Smuzhiyun 	else if (gpu->exec_state == ETNA_PIPE_3D)
105*4882a593Smuzhiyun 		flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 	CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
108*4882a593Smuzhiyun 	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
109*4882a593Smuzhiyun 	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
112*4882a593Smuzhiyun 		       VIVS_GL_PIPE_SELECT_PIPE(pipe));
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun 
etnaviv_buffer_dump(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buf,u32 off,u32 len)115*4882a593Smuzhiyun static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
116*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buf, u32 off, u32 len)
117*4882a593Smuzhiyun {
118*4882a593Smuzhiyun 	u32 size = buf->size;
119*4882a593Smuzhiyun 	u32 *ptr = buf->vaddr + off;
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
122*4882a593Smuzhiyun 			ptr, etnaviv_cmdbuf_get_va(buf,
123*4882a593Smuzhiyun 			&gpu->mmu_context->cmdbuf_mapping) +
124*4882a593Smuzhiyun 			off, size - len * 4 - off);
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
127*4882a593Smuzhiyun 			ptr, len * 4, 0);
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun  * Safely replace the WAIT of a waitlink with a new command and argument.
132*4882a593Smuzhiyun  * The GPU may be executing this WAIT while we're modifying it, so we have
133*4882a593Smuzhiyun  * to write it in a specific order to avoid the GPU branching to somewhere
134*4882a593Smuzhiyun  * else.  'wl_offset' is the offset to the first byte of the WAIT command.
135*4882a593Smuzhiyun  */
etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf * buffer,unsigned int wl_offset,u32 cmd,u32 arg)136*4882a593Smuzhiyun static void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,
137*4882a593Smuzhiyun 	unsigned int wl_offset, u32 cmd, u32 arg)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun 	u32 *lw = buffer->vaddr + wl_offset;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	lw[1] = arg;
142*4882a593Smuzhiyun 	mb();
143*4882a593Smuzhiyun 	lw[0] = cmd;
144*4882a593Smuzhiyun 	mb();
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun /*
148*4882a593Smuzhiyun  * Ensure that there is space in the command buffer to contiguously write
149*4882a593Smuzhiyun  * 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.
150*4882a593Smuzhiyun  */
etnaviv_buffer_reserve(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buffer,unsigned int cmd_dwords)151*4882a593Smuzhiyun static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
152*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)
153*4882a593Smuzhiyun {
154*4882a593Smuzhiyun 	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
155*4882a593Smuzhiyun 		buffer->user_size = 0;
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	return etnaviv_cmdbuf_get_va(buffer,
158*4882a593Smuzhiyun 				     &gpu->mmu_context->cmdbuf_mapping) +
159*4882a593Smuzhiyun 	       buffer->user_size;
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun 
etnaviv_buffer_init(struct etnaviv_gpu * gpu)162*4882a593Smuzhiyun u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
163*4882a593Smuzhiyun {
164*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
165*4882a593Smuzhiyun 
166*4882a593Smuzhiyun 	lockdep_assert_held(&gpu->lock);
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	/* initialize buffer */
169*4882a593Smuzhiyun 	buffer->user_size = 0;
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	CMD_WAIT(buffer);
172*4882a593Smuzhiyun 	CMD_LINK(buffer, 2,
173*4882a593Smuzhiyun 		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
174*4882a593Smuzhiyun 		 + buffer->user_size - 4);
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 	return buffer->user_size / 8;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun 
etnaviv_buffer_config_mmuv2(struct etnaviv_gpu * gpu,u32 mtlb_addr,u32 safe_addr)179*4882a593Smuzhiyun u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)
180*4882a593Smuzhiyun {
181*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 	lockdep_assert_held(&gpu->lock);
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	buffer->user_size = 0;
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	if (gpu->identity.features & chipFeatures_PIPE_3D) {
188*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
189*4882a593Smuzhiyun 			       VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_3D));
190*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
191*4882a593Smuzhiyun 			mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
192*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
193*4882a593Smuzhiyun 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
194*4882a593Smuzhiyun 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
195*4882a593Smuzhiyun 	}
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	if (gpu->identity.features & chipFeatures_PIPE_2D) {
198*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
199*4882a593Smuzhiyun 			       VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_2D));
200*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
201*4882a593Smuzhiyun 			mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
202*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
203*4882a593Smuzhiyun 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
204*4882a593Smuzhiyun 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
205*4882a593Smuzhiyun 	}
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	CMD_END(buffer);
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	buffer->user_size = ALIGN(buffer->user_size, 8);
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	return buffer->user_size / 8;
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun 
etnaviv_buffer_config_pta(struct etnaviv_gpu * gpu,unsigned short id)214*4882a593Smuzhiyun u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)
215*4882a593Smuzhiyun {
216*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 	lockdep_assert_held(&gpu->lock);
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	buffer->user_size = 0;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,
223*4882a593Smuzhiyun 		       VIVS_MMUv2_PTA_CONFIG_INDEX(id));
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	CMD_END(buffer);
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	buffer->user_size = ALIGN(buffer->user_size, 8);
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	return buffer->user_size / 8;
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun 
etnaviv_buffer_end(struct etnaviv_gpu * gpu)232*4882a593Smuzhiyun void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
235*4882a593Smuzhiyun 	unsigned int waitlink_offset = buffer->user_size - 16;
236*4882a593Smuzhiyun 	u32 link_target, flush = 0;
237*4882a593Smuzhiyun 	bool has_blt = !!(gpu->identity.minor_features5 &
238*4882a593Smuzhiyun 			  chipMinorFeatures5_BLT_ENGINE);
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 	lockdep_assert_held(&gpu->lock);
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	if (gpu->exec_state == ETNA_PIPE_2D)
243*4882a593Smuzhiyun 		flush = VIVS_GL_FLUSH_CACHE_PE2D;
244*4882a593Smuzhiyun 	else if (gpu->exec_state == ETNA_PIPE_3D)
245*4882a593Smuzhiyun 		flush = VIVS_GL_FLUSH_CACHE_DEPTH |
246*4882a593Smuzhiyun 			VIVS_GL_FLUSH_CACHE_COLOR |
247*4882a593Smuzhiyun 			VIVS_GL_FLUSH_CACHE_TEXTURE |
248*4882a593Smuzhiyun 			VIVS_GL_FLUSH_CACHE_TEXTUREVS |
249*4882a593Smuzhiyun 			VIVS_GL_FLUSH_CACHE_SHADER_L2;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	if (flush) {
252*4882a593Smuzhiyun 		unsigned int dwords = 7;
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 		if (has_blt)
255*4882a593Smuzhiyun 			dwords += 10;
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 		link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
260*4882a593Smuzhiyun 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
261*4882a593Smuzhiyun 		if (has_blt) {
262*4882a593Smuzhiyun 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
263*4882a593Smuzhiyun 			CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
264*4882a593Smuzhiyun 			CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
265*4882a593Smuzhiyun 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
266*4882a593Smuzhiyun 		}
267*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
268*4882a593Smuzhiyun 		if (gpu->exec_state == ETNA_PIPE_3D) {
269*4882a593Smuzhiyun 			if (has_blt) {
270*4882a593Smuzhiyun 				CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
271*4882a593Smuzhiyun 				CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
272*4882a593Smuzhiyun 				CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
273*4882a593Smuzhiyun 			} else {
274*4882a593Smuzhiyun 				CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
275*4882a593Smuzhiyun 					       VIVS_TS_FLUSH_CACHE_FLUSH);
276*4882a593Smuzhiyun 			}
277*4882a593Smuzhiyun 		}
278*4882a593Smuzhiyun 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
279*4882a593Smuzhiyun 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
280*4882a593Smuzhiyun 		if (has_blt) {
281*4882a593Smuzhiyun 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
282*4882a593Smuzhiyun 			CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
283*4882a593Smuzhiyun 			CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
284*4882a593Smuzhiyun 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
285*4882a593Smuzhiyun 		}
286*4882a593Smuzhiyun 		CMD_END(buffer);
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
289*4882a593Smuzhiyun 					    VIV_FE_LINK_HEADER_OP_LINK |
290*4882a593Smuzhiyun 					    VIV_FE_LINK_HEADER_PREFETCH(dwords),
291*4882a593Smuzhiyun 					    link_target);
292*4882a593Smuzhiyun 	} else {
293*4882a593Smuzhiyun 		/* Replace the last link-wait with an "END" command */
294*4882a593Smuzhiyun 		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
295*4882a593Smuzhiyun 					    VIV_FE_END_HEADER_OP_END, 0);
296*4882a593Smuzhiyun 	}
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun /* Append a 'sync point' to the ring buffer. */
etnaviv_sync_point_queue(struct etnaviv_gpu * gpu,unsigned int event)300*4882a593Smuzhiyun void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
301*4882a593Smuzhiyun {
302*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
303*4882a593Smuzhiyun 	unsigned int waitlink_offset = buffer->user_size - 16;
304*4882a593Smuzhiyun 	u32 dwords, target;
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun 	lockdep_assert_held(&gpu->lock);
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	/*
309*4882a593Smuzhiyun 	 * We need at most 3 dwords in the return target:
310*4882a593Smuzhiyun 	 * 1 event + 1 end + 1 wait + 1 link.
311*4882a593Smuzhiyun 	 */
312*4882a593Smuzhiyun 	dwords = 4;
313*4882a593Smuzhiyun 	target = etnaviv_buffer_reserve(gpu, buffer, dwords);
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	/* Signal sync point event */
316*4882a593Smuzhiyun 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
317*4882a593Smuzhiyun 		       VIVS_GL_EVENT_FROM_PE);
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	/* Stop the FE to 'pause' the GPU */
320*4882a593Smuzhiyun 	CMD_END(buffer);
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	/* Append waitlink */
323*4882a593Smuzhiyun 	CMD_WAIT(buffer);
324*4882a593Smuzhiyun 	CMD_LINK(buffer, 2,
325*4882a593Smuzhiyun 		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
326*4882a593Smuzhiyun 		 + buffer->user_size - 4);
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	/*
329*4882a593Smuzhiyun 	 * Kick off the 'sync point' command by replacing the previous
330*4882a593Smuzhiyun 	 * WAIT with a link to the address in the ring buffer.
331*4882a593Smuzhiyun 	 */
332*4882a593Smuzhiyun 	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
333*4882a593Smuzhiyun 				    VIV_FE_LINK_HEADER_OP_LINK |
334*4882a593Smuzhiyun 				    VIV_FE_LINK_HEADER_PREFETCH(dwords),
335*4882a593Smuzhiyun 				    target);
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun 
338*4882a593Smuzhiyun /* Append a command buffer to the ring buffer. */
etnaviv_buffer_queue(struct etnaviv_gpu * gpu,u32 exec_state,struct etnaviv_iommu_context * mmu_context,unsigned int event,struct etnaviv_cmdbuf * cmdbuf)339*4882a593Smuzhiyun void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
340*4882a593Smuzhiyun 	struct etnaviv_iommu_context *mmu_context, unsigned int event,
341*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *cmdbuf)
342*4882a593Smuzhiyun {
343*4882a593Smuzhiyun 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
344*4882a593Smuzhiyun 	unsigned int waitlink_offset = buffer->user_size - 16;
345*4882a593Smuzhiyun 	u32 return_target, return_dwords;
346*4882a593Smuzhiyun 	u32 link_target, link_dwords;
347*4882a593Smuzhiyun 	bool switch_context = gpu->exec_state != exec_state;
348*4882a593Smuzhiyun 	bool switch_mmu_context = gpu->mmu_context != mmu_context;
349*4882a593Smuzhiyun 	unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
350*4882a593Smuzhiyun 	bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq;
351*4882a593Smuzhiyun 	bool has_blt = !!(gpu->identity.minor_features5 &
352*4882a593Smuzhiyun 			  chipMinorFeatures5_BLT_ENGINE);
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 	lockdep_assert_held(&gpu->lock);
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 	if (drm_debug_enabled(DRM_UT_DRIVER))
357*4882a593Smuzhiyun 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 	link_target = etnaviv_cmdbuf_get_va(cmdbuf,
360*4882a593Smuzhiyun 					    &gpu->mmu_context->cmdbuf_mapping);
361*4882a593Smuzhiyun 	link_dwords = cmdbuf->size / 8;
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 	/*
364*4882a593Smuzhiyun 	 * If we need maintenance prior to submitting this buffer, we will
365*4882a593Smuzhiyun 	 * need to append a mmu flush load state, followed by a new
366*4882a593Smuzhiyun 	 * link to this buffer - a total of four additional words.
367*4882a593Smuzhiyun 	 */
368*4882a593Smuzhiyun 	if (need_flush || switch_context) {
369*4882a593Smuzhiyun 		u32 target, extra_dwords;
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 		/* link command */
372*4882a593Smuzhiyun 		extra_dwords = 1;
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 		/* flush command */
375*4882a593Smuzhiyun 		if (need_flush) {
376*4882a593Smuzhiyun 			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)
377*4882a593Smuzhiyun 				extra_dwords += 1;
378*4882a593Smuzhiyun 			else
379*4882a593Smuzhiyun 				extra_dwords += 3;
380*4882a593Smuzhiyun 		}
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 		/* pipe switch commands */
383*4882a593Smuzhiyun 		if (switch_context)
384*4882a593Smuzhiyun 			extra_dwords += 4;
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 		/* PTA load command */
387*4882a593Smuzhiyun 		if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
388*4882a593Smuzhiyun 			extra_dwords += 1;
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
391*4882a593Smuzhiyun 		/*
392*4882a593Smuzhiyun 		 * Switch MMU context if necessary. Must be done after the
393*4882a593Smuzhiyun 		 * link target has been calculated, as the jump forward in the
394*4882a593Smuzhiyun 		 * kernel ring still uses the last active MMU context before
395*4882a593Smuzhiyun 		 * the switch.
396*4882a593Smuzhiyun 		 */
397*4882a593Smuzhiyun 		if (switch_mmu_context) {
398*4882a593Smuzhiyun 			struct etnaviv_iommu_context *old_context = gpu->mmu_context;
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 			gpu->mmu_context = etnaviv_iommu_context_get(mmu_context);
401*4882a593Smuzhiyun 			etnaviv_iommu_context_put(old_context);
402*4882a593Smuzhiyun 		}
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 		if (need_flush) {
405*4882a593Smuzhiyun 			/* Add the MMU flush */
406*4882a593Smuzhiyun 			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {
407*4882a593Smuzhiyun 				CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
408*4882a593Smuzhiyun 					       VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
409*4882a593Smuzhiyun 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
410*4882a593Smuzhiyun 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK2 |
411*4882a593Smuzhiyun 					       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
412*4882a593Smuzhiyun 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
413*4882a593Smuzhiyun 			} else {
414*4882a593Smuzhiyun 				u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
415*4882a593Smuzhiyun 					    VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 				if (switch_mmu_context &&
418*4882a593Smuzhiyun 				    gpu->sec_mode == ETNA_SEC_KERNEL) {
419*4882a593Smuzhiyun 					unsigned short id =
420*4882a593Smuzhiyun 						etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
421*4882a593Smuzhiyun 					CMD_LOAD_STATE(buffer,
422*4882a593Smuzhiyun 						VIVS_MMUv2_PTA_CONFIG,
423*4882a593Smuzhiyun 						VIVS_MMUv2_PTA_CONFIG_INDEX(id));
424*4882a593Smuzhiyun 				}
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 				if (gpu->sec_mode == ETNA_SEC_NONE)
427*4882a593Smuzhiyun 					flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 				CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
430*4882a593Smuzhiyun 					       flush);
431*4882a593Smuzhiyun 				CMD_SEM(buffer, SYNC_RECIPIENT_FE,
432*4882a593Smuzhiyun 					SYNC_RECIPIENT_PE);
433*4882a593Smuzhiyun 				CMD_STALL(buffer, SYNC_RECIPIENT_FE,
434*4882a593Smuzhiyun 					SYNC_RECIPIENT_PE);
435*4882a593Smuzhiyun 			}
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun 			gpu->flush_seq = new_flush_seq;
438*4882a593Smuzhiyun 		}
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 		if (switch_context) {
441*4882a593Smuzhiyun 			etnaviv_cmd_select_pipe(gpu, buffer, exec_state);
442*4882a593Smuzhiyun 			gpu->exec_state = exec_state;
443*4882a593Smuzhiyun 		}
444*4882a593Smuzhiyun 
445*4882a593Smuzhiyun 		/* And the link to the submitted buffer */
446*4882a593Smuzhiyun 		link_target = etnaviv_cmdbuf_get_va(cmdbuf,
447*4882a593Smuzhiyun 					&gpu->mmu_context->cmdbuf_mapping);
448*4882a593Smuzhiyun 		CMD_LINK(buffer, link_dwords, link_target);
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 		/* Update the link target to point to above instructions */
451*4882a593Smuzhiyun 		link_target = target;
452*4882a593Smuzhiyun 		link_dwords = extra_dwords;
453*4882a593Smuzhiyun 	}
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun 	/*
456*4882a593Smuzhiyun 	 * Append a LINK to the submitted command buffer to return to
457*4882a593Smuzhiyun 	 * the ring buffer.  return_target is the ring target address.
458*4882a593Smuzhiyun 	 * We need at most 7 dwords in the return target: 2 cache flush +
459*4882a593Smuzhiyun 	 * 2 semaphore stall + 1 event + 1 wait + 1 link.
460*4882a593Smuzhiyun 	 */
461*4882a593Smuzhiyun 	return_dwords = 7;
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	/*
464*4882a593Smuzhiyun 	 * When the BLT engine is present we need 6 more dwords in the return
465*4882a593Smuzhiyun 	 * target: 3 enable/flush/disable + 4 enable/semaphore stall/disable,
466*4882a593Smuzhiyun 	 * but we don't need the normal TS flush state.
467*4882a593Smuzhiyun 	 */
468*4882a593Smuzhiyun 	if (has_blt)
469*4882a593Smuzhiyun 		return_dwords += 6;
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 	return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
472*4882a593Smuzhiyun 	CMD_LINK(cmdbuf, return_dwords, return_target);
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun 	/*
475*4882a593Smuzhiyun 	 * Append a cache flush, stall, event, wait and link pointing back to
476*4882a593Smuzhiyun 	 * the wait command to the ring buffer.
477*4882a593Smuzhiyun 	 */
478*4882a593Smuzhiyun 	if (gpu->exec_state == ETNA_PIPE_2D) {
479*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
480*4882a593Smuzhiyun 				       VIVS_GL_FLUSH_CACHE_PE2D);
481*4882a593Smuzhiyun 	} else {
482*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
483*4882a593Smuzhiyun 				       VIVS_GL_FLUSH_CACHE_DEPTH |
484*4882a593Smuzhiyun 				       VIVS_GL_FLUSH_CACHE_COLOR);
485*4882a593Smuzhiyun 		if (has_blt) {
486*4882a593Smuzhiyun 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
487*4882a593Smuzhiyun 			CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
488*4882a593Smuzhiyun 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
489*4882a593Smuzhiyun 		} else {
490*4882a593Smuzhiyun 			CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
491*4882a593Smuzhiyun 					       VIVS_TS_FLUSH_CACHE_FLUSH);
492*4882a593Smuzhiyun 		}
493*4882a593Smuzhiyun 	}
494*4882a593Smuzhiyun 	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
495*4882a593Smuzhiyun 	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
496*4882a593Smuzhiyun 
497*4882a593Smuzhiyun 	if (has_blt) {
498*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
499*4882a593Smuzhiyun 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
500*4882a593Smuzhiyun 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
501*4882a593Smuzhiyun 		CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
502*4882a593Smuzhiyun 	}
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
505*4882a593Smuzhiyun 		       VIVS_GL_EVENT_FROM_PE);
506*4882a593Smuzhiyun 	CMD_WAIT(buffer);
507*4882a593Smuzhiyun 	CMD_LINK(buffer, 2,
508*4882a593Smuzhiyun 		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
509*4882a593Smuzhiyun 		 + buffer->user_size - 4);
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 	if (drm_debug_enabled(DRM_UT_DRIVER))
512*4882a593Smuzhiyun 		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
513*4882a593Smuzhiyun 			return_target,
514*4882a593Smuzhiyun 			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
515*4882a593Smuzhiyun 			cmdbuf->vaddr);
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	if (drm_debug_enabled(DRM_UT_DRIVER)) {
518*4882a593Smuzhiyun 		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
519*4882a593Smuzhiyun 			       cmdbuf->vaddr, cmdbuf->size, 0);
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 		pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);
522*4882a593Smuzhiyun 		pr_info("addr: 0x%08x\n", link_target);
523*4882a593Smuzhiyun 		pr_info("back: 0x%08x\n", return_target);
524*4882a593Smuzhiyun 		pr_info("event: %d\n", event);
525*4882a593Smuzhiyun 	}
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun 	/*
528*4882a593Smuzhiyun 	 * Kick off the submitted command by replacing the previous
529*4882a593Smuzhiyun 	 * WAIT with a link to the address in the ring buffer.
530*4882a593Smuzhiyun 	 */
531*4882a593Smuzhiyun 	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
532*4882a593Smuzhiyun 				    VIV_FE_LINK_HEADER_OP_LINK |
533*4882a593Smuzhiyun 				    VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
534*4882a593Smuzhiyun 				    link_target);
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	if (drm_debug_enabled(DRM_UT_DRIVER))
537*4882a593Smuzhiyun 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
538*4882a593Smuzhiyun }
539