1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2014-2018 Etnaviv Project
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun #include <drm/drm_drv.h>
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include "etnaviv_cmdbuf.h"
9*4882a593Smuzhiyun #include "etnaviv_gpu.h"
10*4882a593Smuzhiyun #include "etnaviv_gem.h"
11*4882a593Smuzhiyun #include "etnaviv_mmu.h"
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include "common.xml.h"
14*4882a593Smuzhiyun #include "state.xml.h"
15*4882a593Smuzhiyun #include "state_blt.xml.h"
16*4882a593Smuzhiyun #include "state_hi.xml.h"
17*4882a593Smuzhiyun #include "state_3d.xml.h"
18*4882a593Smuzhiyun #include "cmdstream.xml.h"
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun /*
21*4882a593Smuzhiyun * Command Buffer helper:
22*4882a593Smuzhiyun */
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun
OUT(struct etnaviv_cmdbuf * buffer,u32 data)25*4882a593Smuzhiyun static inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun u32 *vaddr = (u32 *)buffer->vaddr;
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun BUG_ON(buffer->user_size >= buffer->size);
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun vaddr[buffer->user_size / 4] = data;
32*4882a593Smuzhiyun buffer->user_size += 4;
33*4882a593Smuzhiyun }
34*4882a593Smuzhiyun
CMD_LOAD_STATE(struct etnaviv_cmdbuf * buffer,u32 reg,u32 value)35*4882a593Smuzhiyun static inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,
36*4882a593Smuzhiyun u32 reg, u32 value)
37*4882a593Smuzhiyun {
38*4882a593Smuzhiyun u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun buffer->user_size = ALIGN(buffer->user_size, 8);
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun /* write a register via cmd stream */
43*4882a593Smuzhiyun OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE |
44*4882a593Smuzhiyun VIV_FE_LOAD_STATE_HEADER_COUNT(1) |
45*4882a593Smuzhiyun VIV_FE_LOAD_STATE_HEADER_OFFSET(index));
46*4882a593Smuzhiyun OUT(buffer, value);
47*4882a593Smuzhiyun }
48*4882a593Smuzhiyun
CMD_END(struct etnaviv_cmdbuf * buffer)49*4882a593Smuzhiyun static inline void CMD_END(struct etnaviv_cmdbuf *buffer)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun buffer->user_size = ALIGN(buffer->user_size, 8);
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun OUT(buffer, VIV_FE_END_HEADER_OP_END);
54*4882a593Smuzhiyun }
55*4882a593Smuzhiyun
CMD_WAIT(struct etnaviv_cmdbuf * buffer)56*4882a593Smuzhiyun static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun buffer->user_size = ALIGN(buffer->user_size, 8);
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | 200);
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun
CMD_LINK(struct etnaviv_cmdbuf * buffer,u16 prefetch,u32 address)63*4882a593Smuzhiyun static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
64*4882a593Smuzhiyun u16 prefetch, u32 address)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun buffer->user_size = ALIGN(buffer->user_size, 8);
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK |
69*4882a593Smuzhiyun VIV_FE_LINK_HEADER_PREFETCH(prefetch));
70*4882a593Smuzhiyun OUT(buffer, address);
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
CMD_STALL(struct etnaviv_cmdbuf * buffer,u32 from,u32 to)73*4882a593Smuzhiyun static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
74*4882a593Smuzhiyun u32 from, u32 to)
75*4882a593Smuzhiyun {
76*4882a593Smuzhiyun buffer->user_size = ALIGN(buffer->user_size, 8);
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);
79*4882a593Smuzhiyun OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun
CMD_SEM(struct etnaviv_cmdbuf * buffer,u32 from,u32 to)82*4882a593Smuzhiyun static inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,
85*4882a593Smuzhiyun VIVS_GL_SEMAPHORE_TOKEN_FROM(from) |
86*4882a593Smuzhiyun VIVS_GL_SEMAPHORE_TOKEN_TO(to));
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
etnaviv_cmd_select_pipe(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buffer,u8 pipe)89*4882a593Smuzhiyun static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
90*4882a593Smuzhiyun struct etnaviv_cmdbuf *buffer, u8 pipe)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun u32 flush = 0;
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun lockdep_assert_held(&gpu->lock);
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun /*
97*4882a593Smuzhiyun * This assumes that if we're switching to 2D, we're switching
98*4882a593Smuzhiyun * away from 3D, and vice versa. Hence, if we're switching to
99*4882a593Smuzhiyun * the 2D core, we need to flush the 3D depth and color caches,
100*4882a593Smuzhiyun * otherwise we need to flush the 2D pixel engine cache.
101*4882a593Smuzhiyun */
102*4882a593Smuzhiyun if (gpu->exec_state == ETNA_PIPE_2D)
103*4882a593Smuzhiyun flush = VIVS_GL_FLUSH_CACHE_PE2D;
104*4882a593Smuzhiyun else if (gpu->exec_state == ETNA_PIPE_3D)
105*4882a593Smuzhiyun flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
108*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
109*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
112*4882a593Smuzhiyun VIVS_GL_PIPE_SELECT_PIPE(pipe));
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun
etnaviv_buffer_dump(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buf,u32 off,u32 len)115*4882a593Smuzhiyun static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
116*4882a593Smuzhiyun struct etnaviv_cmdbuf *buf, u32 off, u32 len)
117*4882a593Smuzhiyun {
118*4882a593Smuzhiyun u32 size = buf->size;
119*4882a593Smuzhiyun u32 *ptr = buf->vaddr + off;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
122*4882a593Smuzhiyun ptr, etnaviv_cmdbuf_get_va(buf,
123*4882a593Smuzhiyun &gpu->mmu_context->cmdbuf_mapping) +
124*4882a593Smuzhiyun off, size - len * 4 - off);
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
127*4882a593Smuzhiyun ptr, len * 4, 0);
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun /*
131*4882a593Smuzhiyun * Safely replace the WAIT of a waitlink with a new command and argument.
132*4882a593Smuzhiyun * The GPU may be executing this WAIT while we're modifying it, so we have
133*4882a593Smuzhiyun * to write it in a specific order to avoid the GPU branching to somewhere
134*4882a593Smuzhiyun * else. 'wl_offset' is the offset to the first byte of the WAIT command.
135*4882a593Smuzhiyun */
etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf * buffer,unsigned int wl_offset,u32 cmd,u32 arg)136*4882a593Smuzhiyun static void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,
137*4882a593Smuzhiyun unsigned int wl_offset, u32 cmd, u32 arg)
138*4882a593Smuzhiyun {
139*4882a593Smuzhiyun u32 *lw = buffer->vaddr + wl_offset;
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun lw[1] = arg;
142*4882a593Smuzhiyun mb();
143*4882a593Smuzhiyun lw[0] = cmd;
144*4882a593Smuzhiyun mb();
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun /*
148*4882a593Smuzhiyun * Ensure that there is space in the command buffer to contiguously write
149*4882a593Smuzhiyun * 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.
150*4882a593Smuzhiyun */
etnaviv_buffer_reserve(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buffer,unsigned int cmd_dwords)151*4882a593Smuzhiyun static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
152*4882a593Smuzhiyun struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)
153*4882a593Smuzhiyun {
154*4882a593Smuzhiyun if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
155*4882a593Smuzhiyun buffer->user_size = 0;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun return etnaviv_cmdbuf_get_va(buffer,
158*4882a593Smuzhiyun &gpu->mmu_context->cmdbuf_mapping) +
159*4882a593Smuzhiyun buffer->user_size;
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun
etnaviv_buffer_init(struct etnaviv_gpu * gpu)162*4882a593Smuzhiyun u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
163*4882a593Smuzhiyun {
164*4882a593Smuzhiyun struct etnaviv_cmdbuf *buffer = &gpu->buffer;
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun lockdep_assert_held(&gpu->lock);
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun /* initialize buffer */
169*4882a593Smuzhiyun buffer->user_size = 0;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun CMD_WAIT(buffer);
172*4882a593Smuzhiyun CMD_LINK(buffer, 2,
173*4882a593Smuzhiyun etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
174*4882a593Smuzhiyun + buffer->user_size - 4);
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun return buffer->user_size / 8;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun
etnaviv_buffer_config_mmuv2(struct etnaviv_gpu * gpu,u32 mtlb_addr,u32 safe_addr)179*4882a593Smuzhiyun u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)
180*4882a593Smuzhiyun {
181*4882a593Smuzhiyun struct etnaviv_cmdbuf *buffer = &gpu->buffer;
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun lockdep_assert_held(&gpu->lock);
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun buffer->user_size = 0;
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun if (gpu->identity.features & chipFeatures_PIPE_3D) {
188*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
189*4882a593Smuzhiyun VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_3D));
190*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
191*4882a593Smuzhiyun mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
192*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
193*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
194*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun if (gpu->identity.features & chipFeatures_PIPE_2D) {
198*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
199*4882a593Smuzhiyun VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_2D));
200*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
201*4882a593Smuzhiyun mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
202*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
203*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
204*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun CMD_END(buffer);
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun buffer->user_size = ALIGN(buffer->user_size, 8);
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun return buffer->user_size / 8;
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun
etnaviv_buffer_config_pta(struct etnaviv_gpu * gpu,unsigned short id)214*4882a593Smuzhiyun u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)
215*4882a593Smuzhiyun {
216*4882a593Smuzhiyun struct etnaviv_cmdbuf *buffer = &gpu->buffer;
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun lockdep_assert_held(&gpu->lock);
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun buffer->user_size = 0;
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,
223*4882a593Smuzhiyun VIVS_MMUv2_PTA_CONFIG_INDEX(id));
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun CMD_END(buffer);
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun buffer->user_size = ALIGN(buffer->user_size, 8);
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun return buffer->user_size / 8;
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun
etnaviv_buffer_end(struct etnaviv_gpu * gpu)232*4882a593Smuzhiyun void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun struct etnaviv_cmdbuf *buffer = &gpu->buffer;
235*4882a593Smuzhiyun unsigned int waitlink_offset = buffer->user_size - 16;
236*4882a593Smuzhiyun u32 link_target, flush = 0;
237*4882a593Smuzhiyun bool has_blt = !!(gpu->identity.minor_features5 &
238*4882a593Smuzhiyun chipMinorFeatures5_BLT_ENGINE);
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun lockdep_assert_held(&gpu->lock);
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun if (gpu->exec_state == ETNA_PIPE_2D)
243*4882a593Smuzhiyun flush = VIVS_GL_FLUSH_CACHE_PE2D;
244*4882a593Smuzhiyun else if (gpu->exec_state == ETNA_PIPE_3D)
245*4882a593Smuzhiyun flush = VIVS_GL_FLUSH_CACHE_DEPTH |
246*4882a593Smuzhiyun VIVS_GL_FLUSH_CACHE_COLOR |
247*4882a593Smuzhiyun VIVS_GL_FLUSH_CACHE_TEXTURE |
248*4882a593Smuzhiyun VIVS_GL_FLUSH_CACHE_TEXTUREVS |
249*4882a593Smuzhiyun VIVS_GL_FLUSH_CACHE_SHADER_L2;
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun if (flush) {
252*4882a593Smuzhiyun unsigned int dwords = 7;
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun if (has_blt)
255*4882a593Smuzhiyun dwords += 10;
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
260*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
261*4882a593Smuzhiyun if (has_blt) {
262*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
263*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
264*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
265*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
268*4882a593Smuzhiyun if (gpu->exec_state == ETNA_PIPE_3D) {
269*4882a593Smuzhiyun if (has_blt) {
270*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
271*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
272*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
273*4882a593Smuzhiyun } else {
274*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
275*4882a593Smuzhiyun VIVS_TS_FLUSH_CACHE_FLUSH);
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
279*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
280*4882a593Smuzhiyun if (has_blt) {
281*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
282*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
283*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
284*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun CMD_END(buffer);
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun etnaviv_buffer_replace_wait(buffer, waitlink_offset,
289*4882a593Smuzhiyun VIV_FE_LINK_HEADER_OP_LINK |
290*4882a593Smuzhiyun VIV_FE_LINK_HEADER_PREFETCH(dwords),
291*4882a593Smuzhiyun link_target);
292*4882a593Smuzhiyun } else {
293*4882a593Smuzhiyun /* Replace the last link-wait with an "END" command */
294*4882a593Smuzhiyun etnaviv_buffer_replace_wait(buffer, waitlink_offset,
295*4882a593Smuzhiyun VIV_FE_END_HEADER_OP_END, 0);
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun /* Append a 'sync point' to the ring buffer. */
etnaviv_sync_point_queue(struct etnaviv_gpu * gpu,unsigned int event)300*4882a593Smuzhiyun void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
301*4882a593Smuzhiyun {
302*4882a593Smuzhiyun struct etnaviv_cmdbuf *buffer = &gpu->buffer;
303*4882a593Smuzhiyun unsigned int waitlink_offset = buffer->user_size - 16;
304*4882a593Smuzhiyun u32 dwords, target;
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun lockdep_assert_held(&gpu->lock);
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun /*
309*4882a593Smuzhiyun * We need at most 3 dwords in the return target:
310*4882a593Smuzhiyun * 1 event + 1 end + 1 wait + 1 link.
311*4882a593Smuzhiyun */
312*4882a593Smuzhiyun dwords = 4;
313*4882a593Smuzhiyun target = etnaviv_buffer_reserve(gpu, buffer, dwords);
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun /* Signal sync point event */
316*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
317*4882a593Smuzhiyun VIVS_GL_EVENT_FROM_PE);
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun /* Stop the FE to 'pause' the GPU */
320*4882a593Smuzhiyun CMD_END(buffer);
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun /* Append waitlink */
323*4882a593Smuzhiyun CMD_WAIT(buffer);
324*4882a593Smuzhiyun CMD_LINK(buffer, 2,
325*4882a593Smuzhiyun etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
326*4882a593Smuzhiyun + buffer->user_size - 4);
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun /*
329*4882a593Smuzhiyun * Kick off the 'sync point' command by replacing the previous
330*4882a593Smuzhiyun * WAIT with a link to the address in the ring buffer.
331*4882a593Smuzhiyun */
332*4882a593Smuzhiyun etnaviv_buffer_replace_wait(buffer, waitlink_offset,
333*4882a593Smuzhiyun VIV_FE_LINK_HEADER_OP_LINK |
334*4882a593Smuzhiyun VIV_FE_LINK_HEADER_PREFETCH(dwords),
335*4882a593Smuzhiyun target);
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun /* Append a command buffer to the ring buffer. */
etnaviv_buffer_queue(struct etnaviv_gpu * gpu,u32 exec_state,struct etnaviv_iommu_context * mmu_context,unsigned int event,struct etnaviv_cmdbuf * cmdbuf)339*4882a593Smuzhiyun void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
340*4882a593Smuzhiyun struct etnaviv_iommu_context *mmu_context, unsigned int event,
341*4882a593Smuzhiyun struct etnaviv_cmdbuf *cmdbuf)
342*4882a593Smuzhiyun {
343*4882a593Smuzhiyun struct etnaviv_cmdbuf *buffer = &gpu->buffer;
344*4882a593Smuzhiyun unsigned int waitlink_offset = buffer->user_size - 16;
345*4882a593Smuzhiyun u32 return_target, return_dwords;
346*4882a593Smuzhiyun u32 link_target, link_dwords;
347*4882a593Smuzhiyun bool switch_context = gpu->exec_state != exec_state;
348*4882a593Smuzhiyun bool switch_mmu_context = gpu->mmu_context != mmu_context;
349*4882a593Smuzhiyun unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
350*4882a593Smuzhiyun bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq;
351*4882a593Smuzhiyun bool has_blt = !!(gpu->identity.minor_features5 &
352*4882a593Smuzhiyun chipMinorFeatures5_BLT_ENGINE);
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun lockdep_assert_held(&gpu->lock);
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun if (drm_debug_enabled(DRM_UT_DRIVER))
357*4882a593Smuzhiyun etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun link_target = etnaviv_cmdbuf_get_va(cmdbuf,
360*4882a593Smuzhiyun &gpu->mmu_context->cmdbuf_mapping);
361*4882a593Smuzhiyun link_dwords = cmdbuf->size / 8;
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun /*
364*4882a593Smuzhiyun * If we need maintenance prior to submitting this buffer, we will
365*4882a593Smuzhiyun * need to append a mmu flush load state, followed by a new
366*4882a593Smuzhiyun * link to this buffer - a total of four additional words.
367*4882a593Smuzhiyun */
368*4882a593Smuzhiyun if (need_flush || switch_context) {
369*4882a593Smuzhiyun u32 target, extra_dwords;
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun /* link command */
372*4882a593Smuzhiyun extra_dwords = 1;
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun /* flush command */
375*4882a593Smuzhiyun if (need_flush) {
376*4882a593Smuzhiyun if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)
377*4882a593Smuzhiyun extra_dwords += 1;
378*4882a593Smuzhiyun else
379*4882a593Smuzhiyun extra_dwords += 3;
380*4882a593Smuzhiyun }
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun /* pipe switch commands */
383*4882a593Smuzhiyun if (switch_context)
384*4882a593Smuzhiyun extra_dwords += 4;
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun /* PTA load command */
387*4882a593Smuzhiyun if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
388*4882a593Smuzhiyun extra_dwords += 1;
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
391*4882a593Smuzhiyun /*
392*4882a593Smuzhiyun * Switch MMU context if necessary. Must be done after the
393*4882a593Smuzhiyun * link target has been calculated, as the jump forward in the
394*4882a593Smuzhiyun * kernel ring still uses the last active MMU context before
395*4882a593Smuzhiyun * the switch.
396*4882a593Smuzhiyun */
397*4882a593Smuzhiyun if (switch_mmu_context) {
398*4882a593Smuzhiyun struct etnaviv_iommu_context *old_context = gpu->mmu_context;
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun gpu->mmu_context = etnaviv_iommu_context_get(mmu_context);
401*4882a593Smuzhiyun etnaviv_iommu_context_put(old_context);
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun if (need_flush) {
405*4882a593Smuzhiyun /* Add the MMU flush */
406*4882a593Smuzhiyun if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {
407*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
408*4882a593Smuzhiyun VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
409*4882a593Smuzhiyun VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
410*4882a593Smuzhiyun VIVS_GL_FLUSH_MMU_FLUSH_UNK2 |
411*4882a593Smuzhiyun VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
412*4882a593Smuzhiyun VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
413*4882a593Smuzhiyun } else {
414*4882a593Smuzhiyun u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
415*4882a593Smuzhiyun VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
416*4882a593Smuzhiyun
417*4882a593Smuzhiyun if (switch_mmu_context &&
418*4882a593Smuzhiyun gpu->sec_mode == ETNA_SEC_KERNEL) {
419*4882a593Smuzhiyun unsigned short id =
420*4882a593Smuzhiyun etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
421*4882a593Smuzhiyun CMD_LOAD_STATE(buffer,
422*4882a593Smuzhiyun VIVS_MMUv2_PTA_CONFIG,
423*4882a593Smuzhiyun VIVS_MMUv2_PTA_CONFIG_INDEX(id));
424*4882a593Smuzhiyun }
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun if (gpu->sec_mode == ETNA_SEC_NONE)
427*4882a593Smuzhiyun flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
430*4882a593Smuzhiyun flush);
431*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE,
432*4882a593Smuzhiyun SYNC_RECIPIENT_PE);
433*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE,
434*4882a593Smuzhiyun SYNC_RECIPIENT_PE);
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun gpu->flush_seq = new_flush_seq;
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun if (switch_context) {
441*4882a593Smuzhiyun etnaviv_cmd_select_pipe(gpu, buffer, exec_state);
442*4882a593Smuzhiyun gpu->exec_state = exec_state;
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun /* And the link to the submitted buffer */
446*4882a593Smuzhiyun link_target = etnaviv_cmdbuf_get_va(cmdbuf,
447*4882a593Smuzhiyun &gpu->mmu_context->cmdbuf_mapping);
448*4882a593Smuzhiyun CMD_LINK(buffer, link_dwords, link_target);
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun /* Update the link target to point to above instructions */
451*4882a593Smuzhiyun link_target = target;
452*4882a593Smuzhiyun link_dwords = extra_dwords;
453*4882a593Smuzhiyun }
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun /*
456*4882a593Smuzhiyun * Append a LINK to the submitted command buffer to return to
457*4882a593Smuzhiyun * the ring buffer. return_target is the ring target address.
458*4882a593Smuzhiyun * We need at most 7 dwords in the return target: 2 cache flush +
459*4882a593Smuzhiyun * 2 semaphore stall + 1 event + 1 wait + 1 link.
460*4882a593Smuzhiyun */
461*4882a593Smuzhiyun return_dwords = 7;
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun /*
464*4882a593Smuzhiyun * When the BLT engine is present we need 6 more dwords in the return
465*4882a593Smuzhiyun * target: 3 enable/flush/disable + 4 enable/semaphore stall/disable,
466*4882a593Smuzhiyun * but we don't need the normal TS flush state.
467*4882a593Smuzhiyun */
468*4882a593Smuzhiyun if (has_blt)
469*4882a593Smuzhiyun return_dwords += 6;
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
472*4882a593Smuzhiyun CMD_LINK(cmdbuf, return_dwords, return_target);
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun /*
475*4882a593Smuzhiyun * Append a cache flush, stall, event, wait and link pointing back to
476*4882a593Smuzhiyun * the wait command to the ring buffer.
477*4882a593Smuzhiyun */
478*4882a593Smuzhiyun if (gpu->exec_state == ETNA_PIPE_2D) {
479*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
480*4882a593Smuzhiyun VIVS_GL_FLUSH_CACHE_PE2D);
481*4882a593Smuzhiyun } else {
482*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
483*4882a593Smuzhiyun VIVS_GL_FLUSH_CACHE_DEPTH |
484*4882a593Smuzhiyun VIVS_GL_FLUSH_CACHE_COLOR);
485*4882a593Smuzhiyun if (has_blt) {
486*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
487*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
488*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
489*4882a593Smuzhiyun } else {
490*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
491*4882a593Smuzhiyun VIVS_TS_FLUSH_CACHE_FLUSH);
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun }
494*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
495*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun if (has_blt) {
498*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
499*4882a593Smuzhiyun CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
500*4882a593Smuzhiyun CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
501*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
505*4882a593Smuzhiyun VIVS_GL_EVENT_FROM_PE);
506*4882a593Smuzhiyun CMD_WAIT(buffer);
507*4882a593Smuzhiyun CMD_LINK(buffer, 2,
508*4882a593Smuzhiyun etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
509*4882a593Smuzhiyun + buffer->user_size - 4);
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun if (drm_debug_enabled(DRM_UT_DRIVER))
512*4882a593Smuzhiyun pr_info("stream link to 0x%08x @ 0x%08x %p\n",
513*4882a593Smuzhiyun return_target,
514*4882a593Smuzhiyun etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
515*4882a593Smuzhiyun cmdbuf->vaddr);
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun if (drm_debug_enabled(DRM_UT_DRIVER)) {
518*4882a593Smuzhiyun print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
519*4882a593Smuzhiyun cmdbuf->vaddr, cmdbuf->size, 0);
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);
522*4882a593Smuzhiyun pr_info("addr: 0x%08x\n", link_target);
523*4882a593Smuzhiyun pr_info("back: 0x%08x\n", return_target);
524*4882a593Smuzhiyun pr_info("event: %d\n", event);
525*4882a593Smuzhiyun }
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun /*
528*4882a593Smuzhiyun * Kick off the submitted command by replacing the previous
529*4882a593Smuzhiyun * WAIT with a link to the address in the ring buffer.
530*4882a593Smuzhiyun */
531*4882a593Smuzhiyun etnaviv_buffer_replace_wait(buffer, waitlink_offset,
532*4882a593Smuzhiyun VIV_FE_LINK_HEADER_OP_LINK |
533*4882a593Smuzhiyun VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
534*4882a593Smuzhiyun link_target);
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun if (drm_debug_enabled(DRM_UT_DRIVER))
537*4882a593Smuzhiyun etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
538*4882a593Smuzhiyun }
539