xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3*4882a593Smuzhiyun  */
4*4882a593Smuzhiyun #include "a4xx_gpu.h"
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #define A4XX_INT0_MASK \
7*4882a593Smuzhiyun 	(A4XX_INT0_RBBM_AHB_ERROR |        \
8*4882a593Smuzhiyun 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9*4882a593Smuzhiyun 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10*4882a593Smuzhiyun 	 A4XX_INT0_CP_OPCODE_ERROR |       \
11*4882a593Smuzhiyun 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12*4882a593Smuzhiyun 	 A4XX_INT0_CP_HW_FAULT |           \
13*4882a593Smuzhiyun 	 A4XX_INT0_CP_IB1_INT |            \
14*4882a593Smuzhiyun 	 A4XX_INT0_CP_IB2_INT |            \
15*4882a593Smuzhiyun 	 A4XX_INT0_CP_RB_INT |             \
16*4882a593Smuzhiyun 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17*4882a593Smuzhiyun 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
18*4882a593Smuzhiyun 	 A4XX_INT0_CACHE_FLUSH_TS |        \
19*4882a593Smuzhiyun 	 A4XX_INT0_UCHE_OOB_ACCESS)
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun extern bool hang_debug;
22*4882a593Smuzhiyun static void a4xx_dump(struct msm_gpu *gpu);
23*4882a593Smuzhiyun static bool a4xx_idle(struct msm_gpu *gpu);
24*4882a593Smuzhiyun 
a4xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)25*4882a593Smuzhiyun static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun 	struct msm_drm_private *priv = gpu->dev->dev_private;
28*4882a593Smuzhiyun 	struct msm_ringbuffer *ring = submit->ring;
29*4882a593Smuzhiyun 	unsigned int i;
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun 	for (i = 0; i < submit->nr_cmds; i++) {
32*4882a593Smuzhiyun 		switch (submit->cmd[i].type) {
33*4882a593Smuzhiyun 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
34*4882a593Smuzhiyun 			/* ignore IB-targets */
35*4882a593Smuzhiyun 			break;
36*4882a593Smuzhiyun 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
37*4882a593Smuzhiyun 			/* ignore if there has not been a ctx switch: */
38*4882a593Smuzhiyun 			if (priv->lastctx == submit->queue->ctx)
39*4882a593Smuzhiyun 				break;
40*4882a593Smuzhiyun 			fallthrough;
41*4882a593Smuzhiyun 		case MSM_SUBMIT_CMD_BUF:
42*4882a593Smuzhiyun 			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
43*4882a593Smuzhiyun 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
44*4882a593Smuzhiyun 			OUT_RING(ring, submit->cmd[i].size);
45*4882a593Smuzhiyun 			OUT_PKT2(ring);
46*4882a593Smuzhiyun 			break;
47*4882a593Smuzhiyun 		}
48*4882a593Smuzhiyun 	}
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
51*4882a593Smuzhiyun 	OUT_RING(ring, submit->seqno);
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	/* Flush HLSQ lazy updates to make sure there is nothing
54*4882a593Smuzhiyun 	 * pending for indirect loads after the timestamp has
55*4882a593Smuzhiyun 	 * passed:
56*4882a593Smuzhiyun 	 */
57*4882a593Smuzhiyun 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
58*4882a593Smuzhiyun 	OUT_RING(ring, HLSQ_FLUSH);
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	/* wait for idle before cache flush/interrupt */
61*4882a593Smuzhiyun 	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
62*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
65*4882a593Smuzhiyun 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
66*4882a593Smuzhiyun 	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
67*4882a593Smuzhiyun 	OUT_RING(ring, rbmemptr(ring, fence));
68*4882a593Smuzhiyun 	OUT_RING(ring, submit->seqno);
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun /*
74*4882a593Smuzhiyun  * a4xx_enable_hwcg() - Program the clock control registers
75*4882a593Smuzhiyun  * @device: The adreno device pointer
76*4882a593Smuzhiyun  */
a4xx_enable_hwcg(struct msm_gpu * gpu)77*4882a593Smuzhiyun static void a4xx_enable_hwcg(struct msm_gpu *gpu)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
80*4882a593Smuzhiyun 	unsigned int i;
81*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
82*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
83*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
84*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
85*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
86*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
87*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
88*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
89*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
90*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
91*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
92*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
93*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
94*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
95*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
96*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
97*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
98*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
99*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
100*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
101*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
102*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
103*4882a593Smuzhiyun 	for (i = 0; i < 4; i++)
104*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	/* Disable L1 clocking in A420 due to CCU issues with it */
107*4882a593Smuzhiyun 	for (i = 0; i < 4; i++) {
108*4882a593Smuzhiyun 		if (adreno_is_a420(adreno_gpu)) {
109*4882a593Smuzhiyun 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
110*4882a593Smuzhiyun 					0x00002020);
111*4882a593Smuzhiyun 		} else {
112*4882a593Smuzhiyun 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
113*4882a593Smuzhiyun 					0x00022020);
114*4882a593Smuzhiyun 		}
115*4882a593Smuzhiyun 	}
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 	/* No CCU for A405 */
118*4882a593Smuzhiyun 	if (!adreno_is_a405(adreno_gpu)) {
119*4882a593Smuzhiyun 		for (i = 0; i < 4; i++) {
120*4882a593Smuzhiyun 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
121*4882a593Smuzhiyun 					0x00000922);
122*4882a593Smuzhiyun 		}
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 		for (i = 0; i < 4; i++) {
125*4882a593Smuzhiyun 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
126*4882a593Smuzhiyun 					0x00000000);
127*4882a593Smuzhiyun 		}
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 		for (i = 0; i < 4; i++) {
130*4882a593Smuzhiyun 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
131*4882a593Smuzhiyun 					0x00000001);
132*4882a593Smuzhiyun 		}
133*4882a593Smuzhiyun 	}
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
136*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
137*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
138*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
139*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
140*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
141*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
142*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
143*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
144*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
145*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
146*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
147*4882a593Smuzhiyun 	/* Early A430's have a timing issue with SP/TP power collapse;
148*4882a593Smuzhiyun 	   disabling HW clock gating prevents it. */
149*4882a593Smuzhiyun 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
150*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
151*4882a593Smuzhiyun 	else
152*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
153*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 
a4xx_me_init(struct msm_gpu * gpu)157*4882a593Smuzhiyun static bool a4xx_me_init(struct msm_gpu *gpu)
158*4882a593Smuzhiyun {
159*4882a593Smuzhiyun 	struct msm_ringbuffer *ring = gpu->rb[0];
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	OUT_PKT3(ring, CP_ME_INIT, 17);
162*4882a593Smuzhiyun 	OUT_RING(ring, 0x000003f7);
163*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
164*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
165*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
166*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000080);
167*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000100);
168*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000180);
169*4882a593Smuzhiyun 	OUT_RING(ring, 0x00006600);
170*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000150);
171*4882a593Smuzhiyun 	OUT_RING(ring, 0x0000014e);
172*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000154);
173*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000001);
174*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
175*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
176*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
177*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
178*4882a593Smuzhiyun 	OUT_RING(ring, 0x00000000);
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
181*4882a593Smuzhiyun 	return a4xx_idle(gpu);
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun 
a4xx_hw_init(struct msm_gpu * gpu)184*4882a593Smuzhiyun static int a4xx_hw_init(struct msm_gpu *gpu)
185*4882a593Smuzhiyun {
186*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
187*4882a593Smuzhiyun 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
188*4882a593Smuzhiyun 	uint32_t *ptr, len;
189*4882a593Smuzhiyun 	int i, ret;
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun 	if (adreno_is_a405(adreno_gpu)) {
192*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
193*4882a593Smuzhiyun 	} else if (adreno_is_a420(adreno_gpu)) {
194*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
195*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
196*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
197*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
198*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
199*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
200*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
201*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
202*4882a593Smuzhiyun 	} else if (adreno_is_a430(adreno_gpu)) {
203*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
204*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
205*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
206*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
207*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
208*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
209*4882a593Smuzhiyun 	} else {
210*4882a593Smuzhiyun 		BUG();
211*4882a593Smuzhiyun 	}
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 	/* Make all blocks contribute to the GPU BUSY perf counter */
214*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun 	/* Tune the hystersis counters for SP and CP idle detection */
217*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
218*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	if (adreno_is_a430(adreno_gpu)) {
221*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
222*4882a593Smuzhiyun 	}
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	 /* Enable the RBBM error reporting bits */
225*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	/* Enable AHB error reporting*/
228*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	/* Enable power counters*/
231*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	/*
234*4882a593Smuzhiyun 	 * Turn on hang detection - this spews a lot of useful information
235*4882a593Smuzhiyun 	 * into the RBBM registers on a hang:
236*4882a593Smuzhiyun 	 */
237*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
238*4882a593Smuzhiyun 			(1 << 30) | 0xFFFF);
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
241*4882a593Smuzhiyun 			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun 	/* Turn on performance counters: */
244*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	/* use the first CP counter for timestamp queries.. userspace may set
247*4882a593Smuzhiyun 	 * this as well but it selects the same counter/countable:
248*4882a593Smuzhiyun 	 */
249*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	if (adreno_is_a430(adreno_gpu))
252*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
255*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
256*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
259*4882a593Smuzhiyun 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	/* On A430 enable SP regfile sleep for power savings */
262*4882a593Smuzhiyun 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
263*4882a593Smuzhiyun 	if (!adreno_is_a420(adreno_gpu)) {
264*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
265*4882a593Smuzhiyun 			0x00000441);
266*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
267*4882a593Smuzhiyun 			0x00000441);
268*4882a593Smuzhiyun 	}
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	a4xx_enable_hwcg(gpu);
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 	/*
273*4882a593Smuzhiyun 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
274*4882a593Smuzhiyun 	 * due to timing issue with HLSQ_TP_CLK_EN
275*4882a593Smuzhiyun 	 */
276*4882a593Smuzhiyun 	if (adreno_is_a420(adreno_gpu)) {
277*4882a593Smuzhiyun 		unsigned int val;
278*4882a593Smuzhiyun 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
279*4882a593Smuzhiyun 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
280*4882a593Smuzhiyun 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
281*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
282*4882a593Smuzhiyun 	}
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 	/* setup access protection: */
285*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun 	/* RBBM registers */
288*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
289*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
290*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
291*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
292*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
293*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	/* CP registers */
296*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
297*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	/* RB registers */
301*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	/* HLSQ registers */
304*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun 	/* VPC registers */
307*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	/* SMMU registers */
310*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	ret = adreno_hw_init(gpu);
315*4882a593Smuzhiyun 	if (ret)
316*4882a593Smuzhiyun 		return ret;
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun 	/*
319*4882a593Smuzhiyun 	 * Use the default ringbuffer size and block size but disable the RPTR
320*4882a593Smuzhiyun 	 * shadow
321*4882a593Smuzhiyun 	 */
322*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
323*4882a593Smuzhiyun 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	/* Set the ringbuffer address */
326*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	/* Load PM4: */
329*4882a593Smuzhiyun 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
330*4882a593Smuzhiyun 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
331*4882a593Smuzhiyun 	DBG("loading PM4 ucode version: %u", ptr[0]);
332*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
333*4882a593Smuzhiyun 	for (i = 1; i < len; i++)
334*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	/* Load PFP: */
337*4882a593Smuzhiyun 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
338*4882a593Smuzhiyun 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
339*4882a593Smuzhiyun 	DBG("loading PFP ucode version: %u", ptr[0]);
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
342*4882a593Smuzhiyun 	for (i = 1; i < len; i++)
343*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	/* clear ME_HALT to start micro engine */
346*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
349*4882a593Smuzhiyun }
350*4882a593Smuzhiyun 
a4xx_recover(struct msm_gpu * gpu)351*4882a593Smuzhiyun static void a4xx_recover(struct msm_gpu *gpu)
352*4882a593Smuzhiyun {
353*4882a593Smuzhiyun 	int i;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	adreno_dump_info(gpu);
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun 	for (i = 0; i < 8; i++) {
358*4882a593Smuzhiyun 		printk("CP_SCRATCH_REG%d: %u\n", i,
359*4882a593Smuzhiyun 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
360*4882a593Smuzhiyun 	}
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun 	/* dump registers before resetting gpu, if enabled: */
363*4882a593Smuzhiyun 	if (hang_debug)
364*4882a593Smuzhiyun 		a4xx_dump(gpu);
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
367*4882a593Smuzhiyun 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
368*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
369*4882a593Smuzhiyun 	adreno_recover(gpu);
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun 
a4xx_destroy(struct msm_gpu * gpu)372*4882a593Smuzhiyun static void a4xx_destroy(struct msm_gpu *gpu)
373*4882a593Smuzhiyun {
374*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375*4882a593Smuzhiyun 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	DBG("%s", gpu->name);
378*4882a593Smuzhiyun 
379*4882a593Smuzhiyun 	adreno_gpu_cleanup(adreno_gpu);
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun 	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 	kfree(a4xx_gpu);
384*4882a593Smuzhiyun }
385*4882a593Smuzhiyun 
a4xx_idle(struct msm_gpu * gpu)386*4882a593Smuzhiyun static bool a4xx_idle(struct msm_gpu *gpu)
387*4882a593Smuzhiyun {
388*4882a593Smuzhiyun 	/* wait for ringbuffer to drain: */
389*4882a593Smuzhiyun 	if (!adreno_idle(gpu, gpu->rb[0]))
390*4882a593Smuzhiyun 		return false;
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	/* then wait for GPU to finish: */
393*4882a593Smuzhiyun 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
394*4882a593Smuzhiyun 					A4XX_RBBM_STATUS_GPU_BUSY))) {
395*4882a593Smuzhiyun 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
396*4882a593Smuzhiyun 		/* TODO maybe we need to reset GPU here to recover from hang? */
397*4882a593Smuzhiyun 		return false;
398*4882a593Smuzhiyun 	}
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 	return true;
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun 
a4xx_irq(struct msm_gpu * gpu)403*4882a593Smuzhiyun static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
404*4882a593Smuzhiyun {
405*4882a593Smuzhiyun 	uint32_t status;
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
408*4882a593Smuzhiyun 	DBG("%s: Int status %08x", gpu->name, status);
409*4882a593Smuzhiyun 
410*4882a593Smuzhiyun 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
411*4882a593Smuzhiyun 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
412*4882a593Smuzhiyun 		printk("CP | Protected mode error| %s | addr=%x\n",
413*4882a593Smuzhiyun 			reg & (1 << 24) ? "WRITE" : "READ",
414*4882a593Smuzhiyun 			(reg & 0xFFFFF) >> 2);
415*4882a593Smuzhiyun 	}
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	msm_gpu_retire(gpu);
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 	return IRQ_HANDLED;
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun static const unsigned int a4xx_registers[] = {
425*4882a593Smuzhiyun 	/* RBBM */
426*4882a593Smuzhiyun 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
427*4882a593Smuzhiyun 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
428*4882a593Smuzhiyun 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
429*4882a593Smuzhiyun 	/* CP */
430*4882a593Smuzhiyun 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
431*4882a593Smuzhiyun 	0x0578, 0x058F,
432*4882a593Smuzhiyun 	/* VSC */
433*4882a593Smuzhiyun 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
434*4882a593Smuzhiyun 	/* GRAS */
435*4882a593Smuzhiyun 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
436*4882a593Smuzhiyun 	/* RB */
437*4882a593Smuzhiyun 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
438*4882a593Smuzhiyun 	/* PC */
439*4882a593Smuzhiyun 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
440*4882a593Smuzhiyun 	/* VFD */
441*4882a593Smuzhiyun 	0x0E40, 0x0E4A,
442*4882a593Smuzhiyun 	/* VPC */
443*4882a593Smuzhiyun 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
444*4882a593Smuzhiyun 	/* UCHE */
445*4882a593Smuzhiyun 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
446*4882a593Smuzhiyun 	/* VMIDMT */
447*4882a593Smuzhiyun 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
448*4882a593Smuzhiyun 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
449*4882a593Smuzhiyun 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
450*4882a593Smuzhiyun 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
451*4882a593Smuzhiyun 	0x1380, 0x1380,
452*4882a593Smuzhiyun 	/* GRAS CTX 0 */
453*4882a593Smuzhiyun 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
454*4882a593Smuzhiyun 	/* PC CTX 0 */
455*4882a593Smuzhiyun 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
456*4882a593Smuzhiyun 	/* VFD CTX 0 */
457*4882a593Smuzhiyun 	0x2200, 0x2204, 0x2208, 0x22A9,
458*4882a593Smuzhiyun 	/* GRAS CTX 1 */
459*4882a593Smuzhiyun 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
460*4882a593Smuzhiyun 	/* PC CTX 1 */
461*4882a593Smuzhiyun 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
462*4882a593Smuzhiyun 	/* VFD CTX 1 */
463*4882a593Smuzhiyun 	0x2600, 0x2604, 0x2608, 0x26A9,
464*4882a593Smuzhiyun 	/* XPU */
465*4882a593Smuzhiyun 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
466*4882a593Smuzhiyun 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
467*4882a593Smuzhiyun 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
468*4882a593Smuzhiyun 	/* VBIF */
469*4882a593Smuzhiyun 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
470*4882a593Smuzhiyun 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
471*4882a593Smuzhiyun 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
472*4882a593Smuzhiyun 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
473*4882a593Smuzhiyun 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
474*4882a593Smuzhiyun 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
475*4882a593Smuzhiyun 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
476*4882a593Smuzhiyun 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
477*4882a593Smuzhiyun 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
478*4882a593Smuzhiyun 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
479*4882a593Smuzhiyun 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
480*4882a593Smuzhiyun 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
481*4882a593Smuzhiyun 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
482*4882a593Smuzhiyun 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
483*4882a593Smuzhiyun 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
484*4882a593Smuzhiyun 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
485*4882a593Smuzhiyun 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
486*4882a593Smuzhiyun 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
487*4882a593Smuzhiyun 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
488*4882a593Smuzhiyun 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
489*4882a593Smuzhiyun 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
490*4882a593Smuzhiyun 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
491*4882a593Smuzhiyun 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
492*4882a593Smuzhiyun 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
493*4882a593Smuzhiyun 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
494*4882a593Smuzhiyun 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
495*4882a593Smuzhiyun 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
496*4882a593Smuzhiyun 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
497*4882a593Smuzhiyun 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
498*4882a593Smuzhiyun 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
499*4882a593Smuzhiyun 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
500*4882a593Smuzhiyun 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
501*4882a593Smuzhiyun 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
502*4882a593Smuzhiyun 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
503*4882a593Smuzhiyun 	~0 /* sentinel */
504*4882a593Smuzhiyun };
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun static const unsigned int a405_registers[] = {
507*4882a593Smuzhiyun 	/* RBBM */
508*4882a593Smuzhiyun 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
509*4882a593Smuzhiyun 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
510*4882a593Smuzhiyun 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
511*4882a593Smuzhiyun 	/* CP */
512*4882a593Smuzhiyun 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
513*4882a593Smuzhiyun 	0x0578, 0x058F,
514*4882a593Smuzhiyun 	/* VSC */
515*4882a593Smuzhiyun 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
516*4882a593Smuzhiyun 	/* GRAS */
517*4882a593Smuzhiyun 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
518*4882a593Smuzhiyun 	/* RB */
519*4882a593Smuzhiyun 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
520*4882a593Smuzhiyun 	/* PC */
521*4882a593Smuzhiyun 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
522*4882a593Smuzhiyun 	/* VFD */
523*4882a593Smuzhiyun 	0x0E40, 0x0E4A,
524*4882a593Smuzhiyun 	/* VPC */
525*4882a593Smuzhiyun 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
526*4882a593Smuzhiyun 	/* UCHE */
527*4882a593Smuzhiyun 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
528*4882a593Smuzhiyun 	/* GRAS CTX 0 */
529*4882a593Smuzhiyun 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
530*4882a593Smuzhiyun 	/* PC CTX 0 */
531*4882a593Smuzhiyun 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
532*4882a593Smuzhiyun 	/* VFD CTX 0 */
533*4882a593Smuzhiyun 	0x2200, 0x2204, 0x2208, 0x22A9,
534*4882a593Smuzhiyun 	/* GRAS CTX 1 */
535*4882a593Smuzhiyun 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
536*4882a593Smuzhiyun 	/* PC CTX 1 */
537*4882a593Smuzhiyun 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
538*4882a593Smuzhiyun 	/* VFD CTX 1 */
539*4882a593Smuzhiyun 	0x2600, 0x2604, 0x2608, 0x26A9,
540*4882a593Smuzhiyun 	/* VBIF version 0x20050000*/
541*4882a593Smuzhiyun 	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
542*4882a593Smuzhiyun 	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
543*4882a593Smuzhiyun 	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
544*4882a593Smuzhiyun 	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
545*4882a593Smuzhiyun 	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
546*4882a593Smuzhiyun 	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
547*4882a593Smuzhiyun 	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
548*4882a593Smuzhiyun 	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
549*4882a593Smuzhiyun 	~0 /* sentinel */
550*4882a593Smuzhiyun };
551*4882a593Smuzhiyun 
a4xx_gpu_state_get(struct msm_gpu * gpu)552*4882a593Smuzhiyun static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
553*4882a593Smuzhiyun {
554*4882a593Smuzhiyun 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun 	if (!state)
557*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	adreno_gpu_state_get(gpu, state);
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	return state;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun 
a4xx_dump(struct msm_gpu * gpu)566*4882a593Smuzhiyun static void a4xx_dump(struct msm_gpu *gpu)
567*4882a593Smuzhiyun {
568*4882a593Smuzhiyun 	printk("status:   %08x\n",
569*4882a593Smuzhiyun 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
570*4882a593Smuzhiyun 	adreno_dump(gpu);
571*4882a593Smuzhiyun }
572*4882a593Smuzhiyun 
a4xx_pm_resume(struct msm_gpu * gpu)573*4882a593Smuzhiyun static int a4xx_pm_resume(struct msm_gpu *gpu) {
574*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
575*4882a593Smuzhiyun 	int ret;
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 	ret = msm_gpu_pm_resume(gpu);
578*4882a593Smuzhiyun 	if (ret)
579*4882a593Smuzhiyun 		return ret;
580*4882a593Smuzhiyun 
581*4882a593Smuzhiyun 	if (adreno_is_a430(adreno_gpu)) {
582*4882a593Smuzhiyun 		unsigned int reg;
583*4882a593Smuzhiyun 		/* Set the default register values; set SW_COLLAPSE to 0 */
584*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
585*4882a593Smuzhiyun 		do {
586*4882a593Smuzhiyun 			udelay(5);
587*4882a593Smuzhiyun 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
588*4882a593Smuzhiyun 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
589*4882a593Smuzhiyun 	}
590*4882a593Smuzhiyun 	return 0;
591*4882a593Smuzhiyun }
592*4882a593Smuzhiyun 
a4xx_pm_suspend(struct msm_gpu * gpu)593*4882a593Smuzhiyun static int a4xx_pm_suspend(struct msm_gpu *gpu) {
594*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
595*4882a593Smuzhiyun 	int ret;
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 	ret = msm_gpu_pm_suspend(gpu);
598*4882a593Smuzhiyun 	if (ret)
599*4882a593Smuzhiyun 		return ret;
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun 	if (adreno_is_a430(adreno_gpu)) {
602*4882a593Smuzhiyun 		/* Set the default register values; set SW_COLLAPSE to 1 */
603*4882a593Smuzhiyun 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
604*4882a593Smuzhiyun 	}
605*4882a593Smuzhiyun 	return 0;
606*4882a593Smuzhiyun }
607*4882a593Smuzhiyun 
a4xx_get_timestamp(struct msm_gpu * gpu,uint64_t * value)608*4882a593Smuzhiyun static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
609*4882a593Smuzhiyun {
610*4882a593Smuzhiyun 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
611*4882a593Smuzhiyun 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
612*4882a593Smuzhiyun 
613*4882a593Smuzhiyun 	return 0;
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun 
a4xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)616*4882a593Smuzhiyun static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
617*4882a593Smuzhiyun {
618*4882a593Smuzhiyun 	ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
619*4882a593Smuzhiyun 	return ring->memptrs->rptr;
620*4882a593Smuzhiyun }
621*4882a593Smuzhiyun 
622*4882a593Smuzhiyun static const struct adreno_gpu_funcs funcs = {
623*4882a593Smuzhiyun 	.base = {
624*4882a593Smuzhiyun 		.get_param = adreno_get_param,
625*4882a593Smuzhiyun 		.hw_init = a4xx_hw_init,
626*4882a593Smuzhiyun 		.pm_suspend = a4xx_pm_suspend,
627*4882a593Smuzhiyun 		.pm_resume = a4xx_pm_resume,
628*4882a593Smuzhiyun 		.recover = a4xx_recover,
629*4882a593Smuzhiyun 		.submit = a4xx_submit,
630*4882a593Smuzhiyun 		.active_ring = adreno_active_ring,
631*4882a593Smuzhiyun 		.irq = a4xx_irq,
632*4882a593Smuzhiyun 		.destroy = a4xx_destroy,
633*4882a593Smuzhiyun #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
634*4882a593Smuzhiyun 		.show = adreno_show,
635*4882a593Smuzhiyun #endif
636*4882a593Smuzhiyun 		.gpu_state_get = a4xx_gpu_state_get,
637*4882a593Smuzhiyun 		.gpu_state_put = adreno_gpu_state_put,
638*4882a593Smuzhiyun 		.create_address_space = adreno_iommu_create_address_space,
639*4882a593Smuzhiyun 		.get_rptr = a4xx_get_rptr,
640*4882a593Smuzhiyun 	},
641*4882a593Smuzhiyun 	.get_timestamp = a4xx_get_timestamp,
642*4882a593Smuzhiyun };
643*4882a593Smuzhiyun 
a4xx_gpu_init(struct drm_device * dev)644*4882a593Smuzhiyun struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
645*4882a593Smuzhiyun {
646*4882a593Smuzhiyun 	struct a4xx_gpu *a4xx_gpu = NULL;
647*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu;
648*4882a593Smuzhiyun 	struct msm_gpu *gpu;
649*4882a593Smuzhiyun 	struct msm_drm_private *priv = dev->dev_private;
650*4882a593Smuzhiyun 	struct platform_device *pdev = priv->gpu_pdev;
651*4882a593Smuzhiyun 	int ret;
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun 	if (!pdev) {
654*4882a593Smuzhiyun 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
655*4882a593Smuzhiyun 		ret = -ENXIO;
656*4882a593Smuzhiyun 		goto fail;
657*4882a593Smuzhiyun 	}
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
660*4882a593Smuzhiyun 	if (!a4xx_gpu) {
661*4882a593Smuzhiyun 		ret = -ENOMEM;
662*4882a593Smuzhiyun 		goto fail;
663*4882a593Smuzhiyun 	}
664*4882a593Smuzhiyun 
665*4882a593Smuzhiyun 	adreno_gpu = &a4xx_gpu->base;
666*4882a593Smuzhiyun 	gpu = &adreno_gpu->base;
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	gpu->perfcntrs = NULL;
669*4882a593Smuzhiyun 	gpu->num_perfcntrs = 0;
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
672*4882a593Smuzhiyun 	if (ret)
673*4882a593Smuzhiyun 		goto fail;
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
676*4882a593Smuzhiyun 							     a4xx_registers;
677*4882a593Smuzhiyun 
678*4882a593Smuzhiyun 	/* if needed, allocate gmem: */
679*4882a593Smuzhiyun 	ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
680*4882a593Smuzhiyun 				    &a4xx_gpu->ocmem);
681*4882a593Smuzhiyun 	if (ret)
682*4882a593Smuzhiyun 		goto fail;
683*4882a593Smuzhiyun 
684*4882a593Smuzhiyun 	if (!gpu->aspace) {
685*4882a593Smuzhiyun 		/* TODO we think it is possible to configure the GPU to
686*4882a593Smuzhiyun 		 * restrict access to VRAM carveout.  But the required
687*4882a593Smuzhiyun 		 * registers are unknown.  For now just bail out and
688*4882a593Smuzhiyun 		 * limp along with just modesetting.  If it turns out
689*4882a593Smuzhiyun 		 * to not be possible to restrict access, then we must
690*4882a593Smuzhiyun 		 * implement a cmdstream validator.
691*4882a593Smuzhiyun 		 */
692*4882a593Smuzhiyun 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
693*4882a593Smuzhiyun 		ret = -ENXIO;
694*4882a593Smuzhiyun 		goto fail;
695*4882a593Smuzhiyun 	}
696*4882a593Smuzhiyun 
697*4882a593Smuzhiyun 	/*
698*4882a593Smuzhiyun 	 * Set the ICC path to maximum speed for now by multiplying the fastest
699*4882a593Smuzhiyun 	 * frequency by the bus width (8). We'll want to scale this later on to
700*4882a593Smuzhiyun 	 * improve battery life.
701*4882a593Smuzhiyun 	 */
702*4882a593Smuzhiyun 	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
703*4882a593Smuzhiyun 	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 	return gpu;
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun fail:
708*4882a593Smuzhiyun 	if (a4xx_gpu)
709*4882a593Smuzhiyun 		a4xx_destroy(&a4xx_gpu->base.base);
710*4882a593Smuzhiyun 
711*4882a593Smuzhiyun 	return ERR_PTR(ret);
712*4882a593Smuzhiyun }
713