1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0-only */
2*4882a593Smuzhiyun /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3*4882a593Smuzhiyun */
4*4882a593Smuzhiyun #ifndef __A5XX_GPU_H__
5*4882a593Smuzhiyun #define __A5XX_GPU_H__
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include "adreno_gpu.h"
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun /* Bringing over the hack from the previous targets */
10*4882a593Smuzhiyun #undef ROP_COPY
11*4882a593Smuzhiyun #undef ROP_XOR
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include "a5xx.xml.h"
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun struct a5xx_gpu {
16*4882a593Smuzhiyun struct adreno_gpu base;
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun struct drm_gem_object *pm4_bo;
19*4882a593Smuzhiyun uint64_t pm4_iova;
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun struct drm_gem_object *pfp_bo;
22*4882a593Smuzhiyun uint64_t pfp_iova;
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun struct drm_gem_object *gpmu_bo;
25*4882a593Smuzhiyun uint64_t gpmu_iova;
26*4882a593Smuzhiyun uint32_t gpmu_dwords;
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun uint32_t lm_leakage;
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun struct msm_ringbuffer *cur_ring;
31*4882a593Smuzhiyun struct msm_ringbuffer *next_ring;
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS];
34*4882a593Smuzhiyun struct drm_gem_object *preempt_counters_bo[MSM_GPU_MAX_RINGS];
35*4882a593Smuzhiyun struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS];
36*4882a593Smuzhiyun uint64_t preempt_iova[MSM_GPU_MAX_RINGS];
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun atomic_t preempt_state;
39*4882a593Smuzhiyun struct timer_list preempt_timer;
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun struct drm_gem_object *shadow_bo;
42*4882a593Smuzhiyun uint64_t shadow_iova;
43*4882a593Smuzhiyun uint32_t *shadow;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun /* True if the microcode supports the WHERE_AM_I opcode */
46*4882a593Smuzhiyun bool has_whereami;
47*4882a593Smuzhiyun };
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_FS
52*4882a593Smuzhiyun void a5xx_debugfs_init(struct msm_gpu *gpu, struct drm_minor *minor);
53*4882a593Smuzhiyun #endif
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun /*
56*4882a593Smuzhiyun * In order to do lockless preemption we use a simple state machine to progress
57*4882a593Smuzhiyun * through the process.
58*4882a593Smuzhiyun *
59*4882a593Smuzhiyun * PREEMPT_NONE - no preemption in progress. Next state START.
60*4882a593Smuzhiyun * PREEMPT_START - The trigger is evaulating if preemption is possible. Next
61*4882a593Smuzhiyun * states: TRIGGERED, NONE
62*4882a593Smuzhiyun * PREEMPT_ABORT - An intermediate state before moving back to NONE. Next
63*4882a593Smuzhiyun * state: NONE.
64*4882a593Smuzhiyun * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next
65*4882a593Smuzhiyun * states: FAULTED, PENDING
66*4882a593Smuzhiyun * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger
67*4882a593Smuzhiyun * recovery. Next state: N/A
68*4882a593Smuzhiyun * PREEMPT_PENDING: Preemption complete interrupt fired - the callback is
69*4882a593Smuzhiyun * checking the success of the operation. Next state: FAULTED, NONE.
70*4882a593Smuzhiyun */
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun enum preempt_state {
73*4882a593Smuzhiyun PREEMPT_NONE = 0,
74*4882a593Smuzhiyun PREEMPT_START,
75*4882a593Smuzhiyun PREEMPT_ABORT,
76*4882a593Smuzhiyun PREEMPT_TRIGGERED,
77*4882a593Smuzhiyun PREEMPT_FAULTED,
78*4882a593Smuzhiyun PREEMPT_PENDING,
79*4882a593Smuzhiyun };
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun /*
82*4882a593Smuzhiyun * struct a5xx_preempt_record is a shared buffer between the microcode and the
83*4882a593Smuzhiyun * CPU to store the state for preemption. The record itself is much larger
84*4882a593Smuzhiyun * (64k) but most of that is used by the CP for storage.
85*4882a593Smuzhiyun *
86*4882a593Smuzhiyun * There is a preemption record assigned per ringbuffer. When the CPU triggers a
87*4882a593Smuzhiyun * preemption, it fills out the record with the useful information (wptr, ring
88*4882a593Smuzhiyun * base, etc) and the microcode uses that information to set up the CP following
89*4882a593Smuzhiyun * the preemption. When a ring is switched out, the CP will save the ringbuffer
90*4882a593Smuzhiyun * state back to the record. In this way, once the records are properly set up
91*4882a593Smuzhiyun * the CPU can quickly switch back and forth between ringbuffers by only
92*4882a593Smuzhiyun * updating a few registers (often only the wptr).
93*4882a593Smuzhiyun *
94*4882a593Smuzhiyun * These are the CPU aware registers in the record:
95*4882a593Smuzhiyun * @magic: Must always be 0x27C4BAFC
96*4882a593Smuzhiyun * @info: Type of the record - written 0 by the CPU, updated by the CP
97*4882a593Smuzhiyun * @data: Data field from SET_RENDER_MODE or a checkpoint. Written and used by
98*4882a593Smuzhiyun * the CP
99*4882a593Smuzhiyun * @cntl: Value of RB_CNTL written by CPU, save/restored by CP
100*4882a593Smuzhiyun * @rptr: Value of RB_RPTR written by CPU, save/restored by CP
101*4882a593Smuzhiyun * @wptr: Value of RB_WPTR written by CPU, save/restored by CP
102*4882a593Smuzhiyun * @rptr_addr: Value of RB_RPTR_ADDR written by CPU, save/restored by CP
103*4882a593Smuzhiyun * @rbase: Value of RB_BASE written by CPU, save/restored by CP
104*4882a593Smuzhiyun * @counter: GPU address of the storage area for the performance counters
105*4882a593Smuzhiyun */
106*4882a593Smuzhiyun struct a5xx_preempt_record {
107*4882a593Smuzhiyun uint32_t magic;
108*4882a593Smuzhiyun uint32_t info;
109*4882a593Smuzhiyun uint32_t data;
110*4882a593Smuzhiyun uint32_t cntl;
111*4882a593Smuzhiyun uint32_t rptr;
112*4882a593Smuzhiyun uint32_t wptr;
113*4882a593Smuzhiyun uint64_t rptr_addr;
114*4882a593Smuzhiyun uint64_t rbase;
115*4882a593Smuzhiyun uint64_t counter;
116*4882a593Smuzhiyun };
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun /* Magic identifier for the preemption record */
119*4882a593Smuzhiyun #define A5XX_PREEMPT_RECORD_MAGIC 0x27C4BAFCUL
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun /*
122*4882a593Smuzhiyun * Even though the structure above is only a few bytes, we need a full 64k to
123*4882a593Smuzhiyun * store the entire preemption record from the CP
124*4882a593Smuzhiyun */
125*4882a593Smuzhiyun #define A5XX_PREEMPT_RECORD_SIZE (64 * 1024)
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun /*
128*4882a593Smuzhiyun * The preemption counter block is a storage area for the value of the
129*4882a593Smuzhiyun * preemption counters that are saved immediately before context switch. We
130*4882a593Smuzhiyun * append it on to the end of the allocation for the preemption record.
131*4882a593Smuzhiyun */
132*4882a593Smuzhiyun #define A5XX_PREEMPT_COUNTER_SIZE (16 * 4)
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun int a5xx_power_init(struct msm_gpu *gpu);
136*4882a593Smuzhiyun void a5xx_gpmu_ucode_init(struct msm_gpu *gpu);
137*4882a593Smuzhiyun
spin_usecs(struct msm_gpu * gpu,uint32_t usecs,uint32_t reg,uint32_t mask,uint32_t value)138*4882a593Smuzhiyun static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs,
139*4882a593Smuzhiyun uint32_t reg, uint32_t mask, uint32_t value)
140*4882a593Smuzhiyun {
141*4882a593Smuzhiyun while (usecs--) {
142*4882a593Smuzhiyun udelay(1);
143*4882a593Smuzhiyun if ((gpu_read(gpu, reg) & mask) == value)
144*4882a593Smuzhiyun return 0;
145*4882a593Smuzhiyun cpu_relax();
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun return -ETIMEDOUT;
149*4882a593Smuzhiyun }
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun #define shadowptr(a5xx_gpu, ring) ((a5xx_gpu)->shadow_iova + \
152*4882a593Smuzhiyun ((ring)->id * sizeof(uint32_t)))
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
155*4882a593Smuzhiyun void a5xx_set_hwcg(struct msm_gpu *gpu, bool state);
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun void a5xx_preempt_init(struct msm_gpu *gpu);
158*4882a593Smuzhiyun void a5xx_preempt_hw_init(struct msm_gpu *gpu);
159*4882a593Smuzhiyun void a5xx_preempt_trigger(struct msm_gpu *gpu);
160*4882a593Smuzhiyun void a5xx_preempt_irq(struct msm_gpu *gpu);
161*4882a593Smuzhiyun void a5xx_preempt_fini(struct msm_gpu *gpu);
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, bool sync);
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun /* Return true if we are in a preempt state */
a5xx_in_preempt(struct a5xx_gpu * a5xx_gpu)166*4882a593Smuzhiyun static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu)
167*4882a593Smuzhiyun {
168*4882a593Smuzhiyun int preempt_state = atomic_read(&a5xx_gpu->preempt_state);
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun return !(preempt_state == PREEMPT_NONE ||
171*4882a593Smuzhiyun preempt_state == PREEMPT_ABORT);
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun #endif /* __A5XX_GPU_H__ */
175