xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright 2014 Advanced Micro Devices, Inc.
3*4882a593Smuzhiyun  * Copyright 2008 Red Hat Inc.
4*4882a593Smuzhiyun  * Copyright 2009 Jerome Glisse.
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Permission is hereby granted, free of charge, to any person obtaining a
7*4882a593Smuzhiyun  * copy of this software and associated documentation files (the "Software"),
8*4882a593Smuzhiyun  * to deal in the Software without restriction, including without limitation
9*4882a593Smuzhiyun  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10*4882a593Smuzhiyun  * and/or sell copies of the Software, and to permit persons to whom the
11*4882a593Smuzhiyun  * Software is furnished to do so, subject to the following conditions:
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  * The above copyright notice and this permission notice shall be included in
14*4882a593Smuzhiyun  * all copies or substantial portions of the Software.
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*4882a593Smuzhiyun  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*4882a593Smuzhiyun  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19*4882a593Smuzhiyun  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20*4882a593Smuzhiyun  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21*4882a593Smuzhiyun  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22*4882a593Smuzhiyun  * OTHER DEALINGS IN THE SOFTWARE.
23*4882a593Smuzhiyun  *
24*4882a593Smuzhiyun  */
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun #include "amdgpu.h"
27*4882a593Smuzhiyun #include "amdgpu_gfx.h"
28*4882a593Smuzhiyun #include "amdgpu_rlc.h"
29*4882a593Smuzhiyun #include "amdgpu_ras.h"
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun /* delay 0.1 second to enable gfx off feature */
32*4882a593Smuzhiyun #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun /*
35*4882a593Smuzhiyun  * GPU GFX IP block helpers function.
36*4882a593Smuzhiyun  */
37*4882a593Smuzhiyun 
amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device * adev,int mec,int pipe,int queue)38*4882a593Smuzhiyun int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
39*4882a593Smuzhiyun 				int pipe, int queue)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun 	int bit = 0;
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	bit += mec * adev->gfx.mec.num_pipe_per_mec
44*4882a593Smuzhiyun 		* adev->gfx.mec.num_queue_per_pipe;
45*4882a593Smuzhiyun 	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
46*4882a593Smuzhiyun 	bit += queue;
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 	return bit;
49*4882a593Smuzhiyun }
50*4882a593Smuzhiyun 
amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device * adev,int bit,int * mec,int * pipe,int * queue)51*4882a593Smuzhiyun void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
52*4882a593Smuzhiyun 				 int *mec, int *pipe, int *queue)
53*4882a593Smuzhiyun {
54*4882a593Smuzhiyun 	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
55*4882a593Smuzhiyun 	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
56*4882a593Smuzhiyun 		% adev->gfx.mec.num_pipe_per_mec;
57*4882a593Smuzhiyun 	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
58*4882a593Smuzhiyun 	       / adev->gfx.mec.num_pipe_per_mec;
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun }
61*4882a593Smuzhiyun 
amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device * adev,int mec,int pipe,int queue)62*4882a593Smuzhiyun bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
63*4882a593Smuzhiyun 				     int mec, int pipe, int queue)
64*4882a593Smuzhiyun {
65*4882a593Smuzhiyun 	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
66*4882a593Smuzhiyun 			adev->gfx.mec.queue_bitmap);
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun 
amdgpu_gfx_me_queue_to_bit(struct amdgpu_device * adev,int me,int pipe,int queue)69*4882a593Smuzhiyun int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
70*4882a593Smuzhiyun 			       int me, int pipe, int queue)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun 	int bit = 0;
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	bit += me * adev->gfx.me.num_pipe_per_me
75*4882a593Smuzhiyun 		* adev->gfx.me.num_queue_per_pipe;
76*4882a593Smuzhiyun 	bit += pipe * adev->gfx.me.num_queue_per_pipe;
77*4882a593Smuzhiyun 	bit += queue;
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	return bit;
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun 
amdgpu_gfx_bit_to_me_queue(struct amdgpu_device * adev,int bit,int * me,int * pipe,int * queue)82*4882a593Smuzhiyun void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
83*4882a593Smuzhiyun 				int *me, int *pipe, int *queue)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun 	*queue = bit % adev->gfx.me.num_queue_per_pipe;
86*4882a593Smuzhiyun 	*pipe = (bit / adev->gfx.me.num_queue_per_pipe)
87*4882a593Smuzhiyun 		% adev->gfx.me.num_pipe_per_me;
88*4882a593Smuzhiyun 	*me = (bit / adev->gfx.me.num_queue_per_pipe)
89*4882a593Smuzhiyun 		/ adev->gfx.me.num_pipe_per_me;
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun 
amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device * adev,int me,int pipe,int queue)92*4882a593Smuzhiyun bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
93*4882a593Smuzhiyun 				    int me, int pipe, int queue)
94*4882a593Smuzhiyun {
95*4882a593Smuzhiyun 	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
96*4882a593Smuzhiyun 			adev->gfx.me.queue_bitmap);
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun /**
100*4882a593Smuzhiyun  * amdgpu_gfx_scratch_get - Allocate a scratch register
101*4882a593Smuzhiyun  *
102*4882a593Smuzhiyun  * @adev: amdgpu_device pointer
103*4882a593Smuzhiyun  * @reg: scratch register mmio offset
104*4882a593Smuzhiyun  *
105*4882a593Smuzhiyun  * Allocate a CP scratch register for use by the driver (all asics).
106*4882a593Smuzhiyun  * Returns 0 on success or -EINVAL on failure.
107*4882a593Smuzhiyun  */
amdgpu_gfx_scratch_get(struct amdgpu_device * adev,uint32_t * reg)108*4882a593Smuzhiyun int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun 	int i;
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun 	i = ffs(adev->gfx.scratch.free_mask);
113*4882a593Smuzhiyun 	if (i != 0 && i <= adev->gfx.scratch.num_reg) {
114*4882a593Smuzhiyun 		i--;
115*4882a593Smuzhiyun 		adev->gfx.scratch.free_mask &= ~(1u << i);
116*4882a593Smuzhiyun 		*reg = adev->gfx.scratch.reg_base + i;
117*4882a593Smuzhiyun 		return 0;
118*4882a593Smuzhiyun 	}
119*4882a593Smuzhiyun 	return -EINVAL;
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun /**
123*4882a593Smuzhiyun  * amdgpu_gfx_scratch_free - Free a scratch register
124*4882a593Smuzhiyun  *
125*4882a593Smuzhiyun  * @adev: amdgpu_device pointer
126*4882a593Smuzhiyun  * @reg: scratch register mmio offset
127*4882a593Smuzhiyun  *
128*4882a593Smuzhiyun  * Free a CP scratch register allocated for use by the driver (all asics)
129*4882a593Smuzhiyun  */
amdgpu_gfx_scratch_free(struct amdgpu_device * adev,uint32_t reg)130*4882a593Smuzhiyun void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun 	adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun /**
136*4882a593Smuzhiyun  * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
137*4882a593Smuzhiyun  *
138*4882a593Smuzhiyun  * @mask: array in which the per-shader array disable masks will be stored
139*4882a593Smuzhiyun  * @max_se: number of SEs
140*4882a593Smuzhiyun  * @max_sh: number of SHs
141*4882a593Smuzhiyun  *
142*4882a593Smuzhiyun  * The bitmask of CUs to be disabled in the shader array determined by se and
143*4882a593Smuzhiyun  * sh is stored in mask[se * max_sh + sh].
144*4882a593Smuzhiyun  */
amdgpu_gfx_parse_disable_cu(unsigned * mask,unsigned max_se,unsigned max_sh)145*4882a593Smuzhiyun void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
146*4882a593Smuzhiyun {
147*4882a593Smuzhiyun 	unsigned se, sh, cu;
148*4882a593Smuzhiyun 	const char *p;
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
153*4882a593Smuzhiyun 		return;
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	p = amdgpu_disable_cu;
156*4882a593Smuzhiyun 	for (;;) {
157*4882a593Smuzhiyun 		char *next;
158*4882a593Smuzhiyun 		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
159*4882a593Smuzhiyun 		if (ret < 3) {
160*4882a593Smuzhiyun 			DRM_ERROR("amdgpu: could not parse disable_cu\n");
161*4882a593Smuzhiyun 			return;
162*4882a593Smuzhiyun 		}
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 		if (se < max_se && sh < max_sh && cu < 16) {
165*4882a593Smuzhiyun 			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
166*4882a593Smuzhiyun 			mask[se * max_sh + sh] |= 1u << cu;
167*4882a593Smuzhiyun 		} else {
168*4882a593Smuzhiyun 			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
169*4882a593Smuzhiyun 				  se, sh, cu);
170*4882a593Smuzhiyun 		}
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 		next = strchr(p, ',');
173*4882a593Smuzhiyun 		if (!next)
174*4882a593Smuzhiyun 			break;
175*4882a593Smuzhiyun 		p = next + 1;
176*4882a593Smuzhiyun 	}
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun 
amdgpu_gfx_is_multipipe_capable(struct amdgpu_device * adev)179*4882a593Smuzhiyun static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
180*4882a593Smuzhiyun {
181*4882a593Smuzhiyun 	if (amdgpu_compute_multipipe != -1) {
182*4882a593Smuzhiyun 		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
183*4882a593Smuzhiyun 			 amdgpu_compute_multipipe);
184*4882a593Smuzhiyun 		return amdgpu_compute_multipipe == 1;
185*4882a593Smuzhiyun 	}
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	/* FIXME: spreading the queues across pipes causes perf regressions
188*4882a593Smuzhiyun 	 * on POLARIS11 compute workloads */
189*4882a593Smuzhiyun 	if (adev->asic_type == CHIP_POLARIS11)
190*4882a593Smuzhiyun 		return false;
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	return adev->gfx.mec.num_mec > 1;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun 
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device * adev,int pipe,int queue)195*4882a593Smuzhiyun bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
196*4882a593Smuzhiyun 					       int pipe, int queue)
197*4882a593Smuzhiyun {
198*4882a593Smuzhiyun 	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
199*4882a593Smuzhiyun 	int cond;
200*4882a593Smuzhiyun 	/* Policy: alternate between normal and high priority */
201*4882a593Smuzhiyun 	cond = multipipe_policy ? pipe : queue;
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	return ((cond % 2) != 0);
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun 
amdgpu_gfx_compute_queue_acquire(struct amdgpu_device * adev)207*4882a593Smuzhiyun void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun 	int i, queue, pipe;
210*4882a593Smuzhiyun 	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
211*4882a593Smuzhiyun 	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
212*4882a593Smuzhiyun 				     adev->gfx.mec.num_queue_per_pipe,
213*4882a593Smuzhiyun 				     adev->gfx.num_compute_rings);
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 	if (multipipe_policy) {
216*4882a593Smuzhiyun 		/* policy: make queues evenly cross all pipes on MEC1 only */
217*4882a593Smuzhiyun 		for (i = 0; i < max_queues_per_mec; i++) {
218*4882a593Smuzhiyun 			pipe = i % adev->gfx.mec.num_pipe_per_mec;
219*4882a593Smuzhiyun 			queue = (i / adev->gfx.mec.num_pipe_per_mec) %
220*4882a593Smuzhiyun 				adev->gfx.mec.num_queue_per_pipe;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 			set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
223*4882a593Smuzhiyun 					adev->gfx.mec.queue_bitmap);
224*4882a593Smuzhiyun 		}
225*4882a593Smuzhiyun 	} else {
226*4882a593Smuzhiyun 		/* policy: amdgpu owns all queues in the given pipe */
227*4882a593Smuzhiyun 		for (i = 0; i < max_queues_per_mec; ++i)
228*4882a593Smuzhiyun 			set_bit(i, adev->gfx.mec.queue_bitmap);
229*4882a593Smuzhiyun 	}
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun 
amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device * adev)234*4882a593Smuzhiyun void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun 	int i, queue, me;
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
239*4882a593Smuzhiyun 		queue = i % adev->gfx.me.num_queue_per_pipe;
240*4882a593Smuzhiyun 		me = (i / adev->gfx.me.num_queue_per_pipe)
241*4882a593Smuzhiyun 		      / adev->gfx.me.num_pipe_per_me;
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun 		if (me >= adev->gfx.me.num_me)
244*4882a593Smuzhiyun 			break;
245*4882a593Smuzhiyun 		/* policy: amdgpu owns the first queue per pipe at this stage
246*4882a593Smuzhiyun 		 * will extend to mulitple queues per pipe later */
247*4882a593Smuzhiyun 		if (me == 0 && queue < 1)
248*4882a593Smuzhiyun 			set_bit(i, adev->gfx.me.queue_bitmap);
249*4882a593Smuzhiyun 	}
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	/* update the number of active graphics rings */
252*4882a593Smuzhiyun 	adev->gfx.num_gfx_rings =
253*4882a593Smuzhiyun 		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun 
amdgpu_gfx_kiq_acquire(struct amdgpu_device * adev,struct amdgpu_ring * ring)256*4882a593Smuzhiyun static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
257*4882a593Smuzhiyun 				  struct amdgpu_ring *ring)
258*4882a593Smuzhiyun {
259*4882a593Smuzhiyun 	int queue_bit;
260*4882a593Smuzhiyun 	int mec, pipe, queue;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	queue_bit = adev->gfx.mec.num_mec
263*4882a593Smuzhiyun 		    * adev->gfx.mec.num_pipe_per_mec
264*4882a593Smuzhiyun 		    * adev->gfx.mec.num_queue_per_pipe;
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	while (--queue_bit >= 0) {
267*4882a593Smuzhiyun 		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
268*4882a593Smuzhiyun 			continue;
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun 		/*
273*4882a593Smuzhiyun 		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
274*4882a593Smuzhiyun 		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
275*4882a593Smuzhiyun 		 * only can be issued on queue 0.
276*4882a593Smuzhiyun 		 */
277*4882a593Smuzhiyun 		if ((mec == 1 && pipe > 1) || queue != 0)
278*4882a593Smuzhiyun 			continue;
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 		ring->me = mec + 1;
281*4882a593Smuzhiyun 		ring->pipe = pipe;
282*4882a593Smuzhiyun 		ring->queue = queue;
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 		return 0;
285*4882a593Smuzhiyun 	}
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun 	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
288*4882a593Smuzhiyun 	return -EINVAL;
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun 
amdgpu_gfx_kiq_init_ring(struct amdgpu_device * adev,struct amdgpu_ring * ring,struct amdgpu_irq_src * irq)291*4882a593Smuzhiyun int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
292*4882a593Smuzhiyun 			     struct amdgpu_ring *ring,
293*4882a593Smuzhiyun 			     struct amdgpu_irq_src *irq)
294*4882a593Smuzhiyun {
295*4882a593Smuzhiyun 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
296*4882a593Smuzhiyun 	int r = 0;
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	spin_lock_init(&kiq->ring_lock);
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	ring->adev = NULL;
301*4882a593Smuzhiyun 	ring->ring_obj = NULL;
302*4882a593Smuzhiyun 	ring->use_doorbell = true;
303*4882a593Smuzhiyun 	ring->doorbell_index = adev->doorbell_index.kiq;
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 	r = amdgpu_gfx_kiq_acquire(adev, ring);
306*4882a593Smuzhiyun 	if (r)
307*4882a593Smuzhiyun 		return r;
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
310*4882a593Smuzhiyun 	ring->no_scheduler = true;
311*4882a593Smuzhiyun 	sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
312*4882a593Smuzhiyun 	r = amdgpu_ring_init(adev, ring, 1024,
313*4882a593Smuzhiyun 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
314*4882a593Smuzhiyun 			     AMDGPU_RING_PRIO_DEFAULT);
315*4882a593Smuzhiyun 	if (r)
316*4882a593Smuzhiyun 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
317*4882a593Smuzhiyun 
318*4882a593Smuzhiyun 	return r;
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun 
amdgpu_gfx_kiq_free_ring(struct amdgpu_ring * ring)321*4882a593Smuzhiyun void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
322*4882a593Smuzhiyun {
323*4882a593Smuzhiyun 	amdgpu_ring_fini(ring);
324*4882a593Smuzhiyun }
325*4882a593Smuzhiyun 
amdgpu_gfx_kiq_fini(struct amdgpu_device * adev)326*4882a593Smuzhiyun void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev)
327*4882a593Smuzhiyun {
328*4882a593Smuzhiyun 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun 
amdgpu_gfx_kiq_init(struct amdgpu_device * adev,unsigned hpd_size)333*4882a593Smuzhiyun int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
334*4882a593Smuzhiyun 			unsigned hpd_size)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun 	int r;
337*4882a593Smuzhiyun 	u32 *hpd;
338*4882a593Smuzhiyun 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
341*4882a593Smuzhiyun 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
342*4882a593Smuzhiyun 				    &kiq->eop_gpu_addr, (void **)&hpd);
343*4882a593Smuzhiyun 	if (r) {
344*4882a593Smuzhiyun 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
345*4882a593Smuzhiyun 		return r;
346*4882a593Smuzhiyun 	}
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	memset(hpd, 0, hpd_size);
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	r = amdgpu_bo_reserve(kiq->eop_obj, true);
351*4882a593Smuzhiyun 	if (unlikely(r != 0))
352*4882a593Smuzhiyun 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
353*4882a593Smuzhiyun 	amdgpu_bo_kunmap(kiq->eop_obj);
354*4882a593Smuzhiyun 	amdgpu_bo_unreserve(kiq->eop_obj);
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 	return 0;
357*4882a593Smuzhiyun }
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun /* create MQD for each compute/gfx queue */
amdgpu_gfx_mqd_sw_init(struct amdgpu_device * adev,unsigned mqd_size)360*4882a593Smuzhiyun int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
361*4882a593Smuzhiyun 			   unsigned mqd_size)
362*4882a593Smuzhiyun {
363*4882a593Smuzhiyun 	struct amdgpu_ring *ring = NULL;
364*4882a593Smuzhiyun 	int r, i;
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 	/* create MQD for KIQ */
367*4882a593Smuzhiyun 	ring = &adev->gfx.kiq.ring;
368*4882a593Smuzhiyun 	if (!ring->mqd_obj) {
369*4882a593Smuzhiyun 		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
370*4882a593Smuzhiyun 		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
371*4882a593Smuzhiyun 		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
372*4882a593Smuzhiyun 		 * KIQ MQD no matter SRIOV or Bare-metal
373*4882a593Smuzhiyun 		 */
374*4882a593Smuzhiyun 		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
375*4882a593Smuzhiyun 					    AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj,
376*4882a593Smuzhiyun 					    &ring->mqd_gpu_addr, &ring->mqd_ptr);
377*4882a593Smuzhiyun 		if (r) {
378*4882a593Smuzhiyun 			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
379*4882a593Smuzhiyun 			return r;
380*4882a593Smuzhiyun 		}
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 		/* prepare MQD backup */
383*4882a593Smuzhiyun 		adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL);
384*4882a593Smuzhiyun 		if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS])
385*4882a593Smuzhiyun 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
386*4882a593Smuzhiyun 	}
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
389*4882a593Smuzhiyun 		/* create MQD for each KGQ */
390*4882a593Smuzhiyun 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
391*4882a593Smuzhiyun 			ring = &adev->gfx.gfx_ring[i];
392*4882a593Smuzhiyun 			if (!ring->mqd_obj) {
393*4882a593Smuzhiyun 				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
394*4882a593Smuzhiyun 							    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
395*4882a593Smuzhiyun 							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
396*4882a593Smuzhiyun 				if (r) {
397*4882a593Smuzhiyun 					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
398*4882a593Smuzhiyun 					return r;
399*4882a593Smuzhiyun 				}
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun 				/* prepare MQD backup */
402*4882a593Smuzhiyun 				adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
403*4882a593Smuzhiyun 				if (!adev->gfx.me.mqd_backup[i])
404*4882a593Smuzhiyun 					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
405*4882a593Smuzhiyun 			}
406*4882a593Smuzhiyun 		}
407*4882a593Smuzhiyun 	}
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	/* create MQD for each KCQ */
410*4882a593Smuzhiyun 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
411*4882a593Smuzhiyun 		ring = &adev->gfx.compute_ring[i];
412*4882a593Smuzhiyun 		if (!ring->mqd_obj) {
413*4882a593Smuzhiyun 			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
414*4882a593Smuzhiyun 						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
415*4882a593Smuzhiyun 						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
416*4882a593Smuzhiyun 			if (r) {
417*4882a593Smuzhiyun 				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
418*4882a593Smuzhiyun 				return r;
419*4882a593Smuzhiyun 			}
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 			/* prepare MQD backup */
422*4882a593Smuzhiyun 			adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
423*4882a593Smuzhiyun 			if (!adev->gfx.mec.mqd_backup[i])
424*4882a593Smuzhiyun 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
425*4882a593Smuzhiyun 		}
426*4882a593Smuzhiyun 	}
427*4882a593Smuzhiyun 
428*4882a593Smuzhiyun 	return 0;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun 
amdgpu_gfx_mqd_sw_fini(struct amdgpu_device * adev)431*4882a593Smuzhiyun void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
432*4882a593Smuzhiyun {
433*4882a593Smuzhiyun 	struct amdgpu_ring *ring = NULL;
434*4882a593Smuzhiyun 	int i;
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
437*4882a593Smuzhiyun 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
438*4882a593Smuzhiyun 			ring = &adev->gfx.gfx_ring[i];
439*4882a593Smuzhiyun 			kfree(adev->gfx.me.mqd_backup[i]);
440*4882a593Smuzhiyun 			amdgpu_bo_free_kernel(&ring->mqd_obj,
441*4882a593Smuzhiyun 					      &ring->mqd_gpu_addr,
442*4882a593Smuzhiyun 					      &ring->mqd_ptr);
443*4882a593Smuzhiyun 		}
444*4882a593Smuzhiyun 	}
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
447*4882a593Smuzhiyun 		ring = &adev->gfx.compute_ring[i];
448*4882a593Smuzhiyun 		kfree(adev->gfx.mec.mqd_backup[i]);
449*4882a593Smuzhiyun 		amdgpu_bo_free_kernel(&ring->mqd_obj,
450*4882a593Smuzhiyun 				      &ring->mqd_gpu_addr,
451*4882a593Smuzhiyun 				      &ring->mqd_ptr);
452*4882a593Smuzhiyun 	}
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	ring = &adev->gfx.kiq.ring;
455*4882a593Smuzhiyun 	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
456*4882a593Smuzhiyun 	amdgpu_bo_free_kernel(&ring->mqd_obj,
457*4882a593Smuzhiyun 			      &ring->mqd_gpu_addr,
458*4882a593Smuzhiyun 			      &ring->mqd_ptr);
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun 
amdgpu_gfx_disable_kcq(struct amdgpu_device * adev)461*4882a593Smuzhiyun int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
462*4882a593Smuzhiyun {
463*4882a593Smuzhiyun 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
464*4882a593Smuzhiyun 	struct amdgpu_ring *kiq_ring = &kiq->ring;
465*4882a593Smuzhiyun 	int i;
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
468*4882a593Smuzhiyun 		return -EINVAL;
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
471*4882a593Smuzhiyun 					adev->gfx.num_compute_rings))
472*4882a593Smuzhiyun 		return -ENOMEM;
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
475*4882a593Smuzhiyun 		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
476*4882a593Smuzhiyun 					   RESET_QUEUES, 0, 0);
477*4882a593Smuzhiyun 
478*4882a593Smuzhiyun 	return amdgpu_ring_test_helper(kiq_ring);
479*4882a593Smuzhiyun }
480*4882a593Smuzhiyun 
amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device * adev,int queue_bit)481*4882a593Smuzhiyun int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
482*4882a593Smuzhiyun 					int queue_bit)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun 	int mec, pipe, queue;
485*4882a593Smuzhiyun 	int set_resource_bit = 0;
486*4882a593Smuzhiyun 
487*4882a593Smuzhiyun 	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
488*4882a593Smuzhiyun 
489*4882a593Smuzhiyun 	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	return set_resource_bit;
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun 
amdgpu_gfx_enable_kcq(struct amdgpu_device * adev)494*4882a593Smuzhiyun int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
495*4882a593Smuzhiyun {
496*4882a593Smuzhiyun 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
497*4882a593Smuzhiyun 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
498*4882a593Smuzhiyun 	uint64_t queue_mask = 0;
499*4882a593Smuzhiyun 	int r, i;
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
502*4882a593Smuzhiyun 		return -EINVAL;
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
505*4882a593Smuzhiyun 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
506*4882a593Smuzhiyun 			continue;
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 		/* This situation may be hit in the future if a new HW
509*4882a593Smuzhiyun 		 * generation exposes more than 64 queues. If so, the
510*4882a593Smuzhiyun 		 * definition of queue_mask needs updating */
511*4882a593Smuzhiyun 		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
512*4882a593Smuzhiyun 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
513*4882a593Smuzhiyun 			break;
514*4882a593Smuzhiyun 		}
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
517*4882a593Smuzhiyun 	}
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
520*4882a593Smuzhiyun 							kiq_ring->queue);
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
523*4882a593Smuzhiyun 					adev->gfx.num_compute_rings +
524*4882a593Smuzhiyun 					kiq->pmf->set_resources_size);
525*4882a593Smuzhiyun 	if (r) {
526*4882a593Smuzhiyun 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
527*4882a593Smuzhiyun 		return r;
528*4882a593Smuzhiyun 	}
529*4882a593Smuzhiyun 
530*4882a593Smuzhiyun 	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
531*4882a593Smuzhiyun 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
532*4882a593Smuzhiyun 		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
533*4882a593Smuzhiyun 
534*4882a593Smuzhiyun 	r = amdgpu_ring_test_helper(kiq_ring);
535*4882a593Smuzhiyun 	if (r)
536*4882a593Smuzhiyun 		DRM_ERROR("KCQ enable failed\n");
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	return r;
539*4882a593Smuzhiyun }
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
542*4882a593Smuzhiyun  *
543*4882a593Smuzhiyun  * @adev: amdgpu_device pointer
544*4882a593Smuzhiyun  * @bool enable true: enable gfx off feature, false: disable gfx off feature
545*4882a593Smuzhiyun  *
546*4882a593Smuzhiyun  * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
547*4882a593Smuzhiyun  * 2. other client can send request to disable gfx off feature, the request should be honored.
548*4882a593Smuzhiyun  * 3. other client can cancel their request of disable gfx off feature
549*4882a593Smuzhiyun  * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
550*4882a593Smuzhiyun  */
551*4882a593Smuzhiyun 
amdgpu_gfx_off_ctrl(struct amdgpu_device * adev,bool enable)552*4882a593Smuzhiyun void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
553*4882a593Smuzhiyun {
554*4882a593Smuzhiyun 	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
555*4882a593Smuzhiyun 		return;
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 	mutex_lock(&adev->gfx.gfx_off_mutex);
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	if (enable) {
560*4882a593Smuzhiyun 		/* If the count is already 0, it means there's an imbalance bug somewhere.
561*4882a593Smuzhiyun 		 * Note that the bug may be in a different caller than the one which triggers the
562*4882a593Smuzhiyun 		 * WARN_ON_ONCE.
563*4882a593Smuzhiyun 		 */
564*4882a593Smuzhiyun 		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
565*4882a593Smuzhiyun 			goto unlock;
566*4882a593Smuzhiyun 
567*4882a593Smuzhiyun 		adev->gfx.gfx_off_req_count--;
568*4882a593Smuzhiyun 
569*4882a593Smuzhiyun 		if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state)
570*4882a593Smuzhiyun 			schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
571*4882a593Smuzhiyun 	} else {
572*4882a593Smuzhiyun 		if (adev->gfx.gfx_off_req_count == 0) {
573*4882a593Smuzhiyun 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 			if (adev->gfx.gfx_off_state &&
576*4882a593Smuzhiyun 			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
577*4882a593Smuzhiyun 				adev->gfx.gfx_off_state = false;
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun 				if (adev->gfx.funcs->init_spm_golden) {
580*4882a593Smuzhiyun 					dev_dbg(adev->dev,
581*4882a593Smuzhiyun 						"GFXOFF is disabled, re-init SPM golden settings\n");
582*4882a593Smuzhiyun 					amdgpu_gfx_init_spm_golden(adev);
583*4882a593Smuzhiyun 				}
584*4882a593Smuzhiyun 			}
585*4882a593Smuzhiyun 		}
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 		adev->gfx.gfx_off_req_count++;
588*4882a593Smuzhiyun 	}
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun unlock:
591*4882a593Smuzhiyun 	mutex_unlock(&adev->gfx.gfx_off_mutex);
592*4882a593Smuzhiyun }
593*4882a593Smuzhiyun 
amdgpu_get_gfx_off_status(struct amdgpu_device * adev,uint32_t * value)594*4882a593Smuzhiyun int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
595*4882a593Smuzhiyun {
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 	int r = 0;
598*4882a593Smuzhiyun 
599*4882a593Smuzhiyun 	mutex_lock(&adev->gfx.gfx_off_mutex);
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun 	r = smu_get_status_gfxoff(adev, value);
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	mutex_unlock(&adev->gfx.gfx_off_mutex);
604*4882a593Smuzhiyun 
605*4882a593Smuzhiyun 	return r;
606*4882a593Smuzhiyun }
607*4882a593Smuzhiyun 
amdgpu_gfx_ras_late_init(struct amdgpu_device * adev)608*4882a593Smuzhiyun int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
609*4882a593Smuzhiyun {
610*4882a593Smuzhiyun 	int r;
611*4882a593Smuzhiyun 	struct ras_fs_if fs_info = {
612*4882a593Smuzhiyun 		.sysfs_name = "gfx_err_count",
613*4882a593Smuzhiyun 	};
614*4882a593Smuzhiyun 	struct ras_ih_if ih_info = {
615*4882a593Smuzhiyun 		.cb = amdgpu_gfx_process_ras_data_cb,
616*4882a593Smuzhiyun 	};
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun 	if (!adev->gfx.ras_if) {
619*4882a593Smuzhiyun 		adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
620*4882a593Smuzhiyun 		if (!adev->gfx.ras_if)
621*4882a593Smuzhiyun 			return -ENOMEM;
622*4882a593Smuzhiyun 		adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
623*4882a593Smuzhiyun 		adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
624*4882a593Smuzhiyun 		adev->gfx.ras_if->sub_block_index = 0;
625*4882a593Smuzhiyun 		strcpy(adev->gfx.ras_if->name, "gfx");
626*4882a593Smuzhiyun 	}
627*4882a593Smuzhiyun 	fs_info.head = ih_info.head = *adev->gfx.ras_if;
628*4882a593Smuzhiyun 
629*4882a593Smuzhiyun 	r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
630*4882a593Smuzhiyun 				 &fs_info, &ih_info);
631*4882a593Smuzhiyun 	if (r)
632*4882a593Smuzhiyun 		goto free;
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun 	if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
635*4882a593Smuzhiyun 		r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
636*4882a593Smuzhiyun 		if (r)
637*4882a593Smuzhiyun 			goto late_fini;
638*4882a593Smuzhiyun 	} else {
639*4882a593Smuzhiyun 		/* free gfx ras_if if ras is not supported */
640*4882a593Smuzhiyun 		r = 0;
641*4882a593Smuzhiyun 		goto free;
642*4882a593Smuzhiyun 	}
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	return 0;
645*4882a593Smuzhiyun late_fini:
646*4882a593Smuzhiyun 	amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info);
647*4882a593Smuzhiyun free:
648*4882a593Smuzhiyun 	kfree(adev->gfx.ras_if);
649*4882a593Smuzhiyun 	adev->gfx.ras_if = NULL;
650*4882a593Smuzhiyun 	return r;
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun 
amdgpu_gfx_ras_fini(struct amdgpu_device * adev)653*4882a593Smuzhiyun void amdgpu_gfx_ras_fini(struct amdgpu_device *adev)
654*4882a593Smuzhiyun {
655*4882a593Smuzhiyun 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
656*4882a593Smuzhiyun 			adev->gfx.ras_if) {
657*4882a593Smuzhiyun 		struct ras_common_if *ras_if = adev->gfx.ras_if;
658*4882a593Smuzhiyun 		struct ras_ih_if ih_info = {
659*4882a593Smuzhiyun 			.head = *ras_if,
660*4882a593Smuzhiyun 			.cb = amdgpu_gfx_process_ras_data_cb,
661*4882a593Smuzhiyun 		};
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun 		amdgpu_ras_late_fini(adev, ras_if, &ih_info);
664*4882a593Smuzhiyun 		kfree(ras_if);
665*4882a593Smuzhiyun 	}
666*4882a593Smuzhiyun }
667*4882a593Smuzhiyun 
amdgpu_gfx_process_ras_data_cb(struct amdgpu_device * adev,void * err_data,struct amdgpu_iv_entry * entry)668*4882a593Smuzhiyun int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
669*4882a593Smuzhiyun 		void *err_data,
670*4882a593Smuzhiyun 		struct amdgpu_iv_entry *entry)
671*4882a593Smuzhiyun {
672*4882a593Smuzhiyun 	/* TODO ue will trigger an interrupt.
673*4882a593Smuzhiyun 	 *
674*4882a593Smuzhiyun 	 * When “Full RAS” is enabled, the per-IP interrupt sources should
675*4882a593Smuzhiyun 	 * be disabled and the driver should only look for the aggregated
676*4882a593Smuzhiyun 	 * interrupt via sync flood
677*4882a593Smuzhiyun 	 */
678*4882a593Smuzhiyun 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
679*4882a593Smuzhiyun 		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
680*4882a593Smuzhiyun 		if (adev->gfx.funcs->query_ras_error_count)
681*4882a593Smuzhiyun 			adev->gfx.funcs->query_ras_error_count(adev, err_data);
682*4882a593Smuzhiyun 		amdgpu_ras_reset_gpu(adev);
683*4882a593Smuzhiyun 	}
684*4882a593Smuzhiyun 	return AMDGPU_RAS_SUCCESS;
685*4882a593Smuzhiyun }
686*4882a593Smuzhiyun 
amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)687*4882a593Smuzhiyun int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
688*4882a593Smuzhiyun 				  struct amdgpu_irq_src *source,
689*4882a593Smuzhiyun 				  struct amdgpu_iv_entry *entry)
690*4882a593Smuzhiyun {
691*4882a593Smuzhiyun 	struct ras_common_if *ras_if = adev->gfx.ras_if;
692*4882a593Smuzhiyun 	struct ras_dispatch_if ih_data = {
693*4882a593Smuzhiyun 		.entry = entry,
694*4882a593Smuzhiyun 	};
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	if (!ras_if)
697*4882a593Smuzhiyun 		return 0;
698*4882a593Smuzhiyun 
699*4882a593Smuzhiyun 	ih_data.head = *ras_if;
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun 	DRM_ERROR("CP ECC ERROR IRQ\n");
702*4882a593Smuzhiyun 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
703*4882a593Smuzhiyun 	return 0;
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun 
amdgpu_kiq_rreg(struct amdgpu_device * adev,uint32_t reg)706*4882a593Smuzhiyun uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
707*4882a593Smuzhiyun {
708*4882a593Smuzhiyun 	signed long r, cnt = 0;
709*4882a593Smuzhiyun 	unsigned long flags;
710*4882a593Smuzhiyun 	uint32_t seq, reg_val_offs = 0, value = 0;
711*4882a593Smuzhiyun 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
712*4882a593Smuzhiyun 	struct amdgpu_ring *ring = &kiq->ring;
713*4882a593Smuzhiyun 
714*4882a593Smuzhiyun 	if (adev->in_pci_err_recovery)
715*4882a593Smuzhiyun 		return 0;
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun 	BUG_ON(!ring->funcs->emit_rreg);
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 	spin_lock_irqsave(&kiq->ring_lock, flags);
720*4882a593Smuzhiyun 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
721*4882a593Smuzhiyun 		pr_err("critical bug! too many kiq readers\n");
722*4882a593Smuzhiyun 		goto failed_unlock;
723*4882a593Smuzhiyun 	}
724*4882a593Smuzhiyun 	amdgpu_ring_alloc(ring, 32);
725*4882a593Smuzhiyun 	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
726*4882a593Smuzhiyun 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
727*4882a593Smuzhiyun 	if (r)
728*4882a593Smuzhiyun 		goto failed_undo;
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 	amdgpu_ring_commit(ring);
731*4882a593Smuzhiyun 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
732*4882a593Smuzhiyun 
733*4882a593Smuzhiyun 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
734*4882a593Smuzhiyun 
735*4882a593Smuzhiyun 	/* don't wait anymore for gpu reset case because this way may
736*4882a593Smuzhiyun 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
737*4882a593Smuzhiyun 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
738*4882a593Smuzhiyun 	 * never return if we keep waiting in virt_kiq_rreg, which cause
739*4882a593Smuzhiyun 	 * gpu_recover() hang there.
740*4882a593Smuzhiyun 	 *
741*4882a593Smuzhiyun 	 * also don't wait anymore for IRQ context
742*4882a593Smuzhiyun 	 * */
743*4882a593Smuzhiyun 	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
744*4882a593Smuzhiyun 		goto failed_kiq_read;
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 	might_sleep();
747*4882a593Smuzhiyun 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
748*4882a593Smuzhiyun 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
749*4882a593Smuzhiyun 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
750*4882a593Smuzhiyun 	}
751*4882a593Smuzhiyun 
752*4882a593Smuzhiyun 	if (cnt > MAX_KIQ_REG_TRY)
753*4882a593Smuzhiyun 		goto failed_kiq_read;
754*4882a593Smuzhiyun 
755*4882a593Smuzhiyun 	mb();
756*4882a593Smuzhiyun 	value = adev->wb.wb[reg_val_offs];
757*4882a593Smuzhiyun 	amdgpu_device_wb_free(adev, reg_val_offs);
758*4882a593Smuzhiyun 	return value;
759*4882a593Smuzhiyun 
760*4882a593Smuzhiyun failed_undo:
761*4882a593Smuzhiyun 	amdgpu_ring_undo(ring);
762*4882a593Smuzhiyun failed_unlock:
763*4882a593Smuzhiyun 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
764*4882a593Smuzhiyun failed_kiq_read:
765*4882a593Smuzhiyun 	if (reg_val_offs)
766*4882a593Smuzhiyun 		amdgpu_device_wb_free(adev, reg_val_offs);
767*4882a593Smuzhiyun 	dev_err(adev->dev, "failed to read reg:%x\n", reg);
768*4882a593Smuzhiyun 	return ~0;
769*4882a593Smuzhiyun }
770*4882a593Smuzhiyun 
amdgpu_kiq_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v)771*4882a593Smuzhiyun void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
772*4882a593Smuzhiyun {
773*4882a593Smuzhiyun 	signed long r, cnt = 0;
774*4882a593Smuzhiyun 	unsigned long flags;
775*4882a593Smuzhiyun 	uint32_t seq;
776*4882a593Smuzhiyun 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
777*4882a593Smuzhiyun 	struct amdgpu_ring *ring = &kiq->ring;
778*4882a593Smuzhiyun 
779*4882a593Smuzhiyun 	BUG_ON(!ring->funcs->emit_wreg);
780*4882a593Smuzhiyun 
781*4882a593Smuzhiyun 	if (adev->in_pci_err_recovery)
782*4882a593Smuzhiyun 		return;
783*4882a593Smuzhiyun 
784*4882a593Smuzhiyun 	spin_lock_irqsave(&kiq->ring_lock, flags);
785*4882a593Smuzhiyun 	amdgpu_ring_alloc(ring, 32);
786*4882a593Smuzhiyun 	amdgpu_ring_emit_wreg(ring, reg, v);
787*4882a593Smuzhiyun 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
788*4882a593Smuzhiyun 	if (r)
789*4882a593Smuzhiyun 		goto failed_undo;
790*4882a593Smuzhiyun 
791*4882a593Smuzhiyun 	amdgpu_ring_commit(ring);
792*4882a593Smuzhiyun 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 	/* don't wait anymore for gpu reset case because this way may
797*4882a593Smuzhiyun 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
798*4882a593Smuzhiyun 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
799*4882a593Smuzhiyun 	 * never return if we keep waiting in virt_kiq_rreg, which cause
800*4882a593Smuzhiyun 	 * gpu_recover() hang there.
801*4882a593Smuzhiyun 	 *
802*4882a593Smuzhiyun 	 * also don't wait anymore for IRQ context
803*4882a593Smuzhiyun 	 * */
804*4882a593Smuzhiyun 	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
805*4882a593Smuzhiyun 		goto failed_kiq_write;
806*4882a593Smuzhiyun 
807*4882a593Smuzhiyun 	might_sleep();
808*4882a593Smuzhiyun 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
809*4882a593Smuzhiyun 
810*4882a593Smuzhiyun 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
811*4882a593Smuzhiyun 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
812*4882a593Smuzhiyun 	}
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 	if (cnt > MAX_KIQ_REG_TRY)
815*4882a593Smuzhiyun 		goto failed_kiq_write;
816*4882a593Smuzhiyun 
817*4882a593Smuzhiyun 	return;
818*4882a593Smuzhiyun 
819*4882a593Smuzhiyun failed_undo:
820*4882a593Smuzhiyun 	amdgpu_ring_undo(ring);
821*4882a593Smuzhiyun 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
822*4882a593Smuzhiyun failed_kiq_write:
823*4882a593Smuzhiyun 	dev_err(adev->dev, "failed to write reg:%x\n", reg);
824*4882a593Smuzhiyun }
825