xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright 2014 Advanced Micro Devices, Inc.
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun  * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun  * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun  * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun  * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * The above copyright notice and this permission notice shall be included in
12*4882a593Smuzhiyun  * all copies or substantial portions of the Software.
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*4882a593Smuzhiyun  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*4882a593Smuzhiyun  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17*4882a593Smuzhiyun  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18*4882a593Smuzhiyun  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19*4882a593Smuzhiyun  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20*4882a593Smuzhiyun  * OTHER DEALINGS IN THE SOFTWARE.
21*4882a593Smuzhiyun  */
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun #include "amdgpu_amdkfd.h"
24*4882a593Smuzhiyun #include "amd_shared.h"
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun #include "amdgpu.h"
27*4882a593Smuzhiyun #include "amdgpu_gfx.h"
28*4882a593Smuzhiyun #include "amdgpu_dma_buf.h"
29*4882a593Smuzhiyun #include <linux/module.h>
30*4882a593Smuzhiyun #include <linux/dma-buf.h>
31*4882a593Smuzhiyun #include "amdgpu_xgmi.h"
32*4882a593Smuzhiyun #include <uapi/linux/kfd_ioctl.h>
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun /* Total memory size in system memory and all GPU VRAM. Used to
35*4882a593Smuzhiyun  * estimate worst case amount of memory to reserve for page tables
36*4882a593Smuzhiyun  */
37*4882a593Smuzhiyun uint64_t amdgpu_amdkfd_total_mem_size;
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun static bool kfd_initialized;
40*4882a593Smuzhiyun 
amdgpu_amdkfd_init(void)41*4882a593Smuzhiyun int amdgpu_amdkfd_init(void)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun 	struct sysinfo si;
44*4882a593Smuzhiyun 	int ret;
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	si_meminfo(&si);
47*4882a593Smuzhiyun 	amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
48*4882a593Smuzhiyun 	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun 	ret = kgd2kfd_init();
51*4882a593Smuzhiyun 	amdgpu_amdkfd_gpuvm_init_mem_limits();
52*4882a593Smuzhiyun 	kfd_initialized = !ret;
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	return ret;
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun 
amdgpu_amdkfd_fini(void)57*4882a593Smuzhiyun void amdgpu_amdkfd_fini(void)
58*4882a593Smuzhiyun {
59*4882a593Smuzhiyun 	if (kfd_initialized) {
60*4882a593Smuzhiyun 		kgd2kfd_exit();
61*4882a593Smuzhiyun 		kfd_initialized = false;
62*4882a593Smuzhiyun 	}
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun 
amdgpu_amdkfd_device_probe(struct amdgpu_device * adev)65*4882a593Smuzhiyun void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun 	bool vf = amdgpu_sriov_vf(adev);
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	if (!kfd_initialized)
70*4882a593Smuzhiyun 		return;
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun 	adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
73*4882a593Smuzhiyun 				      adev->pdev, adev->asic_type, vf);
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 	if (adev->kfd.dev)
76*4882a593Smuzhiyun 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun /**
80*4882a593Smuzhiyun  * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
81*4882a593Smuzhiyun  *                                setup amdkfd
82*4882a593Smuzhiyun  *
83*4882a593Smuzhiyun  * @adev: amdgpu_device pointer
84*4882a593Smuzhiyun  * @aperture_base: output returning doorbell aperture base physical address
85*4882a593Smuzhiyun  * @aperture_size: output returning doorbell aperture size in bytes
86*4882a593Smuzhiyun  * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
87*4882a593Smuzhiyun  *
88*4882a593Smuzhiyun  * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
89*4882a593Smuzhiyun  * takes doorbells required for its own rings and reports the setup to amdkfd.
90*4882a593Smuzhiyun  * amdgpu reserved doorbells are at the start of the doorbell aperture.
91*4882a593Smuzhiyun  */
amdgpu_doorbell_get_kfd_info(struct amdgpu_device * adev,phys_addr_t * aperture_base,size_t * aperture_size,size_t * start_offset)92*4882a593Smuzhiyun static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
93*4882a593Smuzhiyun 					 phys_addr_t *aperture_base,
94*4882a593Smuzhiyun 					 size_t *aperture_size,
95*4882a593Smuzhiyun 					 size_t *start_offset)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun 	/*
98*4882a593Smuzhiyun 	 * The first num_doorbells are used by amdgpu.
99*4882a593Smuzhiyun 	 * amdkfd takes whatever's left in the aperture.
100*4882a593Smuzhiyun 	 */
101*4882a593Smuzhiyun 	if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) {
102*4882a593Smuzhiyun 		*aperture_base = adev->doorbell.base;
103*4882a593Smuzhiyun 		*aperture_size = adev->doorbell.size;
104*4882a593Smuzhiyun 		*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
105*4882a593Smuzhiyun 	} else {
106*4882a593Smuzhiyun 		*aperture_base = 0;
107*4882a593Smuzhiyun 		*aperture_size = 0;
108*4882a593Smuzhiyun 		*start_offset = 0;
109*4882a593Smuzhiyun 	}
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun 
amdgpu_amdkfd_device_init(struct amdgpu_device * adev)112*4882a593Smuzhiyun void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun 	int i;
115*4882a593Smuzhiyun 	int last_valid_bit;
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 	if (adev->kfd.dev) {
118*4882a593Smuzhiyun 		struct kgd2kfd_shared_resources gpu_resources = {
119*4882a593Smuzhiyun 			.compute_vmid_bitmap =
120*4882a593Smuzhiyun 				((1 << AMDGPU_NUM_VMID) - 1) -
121*4882a593Smuzhiyun 				((1 << adev->vm_manager.first_kfd_vmid) - 1),
122*4882a593Smuzhiyun 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
123*4882a593Smuzhiyun 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
124*4882a593Smuzhiyun 			.gpuvm_size = min(adev->vm_manager.max_pfn
125*4882a593Smuzhiyun 					  << AMDGPU_GPU_PAGE_SHIFT,
126*4882a593Smuzhiyun 					  AMDGPU_GMC_HOLE_START),
127*4882a593Smuzhiyun 			.drm_render_minor = adev_to_drm(adev)->render->index,
128*4882a593Smuzhiyun 			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 		};
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 		/* this is going to have a few of the MSBs set that we need to
133*4882a593Smuzhiyun 		 * clear
134*4882a593Smuzhiyun 		 */
135*4882a593Smuzhiyun 		bitmap_complement(gpu_resources.cp_queue_bitmap,
136*4882a593Smuzhiyun 				  adev->gfx.mec.queue_bitmap,
137*4882a593Smuzhiyun 				  KGD_MAX_QUEUES);
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
140*4882a593Smuzhiyun 		 * nbits is not compile time constant
141*4882a593Smuzhiyun 		 */
142*4882a593Smuzhiyun 		last_valid_bit = 1 /* only first MEC can have compute queues */
143*4882a593Smuzhiyun 				* adev->gfx.mec.num_pipe_per_mec
144*4882a593Smuzhiyun 				* adev->gfx.mec.num_queue_per_pipe;
145*4882a593Smuzhiyun 		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
146*4882a593Smuzhiyun 			clear_bit(i, gpu_resources.cp_queue_bitmap);
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 		amdgpu_doorbell_get_kfd_info(adev,
149*4882a593Smuzhiyun 				&gpu_resources.doorbell_physical_address,
150*4882a593Smuzhiyun 				&gpu_resources.doorbell_aperture_size,
151*4882a593Smuzhiyun 				&gpu_resources.doorbell_start_offset);
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 		/* Since SOC15, BIF starts to statically use the
154*4882a593Smuzhiyun 		 * lower 12 bits of doorbell addresses for routing
155*4882a593Smuzhiyun 		 * based on settings in registers like
156*4882a593Smuzhiyun 		 * SDMA0_DOORBELL_RANGE etc..
157*4882a593Smuzhiyun 		 * In order to route a doorbell to CP engine, the lower
158*4882a593Smuzhiyun 		 * 12 bits of its address has to be outside the range
159*4882a593Smuzhiyun 		 * set for SDMA, VCN, and IH blocks.
160*4882a593Smuzhiyun 		 */
161*4882a593Smuzhiyun 		if (adev->asic_type >= CHIP_VEGA10) {
162*4882a593Smuzhiyun 			gpu_resources.non_cp_doorbells_start =
163*4882a593Smuzhiyun 					adev->doorbell_index.first_non_cp;
164*4882a593Smuzhiyun 			gpu_resources.non_cp_doorbells_end =
165*4882a593Smuzhiyun 					adev->doorbell_index.last_non_cp;
166*4882a593Smuzhiyun 		}
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 		kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources);
169*4882a593Smuzhiyun 	}
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun 
amdgpu_amdkfd_device_fini(struct amdgpu_device * adev)172*4882a593Smuzhiyun void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
173*4882a593Smuzhiyun {
174*4882a593Smuzhiyun 	if (adev->kfd.dev) {
175*4882a593Smuzhiyun 		kgd2kfd_device_exit(adev->kfd.dev);
176*4882a593Smuzhiyun 		adev->kfd.dev = NULL;
177*4882a593Smuzhiyun 	}
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun 
amdgpu_amdkfd_interrupt(struct amdgpu_device * adev,const void * ih_ring_entry)180*4882a593Smuzhiyun void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
181*4882a593Smuzhiyun 		const void *ih_ring_entry)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun 	if (adev->kfd.dev)
184*4882a593Smuzhiyun 		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun 
amdgpu_amdkfd_suspend(struct amdgpu_device * adev,bool run_pm)187*4882a593Smuzhiyun void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun 	if (adev->kfd.dev)
190*4882a593Smuzhiyun 		kgd2kfd_suspend(adev->kfd.dev, run_pm);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun 
amdgpu_amdkfd_resume_iommu(struct amdgpu_device * adev)193*4882a593Smuzhiyun int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun 	int r = 0;
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	if (adev->kfd.dev)
198*4882a593Smuzhiyun 		r = kgd2kfd_resume_iommu(adev->kfd.dev);
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	return r;
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun 
amdgpu_amdkfd_resume(struct amdgpu_device * adev,bool run_pm)203*4882a593Smuzhiyun int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
204*4882a593Smuzhiyun {
205*4882a593Smuzhiyun 	int r = 0;
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	if (adev->kfd.dev)
208*4882a593Smuzhiyun 		r = kgd2kfd_resume(adev->kfd.dev, run_pm);
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 	return r;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
amdgpu_amdkfd_pre_reset(struct amdgpu_device * adev)213*4882a593Smuzhiyun int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun 	int r = 0;
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	if (adev->kfd.dev)
218*4882a593Smuzhiyun 		r = kgd2kfd_pre_reset(adev->kfd.dev);
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	return r;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun 
amdgpu_amdkfd_post_reset(struct amdgpu_device * adev)223*4882a593Smuzhiyun int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun 	int r = 0;
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	if (adev->kfd.dev)
228*4882a593Smuzhiyun 		r = kgd2kfd_post_reset(adev->kfd.dev);
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	return r;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun 
amdgpu_amdkfd_gpu_reset(struct kgd_dev * kgd)233*4882a593Smuzhiyun void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	if (amdgpu_device_should_recover_gpu(adev))
238*4882a593Smuzhiyun 		amdgpu_device_gpu_recover(adev, NULL);
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun 
amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev * kgd,size_t size,void ** mem_obj,uint64_t * gpu_addr,void ** cpu_ptr,bool cp_mqd_gfx9)241*4882a593Smuzhiyun int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
242*4882a593Smuzhiyun 				void **mem_obj, uint64_t *gpu_addr,
243*4882a593Smuzhiyun 				void **cpu_ptr, bool cp_mqd_gfx9)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
246*4882a593Smuzhiyun 	struct amdgpu_bo *bo = NULL;
247*4882a593Smuzhiyun 	struct amdgpu_bo_param bp;
248*4882a593Smuzhiyun 	int r;
249*4882a593Smuzhiyun 	void *cpu_ptr_tmp = NULL;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	memset(&bp, 0, sizeof(bp));
252*4882a593Smuzhiyun 	bp.size = size;
253*4882a593Smuzhiyun 	bp.byte_align = PAGE_SIZE;
254*4882a593Smuzhiyun 	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
255*4882a593Smuzhiyun 	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
256*4882a593Smuzhiyun 	bp.type = ttm_bo_type_kernel;
257*4882a593Smuzhiyun 	bp.resv = NULL;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	if (cp_mqd_gfx9)
260*4882a593Smuzhiyun 		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	r = amdgpu_bo_create(adev, &bp, &bo);
263*4882a593Smuzhiyun 	if (r) {
264*4882a593Smuzhiyun 		dev_err(adev->dev,
265*4882a593Smuzhiyun 			"failed to allocate BO for amdkfd (%d)\n", r);
266*4882a593Smuzhiyun 		return r;
267*4882a593Smuzhiyun 	}
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 	/* map the buffer */
270*4882a593Smuzhiyun 	r = amdgpu_bo_reserve(bo, true);
271*4882a593Smuzhiyun 	if (r) {
272*4882a593Smuzhiyun 		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
273*4882a593Smuzhiyun 		goto allocate_mem_reserve_bo_failed;
274*4882a593Smuzhiyun 	}
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
277*4882a593Smuzhiyun 	if (r) {
278*4882a593Smuzhiyun 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
279*4882a593Smuzhiyun 		goto allocate_mem_pin_bo_failed;
280*4882a593Smuzhiyun 	}
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 	r = amdgpu_ttm_alloc_gart(&bo->tbo);
283*4882a593Smuzhiyun 	if (r) {
284*4882a593Smuzhiyun 		dev_err(adev->dev, "%p bind failed\n", bo);
285*4882a593Smuzhiyun 		goto allocate_mem_kmap_bo_failed;
286*4882a593Smuzhiyun 	}
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
289*4882a593Smuzhiyun 	if (r) {
290*4882a593Smuzhiyun 		dev_err(adev->dev,
291*4882a593Smuzhiyun 			"(%d) failed to map bo to kernel for amdkfd\n", r);
292*4882a593Smuzhiyun 		goto allocate_mem_kmap_bo_failed;
293*4882a593Smuzhiyun 	}
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	*mem_obj = bo;
296*4882a593Smuzhiyun 	*gpu_addr = amdgpu_bo_gpu_offset(bo);
297*4882a593Smuzhiyun 	*cpu_ptr = cpu_ptr_tmp;
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun 	amdgpu_bo_unreserve(bo);
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 	return 0;
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun allocate_mem_kmap_bo_failed:
304*4882a593Smuzhiyun 	amdgpu_bo_unpin(bo);
305*4882a593Smuzhiyun allocate_mem_pin_bo_failed:
306*4882a593Smuzhiyun 	amdgpu_bo_unreserve(bo);
307*4882a593Smuzhiyun allocate_mem_reserve_bo_failed:
308*4882a593Smuzhiyun 	amdgpu_bo_unref(&bo);
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun 	return r;
311*4882a593Smuzhiyun }
312*4882a593Smuzhiyun 
amdgpu_amdkfd_free_gtt_mem(struct kgd_dev * kgd,void * mem_obj)313*4882a593Smuzhiyun void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun 	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	amdgpu_bo_reserve(bo, true);
318*4882a593Smuzhiyun 	amdgpu_bo_kunmap(bo);
319*4882a593Smuzhiyun 	amdgpu_bo_unpin(bo);
320*4882a593Smuzhiyun 	amdgpu_bo_unreserve(bo);
321*4882a593Smuzhiyun 	amdgpu_bo_unref(&(bo));
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun 
amdgpu_amdkfd_alloc_gws(struct kgd_dev * kgd,size_t size,void ** mem_obj)324*4882a593Smuzhiyun int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
325*4882a593Smuzhiyun 				void **mem_obj)
326*4882a593Smuzhiyun {
327*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
328*4882a593Smuzhiyun 	struct amdgpu_bo *bo = NULL;
329*4882a593Smuzhiyun 	struct amdgpu_bo_param bp;
330*4882a593Smuzhiyun 	int r;
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun 	memset(&bp, 0, sizeof(bp));
333*4882a593Smuzhiyun 	bp.size = size;
334*4882a593Smuzhiyun 	bp.byte_align = 1;
335*4882a593Smuzhiyun 	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
336*4882a593Smuzhiyun 	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
337*4882a593Smuzhiyun 	bp.type = ttm_bo_type_device;
338*4882a593Smuzhiyun 	bp.resv = NULL;
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 	r = amdgpu_bo_create(adev, &bp, &bo);
341*4882a593Smuzhiyun 	if (r) {
342*4882a593Smuzhiyun 		dev_err(adev->dev,
343*4882a593Smuzhiyun 			"failed to allocate gws BO for amdkfd (%d)\n", r);
344*4882a593Smuzhiyun 		return r;
345*4882a593Smuzhiyun 	}
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	*mem_obj = bo;
348*4882a593Smuzhiyun 	return 0;
349*4882a593Smuzhiyun }
350*4882a593Smuzhiyun 
amdgpu_amdkfd_free_gws(struct kgd_dev * kgd,void * mem_obj)351*4882a593Smuzhiyun void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
352*4882a593Smuzhiyun {
353*4882a593Smuzhiyun 	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	amdgpu_bo_unref(&bo);
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun 
amdgpu_amdkfd_get_fw_version(struct kgd_dev * kgd,enum kgd_engine_type type)358*4882a593Smuzhiyun uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
359*4882a593Smuzhiyun 				      enum kgd_engine_type type)
360*4882a593Smuzhiyun {
361*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 	switch (type) {
364*4882a593Smuzhiyun 	case KGD_ENGINE_PFP:
365*4882a593Smuzhiyun 		return adev->gfx.pfp_fw_version;
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	case KGD_ENGINE_ME:
368*4882a593Smuzhiyun 		return adev->gfx.me_fw_version;
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 	case KGD_ENGINE_CE:
371*4882a593Smuzhiyun 		return adev->gfx.ce_fw_version;
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	case KGD_ENGINE_MEC1:
374*4882a593Smuzhiyun 		return adev->gfx.mec_fw_version;
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 	case KGD_ENGINE_MEC2:
377*4882a593Smuzhiyun 		return adev->gfx.mec2_fw_version;
378*4882a593Smuzhiyun 
379*4882a593Smuzhiyun 	case KGD_ENGINE_RLC:
380*4882a593Smuzhiyun 		return adev->gfx.rlc_fw_version;
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 	case KGD_ENGINE_SDMA1:
383*4882a593Smuzhiyun 		return adev->sdma.instance[0].fw_version;
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	case KGD_ENGINE_SDMA2:
386*4882a593Smuzhiyun 		return adev->sdma.instance[1].fw_version;
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 	default:
389*4882a593Smuzhiyun 		return 0;
390*4882a593Smuzhiyun 	}
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	return 0;
393*4882a593Smuzhiyun }
394*4882a593Smuzhiyun 
amdgpu_amdkfd_get_local_mem_info(struct kgd_dev * kgd,struct kfd_local_mem_info * mem_info)395*4882a593Smuzhiyun void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
396*4882a593Smuzhiyun 				      struct kfd_local_mem_info *mem_info)
397*4882a593Smuzhiyun {
398*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
399*4882a593Smuzhiyun 	uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
400*4882a593Smuzhiyun 					     ~((1ULL << 32) - 1);
401*4882a593Smuzhiyun 	resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size;
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	memset(mem_info, 0, sizeof(*mem_info));
404*4882a593Smuzhiyun 	if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) {
405*4882a593Smuzhiyun 		mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
406*4882a593Smuzhiyun 		mem_info->local_mem_size_private = adev->gmc.real_vram_size -
407*4882a593Smuzhiyun 				adev->gmc.visible_vram_size;
408*4882a593Smuzhiyun 	} else {
409*4882a593Smuzhiyun 		mem_info->local_mem_size_public = 0;
410*4882a593Smuzhiyun 		mem_info->local_mem_size_private = adev->gmc.real_vram_size;
411*4882a593Smuzhiyun 	}
412*4882a593Smuzhiyun 	mem_info->vram_width = adev->gmc.vram_width;
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun 	pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n",
415*4882a593Smuzhiyun 			&adev->gmc.aper_base, &aper_limit,
416*4882a593Smuzhiyun 			mem_info->local_mem_size_public,
417*4882a593Smuzhiyun 			mem_info->local_mem_size_private);
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	if (amdgpu_sriov_vf(adev))
420*4882a593Smuzhiyun 		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
421*4882a593Smuzhiyun 	else if (adev->pm.dpm_enabled) {
422*4882a593Smuzhiyun 		if (amdgpu_emu_mode == 1)
423*4882a593Smuzhiyun 			mem_info->mem_clk_max = 0;
424*4882a593Smuzhiyun 		else
425*4882a593Smuzhiyun 			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
426*4882a593Smuzhiyun 	} else
427*4882a593Smuzhiyun 		mem_info->mem_clk_max = 100;
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun 
amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev * kgd)430*4882a593Smuzhiyun uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
431*4882a593Smuzhiyun {
432*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
433*4882a593Smuzhiyun 
434*4882a593Smuzhiyun 	if (adev->gfx.funcs->get_gpu_clock_counter)
435*4882a593Smuzhiyun 		return adev->gfx.funcs->get_gpu_clock_counter(adev);
436*4882a593Smuzhiyun 	return 0;
437*4882a593Smuzhiyun }
438*4882a593Smuzhiyun 
amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev * kgd)439*4882a593Smuzhiyun uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
440*4882a593Smuzhiyun {
441*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 	/* the sclk is in quantas of 10kHz */
444*4882a593Smuzhiyun 	if (amdgpu_sriov_vf(adev))
445*4882a593Smuzhiyun 		return adev->clock.default_sclk / 100;
446*4882a593Smuzhiyun 	else if (adev->pm.dpm_enabled)
447*4882a593Smuzhiyun 		return amdgpu_dpm_get_sclk(adev, false) / 100;
448*4882a593Smuzhiyun 	else
449*4882a593Smuzhiyun 		return 100;
450*4882a593Smuzhiyun }
451*4882a593Smuzhiyun 
amdgpu_amdkfd_get_cu_info(struct kgd_dev * kgd,struct kfd_cu_info * cu_info)452*4882a593Smuzhiyun void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
453*4882a593Smuzhiyun {
454*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
455*4882a593Smuzhiyun 	struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 	memset(cu_info, 0, sizeof(*cu_info));
458*4882a593Smuzhiyun 	if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
459*4882a593Smuzhiyun 		return;
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	cu_info->cu_active_number = acu_info.number;
462*4882a593Smuzhiyun 	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
463*4882a593Smuzhiyun 	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
464*4882a593Smuzhiyun 	       sizeof(acu_info.bitmap));
465*4882a593Smuzhiyun 	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
466*4882a593Smuzhiyun 	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
467*4882a593Smuzhiyun 	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
468*4882a593Smuzhiyun 	cu_info->simd_per_cu = acu_info.simd_per_cu;
469*4882a593Smuzhiyun 	cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
470*4882a593Smuzhiyun 	cu_info->wave_front_size = acu_info.wave_front_size;
471*4882a593Smuzhiyun 	cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
472*4882a593Smuzhiyun 	cu_info->lds_size = acu_info.lds_size;
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun 
amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev * kgd,int dma_buf_fd,struct kgd_dev ** dma_buf_kgd,uint64_t * bo_size,void * metadata_buffer,size_t buffer_size,uint32_t * metadata_size,uint32_t * flags)475*4882a593Smuzhiyun int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
476*4882a593Smuzhiyun 				  struct kgd_dev **dma_buf_kgd,
477*4882a593Smuzhiyun 				  uint64_t *bo_size, void *metadata_buffer,
478*4882a593Smuzhiyun 				  size_t buffer_size, uint32_t *metadata_size,
479*4882a593Smuzhiyun 				  uint32_t *flags)
480*4882a593Smuzhiyun {
481*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
482*4882a593Smuzhiyun 	struct dma_buf *dma_buf;
483*4882a593Smuzhiyun 	struct drm_gem_object *obj;
484*4882a593Smuzhiyun 	struct amdgpu_bo *bo;
485*4882a593Smuzhiyun 	uint64_t metadata_flags;
486*4882a593Smuzhiyun 	int r = -EINVAL;
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun 	dma_buf = dma_buf_get(dma_buf_fd);
489*4882a593Smuzhiyun 	if (IS_ERR(dma_buf))
490*4882a593Smuzhiyun 		return PTR_ERR(dma_buf);
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 	if (dma_buf->ops != &amdgpu_dmabuf_ops)
493*4882a593Smuzhiyun 		/* Can't handle non-graphics buffers */
494*4882a593Smuzhiyun 		goto out_put;
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun 	obj = dma_buf->priv;
497*4882a593Smuzhiyun 	if (obj->dev->driver != adev_to_drm(adev)->driver)
498*4882a593Smuzhiyun 		/* Can't handle buffers from different drivers */
499*4882a593Smuzhiyun 		goto out_put;
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	adev = drm_to_adev(obj->dev);
502*4882a593Smuzhiyun 	bo = gem_to_amdgpu_bo(obj);
503*4882a593Smuzhiyun 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
504*4882a593Smuzhiyun 				    AMDGPU_GEM_DOMAIN_GTT)))
505*4882a593Smuzhiyun 		/* Only VRAM and GTT BOs are supported */
506*4882a593Smuzhiyun 		goto out_put;
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	r = 0;
509*4882a593Smuzhiyun 	if (dma_buf_kgd)
510*4882a593Smuzhiyun 		*dma_buf_kgd = (struct kgd_dev *)adev;
511*4882a593Smuzhiyun 	if (bo_size)
512*4882a593Smuzhiyun 		*bo_size = amdgpu_bo_size(bo);
513*4882a593Smuzhiyun 	if (metadata_size)
514*4882a593Smuzhiyun 		*metadata_size = bo->metadata_size;
515*4882a593Smuzhiyun 	if (metadata_buffer)
516*4882a593Smuzhiyun 		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
517*4882a593Smuzhiyun 					   metadata_size, &metadata_flags);
518*4882a593Smuzhiyun 	if (flags) {
519*4882a593Smuzhiyun 		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
520*4882a593Smuzhiyun 				KFD_IOC_ALLOC_MEM_FLAGS_VRAM
521*4882a593Smuzhiyun 				: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
522*4882a593Smuzhiyun 
523*4882a593Smuzhiyun 		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
524*4882a593Smuzhiyun 			*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
525*4882a593Smuzhiyun 	}
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun out_put:
528*4882a593Smuzhiyun 	dma_buf_put(dma_buf);
529*4882a593Smuzhiyun 	return r;
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun 
amdgpu_amdkfd_get_vram_usage(struct kgd_dev * kgd)532*4882a593Smuzhiyun uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
533*4882a593Smuzhiyun {
534*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
535*4882a593Smuzhiyun 	struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	return amdgpu_vram_mgr_usage(vram_man);
538*4882a593Smuzhiyun }
539*4882a593Smuzhiyun 
amdgpu_amdkfd_get_hive_id(struct kgd_dev * kgd)540*4882a593Smuzhiyun uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	return adev->gmc.xgmi.hive_id;
545*4882a593Smuzhiyun }
546*4882a593Smuzhiyun 
amdgpu_amdkfd_get_unique_id(struct kgd_dev * kgd)547*4882a593Smuzhiyun uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd)
548*4882a593Smuzhiyun {
549*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	return adev->unique_id;
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun 
amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev * dst,struct kgd_dev * src)554*4882a593Smuzhiyun uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
555*4882a593Smuzhiyun {
556*4882a593Smuzhiyun 	struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
557*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)dst;
558*4882a593Smuzhiyun 	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	if (ret < 0) {
561*4882a593Smuzhiyun 		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
562*4882a593Smuzhiyun 			adev->gmc.xgmi.physical_node_id,
563*4882a593Smuzhiyun 			peer_adev->gmc.xgmi.physical_node_id, ret);
564*4882a593Smuzhiyun 		ret = 0;
565*4882a593Smuzhiyun 	}
566*4882a593Smuzhiyun 	return  (uint8_t)ret;
567*4882a593Smuzhiyun }
568*4882a593Smuzhiyun 
amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev * kgd)569*4882a593Smuzhiyun uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
570*4882a593Smuzhiyun {
571*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 	return adev->rmmio_remap.bus_addr;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun 
amdgpu_amdkfd_get_num_gws(struct kgd_dev * kgd)576*4882a593Smuzhiyun uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
577*4882a593Smuzhiyun {
578*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
579*4882a593Smuzhiyun 
580*4882a593Smuzhiyun 	return adev->gds.gws_size;
581*4882a593Smuzhiyun }
582*4882a593Smuzhiyun 
amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev * kgd)583*4882a593Smuzhiyun uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd)
584*4882a593Smuzhiyun {
585*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	return adev->rev_id;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun 
amdgpu_amdkfd_get_noretry(struct kgd_dev * kgd)590*4882a593Smuzhiyun int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd)
591*4882a593Smuzhiyun {
592*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 	return adev->gmc.noretry;
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun 
amdgpu_amdkfd_submit_ib(struct kgd_dev * kgd,enum kgd_engine_type engine,uint32_t vmid,uint64_t gpu_addr,uint32_t * ib_cmd,uint32_t ib_len)597*4882a593Smuzhiyun int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
598*4882a593Smuzhiyun 				uint32_t vmid, uint64_t gpu_addr,
599*4882a593Smuzhiyun 				uint32_t *ib_cmd, uint32_t ib_len)
600*4882a593Smuzhiyun {
601*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
602*4882a593Smuzhiyun 	struct amdgpu_job *job;
603*4882a593Smuzhiyun 	struct amdgpu_ib *ib;
604*4882a593Smuzhiyun 	struct amdgpu_ring *ring;
605*4882a593Smuzhiyun 	struct dma_fence *f = NULL;
606*4882a593Smuzhiyun 	int ret;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	switch (engine) {
609*4882a593Smuzhiyun 	case KGD_ENGINE_MEC1:
610*4882a593Smuzhiyun 		ring = &adev->gfx.compute_ring[0];
611*4882a593Smuzhiyun 		break;
612*4882a593Smuzhiyun 	case KGD_ENGINE_SDMA1:
613*4882a593Smuzhiyun 		ring = &adev->sdma.instance[0].ring;
614*4882a593Smuzhiyun 		break;
615*4882a593Smuzhiyun 	case KGD_ENGINE_SDMA2:
616*4882a593Smuzhiyun 		ring = &adev->sdma.instance[1].ring;
617*4882a593Smuzhiyun 		break;
618*4882a593Smuzhiyun 	default:
619*4882a593Smuzhiyun 		pr_err("Invalid engine in IB submission: %d\n", engine);
620*4882a593Smuzhiyun 		ret = -EINVAL;
621*4882a593Smuzhiyun 		goto err;
622*4882a593Smuzhiyun 	}
623*4882a593Smuzhiyun 
624*4882a593Smuzhiyun 	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
625*4882a593Smuzhiyun 	if (ret)
626*4882a593Smuzhiyun 		goto err;
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun 	ib = &job->ibs[0];
629*4882a593Smuzhiyun 	memset(ib, 0, sizeof(struct amdgpu_ib));
630*4882a593Smuzhiyun 
631*4882a593Smuzhiyun 	ib->gpu_addr = gpu_addr;
632*4882a593Smuzhiyun 	ib->ptr = ib_cmd;
633*4882a593Smuzhiyun 	ib->length_dw = ib_len;
634*4882a593Smuzhiyun 	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
635*4882a593Smuzhiyun 	job->vmid = vmid;
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 	if (ret) {
640*4882a593Smuzhiyun 		DRM_ERROR("amdgpu: failed to schedule IB.\n");
641*4882a593Smuzhiyun 		goto err_ib_sched;
642*4882a593Smuzhiyun 	}
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	ret = dma_fence_wait(f, false);
645*4882a593Smuzhiyun 
646*4882a593Smuzhiyun err_ib_sched:
647*4882a593Smuzhiyun 	dma_fence_put(f);
648*4882a593Smuzhiyun 	amdgpu_job_free(job);
649*4882a593Smuzhiyun err:
650*4882a593Smuzhiyun 	return ret;
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun 
amdgpu_amdkfd_set_compute_idle(struct kgd_dev * kgd,bool idle)653*4882a593Smuzhiyun void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
654*4882a593Smuzhiyun {
655*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun 	amdgpu_dpm_switch_power_profile(adev,
658*4882a593Smuzhiyun 					PP_SMC_POWER_PROFILE_COMPUTE,
659*4882a593Smuzhiyun 					!idle);
660*4882a593Smuzhiyun }
661*4882a593Smuzhiyun 
amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device * adev,u32 vmid)662*4882a593Smuzhiyun bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
663*4882a593Smuzhiyun {
664*4882a593Smuzhiyun 	if (adev->kfd.dev)
665*4882a593Smuzhiyun 		return vmid >= adev->vm_manager.first_kfd_vmid;
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun 	return false;
668*4882a593Smuzhiyun }
669*4882a593Smuzhiyun 
amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev * kgd,uint16_t vmid)670*4882a593Smuzhiyun int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
671*4882a593Smuzhiyun {
672*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 	if (adev->family == AMDGPU_FAMILY_AI) {
675*4882a593Smuzhiyun 		int i;
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun 		for (i = 0; i < adev->num_vmhubs; i++)
678*4882a593Smuzhiyun 			amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
679*4882a593Smuzhiyun 	} else {
680*4882a593Smuzhiyun 		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
681*4882a593Smuzhiyun 	}
682*4882a593Smuzhiyun 
683*4882a593Smuzhiyun 	return 0;
684*4882a593Smuzhiyun }
685*4882a593Smuzhiyun 
amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev * kgd,uint16_t pasid)686*4882a593Smuzhiyun int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
687*4882a593Smuzhiyun {
688*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
689*4882a593Smuzhiyun 	const uint32_t flush_type = 0;
690*4882a593Smuzhiyun 	bool all_hub = false;
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 	if (adev->family == AMDGPU_FAMILY_AI ||
693*4882a593Smuzhiyun 	    adev->family == AMDGPU_FAMILY_RV)
694*4882a593Smuzhiyun 		all_hub = true;
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
697*4882a593Smuzhiyun }
698*4882a593Smuzhiyun 
amdgpu_amdkfd_have_atomics_support(struct kgd_dev * kgd)699*4882a593Smuzhiyun bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
700*4882a593Smuzhiyun {
701*4882a593Smuzhiyun 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun 	return adev->have_atomics_support;
704*4882a593Smuzhiyun }
705