xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright 2014 Advanced Micro Devices, Inc.
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun  * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun  * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun  * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun  * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * The above copyright notice and this permission notice shall be included in
12*4882a593Smuzhiyun  * all copies or substantial portions of the Software.
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*4882a593Smuzhiyun  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*4882a593Smuzhiyun  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17*4882a593Smuzhiyun  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18*4882a593Smuzhiyun  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19*4882a593Smuzhiyun  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20*4882a593Smuzhiyun  * OTHER DEALINGS IN THE SOFTWARE.
21*4882a593Smuzhiyun  */
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun #include <linux/device.h>
24*4882a593Smuzhiyun #include <linux/export.h>
25*4882a593Smuzhiyun #include <linux/err.h>
26*4882a593Smuzhiyun #include <linux/fs.h>
27*4882a593Smuzhiyun #include <linux/file.h>
28*4882a593Smuzhiyun #include <linux/sched.h>
29*4882a593Smuzhiyun #include <linux/slab.h>
30*4882a593Smuzhiyun #include <linux/uaccess.h>
31*4882a593Smuzhiyun #include <linux/compat.h>
32*4882a593Smuzhiyun #include <uapi/linux/kfd_ioctl.h>
33*4882a593Smuzhiyun #include <linux/time.h>
34*4882a593Smuzhiyun #include <linux/mm.h>
35*4882a593Smuzhiyun #include <linux/mman.h>
36*4882a593Smuzhiyun #include <linux/dma-buf.h>
37*4882a593Smuzhiyun #include <asm/processor.h>
38*4882a593Smuzhiyun #include "kfd_priv.h"
39*4882a593Smuzhiyun #include "kfd_device_queue_manager.h"
40*4882a593Smuzhiyun #include "kfd_dbgmgr.h"
41*4882a593Smuzhiyun #include "amdgpu_amdkfd.h"
42*4882a593Smuzhiyun #include "kfd_smi_events.h"
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun static long kfd_ioctl(struct file *, unsigned int, unsigned long);
45*4882a593Smuzhiyun static int kfd_open(struct inode *, struct file *);
46*4882a593Smuzhiyun static int kfd_release(struct inode *, struct file *);
47*4882a593Smuzhiyun static int kfd_mmap(struct file *, struct vm_area_struct *);
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun static const char kfd_dev_name[] = "kfd";
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun static const struct file_operations kfd_fops = {
52*4882a593Smuzhiyun 	.owner = THIS_MODULE,
53*4882a593Smuzhiyun 	.unlocked_ioctl = kfd_ioctl,
54*4882a593Smuzhiyun 	.compat_ioctl = compat_ptr_ioctl,
55*4882a593Smuzhiyun 	.open = kfd_open,
56*4882a593Smuzhiyun 	.release = kfd_release,
57*4882a593Smuzhiyun 	.mmap = kfd_mmap,
58*4882a593Smuzhiyun };
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun static int kfd_char_dev_major = -1;
61*4882a593Smuzhiyun static struct class *kfd_class;
62*4882a593Smuzhiyun struct device *kfd_device;
63*4882a593Smuzhiyun 
kfd_chardev_init(void)64*4882a593Smuzhiyun int kfd_chardev_init(void)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun 	int err = 0;
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
69*4882a593Smuzhiyun 	err = kfd_char_dev_major;
70*4882a593Smuzhiyun 	if (err < 0)
71*4882a593Smuzhiyun 		goto err_register_chrdev;
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun 	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
74*4882a593Smuzhiyun 	err = PTR_ERR(kfd_class);
75*4882a593Smuzhiyun 	if (IS_ERR(kfd_class))
76*4882a593Smuzhiyun 		goto err_class_create;
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun 	kfd_device = device_create(kfd_class, NULL,
79*4882a593Smuzhiyun 					MKDEV(kfd_char_dev_major, 0),
80*4882a593Smuzhiyun 					NULL, kfd_dev_name);
81*4882a593Smuzhiyun 	err = PTR_ERR(kfd_device);
82*4882a593Smuzhiyun 	if (IS_ERR(kfd_device))
83*4882a593Smuzhiyun 		goto err_device_create;
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	return 0;
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun err_device_create:
88*4882a593Smuzhiyun 	class_destroy(kfd_class);
89*4882a593Smuzhiyun err_class_create:
90*4882a593Smuzhiyun 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
91*4882a593Smuzhiyun err_register_chrdev:
92*4882a593Smuzhiyun 	return err;
93*4882a593Smuzhiyun }
94*4882a593Smuzhiyun 
kfd_chardev_exit(void)95*4882a593Smuzhiyun void kfd_chardev_exit(void)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun 	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
98*4882a593Smuzhiyun 	class_destroy(kfd_class);
99*4882a593Smuzhiyun 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
100*4882a593Smuzhiyun 	kfd_device = NULL;
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun 
kfd_chardev(void)103*4882a593Smuzhiyun struct device *kfd_chardev(void)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun 	return kfd_device;
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun 
kfd_open(struct inode * inode,struct file * filep)109*4882a593Smuzhiyun static int kfd_open(struct inode *inode, struct file *filep)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun 	struct kfd_process *process;
112*4882a593Smuzhiyun 	bool is_32bit_user_mode;
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun 	if (iminor(inode) != 0)
115*4882a593Smuzhiyun 		return -ENODEV;
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 	is_32bit_user_mode = in_compat_syscall();
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	if (is_32bit_user_mode) {
120*4882a593Smuzhiyun 		dev_warn(kfd_device,
121*4882a593Smuzhiyun 			"Process %d (32-bit) failed to open /dev/kfd\n"
122*4882a593Smuzhiyun 			"32-bit processes are not supported by amdkfd\n",
123*4882a593Smuzhiyun 			current->pid);
124*4882a593Smuzhiyun 		return -EPERM;
125*4882a593Smuzhiyun 	}
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	process = kfd_create_process(filep);
128*4882a593Smuzhiyun 	if (IS_ERR(process))
129*4882a593Smuzhiyun 		return PTR_ERR(process);
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	if (kfd_is_locked()) {
132*4882a593Smuzhiyun 		dev_dbg(kfd_device, "kfd is locked!\n"
133*4882a593Smuzhiyun 				"process %d unreferenced", process->pasid);
134*4882a593Smuzhiyun 		kfd_unref_process(process);
135*4882a593Smuzhiyun 		return -EAGAIN;
136*4882a593Smuzhiyun 	}
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	/* filep now owns the reference returned by kfd_create_process */
139*4882a593Smuzhiyun 	filep->private_data = process;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
142*4882a593Smuzhiyun 		process->pasid, process->is_32bit_user_mode);
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 	return 0;
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun 
kfd_release(struct inode * inode,struct file * filep)147*4882a593Smuzhiyun static int kfd_release(struct inode *inode, struct file *filep)
148*4882a593Smuzhiyun {
149*4882a593Smuzhiyun 	struct kfd_process *process = filep->private_data;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	if (process)
152*4882a593Smuzhiyun 		kfd_unref_process(process);
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	return 0;
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun 
kfd_ioctl_get_version(struct file * filep,struct kfd_process * p,void * data)157*4882a593Smuzhiyun static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
158*4882a593Smuzhiyun 					void *data)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun 	struct kfd_ioctl_get_version_args *args = data;
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	args->major_version = KFD_IOCTL_MAJOR_VERSION;
163*4882a593Smuzhiyun 	args->minor_version = KFD_IOCTL_MINOR_VERSION;
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	return 0;
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun 
set_queue_properties_from_user(struct queue_properties * q_properties,struct kfd_ioctl_create_queue_args * args)168*4882a593Smuzhiyun static int set_queue_properties_from_user(struct queue_properties *q_properties,
169*4882a593Smuzhiyun 				struct kfd_ioctl_create_queue_args *args)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
172*4882a593Smuzhiyun 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
173*4882a593Smuzhiyun 		return -EINVAL;
174*4882a593Smuzhiyun 	}
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
177*4882a593Smuzhiyun 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
178*4882a593Smuzhiyun 		return -EINVAL;
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	if ((args->ring_base_address) &&
182*4882a593Smuzhiyun 		(!access_ok((const void __user *) args->ring_base_address,
183*4882a593Smuzhiyun 			sizeof(uint64_t)))) {
184*4882a593Smuzhiyun 		pr_err("Can't access ring base address\n");
185*4882a593Smuzhiyun 		return -EFAULT;
186*4882a593Smuzhiyun 	}
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
189*4882a593Smuzhiyun 		pr_err("Ring size must be a power of 2 or 0\n");
190*4882a593Smuzhiyun 		return -EINVAL;
191*4882a593Smuzhiyun 	}
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	if (!access_ok((const void __user *) args->read_pointer_address,
194*4882a593Smuzhiyun 			sizeof(uint32_t))) {
195*4882a593Smuzhiyun 		pr_err("Can't access read pointer\n");
196*4882a593Smuzhiyun 		return -EFAULT;
197*4882a593Smuzhiyun 	}
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	if (!access_ok((const void __user *) args->write_pointer_address,
200*4882a593Smuzhiyun 			sizeof(uint32_t))) {
201*4882a593Smuzhiyun 		pr_err("Can't access write pointer\n");
202*4882a593Smuzhiyun 		return -EFAULT;
203*4882a593Smuzhiyun 	}
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	if (args->eop_buffer_address &&
206*4882a593Smuzhiyun 		!access_ok((const void __user *) args->eop_buffer_address,
207*4882a593Smuzhiyun 			sizeof(uint32_t))) {
208*4882a593Smuzhiyun 		pr_debug("Can't access eop buffer");
209*4882a593Smuzhiyun 		return -EFAULT;
210*4882a593Smuzhiyun 	}
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	if (args->ctx_save_restore_address &&
213*4882a593Smuzhiyun 		!access_ok((const void __user *) args->ctx_save_restore_address,
214*4882a593Smuzhiyun 			sizeof(uint32_t))) {
215*4882a593Smuzhiyun 		pr_debug("Can't access ctx save restore buffer");
216*4882a593Smuzhiyun 		return -EFAULT;
217*4882a593Smuzhiyun 	}
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	q_properties->is_interop = false;
220*4882a593Smuzhiyun 	q_properties->is_gws = false;
221*4882a593Smuzhiyun 	q_properties->queue_percent = args->queue_percentage;
222*4882a593Smuzhiyun 	q_properties->priority = args->queue_priority;
223*4882a593Smuzhiyun 	q_properties->queue_address = args->ring_base_address;
224*4882a593Smuzhiyun 	q_properties->queue_size = args->ring_size;
225*4882a593Smuzhiyun 	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
226*4882a593Smuzhiyun 	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
227*4882a593Smuzhiyun 	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
228*4882a593Smuzhiyun 	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
229*4882a593Smuzhiyun 	q_properties->ctx_save_restore_area_address =
230*4882a593Smuzhiyun 			args->ctx_save_restore_address;
231*4882a593Smuzhiyun 	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
232*4882a593Smuzhiyun 	q_properties->ctl_stack_size = args->ctl_stack_size;
233*4882a593Smuzhiyun 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
234*4882a593Smuzhiyun 		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
235*4882a593Smuzhiyun 		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
236*4882a593Smuzhiyun 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
237*4882a593Smuzhiyun 		q_properties->type = KFD_QUEUE_TYPE_SDMA;
238*4882a593Smuzhiyun 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
239*4882a593Smuzhiyun 		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
240*4882a593Smuzhiyun 	else
241*4882a593Smuzhiyun 		return -ENOTSUPP;
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
244*4882a593Smuzhiyun 		q_properties->format = KFD_QUEUE_FORMAT_AQL;
245*4882a593Smuzhiyun 	else
246*4882a593Smuzhiyun 		q_properties->format = KFD_QUEUE_FORMAT_PM4;
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun 	pr_debug("Queue Percentage: %d, %d\n",
249*4882a593Smuzhiyun 			q_properties->queue_percent, args->queue_percentage);
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	pr_debug("Queue Priority: %d, %d\n",
252*4882a593Smuzhiyun 			q_properties->priority, args->queue_priority);
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
255*4882a593Smuzhiyun 			q_properties->queue_address, args->ring_base_address);
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	pr_debug("Queue Size: 0x%llX, %u\n",
258*4882a593Smuzhiyun 			q_properties->queue_size, args->ring_size);
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	pr_debug("Queue r/w Pointers: %px, %px\n",
261*4882a593Smuzhiyun 			q_properties->read_ptr,
262*4882a593Smuzhiyun 			q_properties->write_ptr);
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	pr_debug("Queue Format: %d\n", q_properties->format);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	pr_debug("Queue CTX save area: 0x%llX\n",
269*4882a593Smuzhiyun 			q_properties->ctx_save_restore_area_address);
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	return 0;
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun 
kfd_ioctl_create_queue(struct file * filep,struct kfd_process * p,void * data)274*4882a593Smuzhiyun static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
275*4882a593Smuzhiyun 					void *data)
276*4882a593Smuzhiyun {
277*4882a593Smuzhiyun 	struct kfd_ioctl_create_queue_args *args = data;
278*4882a593Smuzhiyun 	struct kfd_dev *dev;
279*4882a593Smuzhiyun 	int err = 0;
280*4882a593Smuzhiyun 	unsigned int queue_id;
281*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
282*4882a593Smuzhiyun 	struct queue_properties q_properties;
283*4882a593Smuzhiyun 	uint32_t doorbell_offset_in_process = 0;
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 	memset(&q_properties, 0, sizeof(struct queue_properties));
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun 	pr_debug("Creating queue ioctl\n");
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 	err = set_queue_properties_from_user(&q_properties, args);
290*4882a593Smuzhiyun 	if (err)
291*4882a593Smuzhiyun 		return err;
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
294*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
295*4882a593Smuzhiyun 	if (!dev) {
296*4882a593Smuzhiyun 		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
297*4882a593Smuzhiyun 		return -EINVAL;
298*4882a593Smuzhiyun 	}
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	pdd = kfd_bind_process_to_device(dev, p);
303*4882a593Smuzhiyun 	if (IS_ERR(pdd)) {
304*4882a593Smuzhiyun 		err = -ESRCH;
305*4882a593Smuzhiyun 		goto err_bind_process;
306*4882a593Smuzhiyun 	}
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
309*4882a593Smuzhiyun 			p->pasid,
310*4882a593Smuzhiyun 			dev->id);
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
313*4882a593Smuzhiyun 			&doorbell_offset_in_process);
314*4882a593Smuzhiyun 	if (err != 0)
315*4882a593Smuzhiyun 		goto err_create_queue;
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	args->queue_id = queue_id;
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	/* Return gpu_id as doorbell offset for mmap usage */
321*4882a593Smuzhiyun 	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
322*4882a593Smuzhiyun 	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
323*4882a593Smuzhiyun 	if (KFD_IS_SOC15(dev->device_info->asic_family))
324*4882a593Smuzhiyun 		/* On SOC15 ASICs, include the doorbell offset within the
325*4882a593Smuzhiyun 		 * process doorbell frame, which is 2 pages.
326*4882a593Smuzhiyun 		 */
327*4882a593Smuzhiyun 		args->doorbell_offset |= doorbell_offset_in_process;
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	pr_debug("Queue id %d was created successfully\n", args->queue_id);
332*4882a593Smuzhiyun 
333*4882a593Smuzhiyun 	pr_debug("Ring buffer address == 0x%016llX\n",
334*4882a593Smuzhiyun 			args->ring_base_address);
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	pr_debug("Read ptr address    == 0x%016llX\n",
337*4882a593Smuzhiyun 			args->read_pointer_address);
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 	pr_debug("Write ptr address   == 0x%016llX\n",
340*4882a593Smuzhiyun 			args->write_pointer_address);
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	return 0;
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun err_create_queue:
345*4882a593Smuzhiyun err_bind_process:
346*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
347*4882a593Smuzhiyun 	return err;
348*4882a593Smuzhiyun }
349*4882a593Smuzhiyun 
kfd_ioctl_destroy_queue(struct file * filp,struct kfd_process * p,void * data)350*4882a593Smuzhiyun static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
351*4882a593Smuzhiyun 					void *data)
352*4882a593Smuzhiyun {
353*4882a593Smuzhiyun 	int retval;
354*4882a593Smuzhiyun 	struct kfd_ioctl_destroy_queue_args *args = data;
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 	pr_debug("Destroying queue id %d for pasid 0x%x\n",
357*4882a593Smuzhiyun 				args->queue_id,
358*4882a593Smuzhiyun 				p->pasid);
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun 	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
365*4882a593Smuzhiyun 	return retval;
366*4882a593Smuzhiyun }
367*4882a593Smuzhiyun 
kfd_ioctl_update_queue(struct file * filp,struct kfd_process * p,void * data)368*4882a593Smuzhiyun static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
369*4882a593Smuzhiyun 					void *data)
370*4882a593Smuzhiyun {
371*4882a593Smuzhiyun 	int retval;
372*4882a593Smuzhiyun 	struct kfd_ioctl_update_queue_args *args = data;
373*4882a593Smuzhiyun 	struct queue_properties properties;
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
376*4882a593Smuzhiyun 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
377*4882a593Smuzhiyun 		return -EINVAL;
378*4882a593Smuzhiyun 	}
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
381*4882a593Smuzhiyun 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
382*4882a593Smuzhiyun 		return -EINVAL;
383*4882a593Smuzhiyun 	}
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	if ((args->ring_base_address) &&
386*4882a593Smuzhiyun 		(!access_ok((const void __user *) args->ring_base_address,
387*4882a593Smuzhiyun 			sizeof(uint64_t)))) {
388*4882a593Smuzhiyun 		pr_err("Can't access ring base address\n");
389*4882a593Smuzhiyun 		return -EFAULT;
390*4882a593Smuzhiyun 	}
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
393*4882a593Smuzhiyun 		pr_err("Ring size must be a power of 2 or 0\n");
394*4882a593Smuzhiyun 		return -EINVAL;
395*4882a593Smuzhiyun 	}
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	properties.queue_address = args->ring_base_address;
398*4882a593Smuzhiyun 	properties.queue_size = args->ring_size;
399*4882a593Smuzhiyun 	properties.queue_percent = args->queue_percentage;
400*4882a593Smuzhiyun 	properties.priority = args->queue_priority;
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	pr_debug("Updating queue id %d for pasid 0x%x\n",
403*4882a593Smuzhiyun 			args->queue_id, p->pasid);
404*4882a593Smuzhiyun 
405*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 	return retval;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun 
kfd_ioctl_set_cu_mask(struct file * filp,struct kfd_process * p,void * data)414*4882a593Smuzhiyun static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
415*4882a593Smuzhiyun 					void *data)
416*4882a593Smuzhiyun {
417*4882a593Smuzhiyun 	int retval;
418*4882a593Smuzhiyun 	const int max_num_cus = 1024;
419*4882a593Smuzhiyun 	struct kfd_ioctl_set_cu_mask_args *args = data;
420*4882a593Smuzhiyun 	struct queue_properties properties;
421*4882a593Smuzhiyun 	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
422*4882a593Smuzhiyun 	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 	if ((args->num_cu_mask % 32) != 0) {
425*4882a593Smuzhiyun 		pr_debug("num_cu_mask 0x%x must be a multiple of 32",
426*4882a593Smuzhiyun 				args->num_cu_mask);
427*4882a593Smuzhiyun 		return -EINVAL;
428*4882a593Smuzhiyun 	}
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	properties.cu_mask_count = args->num_cu_mask;
431*4882a593Smuzhiyun 	if (properties.cu_mask_count == 0) {
432*4882a593Smuzhiyun 		pr_debug("CU mask cannot be 0");
433*4882a593Smuzhiyun 		return -EINVAL;
434*4882a593Smuzhiyun 	}
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	/* To prevent an unreasonably large CU mask size, set an arbitrary
437*4882a593Smuzhiyun 	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
438*4882a593Smuzhiyun 	 * past max_num_cus bits and just use the first max_num_cus bits.
439*4882a593Smuzhiyun 	 */
440*4882a593Smuzhiyun 	if (properties.cu_mask_count > max_num_cus) {
441*4882a593Smuzhiyun 		pr_debug("CU mask cannot be greater than 1024 bits");
442*4882a593Smuzhiyun 		properties.cu_mask_count = max_num_cus;
443*4882a593Smuzhiyun 		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
444*4882a593Smuzhiyun 	}
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
447*4882a593Smuzhiyun 	if (!properties.cu_mask)
448*4882a593Smuzhiyun 		return -ENOMEM;
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
451*4882a593Smuzhiyun 	if (retval) {
452*4882a593Smuzhiyun 		pr_debug("Could not copy CU mask from userspace");
453*4882a593Smuzhiyun 		kfree(properties.cu_mask);
454*4882a593Smuzhiyun 		return -EFAULT;
455*4882a593Smuzhiyun 	}
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun 	retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	if (retval)
464*4882a593Smuzhiyun 		kfree(properties.cu_mask);
465*4882a593Smuzhiyun 
466*4882a593Smuzhiyun 	return retval;
467*4882a593Smuzhiyun }
468*4882a593Smuzhiyun 
kfd_ioctl_get_queue_wave_state(struct file * filep,struct kfd_process * p,void * data)469*4882a593Smuzhiyun static int kfd_ioctl_get_queue_wave_state(struct file *filep,
470*4882a593Smuzhiyun 					  struct kfd_process *p, void *data)
471*4882a593Smuzhiyun {
472*4882a593Smuzhiyun 	struct kfd_ioctl_get_queue_wave_state_args *args = data;
473*4882a593Smuzhiyun 	int r;
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun 	r = pqm_get_wave_state(&p->pqm, args->queue_id,
478*4882a593Smuzhiyun 			       (void __user *)args->ctl_stack_address,
479*4882a593Smuzhiyun 			       &args->ctl_stack_used_size,
480*4882a593Smuzhiyun 			       &args->save_area_used_size);
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	return r;
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun 
kfd_ioctl_set_memory_policy(struct file * filep,struct kfd_process * p,void * data)487*4882a593Smuzhiyun static int kfd_ioctl_set_memory_policy(struct file *filep,
488*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
489*4882a593Smuzhiyun {
490*4882a593Smuzhiyun 	struct kfd_ioctl_set_memory_policy_args *args = data;
491*4882a593Smuzhiyun 	struct kfd_dev *dev;
492*4882a593Smuzhiyun 	int err = 0;
493*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
494*4882a593Smuzhiyun 	enum cache_policy default_policy, alternate_policy;
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun 	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
497*4882a593Smuzhiyun 	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
498*4882a593Smuzhiyun 		return -EINVAL;
499*4882a593Smuzhiyun 	}
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
502*4882a593Smuzhiyun 	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
503*4882a593Smuzhiyun 		return -EINVAL;
504*4882a593Smuzhiyun 	}
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
507*4882a593Smuzhiyun 	if (!dev)
508*4882a593Smuzhiyun 		return -EINVAL;
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun 	pdd = kfd_bind_process_to_device(dev, p);
513*4882a593Smuzhiyun 	if (IS_ERR(pdd)) {
514*4882a593Smuzhiyun 		err = -ESRCH;
515*4882a593Smuzhiyun 		goto out;
516*4882a593Smuzhiyun 	}
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
519*4882a593Smuzhiyun 			 ? cache_policy_coherent : cache_policy_noncoherent;
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 	alternate_policy =
522*4882a593Smuzhiyun 		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
523*4882a593Smuzhiyun 		   ? cache_policy_coherent : cache_policy_noncoherent;
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 	if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
526*4882a593Smuzhiyun 				&pdd->qpd,
527*4882a593Smuzhiyun 				default_policy,
528*4882a593Smuzhiyun 				alternate_policy,
529*4882a593Smuzhiyun 				(void __user *)args->alternate_aperture_base,
530*4882a593Smuzhiyun 				args->alternate_aperture_size))
531*4882a593Smuzhiyun 		err = -EINVAL;
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun out:
534*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	return err;
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun 
kfd_ioctl_set_trap_handler(struct file * filep,struct kfd_process * p,void * data)539*4882a593Smuzhiyun static int kfd_ioctl_set_trap_handler(struct file *filep,
540*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun 	struct kfd_ioctl_set_trap_handler_args *args = data;
543*4882a593Smuzhiyun 	struct kfd_dev *dev;
544*4882a593Smuzhiyun 	int err = 0;
545*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
548*4882a593Smuzhiyun 	if (!dev)
549*4882a593Smuzhiyun 		return -EINVAL;
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 	pdd = kfd_bind_process_to_device(dev, p);
554*4882a593Smuzhiyun 	if (IS_ERR(pdd)) {
555*4882a593Smuzhiyun 		err = -ESRCH;
556*4882a593Smuzhiyun 		goto out;
557*4882a593Smuzhiyun 	}
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	if (dev->dqm->ops.set_trap_handler(dev->dqm,
560*4882a593Smuzhiyun 					&pdd->qpd,
561*4882a593Smuzhiyun 					args->tba_addr,
562*4882a593Smuzhiyun 					args->tma_addr))
563*4882a593Smuzhiyun 		err = -EINVAL;
564*4882a593Smuzhiyun 
565*4882a593Smuzhiyun out:
566*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
567*4882a593Smuzhiyun 
568*4882a593Smuzhiyun 	return err;
569*4882a593Smuzhiyun }
570*4882a593Smuzhiyun 
kfd_ioctl_dbg_register(struct file * filep,struct kfd_process * p,void * data)571*4882a593Smuzhiyun static int kfd_ioctl_dbg_register(struct file *filep,
572*4882a593Smuzhiyun 				struct kfd_process *p, void *data)
573*4882a593Smuzhiyun {
574*4882a593Smuzhiyun 	struct kfd_ioctl_dbg_register_args *args = data;
575*4882a593Smuzhiyun 	struct kfd_dev *dev;
576*4882a593Smuzhiyun 	struct kfd_dbgmgr *dbgmgr_ptr;
577*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
578*4882a593Smuzhiyun 	bool create_ok;
579*4882a593Smuzhiyun 	long status = 0;
580*4882a593Smuzhiyun 
581*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
582*4882a593Smuzhiyun 	if (!dev)
583*4882a593Smuzhiyun 		return -EINVAL;
584*4882a593Smuzhiyun 
585*4882a593Smuzhiyun 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
586*4882a593Smuzhiyun 		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
587*4882a593Smuzhiyun 		return -EINVAL;
588*4882a593Smuzhiyun 	}
589*4882a593Smuzhiyun 
590*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
591*4882a593Smuzhiyun 	mutex_lock(kfd_get_dbgmgr_mutex());
592*4882a593Smuzhiyun 
593*4882a593Smuzhiyun 	/*
594*4882a593Smuzhiyun 	 * make sure that we have pdd, if this the first queue created for
595*4882a593Smuzhiyun 	 * this process
596*4882a593Smuzhiyun 	 */
597*4882a593Smuzhiyun 	pdd = kfd_bind_process_to_device(dev, p);
598*4882a593Smuzhiyun 	if (IS_ERR(pdd)) {
599*4882a593Smuzhiyun 		status = PTR_ERR(pdd);
600*4882a593Smuzhiyun 		goto out;
601*4882a593Smuzhiyun 	}
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	if (!dev->dbgmgr) {
604*4882a593Smuzhiyun 		/* In case of a legal call, we have no dbgmgr yet */
605*4882a593Smuzhiyun 		create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
606*4882a593Smuzhiyun 		if (create_ok) {
607*4882a593Smuzhiyun 			status = kfd_dbgmgr_register(dbgmgr_ptr, p);
608*4882a593Smuzhiyun 			if (status != 0)
609*4882a593Smuzhiyun 				kfd_dbgmgr_destroy(dbgmgr_ptr);
610*4882a593Smuzhiyun 			else
611*4882a593Smuzhiyun 				dev->dbgmgr = dbgmgr_ptr;
612*4882a593Smuzhiyun 		}
613*4882a593Smuzhiyun 	} else {
614*4882a593Smuzhiyun 		pr_debug("debugger already registered\n");
615*4882a593Smuzhiyun 		status = -EINVAL;
616*4882a593Smuzhiyun 	}
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun out:
619*4882a593Smuzhiyun 	mutex_unlock(kfd_get_dbgmgr_mutex());
620*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
621*4882a593Smuzhiyun 
622*4882a593Smuzhiyun 	return status;
623*4882a593Smuzhiyun }
624*4882a593Smuzhiyun 
kfd_ioctl_dbg_unregister(struct file * filep,struct kfd_process * p,void * data)625*4882a593Smuzhiyun static int kfd_ioctl_dbg_unregister(struct file *filep,
626*4882a593Smuzhiyun 				struct kfd_process *p, void *data)
627*4882a593Smuzhiyun {
628*4882a593Smuzhiyun 	struct kfd_ioctl_dbg_unregister_args *args = data;
629*4882a593Smuzhiyun 	struct kfd_dev *dev;
630*4882a593Smuzhiyun 	long status;
631*4882a593Smuzhiyun 
632*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
633*4882a593Smuzhiyun 	if (!dev || !dev->dbgmgr)
634*4882a593Smuzhiyun 		return -EINVAL;
635*4882a593Smuzhiyun 
636*4882a593Smuzhiyun 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
637*4882a593Smuzhiyun 		pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
638*4882a593Smuzhiyun 		return -EINVAL;
639*4882a593Smuzhiyun 	}
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun 	mutex_lock(kfd_get_dbgmgr_mutex());
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun 	status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
644*4882a593Smuzhiyun 	if (!status) {
645*4882a593Smuzhiyun 		kfd_dbgmgr_destroy(dev->dbgmgr);
646*4882a593Smuzhiyun 		dev->dbgmgr = NULL;
647*4882a593Smuzhiyun 	}
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun 	mutex_unlock(kfd_get_dbgmgr_mutex());
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 	return status;
652*4882a593Smuzhiyun }
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun /*
655*4882a593Smuzhiyun  * Parse and generate variable size data structure for address watch.
656*4882a593Smuzhiyun  * Total size of the buffer and # watch points is limited in order
657*4882a593Smuzhiyun  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
658*4882a593Smuzhiyun  * which is enforced by dbgdev module)
659*4882a593Smuzhiyun  * please also note that the watch address itself are not "copied from user",
660*4882a593Smuzhiyun  * since it be set into the HW in user mode values.
661*4882a593Smuzhiyun  *
662*4882a593Smuzhiyun  */
kfd_ioctl_dbg_address_watch(struct file * filep,struct kfd_process * p,void * data)663*4882a593Smuzhiyun static int kfd_ioctl_dbg_address_watch(struct file *filep,
664*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
665*4882a593Smuzhiyun {
666*4882a593Smuzhiyun 	struct kfd_ioctl_dbg_address_watch_args *args = data;
667*4882a593Smuzhiyun 	struct kfd_dev *dev;
668*4882a593Smuzhiyun 	struct dbg_address_watch_info aw_info;
669*4882a593Smuzhiyun 	unsigned char *args_buff;
670*4882a593Smuzhiyun 	long status;
671*4882a593Smuzhiyun 	void __user *cmd_from_user;
672*4882a593Smuzhiyun 	uint64_t watch_mask_value = 0;
673*4882a593Smuzhiyun 	unsigned int args_idx = 0;
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
678*4882a593Smuzhiyun 	if (!dev)
679*4882a593Smuzhiyun 		return -EINVAL;
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
682*4882a593Smuzhiyun 		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
683*4882a593Smuzhiyun 		return -EINVAL;
684*4882a593Smuzhiyun 	}
685*4882a593Smuzhiyun 
686*4882a593Smuzhiyun 	cmd_from_user = (void __user *) args->content_ptr;
687*4882a593Smuzhiyun 
688*4882a593Smuzhiyun 	/* Validate arguments */
689*4882a593Smuzhiyun 
690*4882a593Smuzhiyun 	if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
691*4882a593Smuzhiyun 		(args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
692*4882a593Smuzhiyun 		(cmd_from_user == NULL))
693*4882a593Smuzhiyun 		return -EINVAL;
694*4882a593Smuzhiyun 
695*4882a593Smuzhiyun 	/* this is the actual buffer to work with */
696*4882a593Smuzhiyun 	args_buff = memdup_user(cmd_from_user,
697*4882a593Smuzhiyun 				args->buf_size_in_bytes - sizeof(*args));
698*4882a593Smuzhiyun 	if (IS_ERR(args_buff))
699*4882a593Smuzhiyun 		return PTR_ERR(args_buff);
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun 	aw_info.process = p;
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun 	aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
704*4882a593Smuzhiyun 	args_idx += sizeof(aw_info.num_watch_points);
705*4882a593Smuzhiyun 
706*4882a593Smuzhiyun 	aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
707*4882a593Smuzhiyun 	args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
708*4882a593Smuzhiyun 
709*4882a593Smuzhiyun 	/*
710*4882a593Smuzhiyun 	 * set watch address base pointer to point on the array base
711*4882a593Smuzhiyun 	 * within args_buff
712*4882a593Smuzhiyun 	 */
713*4882a593Smuzhiyun 	aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
714*4882a593Smuzhiyun 
715*4882a593Smuzhiyun 	/* skip over the addresses buffer */
716*4882a593Smuzhiyun 	args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
717*4882a593Smuzhiyun 
718*4882a593Smuzhiyun 	if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
719*4882a593Smuzhiyun 		status = -EINVAL;
720*4882a593Smuzhiyun 		goto out;
721*4882a593Smuzhiyun 	}
722*4882a593Smuzhiyun 
723*4882a593Smuzhiyun 	watch_mask_value = (uint64_t) args_buff[args_idx];
724*4882a593Smuzhiyun 
725*4882a593Smuzhiyun 	if (watch_mask_value > 0) {
726*4882a593Smuzhiyun 		/*
727*4882a593Smuzhiyun 		 * There is an array of masks.
728*4882a593Smuzhiyun 		 * set watch mask base pointer to point on the array base
729*4882a593Smuzhiyun 		 * within args_buff
730*4882a593Smuzhiyun 		 */
731*4882a593Smuzhiyun 		aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
732*4882a593Smuzhiyun 
733*4882a593Smuzhiyun 		/* skip over the masks buffer */
734*4882a593Smuzhiyun 		args_idx += sizeof(aw_info.watch_mask) *
735*4882a593Smuzhiyun 				aw_info.num_watch_points;
736*4882a593Smuzhiyun 	} else {
737*4882a593Smuzhiyun 		/* just the NULL mask, set to NULL and skip over it */
738*4882a593Smuzhiyun 		aw_info.watch_mask = NULL;
739*4882a593Smuzhiyun 		args_idx += sizeof(aw_info.watch_mask);
740*4882a593Smuzhiyun 	}
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun 	if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
743*4882a593Smuzhiyun 		status = -EINVAL;
744*4882a593Smuzhiyun 		goto out;
745*4882a593Smuzhiyun 	}
746*4882a593Smuzhiyun 
747*4882a593Smuzhiyun 	/* Currently HSA Event is not supported for DBG */
748*4882a593Smuzhiyun 	aw_info.watch_event = NULL;
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun 	mutex_lock(kfd_get_dbgmgr_mutex());
751*4882a593Smuzhiyun 
752*4882a593Smuzhiyun 	status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	mutex_unlock(kfd_get_dbgmgr_mutex());
755*4882a593Smuzhiyun 
756*4882a593Smuzhiyun out:
757*4882a593Smuzhiyun 	kfree(args_buff);
758*4882a593Smuzhiyun 
759*4882a593Smuzhiyun 	return status;
760*4882a593Smuzhiyun }
761*4882a593Smuzhiyun 
762*4882a593Smuzhiyun /* Parse and generate fixed size data structure for wave control */
kfd_ioctl_dbg_wave_control(struct file * filep,struct kfd_process * p,void * data)763*4882a593Smuzhiyun static int kfd_ioctl_dbg_wave_control(struct file *filep,
764*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
765*4882a593Smuzhiyun {
766*4882a593Smuzhiyun 	struct kfd_ioctl_dbg_wave_control_args *args = data;
767*4882a593Smuzhiyun 	struct kfd_dev *dev;
768*4882a593Smuzhiyun 	struct dbg_wave_control_info wac_info;
769*4882a593Smuzhiyun 	unsigned char *args_buff;
770*4882a593Smuzhiyun 	uint32_t computed_buff_size;
771*4882a593Smuzhiyun 	long status;
772*4882a593Smuzhiyun 	void __user *cmd_from_user;
773*4882a593Smuzhiyun 	unsigned int args_idx = 0;
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 	memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
776*4882a593Smuzhiyun 
777*4882a593Smuzhiyun 	/* we use compact form, independent of the packing attribute value */
778*4882a593Smuzhiyun 	computed_buff_size = sizeof(*args) +
779*4882a593Smuzhiyun 				sizeof(wac_info.mode) +
780*4882a593Smuzhiyun 				sizeof(wac_info.operand) +
781*4882a593Smuzhiyun 				sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
782*4882a593Smuzhiyun 				sizeof(wac_info.dbgWave_msg.MemoryVA) +
783*4882a593Smuzhiyun 				sizeof(wac_info.trapId);
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
786*4882a593Smuzhiyun 	if (!dev)
787*4882a593Smuzhiyun 		return -EINVAL;
788*4882a593Smuzhiyun 
789*4882a593Smuzhiyun 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
790*4882a593Smuzhiyun 		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
791*4882a593Smuzhiyun 		return -EINVAL;
792*4882a593Smuzhiyun 	}
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 	/* input size must match the computed "compact" size */
795*4882a593Smuzhiyun 	if (args->buf_size_in_bytes != computed_buff_size) {
796*4882a593Smuzhiyun 		pr_debug("size mismatch, computed : actual %u : %u\n",
797*4882a593Smuzhiyun 				args->buf_size_in_bytes, computed_buff_size);
798*4882a593Smuzhiyun 		return -EINVAL;
799*4882a593Smuzhiyun 	}
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 	cmd_from_user = (void __user *) args->content_ptr;
802*4882a593Smuzhiyun 
803*4882a593Smuzhiyun 	if (cmd_from_user == NULL)
804*4882a593Smuzhiyun 		return -EINVAL;
805*4882a593Smuzhiyun 
806*4882a593Smuzhiyun 	/* copy the entire buffer from user */
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 	args_buff = memdup_user(cmd_from_user,
809*4882a593Smuzhiyun 				args->buf_size_in_bytes - sizeof(*args));
810*4882a593Smuzhiyun 	if (IS_ERR(args_buff))
811*4882a593Smuzhiyun 		return PTR_ERR(args_buff);
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun 	/* move ptr to the start of the "pay-load" area */
814*4882a593Smuzhiyun 	wac_info.process = p;
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 	wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
817*4882a593Smuzhiyun 	args_idx += sizeof(wac_info.operand);
818*4882a593Smuzhiyun 
819*4882a593Smuzhiyun 	wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
820*4882a593Smuzhiyun 	args_idx += sizeof(wac_info.mode);
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 	wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
823*4882a593Smuzhiyun 	args_idx += sizeof(wac_info.trapId);
824*4882a593Smuzhiyun 
825*4882a593Smuzhiyun 	wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
826*4882a593Smuzhiyun 					*((uint32_t *)(&args_buff[args_idx]));
827*4882a593Smuzhiyun 	wac_info.dbgWave_msg.MemoryVA = NULL;
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	mutex_lock(kfd_get_dbgmgr_mutex());
830*4882a593Smuzhiyun 
831*4882a593Smuzhiyun 	pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
832*4882a593Smuzhiyun 			wac_info.process, wac_info.operand,
833*4882a593Smuzhiyun 			wac_info.mode, wac_info.trapId,
834*4882a593Smuzhiyun 			wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
837*4882a593Smuzhiyun 
838*4882a593Smuzhiyun 	pr_debug("Returned status of dbg manager is %ld\n", status);
839*4882a593Smuzhiyun 
840*4882a593Smuzhiyun 	mutex_unlock(kfd_get_dbgmgr_mutex());
841*4882a593Smuzhiyun 
842*4882a593Smuzhiyun 	kfree(args_buff);
843*4882a593Smuzhiyun 
844*4882a593Smuzhiyun 	return status;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun 
kfd_ioctl_get_clock_counters(struct file * filep,struct kfd_process * p,void * data)847*4882a593Smuzhiyun static int kfd_ioctl_get_clock_counters(struct file *filep,
848*4882a593Smuzhiyun 				struct kfd_process *p, void *data)
849*4882a593Smuzhiyun {
850*4882a593Smuzhiyun 	struct kfd_ioctl_get_clock_counters_args *args = data;
851*4882a593Smuzhiyun 	struct kfd_dev *dev;
852*4882a593Smuzhiyun 
853*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
854*4882a593Smuzhiyun 	if (dev)
855*4882a593Smuzhiyun 		/* Reading GPU clock counter from KGD */
856*4882a593Smuzhiyun 		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
857*4882a593Smuzhiyun 	else
858*4882a593Smuzhiyun 		/* Node without GPU resource */
859*4882a593Smuzhiyun 		args->gpu_clock_counter = 0;
860*4882a593Smuzhiyun 
861*4882a593Smuzhiyun 	/* No access to rdtsc. Using raw monotonic time */
862*4882a593Smuzhiyun 	args->cpu_clock_counter = ktime_get_raw_ns();
863*4882a593Smuzhiyun 	args->system_clock_counter = ktime_get_boottime_ns();
864*4882a593Smuzhiyun 
865*4882a593Smuzhiyun 	/* Since the counter is in nano-seconds we use 1GHz frequency */
866*4882a593Smuzhiyun 	args->system_clock_freq = 1000000000;
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 	return 0;
869*4882a593Smuzhiyun }
870*4882a593Smuzhiyun 
871*4882a593Smuzhiyun 
kfd_ioctl_get_process_apertures(struct file * filp,struct kfd_process * p,void * data)872*4882a593Smuzhiyun static int kfd_ioctl_get_process_apertures(struct file *filp,
873*4882a593Smuzhiyun 				struct kfd_process *p, void *data)
874*4882a593Smuzhiyun {
875*4882a593Smuzhiyun 	struct kfd_ioctl_get_process_apertures_args *args = data;
876*4882a593Smuzhiyun 	struct kfd_process_device_apertures *pAperture;
877*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
878*4882a593Smuzhiyun 
879*4882a593Smuzhiyun 	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
880*4882a593Smuzhiyun 
881*4882a593Smuzhiyun 	args->num_of_nodes = 0;
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
884*4882a593Smuzhiyun 
885*4882a593Smuzhiyun 	/*if the process-device list isn't empty*/
886*4882a593Smuzhiyun 	if (kfd_has_process_device_data(p)) {
887*4882a593Smuzhiyun 		/* Run over all pdd of the process */
888*4882a593Smuzhiyun 		pdd = kfd_get_first_process_device_data(p);
889*4882a593Smuzhiyun 		do {
890*4882a593Smuzhiyun 			pAperture =
891*4882a593Smuzhiyun 				&args->process_apertures[args->num_of_nodes];
892*4882a593Smuzhiyun 			pAperture->gpu_id = pdd->dev->id;
893*4882a593Smuzhiyun 			pAperture->lds_base = pdd->lds_base;
894*4882a593Smuzhiyun 			pAperture->lds_limit = pdd->lds_limit;
895*4882a593Smuzhiyun 			pAperture->gpuvm_base = pdd->gpuvm_base;
896*4882a593Smuzhiyun 			pAperture->gpuvm_limit = pdd->gpuvm_limit;
897*4882a593Smuzhiyun 			pAperture->scratch_base = pdd->scratch_base;
898*4882a593Smuzhiyun 			pAperture->scratch_limit = pdd->scratch_limit;
899*4882a593Smuzhiyun 
900*4882a593Smuzhiyun 			dev_dbg(kfd_device,
901*4882a593Smuzhiyun 				"node id %u\n", args->num_of_nodes);
902*4882a593Smuzhiyun 			dev_dbg(kfd_device,
903*4882a593Smuzhiyun 				"gpu id %u\n", pdd->dev->id);
904*4882a593Smuzhiyun 			dev_dbg(kfd_device,
905*4882a593Smuzhiyun 				"lds_base %llX\n", pdd->lds_base);
906*4882a593Smuzhiyun 			dev_dbg(kfd_device,
907*4882a593Smuzhiyun 				"lds_limit %llX\n", pdd->lds_limit);
908*4882a593Smuzhiyun 			dev_dbg(kfd_device,
909*4882a593Smuzhiyun 				"gpuvm_base %llX\n", pdd->gpuvm_base);
910*4882a593Smuzhiyun 			dev_dbg(kfd_device,
911*4882a593Smuzhiyun 				"gpuvm_limit %llX\n", pdd->gpuvm_limit);
912*4882a593Smuzhiyun 			dev_dbg(kfd_device,
913*4882a593Smuzhiyun 				"scratch_base %llX\n", pdd->scratch_base);
914*4882a593Smuzhiyun 			dev_dbg(kfd_device,
915*4882a593Smuzhiyun 				"scratch_limit %llX\n", pdd->scratch_limit);
916*4882a593Smuzhiyun 
917*4882a593Smuzhiyun 			args->num_of_nodes++;
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 			pdd = kfd_get_next_process_device_data(p, pdd);
920*4882a593Smuzhiyun 		} while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
921*4882a593Smuzhiyun 	}
922*4882a593Smuzhiyun 
923*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
924*4882a593Smuzhiyun 
925*4882a593Smuzhiyun 	return 0;
926*4882a593Smuzhiyun }
927*4882a593Smuzhiyun 
kfd_ioctl_get_process_apertures_new(struct file * filp,struct kfd_process * p,void * data)928*4882a593Smuzhiyun static int kfd_ioctl_get_process_apertures_new(struct file *filp,
929*4882a593Smuzhiyun 				struct kfd_process *p, void *data)
930*4882a593Smuzhiyun {
931*4882a593Smuzhiyun 	struct kfd_ioctl_get_process_apertures_new_args *args = data;
932*4882a593Smuzhiyun 	struct kfd_process_device_apertures *pa;
933*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
934*4882a593Smuzhiyun 	uint32_t nodes = 0;
935*4882a593Smuzhiyun 	int ret;
936*4882a593Smuzhiyun 
937*4882a593Smuzhiyun 	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
938*4882a593Smuzhiyun 
939*4882a593Smuzhiyun 	if (args->num_of_nodes == 0) {
940*4882a593Smuzhiyun 		/* Return number of nodes, so that user space can alloacate
941*4882a593Smuzhiyun 		 * sufficient memory
942*4882a593Smuzhiyun 		 */
943*4882a593Smuzhiyun 		mutex_lock(&p->mutex);
944*4882a593Smuzhiyun 
945*4882a593Smuzhiyun 		if (!kfd_has_process_device_data(p))
946*4882a593Smuzhiyun 			goto out_unlock;
947*4882a593Smuzhiyun 
948*4882a593Smuzhiyun 		/* Run over all pdd of the process */
949*4882a593Smuzhiyun 		pdd = kfd_get_first_process_device_data(p);
950*4882a593Smuzhiyun 		do {
951*4882a593Smuzhiyun 			args->num_of_nodes++;
952*4882a593Smuzhiyun 			pdd = kfd_get_next_process_device_data(p, pdd);
953*4882a593Smuzhiyun 		} while (pdd);
954*4882a593Smuzhiyun 
955*4882a593Smuzhiyun 		goto out_unlock;
956*4882a593Smuzhiyun 	}
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun 	/* Fill in process-aperture information for all available
959*4882a593Smuzhiyun 	 * nodes, but not more than args->num_of_nodes as that is
960*4882a593Smuzhiyun 	 * the amount of memory allocated by user
961*4882a593Smuzhiyun 	 */
962*4882a593Smuzhiyun 	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
963*4882a593Smuzhiyun 				args->num_of_nodes), GFP_KERNEL);
964*4882a593Smuzhiyun 	if (!pa)
965*4882a593Smuzhiyun 		return -ENOMEM;
966*4882a593Smuzhiyun 
967*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
968*4882a593Smuzhiyun 
969*4882a593Smuzhiyun 	if (!kfd_has_process_device_data(p)) {
970*4882a593Smuzhiyun 		args->num_of_nodes = 0;
971*4882a593Smuzhiyun 		kfree(pa);
972*4882a593Smuzhiyun 		goto out_unlock;
973*4882a593Smuzhiyun 	}
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	/* Run over all pdd of the process */
976*4882a593Smuzhiyun 	pdd = kfd_get_first_process_device_data(p);
977*4882a593Smuzhiyun 	do {
978*4882a593Smuzhiyun 		pa[nodes].gpu_id = pdd->dev->id;
979*4882a593Smuzhiyun 		pa[nodes].lds_base = pdd->lds_base;
980*4882a593Smuzhiyun 		pa[nodes].lds_limit = pdd->lds_limit;
981*4882a593Smuzhiyun 		pa[nodes].gpuvm_base = pdd->gpuvm_base;
982*4882a593Smuzhiyun 		pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
983*4882a593Smuzhiyun 		pa[nodes].scratch_base = pdd->scratch_base;
984*4882a593Smuzhiyun 		pa[nodes].scratch_limit = pdd->scratch_limit;
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun 		dev_dbg(kfd_device,
987*4882a593Smuzhiyun 			"gpu id %u\n", pdd->dev->id);
988*4882a593Smuzhiyun 		dev_dbg(kfd_device,
989*4882a593Smuzhiyun 			"lds_base %llX\n", pdd->lds_base);
990*4882a593Smuzhiyun 		dev_dbg(kfd_device,
991*4882a593Smuzhiyun 			"lds_limit %llX\n", pdd->lds_limit);
992*4882a593Smuzhiyun 		dev_dbg(kfd_device,
993*4882a593Smuzhiyun 			"gpuvm_base %llX\n", pdd->gpuvm_base);
994*4882a593Smuzhiyun 		dev_dbg(kfd_device,
995*4882a593Smuzhiyun 			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
996*4882a593Smuzhiyun 		dev_dbg(kfd_device,
997*4882a593Smuzhiyun 			"scratch_base %llX\n", pdd->scratch_base);
998*4882a593Smuzhiyun 		dev_dbg(kfd_device,
999*4882a593Smuzhiyun 			"scratch_limit %llX\n", pdd->scratch_limit);
1000*4882a593Smuzhiyun 		nodes++;
1001*4882a593Smuzhiyun 
1002*4882a593Smuzhiyun 		pdd = kfd_get_next_process_device_data(p, pdd);
1003*4882a593Smuzhiyun 	} while (pdd && (nodes < args->num_of_nodes));
1004*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1005*4882a593Smuzhiyun 
1006*4882a593Smuzhiyun 	args->num_of_nodes = nodes;
1007*4882a593Smuzhiyun 	ret = copy_to_user(
1008*4882a593Smuzhiyun 			(void __user *)args->kfd_process_device_apertures_ptr,
1009*4882a593Smuzhiyun 			pa,
1010*4882a593Smuzhiyun 			(nodes * sizeof(struct kfd_process_device_apertures)));
1011*4882a593Smuzhiyun 	kfree(pa);
1012*4882a593Smuzhiyun 	return ret ? -EFAULT : 0;
1013*4882a593Smuzhiyun 
1014*4882a593Smuzhiyun out_unlock:
1015*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1016*4882a593Smuzhiyun 	return 0;
1017*4882a593Smuzhiyun }
1018*4882a593Smuzhiyun 
kfd_ioctl_create_event(struct file * filp,struct kfd_process * p,void * data)1019*4882a593Smuzhiyun static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
1020*4882a593Smuzhiyun 					void *data)
1021*4882a593Smuzhiyun {
1022*4882a593Smuzhiyun 	struct kfd_ioctl_create_event_args *args = data;
1023*4882a593Smuzhiyun 	int err;
1024*4882a593Smuzhiyun 
1025*4882a593Smuzhiyun 	/* For dGPUs the event page is allocated in user mode. The
1026*4882a593Smuzhiyun 	 * handle is passed to KFD with the first call to this IOCTL
1027*4882a593Smuzhiyun 	 * through the event_page_offset field.
1028*4882a593Smuzhiyun 	 */
1029*4882a593Smuzhiyun 	if (args->event_page_offset) {
1030*4882a593Smuzhiyun 		struct kfd_dev *kfd;
1031*4882a593Smuzhiyun 		struct kfd_process_device *pdd;
1032*4882a593Smuzhiyun 		void *mem, *kern_addr;
1033*4882a593Smuzhiyun 		uint64_t size;
1034*4882a593Smuzhiyun 
1035*4882a593Smuzhiyun 		if (p->signal_page) {
1036*4882a593Smuzhiyun 			pr_err("Event page is already set\n");
1037*4882a593Smuzhiyun 			return -EINVAL;
1038*4882a593Smuzhiyun 		}
1039*4882a593Smuzhiyun 
1040*4882a593Smuzhiyun 		kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1041*4882a593Smuzhiyun 		if (!kfd) {
1042*4882a593Smuzhiyun 			pr_err("Getting device by id failed in %s\n", __func__);
1043*4882a593Smuzhiyun 			return -EINVAL;
1044*4882a593Smuzhiyun 		}
1045*4882a593Smuzhiyun 
1046*4882a593Smuzhiyun 		mutex_lock(&p->mutex);
1047*4882a593Smuzhiyun 		pdd = kfd_bind_process_to_device(kfd, p);
1048*4882a593Smuzhiyun 		if (IS_ERR(pdd)) {
1049*4882a593Smuzhiyun 			err = PTR_ERR(pdd);
1050*4882a593Smuzhiyun 			goto out_unlock;
1051*4882a593Smuzhiyun 		}
1052*4882a593Smuzhiyun 
1053*4882a593Smuzhiyun 		mem = kfd_process_device_translate_handle(pdd,
1054*4882a593Smuzhiyun 				GET_IDR_HANDLE(args->event_page_offset));
1055*4882a593Smuzhiyun 		if (!mem) {
1056*4882a593Smuzhiyun 			pr_err("Can't find BO, offset is 0x%llx\n",
1057*4882a593Smuzhiyun 			       args->event_page_offset);
1058*4882a593Smuzhiyun 			err = -EINVAL;
1059*4882a593Smuzhiyun 			goto out_unlock;
1060*4882a593Smuzhiyun 		}
1061*4882a593Smuzhiyun 		mutex_unlock(&p->mutex);
1062*4882a593Smuzhiyun 
1063*4882a593Smuzhiyun 		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1064*4882a593Smuzhiyun 						mem, &kern_addr, &size);
1065*4882a593Smuzhiyun 		if (err) {
1066*4882a593Smuzhiyun 			pr_err("Failed to map event page to kernel\n");
1067*4882a593Smuzhiyun 			return err;
1068*4882a593Smuzhiyun 		}
1069*4882a593Smuzhiyun 
1070*4882a593Smuzhiyun 		err = kfd_event_page_set(p, kern_addr, size);
1071*4882a593Smuzhiyun 		if (err) {
1072*4882a593Smuzhiyun 			pr_err("Failed to set event page\n");
1073*4882a593Smuzhiyun 			return err;
1074*4882a593Smuzhiyun 		}
1075*4882a593Smuzhiyun 	}
1076*4882a593Smuzhiyun 
1077*4882a593Smuzhiyun 	err = kfd_event_create(filp, p, args->event_type,
1078*4882a593Smuzhiyun 				args->auto_reset != 0, args->node_id,
1079*4882a593Smuzhiyun 				&args->event_id, &args->event_trigger_data,
1080*4882a593Smuzhiyun 				&args->event_page_offset,
1081*4882a593Smuzhiyun 				&args->event_slot_index);
1082*4882a593Smuzhiyun 
1083*4882a593Smuzhiyun 	return err;
1084*4882a593Smuzhiyun 
1085*4882a593Smuzhiyun out_unlock:
1086*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1087*4882a593Smuzhiyun 	return err;
1088*4882a593Smuzhiyun }
1089*4882a593Smuzhiyun 
kfd_ioctl_destroy_event(struct file * filp,struct kfd_process * p,void * data)1090*4882a593Smuzhiyun static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1091*4882a593Smuzhiyun 					void *data)
1092*4882a593Smuzhiyun {
1093*4882a593Smuzhiyun 	struct kfd_ioctl_destroy_event_args *args = data;
1094*4882a593Smuzhiyun 
1095*4882a593Smuzhiyun 	return kfd_event_destroy(p, args->event_id);
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun 
kfd_ioctl_set_event(struct file * filp,struct kfd_process * p,void * data)1098*4882a593Smuzhiyun static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1099*4882a593Smuzhiyun 				void *data)
1100*4882a593Smuzhiyun {
1101*4882a593Smuzhiyun 	struct kfd_ioctl_set_event_args *args = data;
1102*4882a593Smuzhiyun 
1103*4882a593Smuzhiyun 	return kfd_set_event(p, args->event_id);
1104*4882a593Smuzhiyun }
1105*4882a593Smuzhiyun 
kfd_ioctl_reset_event(struct file * filp,struct kfd_process * p,void * data)1106*4882a593Smuzhiyun static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1107*4882a593Smuzhiyun 				void *data)
1108*4882a593Smuzhiyun {
1109*4882a593Smuzhiyun 	struct kfd_ioctl_reset_event_args *args = data;
1110*4882a593Smuzhiyun 
1111*4882a593Smuzhiyun 	return kfd_reset_event(p, args->event_id);
1112*4882a593Smuzhiyun }
1113*4882a593Smuzhiyun 
kfd_ioctl_wait_events(struct file * filp,struct kfd_process * p,void * data)1114*4882a593Smuzhiyun static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1115*4882a593Smuzhiyun 				void *data)
1116*4882a593Smuzhiyun {
1117*4882a593Smuzhiyun 	struct kfd_ioctl_wait_events_args *args = data;
1118*4882a593Smuzhiyun 	int err;
1119*4882a593Smuzhiyun 
1120*4882a593Smuzhiyun 	err = kfd_wait_on_events(p, args->num_events,
1121*4882a593Smuzhiyun 			(void __user *)args->events_ptr,
1122*4882a593Smuzhiyun 			(args->wait_for_all != 0),
1123*4882a593Smuzhiyun 			args->timeout, &args->wait_result);
1124*4882a593Smuzhiyun 
1125*4882a593Smuzhiyun 	return err;
1126*4882a593Smuzhiyun }
kfd_ioctl_set_scratch_backing_va(struct file * filep,struct kfd_process * p,void * data)1127*4882a593Smuzhiyun static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1128*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
1129*4882a593Smuzhiyun {
1130*4882a593Smuzhiyun 	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1131*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
1132*4882a593Smuzhiyun 	struct kfd_dev *dev;
1133*4882a593Smuzhiyun 	long err;
1134*4882a593Smuzhiyun 
1135*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
1136*4882a593Smuzhiyun 	if (!dev)
1137*4882a593Smuzhiyun 		return -EINVAL;
1138*4882a593Smuzhiyun 
1139*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
1140*4882a593Smuzhiyun 
1141*4882a593Smuzhiyun 	pdd = kfd_bind_process_to_device(dev, p);
1142*4882a593Smuzhiyun 	if (IS_ERR(pdd)) {
1143*4882a593Smuzhiyun 		err = PTR_ERR(pdd);
1144*4882a593Smuzhiyun 		goto bind_process_to_device_fail;
1145*4882a593Smuzhiyun 	}
1146*4882a593Smuzhiyun 
1147*4882a593Smuzhiyun 	pdd->qpd.sh_hidden_private_base = args->va_addr;
1148*4882a593Smuzhiyun 
1149*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1150*4882a593Smuzhiyun 
1151*4882a593Smuzhiyun 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1152*4882a593Smuzhiyun 	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1153*4882a593Smuzhiyun 		dev->kfd2kgd->set_scratch_backing_va(
1154*4882a593Smuzhiyun 			dev->kgd, args->va_addr, pdd->qpd.vmid);
1155*4882a593Smuzhiyun 
1156*4882a593Smuzhiyun 	return 0;
1157*4882a593Smuzhiyun 
1158*4882a593Smuzhiyun bind_process_to_device_fail:
1159*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1160*4882a593Smuzhiyun 	return err;
1161*4882a593Smuzhiyun }
1162*4882a593Smuzhiyun 
kfd_ioctl_get_tile_config(struct file * filep,struct kfd_process * p,void * data)1163*4882a593Smuzhiyun static int kfd_ioctl_get_tile_config(struct file *filep,
1164*4882a593Smuzhiyun 		struct kfd_process *p, void *data)
1165*4882a593Smuzhiyun {
1166*4882a593Smuzhiyun 	struct kfd_ioctl_get_tile_config_args *args = data;
1167*4882a593Smuzhiyun 	struct kfd_dev *dev;
1168*4882a593Smuzhiyun 	struct tile_config config;
1169*4882a593Smuzhiyun 	int err = 0;
1170*4882a593Smuzhiyun 
1171*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
1172*4882a593Smuzhiyun 	if (!dev)
1173*4882a593Smuzhiyun 		return -EINVAL;
1174*4882a593Smuzhiyun 
1175*4882a593Smuzhiyun 	amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1176*4882a593Smuzhiyun 
1177*4882a593Smuzhiyun 	args->gb_addr_config = config.gb_addr_config;
1178*4882a593Smuzhiyun 	args->num_banks = config.num_banks;
1179*4882a593Smuzhiyun 	args->num_ranks = config.num_ranks;
1180*4882a593Smuzhiyun 
1181*4882a593Smuzhiyun 	if (args->num_tile_configs > config.num_tile_configs)
1182*4882a593Smuzhiyun 		args->num_tile_configs = config.num_tile_configs;
1183*4882a593Smuzhiyun 	err = copy_to_user((void __user *)args->tile_config_ptr,
1184*4882a593Smuzhiyun 			config.tile_config_ptr,
1185*4882a593Smuzhiyun 			args->num_tile_configs * sizeof(uint32_t));
1186*4882a593Smuzhiyun 	if (err) {
1187*4882a593Smuzhiyun 		args->num_tile_configs = 0;
1188*4882a593Smuzhiyun 		return -EFAULT;
1189*4882a593Smuzhiyun 	}
1190*4882a593Smuzhiyun 
1191*4882a593Smuzhiyun 	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1192*4882a593Smuzhiyun 		args->num_macro_tile_configs =
1193*4882a593Smuzhiyun 				config.num_macro_tile_configs;
1194*4882a593Smuzhiyun 	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1195*4882a593Smuzhiyun 			config.macro_tile_config_ptr,
1196*4882a593Smuzhiyun 			args->num_macro_tile_configs * sizeof(uint32_t));
1197*4882a593Smuzhiyun 	if (err) {
1198*4882a593Smuzhiyun 		args->num_macro_tile_configs = 0;
1199*4882a593Smuzhiyun 		return -EFAULT;
1200*4882a593Smuzhiyun 	}
1201*4882a593Smuzhiyun 
1202*4882a593Smuzhiyun 	return 0;
1203*4882a593Smuzhiyun }
1204*4882a593Smuzhiyun 
kfd_ioctl_acquire_vm(struct file * filep,struct kfd_process * p,void * data)1205*4882a593Smuzhiyun static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1206*4882a593Smuzhiyun 				void *data)
1207*4882a593Smuzhiyun {
1208*4882a593Smuzhiyun 	struct kfd_ioctl_acquire_vm_args *args = data;
1209*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
1210*4882a593Smuzhiyun 	struct kfd_dev *dev;
1211*4882a593Smuzhiyun 	struct file *drm_file;
1212*4882a593Smuzhiyun 	int ret;
1213*4882a593Smuzhiyun 
1214*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
1215*4882a593Smuzhiyun 	if (!dev)
1216*4882a593Smuzhiyun 		return -EINVAL;
1217*4882a593Smuzhiyun 
1218*4882a593Smuzhiyun 	drm_file = fget(args->drm_fd);
1219*4882a593Smuzhiyun 	if (!drm_file)
1220*4882a593Smuzhiyun 		return -EINVAL;
1221*4882a593Smuzhiyun 
1222*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
1223*4882a593Smuzhiyun 
1224*4882a593Smuzhiyun 	pdd = kfd_get_process_device_data(dev, p);
1225*4882a593Smuzhiyun 	if (!pdd) {
1226*4882a593Smuzhiyun 		ret = -EINVAL;
1227*4882a593Smuzhiyun 		goto err_unlock;
1228*4882a593Smuzhiyun 	}
1229*4882a593Smuzhiyun 
1230*4882a593Smuzhiyun 	if (pdd->drm_file) {
1231*4882a593Smuzhiyun 		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1232*4882a593Smuzhiyun 		goto err_unlock;
1233*4882a593Smuzhiyun 	}
1234*4882a593Smuzhiyun 
1235*4882a593Smuzhiyun 	ret = kfd_process_device_init_vm(pdd, drm_file);
1236*4882a593Smuzhiyun 	if (ret)
1237*4882a593Smuzhiyun 		goto err_unlock;
1238*4882a593Smuzhiyun 	/* On success, the PDD keeps the drm_file reference */
1239*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1240*4882a593Smuzhiyun 
1241*4882a593Smuzhiyun 	return 0;
1242*4882a593Smuzhiyun 
1243*4882a593Smuzhiyun err_unlock:
1244*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1245*4882a593Smuzhiyun 	fput(drm_file);
1246*4882a593Smuzhiyun 	return ret;
1247*4882a593Smuzhiyun }
1248*4882a593Smuzhiyun 
kfd_dev_is_large_bar(struct kfd_dev * dev)1249*4882a593Smuzhiyun bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1250*4882a593Smuzhiyun {
1251*4882a593Smuzhiyun 	struct kfd_local_mem_info mem_info;
1252*4882a593Smuzhiyun 
1253*4882a593Smuzhiyun 	if (debug_largebar) {
1254*4882a593Smuzhiyun 		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1255*4882a593Smuzhiyun 		return true;
1256*4882a593Smuzhiyun 	}
1257*4882a593Smuzhiyun 
1258*4882a593Smuzhiyun 	if (dev->use_iommu_v2)
1259*4882a593Smuzhiyun 		return false;
1260*4882a593Smuzhiyun 
1261*4882a593Smuzhiyun 	amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1262*4882a593Smuzhiyun 	if (mem_info.local_mem_size_private == 0 &&
1263*4882a593Smuzhiyun 			mem_info.local_mem_size_public > 0)
1264*4882a593Smuzhiyun 		return true;
1265*4882a593Smuzhiyun 	return false;
1266*4882a593Smuzhiyun }
1267*4882a593Smuzhiyun 
kfd_ioctl_alloc_memory_of_gpu(struct file * filep,struct kfd_process * p,void * data)1268*4882a593Smuzhiyun static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1269*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
1270*4882a593Smuzhiyun {
1271*4882a593Smuzhiyun 	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1272*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
1273*4882a593Smuzhiyun 	void *mem;
1274*4882a593Smuzhiyun 	struct kfd_dev *dev;
1275*4882a593Smuzhiyun 	int idr_handle;
1276*4882a593Smuzhiyun 	long err;
1277*4882a593Smuzhiyun 	uint64_t offset = args->mmap_offset;
1278*4882a593Smuzhiyun 	uint32_t flags = args->flags;
1279*4882a593Smuzhiyun 
1280*4882a593Smuzhiyun 	if (args->size == 0)
1281*4882a593Smuzhiyun 		return -EINVAL;
1282*4882a593Smuzhiyun 
1283*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
1284*4882a593Smuzhiyun 	if (!dev)
1285*4882a593Smuzhiyun 		return -EINVAL;
1286*4882a593Smuzhiyun 
1287*4882a593Smuzhiyun 	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1288*4882a593Smuzhiyun 		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1289*4882a593Smuzhiyun 		!kfd_dev_is_large_bar(dev)) {
1290*4882a593Smuzhiyun 		pr_err("Alloc host visible vram on small bar is not allowed\n");
1291*4882a593Smuzhiyun 		return -EINVAL;
1292*4882a593Smuzhiyun 	}
1293*4882a593Smuzhiyun 
1294*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
1295*4882a593Smuzhiyun 
1296*4882a593Smuzhiyun 	pdd = kfd_bind_process_to_device(dev, p);
1297*4882a593Smuzhiyun 	if (IS_ERR(pdd)) {
1298*4882a593Smuzhiyun 		err = PTR_ERR(pdd);
1299*4882a593Smuzhiyun 		goto err_unlock;
1300*4882a593Smuzhiyun 	}
1301*4882a593Smuzhiyun 
1302*4882a593Smuzhiyun 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1303*4882a593Smuzhiyun 		if (args->size != kfd_doorbell_process_slice(dev)) {
1304*4882a593Smuzhiyun 			err = -EINVAL;
1305*4882a593Smuzhiyun 			goto err_unlock;
1306*4882a593Smuzhiyun 		}
1307*4882a593Smuzhiyun 		offset = kfd_get_process_doorbells(pdd);
1308*4882a593Smuzhiyun 	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1309*4882a593Smuzhiyun 		if (args->size != PAGE_SIZE) {
1310*4882a593Smuzhiyun 			err = -EINVAL;
1311*4882a593Smuzhiyun 			goto err_unlock;
1312*4882a593Smuzhiyun 		}
1313*4882a593Smuzhiyun 		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1314*4882a593Smuzhiyun 		if (!offset) {
1315*4882a593Smuzhiyun 			err = -ENOMEM;
1316*4882a593Smuzhiyun 			goto err_unlock;
1317*4882a593Smuzhiyun 		}
1318*4882a593Smuzhiyun 	}
1319*4882a593Smuzhiyun 
1320*4882a593Smuzhiyun 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1321*4882a593Smuzhiyun 		dev->kgd, args->va_addr, args->size,
1322*4882a593Smuzhiyun 		pdd->vm, (struct kgd_mem **) &mem, &offset,
1323*4882a593Smuzhiyun 		flags);
1324*4882a593Smuzhiyun 
1325*4882a593Smuzhiyun 	if (err)
1326*4882a593Smuzhiyun 		goto err_unlock;
1327*4882a593Smuzhiyun 
1328*4882a593Smuzhiyun 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1329*4882a593Smuzhiyun 	if (idr_handle < 0) {
1330*4882a593Smuzhiyun 		err = -EFAULT;
1331*4882a593Smuzhiyun 		goto err_free;
1332*4882a593Smuzhiyun 	}
1333*4882a593Smuzhiyun 
1334*4882a593Smuzhiyun 	/* Update the VRAM usage count */
1335*4882a593Smuzhiyun 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1336*4882a593Smuzhiyun 		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1337*4882a593Smuzhiyun 
1338*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1339*4882a593Smuzhiyun 
1340*4882a593Smuzhiyun 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1341*4882a593Smuzhiyun 	args->mmap_offset = offset;
1342*4882a593Smuzhiyun 
1343*4882a593Smuzhiyun 	/* MMIO is mapped through kfd device
1344*4882a593Smuzhiyun 	 * Generate a kfd mmap offset
1345*4882a593Smuzhiyun 	 */
1346*4882a593Smuzhiyun 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1347*4882a593Smuzhiyun 		args->mmap_offset = KFD_MMAP_TYPE_MMIO
1348*4882a593Smuzhiyun 					| KFD_MMAP_GPU_ID(args->gpu_id);
1349*4882a593Smuzhiyun 
1350*4882a593Smuzhiyun 	return 0;
1351*4882a593Smuzhiyun 
1352*4882a593Smuzhiyun err_free:
1353*4882a593Smuzhiyun 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1354*4882a593Smuzhiyun err_unlock:
1355*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1356*4882a593Smuzhiyun 	return err;
1357*4882a593Smuzhiyun }
1358*4882a593Smuzhiyun 
kfd_ioctl_free_memory_of_gpu(struct file * filep,struct kfd_process * p,void * data)1359*4882a593Smuzhiyun static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1360*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
1361*4882a593Smuzhiyun {
1362*4882a593Smuzhiyun 	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1363*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
1364*4882a593Smuzhiyun 	void *mem;
1365*4882a593Smuzhiyun 	struct kfd_dev *dev;
1366*4882a593Smuzhiyun 	int ret;
1367*4882a593Smuzhiyun 	uint64_t size = 0;
1368*4882a593Smuzhiyun 
1369*4882a593Smuzhiyun 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1370*4882a593Smuzhiyun 	if (!dev)
1371*4882a593Smuzhiyun 		return -EINVAL;
1372*4882a593Smuzhiyun 
1373*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
1374*4882a593Smuzhiyun 
1375*4882a593Smuzhiyun 	pdd = kfd_get_process_device_data(dev, p);
1376*4882a593Smuzhiyun 	if (!pdd) {
1377*4882a593Smuzhiyun 		pr_err("Process device data doesn't exist\n");
1378*4882a593Smuzhiyun 		ret = -EINVAL;
1379*4882a593Smuzhiyun 		goto err_unlock;
1380*4882a593Smuzhiyun 	}
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 	mem = kfd_process_device_translate_handle(
1383*4882a593Smuzhiyun 		pdd, GET_IDR_HANDLE(args->handle));
1384*4882a593Smuzhiyun 	if (!mem) {
1385*4882a593Smuzhiyun 		ret = -EINVAL;
1386*4882a593Smuzhiyun 		goto err_unlock;
1387*4882a593Smuzhiyun 	}
1388*4882a593Smuzhiyun 
1389*4882a593Smuzhiyun 	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1390*4882a593Smuzhiyun 						(struct kgd_mem *)mem, &size);
1391*4882a593Smuzhiyun 
1392*4882a593Smuzhiyun 	/* If freeing the buffer failed, leave the handle in place for
1393*4882a593Smuzhiyun 	 * clean-up during process tear-down.
1394*4882a593Smuzhiyun 	 */
1395*4882a593Smuzhiyun 	if (!ret)
1396*4882a593Smuzhiyun 		kfd_process_device_remove_obj_handle(
1397*4882a593Smuzhiyun 			pdd, GET_IDR_HANDLE(args->handle));
1398*4882a593Smuzhiyun 
1399*4882a593Smuzhiyun 	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1400*4882a593Smuzhiyun 
1401*4882a593Smuzhiyun err_unlock:
1402*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1403*4882a593Smuzhiyun 	return ret;
1404*4882a593Smuzhiyun }
1405*4882a593Smuzhiyun 
kfd_ioctl_map_memory_to_gpu(struct file * filep,struct kfd_process * p,void * data)1406*4882a593Smuzhiyun static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1407*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
1408*4882a593Smuzhiyun {
1409*4882a593Smuzhiyun 	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1410*4882a593Smuzhiyun 	struct kfd_process_device *pdd, *peer_pdd;
1411*4882a593Smuzhiyun 	void *mem;
1412*4882a593Smuzhiyun 	struct kfd_dev *dev, *peer;
1413*4882a593Smuzhiyun 	long err = 0;
1414*4882a593Smuzhiyun 	int i;
1415*4882a593Smuzhiyun 	uint32_t *devices_arr = NULL;
1416*4882a593Smuzhiyun 
1417*4882a593Smuzhiyun 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1418*4882a593Smuzhiyun 	if (!dev)
1419*4882a593Smuzhiyun 		return -EINVAL;
1420*4882a593Smuzhiyun 
1421*4882a593Smuzhiyun 	if (!args->n_devices) {
1422*4882a593Smuzhiyun 		pr_debug("Device IDs array empty\n");
1423*4882a593Smuzhiyun 		return -EINVAL;
1424*4882a593Smuzhiyun 	}
1425*4882a593Smuzhiyun 	if (args->n_success > args->n_devices) {
1426*4882a593Smuzhiyun 		pr_debug("n_success exceeds n_devices\n");
1427*4882a593Smuzhiyun 		return -EINVAL;
1428*4882a593Smuzhiyun 	}
1429*4882a593Smuzhiyun 
1430*4882a593Smuzhiyun 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1431*4882a593Smuzhiyun 				    GFP_KERNEL);
1432*4882a593Smuzhiyun 	if (!devices_arr)
1433*4882a593Smuzhiyun 		return -ENOMEM;
1434*4882a593Smuzhiyun 
1435*4882a593Smuzhiyun 	err = copy_from_user(devices_arr,
1436*4882a593Smuzhiyun 			     (void __user *)args->device_ids_array_ptr,
1437*4882a593Smuzhiyun 			     args->n_devices * sizeof(*devices_arr));
1438*4882a593Smuzhiyun 	if (err != 0) {
1439*4882a593Smuzhiyun 		err = -EFAULT;
1440*4882a593Smuzhiyun 		goto copy_from_user_failed;
1441*4882a593Smuzhiyun 	}
1442*4882a593Smuzhiyun 
1443*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
1444*4882a593Smuzhiyun 
1445*4882a593Smuzhiyun 	pdd = kfd_bind_process_to_device(dev, p);
1446*4882a593Smuzhiyun 	if (IS_ERR(pdd)) {
1447*4882a593Smuzhiyun 		err = PTR_ERR(pdd);
1448*4882a593Smuzhiyun 		goto bind_process_to_device_failed;
1449*4882a593Smuzhiyun 	}
1450*4882a593Smuzhiyun 
1451*4882a593Smuzhiyun 	mem = kfd_process_device_translate_handle(pdd,
1452*4882a593Smuzhiyun 						GET_IDR_HANDLE(args->handle));
1453*4882a593Smuzhiyun 	if (!mem) {
1454*4882a593Smuzhiyun 		err = -ENOMEM;
1455*4882a593Smuzhiyun 		goto get_mem_obj_from_handle_failed;
1456*4882a593Smuzhiyun 	}
1457*4882a593Smuzhiyun 
1458*4882a593Smuzhiyun 	for (i = args->n_success; i < args->n_devices; i++) {
1459*4882a593Smuzhiyun 		peer = kfd_device_by_id(devices_arr[i]);
1460*4882a593Smuzhiyun 		if (!peer) {
1461*4882a593Smuzhiyun 			pr_debug("Getting device by id failed for 0x%x\n",
1462*4882a593Smuzhiyun 				 devices_arr[i]);
1463*4882a593Smuzhiyun 			err = -EINVAL;
1464*4882a593Smuzhiyun 			goto get_mem_obj_from_handle_failed;
1465*4882a593Smuzhiyun 		}
1466*4882a593Smuzhiyun 
1467*4882a593Smuzhiyun 		peer_pdd = kfd_bind_process_to_device(peer, p);
1468*4882a593Smuzhiyun 		if (IS_ERR(peer_pdd)) {
1469*4882a593Smuzhiyun 			err = PTR_ERR(peer_pdd);
1470*4882a593Smuzhiyun 			goto get_mem_obj_from_handle_failed;
1471*4882a593Smuzhiyun 		}
1472*4882a593Smuzhiyun 		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1473*4882a593Smuzhiyun 			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1474*4882a593Smuzhiyun 		if (err) {
1475*4882a593Smuzhiyun 			pr_err("Failed to map to gpu %d/%d\n",
1476*4882a593Smuzhiyun 			       i, args->n_devices);
1477*4882a593Smuzhiyun 			goto map_memory_to_gpu_failed;
1478*4882a593Smuzhiyun 		}
1479*4882a593Smuzhiyun 		args->n_success = i+1;
1480*4882a593Smuzhiyun 	}
1481*4882a593Smuzhiyun 
1482*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1483*4882a593Smuzhiyun 
1484*4882a593Smuzhiyun 	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1485*4882a593Smuzhiyun 	if (err) {
1486*4882a593Smuzhiyun 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
1487*4882a593Smuzhiyun 		goto sync_memory_failed;
1488*4882a593Smuzhiyun 	}
1489*4882a593Smuzhiyun 
1490*4882a593Smuzhiyun 	/* Flush TLBs after waiting for the page table updates to complete */
1491*4882a593Smuzhiyun 	for (i = 0; i < args->n_devices; i++) {
1492*4882a593Smuzhiyun 		peer = kfd_device_by_id(devices_arr[i]);
1493*4882a593Smuzhiyun 		if (WARN_ON_ONCE(!peer))
1494*4882a593Smuzhiyun 			continue;
1495*4882a593Smuzhiyun 		peer_pdd = kfd_get_process_device_data(peer, p);
1496*4882a593Smuzhiyun 		if (WARN_ON_ONCE(!peer_pdd))
1497*4882a593Smuzhiyun 			continue;
1498*4882a593Smuzhiyun 		kfd_flush_tlb(peer_pdd);
1499*4882a593Smuzhiyun 	}
1500*4882a593Smuzhiyun 
1501*4882a593Smuzhiyun 	kfree(devices_arr);
1502*4882a593Smuzhiyun 
1503*4882a593Smuzhiyun 	return err;
1504*4882a593Smuzhiyun 
1505*4882a593Smuzhiyun bind_process_to_device_failed:
1506*4882a593Smuzhiyun get_mem_obj_from_handle_failed:
1507*4882a593Smuzhiyun map_memory_to_gpu_failed:
1508*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1509*4882a593Smuzhiyun copy_from_user_failed:
1510*4882a593Smuzhiyun sync_memory_failed:
1511*4882a593Smuzhiyun 	kfree(devices_arr);
1512*4882a593Smuzhiyun 
1513*4882a593Smuzhiyun 	return err;
1514*4882a593Smuzhiyun }
1515*4882a593Smuzhiyun 
kfd_ioctl_unmap_memory_from_gpu(struct file * filep,struct kfd_process * p,void * data)1516*4882a593Smuzhiyun static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1517*4882a593Smuzhiyun 					struct kfd_process *p, void *data)
1518*4882a593Smuzhiyun {
1519*4882a593Smuzhiyun 	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1520*4882a593Smuzhiyun 	struct kfd_process_device *pdd, *peer_pdd;
1521*4882a593Smuzhiyun 	void *mem;
1522*4882a593Smuzhiyun 	struct kfd_dev *dev, *peer;
1523*4882a593Smuzhiyun 	long err = 0;
1524*4882a593Smuzhiyun 	uint32_t *devices_arr = NULL, i;
1525*4882a593Smuzhiyun 
1526*4882a593Smuzhiyun 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1527*4882a593Smuzhiyun 	if (!dev)
1528*4882a593Smuzhiyun 		return -EINVAL;
1529*4882a593Smuzhiyun 
1530*4882a593Smuzhiyun 	if (!args->n_devices) {
1531*4882a593Smuzhiyun 		pr_debug("Device IDs array empty\n");
1532*4882a593Smuzhiyun 		return -EINVAL;
1533*4882a593Smuzhiyun 	}
1534*4882a593Smuzhiyun 	if (args->n_success > args->n_devices) {
1535*4882a593Smuzhiyun 		pr_debug("n_success exceeds n_devices\n");
1536*4882a593Smuzhiyun 		return -EINVAL;
1537*4882a593Smuzhiyun 	}
1538*4882a593Smuzhiyun 
1539*4882a593Smuzhiyun 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1540*4882a593Smuzhiyun 				    GFP_KERNEL);
1541*4882a593Smuzhiyun 	if (!devices_arr)
1542*4882a593Smuzhiyun 		return -ENOMEM;
1543*4882a593Smuzhiyun 
1544*4882a593Smuzhiyun 	err = copy_from_user(devices_arr,
1545*4882a593Smuzhiyun 			     (void __user *)args->device_ids_array_ptr,
1546*4882a593Smuzhiyun 			     args->n_devices * sizeof(*devices_arr));
1547*4882a593Smuzhiyun 	if (err != 0) {
1548*4882a593Smuzhiyun 		err = -EFAULT;
1549*4882a593Smuzhiyun 		goto copy_from_user_failed;
1550*4882a593Smuzhiyun 	}
1551*4882a593Smuzhiyun 
1552*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
1553*4882a593Smuzhiyun 
1554*4882a593Smuzhiyun 	pdd = kfd_get_process_device_data(dev, p);
1555*4882a593Smuzhiyun 	if (!pdd) {
1556*4882a593Smuzhiyun 		err = -EINVAL;
1557*4882a593Smuzhiyun 		goto bind_process_to_device_failed;
1558*4882a593Smuzhiyun 	}
1559*4882a593Smuzhiyun 
1560*4882a593Smuzhiyun 	mem = kfd_process_device_translate_handle(pdd,
1561*4882a593Smuzhiyun 						GET_IDR_HANDLE(args->handle));
1562*4882a593Smuzhiyun 	if (!mem) {
1563*4882a593Smuzhiyun 		err = -ENOMEM;
1564*4882a593Smuzhiyun 		goto get_mem_obj_from_handle_failed;
1565*4882a593Smuzhiyun 	}
1566*4882a593Smuzhiyun 
1567*4882a593Smuzhiyun 	for (i = args->n_success; i < args->n_devices; i++) {
1568*4882a593Smuzhiyun 		peer = kfd_device_by_id(devices_arr[i]);
1569*4882a593Smuzhiyun 		if (!peer) {
1570*4882a593Smuzhiyun 			err = -EINVAL;
1571*4882a593Smuzhiyun 			goto get_mem_obj_from_handle_failed;
1572*4882a593Smuzhiyun 		}
1573*4882a593Smuzhiyun 
1574*4882a593Smuzhiyun 		peer_pdd = kfd_get_process_device_data(peer, p);
1575*4882a593Smuzhiyun 		if (!peer_pdd) {
1576*4882a593Smuzhiyun 			err = -ENODEV;
1577*4882a593Smuzhiyun 			goto get_mem_obj_from_handle_failed;
1578*4882a593Smuzhiyun 		}
1579*4882a593Smuzhiyun 		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1580*4882a593Smuzhiyun 			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1581*4882a593Smuzhiyun 		if (err) {
1582*4882a593Smuzhiyun 			pr_err("Failed to unmap from gpu %d/%d\n",
1583*4882a593Smuzhiyun 			       i, args->n_devices);
1584*4882a593Smuzhiyun 			goto unmap_memory_from_gpu_failed;
1585*4882a593Smuzhiyun 		}
1586*4882a593Smuzhiyun 		args->n_success = i+1;
1587*4882a593Smuzhiyun 	}
1588*4882a593Smuzhiyun 	kfree(devices_arr);
1589*4882a593Smuzhiyun 
1590*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1591*4882a593Smuzhiyun 
1592*4882a593Smuzhiyun 	return 0;
1593*4882a593Smuzhiyun 
1594*4882a593Smuzhiyun bind_process_to_device_failed:
1595*4882a593Smuzhiyun get_mem_obj_from_handle_failed:
1596*4882a593Smuzhiyun unmap_memory_from_gpu_failed:
1597*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1598*4882a593Smuzhiyun copy_from_user_failed:
1599*4882a593Smuzhiyun 	kfree(devices_arr);
1600*4882a593Smuzhiyun 	return err;
1601*4882a593Smuzhiyun }
1602*4882a593Smuzhiyun 
kfd_ioctl_alloc_queue_gws(struct file * filep,struct kfd_process * p,void * data)1603*4882a593Smuzhiyun static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1604*4882a593Smuzhiyun 		struct kfd_process *p, void *data)
1605*4882a593Smuzhiyun {
1606*4882a593Smuzhiyun 	int retval;
1607*4882a593Smuzhiyun 	struct kfd_ioctl_alloc_queue_gws_args *args = data;
1608*4882a593Smuzhiyun 	struct queue *q;
1609*4882a593Smuzhiyun 	struct kfd_dev *dev;
1610*4882a593Smuzhiyun 
1611*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
1612*4882a593Smuzhiyun 	q = pqm_get_user_queue(&p->pqm, args->queue_id);
1613*4882a593Smuzhiyun 
1614*4882a593Smuzhiyun 	if (q) {
1615*4882a593Smuzhiyun 		dev = q->device;
1616*4882a593Smuzhiyun 	} else {
1617*4882a593Smuzhiyun 		retval = -EINVAL;
1618*4882a593Smuzhiyun 		goto out_unlock;
1619*4882a593Smuzhiyun 	}
1620*4882a593Smuzhiyun 
1621*4882a593Smuzhiyun 	if (!dev->gws) {
1622*4882a593Smuzhiyun 		retval = -ENODEV;
1623*4882a593Smuzhiyun 		goto out_unlock;
1624*4882a593Smuzhiyun 	}
1625*4882a593Smuzhiyun 
1626*4882a593Smuzhiyun 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1627*4882a593Smuzhiyun 		retval = -ENODEV;
1628*4882a593Smuzhiyun 		goto out_unlock;
1629*4882a593Smuzhiyun 	}
1630*4882a593Smuzhiyun 
1631*4882a593Smuzhiyun 	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1632*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1633*4882a593Smuzhiyun 
1634*4882a593Smuzhiyun 	args->first_gws = 0;
1635*4882a593Smuzhiyun 	return retval;
1636*4882a593Smuzhiyun 
1637*4882a593Smuzhiyun out_unlock:
1638*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1639*4882a593Smuzhiyun 	return retval;
1640*4882a593Smuzhiyun }
1641*4882a593Smuzhiyun 
kfd_ioctl_get_dmabuf_info(struct file * filep,struct kfd_process * p,void * data)1642*4882a593Smuzhiyun static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1643*4882a593Smuzhiyun 		struct kfd_process *p, void *data)
1644*4882a593Smuzhiyun {
1645*4882a593Smuzhiyun 	struct kfd_ioctl_get_dmabuf_info_args *args = data;
1646*4882a593Smuzhiyun 	struct kfd_dev *dev = NULL;
1647*4882a593Smuzhiyun 	struct kgd_dev *dma_buf_kgd;
1648*4882a593Smuzhiyun 	void *metadata_buffer = NULL;
1649*4882a593Smuzhiyun 	uint32_t flags;
1650*4882a593Smuzhiyun 	unsigned int i;
1651*4882a593Smuzhiyun 	int r;
1652*4882a593Smuzhiyun 
1653*4882a593Smuzhiyun 	/* Find a KFD GPU device that supports the get_dmabuf_info query */
1654*4882a593Smuzhiyun 	for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1655*4882a593Smuzhiyun 		if (dev)
1656*4882a593Smuzhiyun 			break;
1657*4882a593Smuzhiyun 	if (!dev)
1658*4882a593Smuzhiyun 		return -EINVAL;
1659*4882a593Smuzhiyun 
1660*4882a593Smuzhiyun 	if (args->metadata_ptr) {
1661*4882a593Smuzhiyun 		metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1662*4882a593Smuzhiyun 		if (!metadata_buffer)
1663*4882a593Smuzhiyun 			return -ENOMEM;
1664*4882a593Smuzhiyun 	}
1665*4882a593Smuzhiyun 
1666*4882a593Smuzhiyun 	/* Get dmabuf info from KGD */
1667*4882a593Smuzhiyun 	r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1668*4882a593Smuzhiyun 					  &dma_buf_kgd, &args->size,
1669*4882a593Smuzhiyun 					  metadata_buffer, args->metadata_size,
1670*4882a593Smuzhiyun 					  &args->metadata_size, &flags);
1671*4882a593Smuzhiyun 	if (r)
1672*4882a593Smuzhiyun 		goto exit;
1673*4882a593Smuzhiyun 
1674*4882a593Smuzhiyun 	/* Reverse-lookup gpu_id from kgd pointer */
1675*4882a593Smuzhiyun 	dev = kfd_device_by_kgd(dma_buf_kgd);
1676*4882a593Smuzhiyun 	if (!dev) {
1677*4882a593Smuzhiyun 		r = -EINVAL;
1678*4882a593Smuzhiyun 		goto exit;
1679*4882a593Smuzhiyun 	}
1680*4882a593Smuzhiyun 	args->gpu_id = dev->id;
1681*4882a593Smuzhiyun 	args->flags = flags;
1682*4882a593Smuzhiyun 
1683*4882a593Smuzhiyun 	/* Copy metadata buffer to user mode */
1684*4882a593Smuzhiyun 	if (metadata_buffer) {
1685*4882a593Smuzhiyun 		r = copy_to_user((void __user *)args->metadata_ptr,
1686*4882a593Smuzhiyun 				 metadata_buffer, args->metadata_size);
1687*4882a593Smuzhiyun 		if (r != 0)
1688*4882a593Smuzhiyun 			r = -EFAULT;
1689*4882a593Smuzhiyun 	}
1690*4882a593Smuzhiyun 
1691*4882a593Smuzhiyun exit:
1692*4882a593Smuzhiyun 	kfree(metadata_buffer);
1693*4882a593Smuzhiyun 
1694*4882a593Smuzhiyun 	return r;
1695*4882a593Smuzhiyun }
1696*4882a593Smuzhiyun 
kfd_ioctl_import_dmabuf(struct file * filep,struct kfd_process * p,void * data)1697*4882a593Smuzhiyun static int kfd_ioctl_import_dmabuf(struct file *filep,
1698*4882a593Smuzhiyun 				   struct kfd_process *p, void *data)
1699*4882a593Smuzhiyun {
1700*4882a593Smuzhiyun 	struct kfd_ioctl_import_dmabuf_args *args = data;
1701*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
1702*4882a593Smuzhiyun 	struct dma_buf *dmabuf;
1703*4882a593Smuzhiyun 	struct kfd_dev *dev;
1704*4882a593Smuzhiyun 	int idr_handle;
1705*4882a593Smuzhiyun 	uint64_t size;
1706*4882a593Smuzhiyun 	void *mem;
1707*4882a593Smuzhiyun 	int r;
1708*4882a593Smuzhiyun 
1709*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpu_id);
1710*4882a593Smuzhiyun 	if (!dev)
1711*4882a593Smuzhiyun 		return -EINVAL;
1712*4882a593Smuzhiyun 
1713*4882a593Smuzhiyun 	dmabuf = dma_buf_get(args->dmabuf_fd);
1714*4882a593Smuzhiyun 	if (IS_ERR(dmabuf))
1715*4882a593Smuzhiyun 		return PTR_ERR(dmabuf);
1716*4882a593Smuzhiyun 
1717*4882a593Smuzhiyun 	mutex_lock(&p->mutex);
1718*4882a593Smuzhiyun 
1719*4882a593Smuzhiyun 	pdd = kfd_bind_process_to_device(dev, p);
1720*4882a593Smuzhiyun 	if (IS_ERR(pdd)) {
1721*4882a593Smuzhiyun 		r = PTR_ERR(pdd);
1722*4882a593Smuzhiyun 		goto err_unlock;
1723*4882a593Smuzhiyun 	}
1724*4882a593Smuzhiyun 
1725*4882a593Smuzhiyun 	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1726*4882a593Smuzhiyun 					      args->va_addr, pdd->vm,
1727*4882a593Smuzhiyun 					      (struct kgd_mem **)&mem, &size,
1728*4882a593Smuzhiyun 					      NULL);
1729*4882a593Smuzhiyun 	if (r)
1730*4882a593Smuzhiyun 		goto err_unlock;
1731*4882a593Smuzhiyun 
1732*4882a593Smuzhiyun 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1733*4882a593Smuzhiyun 	if (idr_handle < 0) {
1734*4882a593Smuzhiyun 		r = -EFAULT;
1735*4882a593Smuzhiyun 		goto err_free;
1736*4882a593Smuzhiyun 	}
1737*4882a593Smuzhiyun 
1738*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1739*4882a593Smuzhiyun 	dma_buf_put(dmabuf);
1740*4882a593Smuzhiyun 
1741*4882a593Smuzhiyun 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1742*4882a593Smuzhiyun 
1743*4882a593Smuzhiyun 	return 0;
1744*4882a593Smuzhiyun 
1745*4882a593Smuzhiyun err_free:
1746*4882a593Smuzhiyun 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1747*4882a593Smuzhiyun err_unlock:
1748*4882a593Smuzhiyun 	mutex_unlock(&p->mutex);
1749*4882a593Smuzhiyun 	dma_buf_put(dmabuf);
1750*4882a593Smuzhiyun 	return r;
1751*4882a593Smuzhiyun }
1752*4882a593Smuzhiyun 
1753*4882a593Smuzhiyun /* Handle requests for watching SMI events */
kfd_ioctl_smi_events(struct file * filep,struct kfd_process * p,void * data)1754*4882a593Smuzhiyun static int kfd_ioctl_smi_events(struct file *filep,
1755*4882a593Smuzhiyun 				struct kfd_process *p, void *data)
1756*4882a593Smuzhiyun {
1757*4882a593Smuzhiyun 	struct kfd_ioctl_smi_events_args *args = data;
1758*4882a593Smuzhiyun 	struct kfd_dev *dev;
1759*4882a593Smuzhiyun 
1760*4882a593Smuzhiyun 	dev = kfd_device_by_id(args->gpuid);
1761*4882a593Smuzhiyun 	if (!dev)
1762*4882a593Smuzhiyun 		return -EINVAL;
1763*4882a593Smuzhiyun 
1764*4882a593Smuzhiyun 	return kfd_smi_event_open(dev, &args->anon_fd);
1765*4882a593Smuzhiyun }
1766*4882a593Smuzhiyun 
1767*4882a593Smuzhiyun #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1768*4882a593Smuzhiyun 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1769*4882a593Smuzhiyun 			    .cmd_drv = 0, .name = #ioctl}
1770*4882a593Smuzhiyun 
1771*4882a593Smuzhiyun /** Ioctl table */
1772*4882a593Smuzhiyun static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1773*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1774*4882a593Smuzhiyun 			kfd_ioctl_get_version, 0),
1775*4882a593Smuzhiyun 
1776*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1777*4882a593Smuzhiyun 			kfd_ioctl_create_queue, 0),
1778*4882a593Smuzhiyun 
1779*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1780*4882a593Smuzhiyun 			kfd_ioctl_destroy_queue, 0),
1781*4882a593Smuzhiyun 
1782*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1783*4882a593Smuzhiyun 			kfd_ioctl_set_memory_policy, 0),
1784*4882a593Smuzhiyun 
1785*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1786*4882a593Smuzhiyun 			kfd_ioctl_get_clock_counters, 0),
1787*4882a593Smuzhiyun 
1788*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1789*4882a593Smuzhiyun 			kfd_ioctl_get_process_apertures, 0),
1790*4882a593Smuzhiyun 
1791*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1792*4882a593Smuzhiyun 			kfd_ioctl_update_queue, 0),
1793*4882a593Smuzhiyun 
1794*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1795*4882a593Smuzhiyun 			kfd_ioctl_create_event, 0),
1796*4882a593Smuzhiyun 
1797*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1798*4882a593Smuzhiyun 			kfd_ioctl_destroy_event, 0),
1799*4882a593Smuzhiyun 
1800*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1801*4882a593Smuzhiyun 			kfd_ioctl_set_event, 0),
1802*4882a593Smuzhiyun 
1803*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1804*4882a593Smuzhiyun 			kfd_ioctl_reset_event, 0),
1805*4882a593Smuzhiyun 
1806*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1807*4882a593Smuzhiyun 			kfd_ioctl_wait_events, 0),
1808*4882a593Smuzhiyun 
1809*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1810*4882a593Smuzhiyun 			kfd_ioctl_dbg_register, 0),
1811*4882a593Smuzhiyun 
1812*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1813*4882a593Smuzhiyun 			kfd_ioctl_dbg_unregister, 0),
1814*4882a593Smuzhiyun 
1815*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1816*4882a593Smuzhiyun 			kfd_ioctl_dbg_address_watch, 0),
1817*4882a593Smuzhiyun 
1818*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1819*4882a593Smuzhiyun 			kfd_ioctl_dbg_wave_control, 0),
1820*4882a593Smuzhiyun 
1821*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1822*4882a593Smuzhiyun 			kfd_ioctl_set_scratch_backing_va, 0),
1823*4882a593Smuzhiyun 
1824*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1825*4882a593Smuzhiyun 			kfd_ioctl_get_tile_config, 0),
1826*4882a593Smuzhiyun 
1827*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1828*4882a593Smuzhiyun 			kfd_ioctl_set_trap_handler, 0),
1829*4882a593Smuzhiyun 
1830*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1831*4882a593Smuzhiyun 			kfd_ioctl_get_process_apertures_new, 0),
1832*4882a593Smuzhiyun 
1833*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1834*4882a593Smuzhiyun 			kfd_ioctl_acquire_vm, 0),
1835*4882a593Smuzhiyun 
1836*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1837*4882a593Smuzhiyun 			kfd_ioctl_alloc_memory_of_gpu, 0),
1838*4882a593Smuzhiyun 
1839*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1840*4882a593Smuzhiyun 			kfd_ioctl_free_memory_of_gpu, 0),
1841*4882a593Smuzhiyun 
1842*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1843*4882a593Smuzhiyun 			kfd_ioctl_map_memory_to_gpu, 0),
1844*4882a593Smuzhiyun 
1845*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1846*4882a593Smuzhiyun 			kfd_ioctl_unmap_memory_from_gpu, 0),
1847*4882a593Smuzhiyun 
1848*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1849*4882a593Smuzhiyun 			kfd_ioctl_set_cu_mask, 0),
1850*4882a593Smuzhiyun 
1851*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1852*4882a593Smuzhiyun 			kfd_ioctl_get_queue_wave_state, 0),
1853*4882a593Smuzhiyun 
1854*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1855*4882a593Smuzhiyun 				kfd_ioctl_get_dmabuf_info, 0),
1856*4882a593Smuzhiyun 
1857*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1858*4882a593Smuzhiyun 				kfd_ioctl_import_dmabuf, 0),
1859*4882a593Smuzhiyun 
1860*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1861*4882a593Smuzhiyun 			kfd_ioctl_alloc_queue_gws, 0),
1862*4882a593Smuzhiyun 
1863*4882a593Smuzhiyun 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
1864*4882a593Smuzhiyun 			kfd_ioctl_smi_events, 0),
1865*4882a593Smuzhiyun };
1866*4882a593Smuzhiyun 
1867*4882a593Smuzhiyun #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
1868*4882a593Smuzhiyun 
kfd_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)1869*4882a593Smuzhiyun static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1870*4882a593Smuzhiyun {
1871*4882a593Smuzhiyun 	struct kfd_process *process;
1872*4882a593Smuzhiyun 	amdkfd_ioctl_t *func;
1873*4882a593Smuzhiyun 	const struct amdkfd_ioctl_desc *ioctl = NULL;
1874*4882a593Smuzhiyun 	unsigned int nr = _IOC_NR(cmd);
1875*4882a593Smuzhiyun 	char stack_kdata[128];
1876*4882a593Smuzhiyun 	char *kdata = NULL;
1877*4882a593Smuzhiyun 	unsigned int usize, asize;
1878*4882a593Smuzhiyun 	int retcode = -EINVAL;
1879*4882a593Smuzhiyun 
1880*4882a593Smuzhiyun 	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1881*4882a593Smuzhiyun 		goto err_i1;
1882*4882a593Smuzhiyun 
1883*4882a593Smuzhiyun 	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1884*4882a593Smuzhiyun 		u32 amdkfd_size;
1885*4882a593Smuzhiyun 
1886*4882a593Smuzhiyun 		ioctl = &amdkfd_ioctls[nr];
1887*4882a593Smuzhiyun 
1888*4882a593Smuzhiyun 		amdkfd_size = _IOC_SIZE(ioctl->cmd);
1889*4882a593Smuzhiyun 		usize = asize = _IOC_SIZE(cmd);
1890*4882a593Smuzhiyun 		if (amdkfd_size > asize)
1891*4882a593Smuzhiyun 			asize = amdkfd_size;
1892*4882a593Smuzhiyun 
1893*4882a593Smuzhiyun 		cmd = ioctl->cmd;
1894*4882a593Smuzhiyun 	} else
1895*4882a593Smuzhiyun 		goto err_i1;
1896*4882a593Smuzhiyun 
1897*4882a593Smuzhiyun 	dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1898*4882a593Smuzhiyun 
1899*4882a593Smuzhiyun 	/* Get the process struct from the filep. Only the process
1900*4882a593Smuzhiyun 	 * that opened /dev/kfd can use the file descriptor. Child
1901*4882a593Smuzhiyun 	 * processes need to create their own KFD device context.
1902*4882a593Smuzhiyun 	 */
1903*4882a593Smuzhiyun 	process = filep->private_data;
1904*4882a593Smuzhiyun 	if (process->lead_thread != current->group_leader) {
1905*4882a593Smuzhiyun 		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1906*4882a593Smuzhiyun 		retcode = -EBADF;
1907*4882a593Smuzhiyun 		goto err_i1;
1908*4882a593Smuzhiyun 	}
1909*4882a593Smuzhiyun 
1910*4882a593Smuzhiyun 	/* Do not trust userspace, use our own definition */
1911*4882a593Smuzhiyun 	func = ioctl->func;
1912*4882a593Smuzhiyun 
1913*4882a593Smuzhiyun 	if (unlikely(!func)) {
1914*4882a593Smuzhiyun 		dev_dbg(kfd_device, "no function\n");
1915*4882a593Smuzhiyun 		retcode = -EINVAL;
1916*4882a593Smuzhiyun 		goto err_i1;
1917*4882a593Smuzhiyun 	}
1918*4882a593Smuzhiyun 
1919*4882a593Smuzhiyun 	if (cmd & (IOC_IN | IOC_OUT)) {
1920*4882a593Smuzhiyun 		if (asize <= sizeof(stack_kdata)) {
1921*4882a593Smuzhiyun 			kdata = stack_kdata;
1922*4882a593Smuzhiyun 		} else {
1923*4882a593Smuzhiyun 			kdata = kmalloc(asize, GFP_KERNEL);
1924*4882a593Smuzhiyun 			if (!kdata) {
1925*4882a593Smuzhiyun 				retcode = -ENOMEM;
1926*4882a593Smuzhiyun 				goto err_i1;
1927*4882a593Smuzhiyun 			}
1928*4882a593Smuzhiyun 		}
1929*4882a593Smuzhiyun 		if (asize > usize)
1930*4882a593Smuzhiyun 			memset(kdata + usize, 0, asize - usize);
1931*4882a593Smuzhiyun 	}
1932*4882a593Smuzhiyun 
1933*4882a593Smuzhiyun 	if (cmd & IOC_IN) {
1934*4882a593Smuzhiyun 		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1935*4882a593Smuzhiyun 			retcode = -EFAULT;
1936*4882a593Smuzhiyun 			goto err_i1;
1937*4882a593Smuzhiyun 		}
1938*4882a593Smuzhiyun 	} else if (cmd & IOC_OUT) {
1939*4882a593Smuzhiyun 		memset(kdata, 0, usize);
1940*4882a593Smuzhiyun 	}
1941*4882a593Smuzhiyun 
1942*4882a593Smuzhiyun 	retcode = func(filep, process, kdata);
1943*4882a593Smuzhiyun 
1944*4882a593Smuzhiyun 	if (cmd & IOC_OUT)
1945*4882a593Smuzhiyun 		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1946*4882a593Smuzhiyun 			retcode = -EFAULT;
1947*4882a593Smuzhiyun 
1948*4882a593Smuzhiyun err_i1:
1949*4882a593Smuzhiyun 	if (!ioctl)
1950*4882a593Smuzhiyun 		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1951*4882a593Smuzhiyun 			  task_pid_nr(current), cmd, nr);
1952*4882a593Smuzhiyun 
1953*4882a593Smuzhiyun 	if (kdata != stack_kdata)
1954*4882a593Smuzhiyun 		kfree(kdata);
1955*4882a593Smuzhiyun 
1956*4882a593Smuzhiyun 	if (retcode)
1957*4882a593Smuzhiyun 		dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
1958*4882a593Smuzhiyun 				nr, arg, retcode);
1959*4882a593Smuzhiyun 
1960*4882a593Smuzhiyun 	return retcode;
1961*4882a593Smuzhiyun }
1962*4882a593Smuzhiyun 
kfd_mmio_mmap(struct kfd_dev * dev,struct kfd_process * process,struct vm_area_struct * vma)1963*4882a593Smuzhiyun static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
1964*4882a593Smuzhiyun 		      struct vm_area_struct *vma)
1965*4882a593Smuzhiyun {
1966*4882a593Smuzhiyun 	phys_addr_t address;
1967*4882a593Smuzhiyun 	int ret;
1968*4882a593Smuzhiyun 
1969*4882a593Smuzhiyun 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1970*4882a593Smuzhiyun 		return -EINVAL;
1971*4882a593Smuzhiyun 
1972*4882a593Smuzhiyun 	address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1973*4882a593Smuzhiyun 
1974*4882a593Smuzhiyun 	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
1975*4882a593Smuzhiyun 				VM_DONTDUMP | VM_PFNMAP;
1976*4882a593Smuzhiyun 
1977*4882a593Smuzhiyun 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1978*4882a593Smuzhiyun 
1979*4882a593Smuzhiyun 	pr_debug("pasid 0x%x mapping mmio page\n"
1980*4882a593Smuzhiyun 		 "     target user address == 0x%08llX\n"
1981*4882a593Smuzhiyun 		 "     physical address    == 0x%08llX\n"
1982*4882a593Smuzhiyun 		 "     vm_flags            == 0x%04lX\n"
1983*4882a593Smuzhiyun 		 "     size                == 0x%04lX\n",
1984*4882a593Smuzhiyun 		 process->pasid, (unsigned long long) vma->vm_start,
1985*4882a593Smuzhiyun 		 address, vma->vm_flags, PAGE_SIZE);
1986*4882a593Smuzhiyun 
1987*4882a593Smuzhiyun 	ret = io_remap_pfn_range(vma,
1988*4882a593Smuzhiyun 				vma->vm_start,
1989*4882a593Smuzhiyun 				address >> PAGE_SHIFT,
1990*4882a593Smuzhiyun 				PAGE_SIZE,
1991*4882a593Smuzhiyun 				vma->vm_page_prot);
1992*4882a593Smuzhiyun 	return ret;
1993*4882a593Smuzhiyun }
1994*4882a593Smuzhiyun 
1995*4882a593Smuzhiyun 
kfd_mmap(struct file * filp,struct vm_area_struct * vma)1996*4882a593Smuzhiyun static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1997*4882a593Smuzhiyun {
1998*4882a593Smuzhiyun 	struct kfd_process *process;
1999*4882a593Smuzhiyun 	struct kfd_dev *dev = NULL;
2000*4882a593Smuzhiyun 	unsigned long mmap_offset;
2001*4882a593Smuzhiyun 	unsigned int gpu_id;
2002*4882a593Smuzhiyun 
2003*4882a593Smuzhiyun 	process = kfd_get_process(current);
2004*4882a593Smuzhiyun 	if (IS_ERR(process))
2005*4882a593Smuzhiyun 		return PTR_ERR(process);
2006*4882a593Smuzhiyun 
2007*4882a593Smuzhiyun 	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
2008*4882a593Smuzhiyun 	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
2009*4882a593Smuzhiyun 	if (gpu_id)
2010*4882a593Smuzhiyun 		dev = kfd_device_by_id(gpu_id);
2011*4882a593Smuzhiyun 
2012*4882a593Smuzhiyun 	switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
2013*4882a593Smuzhiyun 	case KFD_MMAP_TYPE_DOORBELL:
2014*4882a593Smuzhiyun 		if (!dev)
2015*4882a593Smuzhiyun 			return -ENODEV;
2016*4882a593Smuzhiyun 		return kfd_doorbell_mmap(dev, process, vma);
2017*4882a593Smuzhiyun 
2018*4882a593Smuzhiyun 	case KFD_MMAP_TYPE_EVENTS:
2019*4882a593Smuzhiyun 		return kfd_event_mmap(process, vma);
2020*4882a593Smuzhiyun 
2021*4882a593Smuzhiyun 	case KFD_MMAP_TYPE_RESERVED_MEM:
2022*4882a593Smuzhiyun 		if (!dev)
2023*4882a593Smuzhiyun 			return -ENODEV;
2024*4882a593Smuzhiyun 		return kfd_reserved_mem_mmap(dev, process, vma);
2025*4882a593Smuzhiyun 	case KFD_MMAP_TYPE_MMIO:
2026*4882a593Smuzhiyun 		if (!dev)
2027*4882a593Smuzhiyun 			return -ENODEV;
2028*4882a593Smuzhiyun 		return kfd_mmio_mmap(dev, process, vma);
2029*4882a593Smuzhiyun 	}
2030*4882a593Smuzhiyun 
2031*4882a593Smuzhiyun 	return -EFAULT;
2032*4882a593Smuzhiyun }
2033