xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Copyright 2014 Advanced Micro Devices, Inc.
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Permission is hereby granted, free of charge, to any person obtaining a
5*4882a593Smuzhiyun  * copy of this software and associated documentation files (the "Software"),
6*4882a593Smuzhiyun  * to deal in the Software without restriction, including without limitation
7*4882a593Smuzhiyun  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*4882a593Smuzhiyun  * and/or sell copies of the Software, and to permit persons to whom the
9*4882a593Smuzhiyun  * Software is furnished to do so, subject to the following conditions:
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * The above copyright notice and this permission notice shall be included in
12*4882a593Smuzhiyun  * all copies or substantial portions of the Software.
13*4882a593Smuzhiyun  *
14*4882a593Smuzhiyun  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15*4882a593Smuzhiyun  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16*4882a593Smuzhiyun  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17*4882a593Smuzhiyun  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18*4882a593Smuzhiyun  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19*4882a593Smuzhiyun  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20*4882a593Smuzhiyun  * OTHER DEALINGS IN THE SOFTWARE.
21*4882a593Smuzhiyun  *
22*4882a593Smuzhiyun  */
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun #include <linux/types.h>
25*4882a593Smuzhiyun #include <linux/kernel.h>
26*4882a593Smuzhiyun #include <linux/log2.h>
27*4882a593Smuzhiyun #include <linux/sched.h>
28*4882a593Smuzhiyun #include <linux/slab.h>
29*4882a593Smuzhiyun #include <linux/mutex.h>
30*4882a593Smuzhiyun #include <linux/device.h>
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun #include "kfd_pm4_headers.h"
33*4882a593Smuzhiyun #include "kfd_pm4_headers_diq.h"
34*4882a593Smuzhiyun #include "kfd_kernel_queue.h"
35*4882a593Smuzhiyun #include "kfd_priv.h"
36*4882a593Smuzhiyun #include "kfd_pm4_opcodes.h"
37*4882a593Smuzhiyun #include "cik_regs.h"
38*4882a593Smuzhiyun #include "kfd_dbgmgr.h"
39*4882a593Smuzhiyun #include "kfd_dbgdev.h"
40*4882a593Smuzhiyun #include "kfd_device_queue_manager.h"
41*4882a593Smuzhiyun 
dbgdev_address_watch_disable_nodiq(struct kfd_dev * dev)42*4882a593Smuzhiyun static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun 	dev->kfd2kgd->address_watch_disable(dev->kgd);
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun 
dbgdev_diq_submit_ib(struct kfd_dbgdev * dbgdev,u32 pasid,uint64_t vmid0_address,uint32_t * packet_buff,size_t size_in_bytes)47*4882a593Smuzhiyun static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
48*4882a593Smuzhiyun 				u32 pasid, uint64_t vmid0_address,
49*4882a593Smuzhiyun 				uint32_t *packet_buff, size_t size_in_bytes)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun 	struct pm4__release_mem *rm_packet;
52*4882a593Smuzhiyun 	struct pm4__indirect_buffer_pasid *ib_packet;
53*4882a593Smuzhiyun 	struct kfd_mem_obj *mem_obj;
54*4882a593Smuzhiyun 	size_t pq_packets_size_in_bytes;
55*4882a593Smuzhiyun 	union ULARGE_INTEGER *largep;
56*4882a593Smuzhiyun 	union ULARGE_INTEGER addr;
57*4882a593Smuzhiyun 	struct kernel_queue *kq;
58*4882a593Smuzhiyun 	uint64_t *rm_state;
59*4882a593Smuzhiyun 	unsigned int *ib_packet_buff;
60*4882a593Smuzhiyun 	int status;
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	if (WARN_ON(!size_in_bytes))
63*4882a593Smuzhiyun 		return -EINVAL;
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun 	kq = dbgdev->kq;
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
68*4882a593Smuzhiyun 				sizeof(struct pm4__indirect_buffer_pasid);
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	/*
71*4882a593Smuzhiyun 	 * We acquire a buffer from DIQ
72*4882a593Smuzhiyun 	 * The receive packet buff will be sitting on the Indirect Buffer
73*4882a593Smuzhiyun 	 * and in the PQ we put the IB packet + sync packet(s).
74*4882a593Smuzhiyun 	 */
75*4882a593Smuzhiyun 	status = kq_acquire_packet_buffer(kq,
76*4882a593Smuzhiyun 				pq_packets_size_in_bytes / sizeof(uint32_t),
77*4882a593Smuzhiyun 				&ib_packet_buff);
78*4882a593Smuzhiyun 	if (status) {
79*4882a593Smuzhiyun 		pr_err("kq_acquire_packet_buffer failed\n");
80*4882a593Smuzhiyun 		return status;
81*4882a593Smuzhiyun 	}
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun 	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 	ib_packet->header.count = 3;
88*4882a593Smuzhiyun 	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
89*4882a593Smuzhiyun 	ib_packet->header.type = PM4_TYPE_3;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	largep = (union ULARGE_INTEGER *) &vmid0_address;
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
94*4882a593Smuzhiyun 	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	ib_packet->control = (1 << 23) | (1 << 31) |
97*4882a593Smuzhiyun 			((size_in_bytes / 4) & 0xfffff);
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	ib_packet->bitfields5.pasid = pasid;
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	/*
102*4882a593Smuzhiyun 	 * for now we use release mem for GPU-CPU synchronization
103*4882a593Smuzhiyun 	 * Consider WaitRegMem + WriteData as a better alternative
104*4882a593Smuzhiyun 	 * we get a GART allocations ( gpu/cpu mapping),
105*4882a593Smuzhiyun 	 * for the sync variable, and wait until:
106*4882a593Smuzhiyun 	 * (a) Sync with HW
107*4882a593Smuzhiyun 	 * (b) Sync var is written by CP to mem.
108*4882a593Smuzhiyun 	 */
109*4882a593Smuzhiyun 	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
110*4882a593Smuzhiyun 			(sizeof(struct pm4__indirect_buffer_pasid) /
111*4882a593Smuzhiyun 					sizeof(unsigned int)));
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
114*4882a593Smuzhiyun 					&mem_obj);
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	if (status) {
117*4882a593Smuzhiyun 		pr_err("Failed to allocate GART memory\n");
118*4882a593Smuzhiyun 		kq_rollback_packet(kq);
119*4882a593Smuzhiyun 		return status;
120*4882a593Smuzhiyun 	}
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	rm_state = (uint64_t *) mem_obj->cpu_ptr;
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun 	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	rm_packet->header.opcode = IT_RELEASE_MEM;
127*4882a593Smuzhiyun 	rm_packet->header.type = PM4_TYPE_3;
128*4882a593Smuzhiyun 	rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
131*4882a593Smuzhiyun 	rm_packet->bitfields2.event_index =
132*4882a593Smuzhiyun 				event_index___release_mem__end_of_pipe;
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
135*4882a593Smuzhiyun 	rm_packet->bitfields2.atc = 0;
136*4882a593Smuzhiyun 	rm_packet->bitfields2.tc_wb_action_ena = 1;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	addr.quad_part = mem_obj->gpu_addr;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
141*4882a593Smuzhiyun 	rm_packet->address_hi = addr.u.high_part;
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	rm_packet->bitfields3.data_sel =
144*4882a593Smuzhiyun 				data_sel___release_mem__send_64_bit_data;
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	rm_packet->bitfields3.int_sel =
147*4882a593Smuzhiyun 			int_sel___release_mem__send_data_after_write_confirm;
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	rm_packet->bitfields3.dst_sel =
150*4882a593Smuzhiyun 			dst_sel___release_mem__memory_controller;
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 	rm_packet->data_lo = QUEUESTATE__ACTIVE;
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	kq_submit_packet(kq);
155*4882a593Smuzhiyun 
156*4882a593Smuzhiyun 	/* Wait till CP writes sync code: */
157*4882a593Smuzhiyun 	status = amdkfd_fence_wait_timeout(
158*4882a593Smuzhiyun 			rm_state,
159*4882a593Smuzhiyun 			QUEUESTATE__ACTIVE, 1500);
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	return status;
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun 
dbgdev_register_nodiq(struct kfd_dbgdev * dbgdev)166*4882a593Smuzhiyun static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
167*4882a593Smuzhiyun {
168*4882a593Smuzhiyun 	/*
169*4882a593Smuzhiyun 	 * no action is needed in this case,
170*4882a593Smuzhiyun 	 * just make sure diq will not be used
171*4882a593Smuzhiyun 	 */
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 	dbgdev->kq = NULL;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	return 0;
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun 
dbgdev_register_diq(struct kfd_dbgdev * dbgdev)178*4882a593Smuzhiyun static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
179*4882a593Smuzhiyun {
180*4882a593Smuzhiyun 	struct queue_properties properties;
181*4882a593Smuzhiyun 	unsigned int qid;
182*4882a593Smuzhiyun 	struct kernel_queue *kq = NULL;
183*4882a593Smuzhiyun 	int status;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	properties.type = KFD_QUEUE_TYPE_DIQ;
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
188*4882a593Smuzhiyun 				&properties, &qid, NULL);
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	if (status) {
191*4882a593Smuzhiyun 		pr_err("Failed to create DIQ\n");
192*4882a593Smuzhiyun 		return status;
193*4882a593Smuzhiyun 	}
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	pr_debug("DIQ Created with queue id: %d\n", qid);
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	if (!kq) {
200*4882a593Smuzhiyun 		pr_err("Error getting DIQ\n");
201*4882a593Smuzhiyun 		pqm_destroy_queue(dbgdev->pqm, qid);
202*4882a593Smuzhiyun 		return -EFAULT;
203*4882a593Smuzhiyun 	}
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	dbgdev->kq = kq;
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	return status;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun 
dbgdev_unregister_nodiq(struct kfd_dbgdev * dbgdev)210*4882a593Smuzhiyun static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
211*4882a593Smuzhiyun {
212*4882a593Smuzhiyun 	/* disable watch address */
213*4882a593Smuzhiyun 	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
214*4882a593Smuzhiyun 	return 0;
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun 
dbgdev_unregister_diq(struct kfd_dbgdev * dbgdev)217*4882a593Smuzhiyun static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun 	/* todo - disable address watch */
220*4882a593Smuzhiyun 	int status;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	status = pqm_destroy_queue(dbgdev->pqm,
223*4882a593Smuzhiyun 			dbgdev->kq->queue->properties.queue_id);
224*4882a593Smuzhiyun 	dbgdev->kq = NULL;
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 	return status;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun 
dbgdev_address_watch_set_registers(const struct dbg_address_watch_info * adw_info,union TCP_WATCH_ADDR_H_BITS * addrHi,union TCP_WATCH_ADDR_L_BITS * addrLo,union TCP_WATCH_CNTL_BITS * cntl,unsigned int index,unsigned int vmid)229*4882a593Smuzhiyun static void dbgdev_address_watch_set_registers(
230*4882a593Smuzhiyun 			const struct dbg_address_watch_info *adw_info,
231*4882a593Smuzhiyun 			union TCP_WATCH_ADDR_H_BITS *addrHi,
232*4882a593Smuzhiyun 			union TCP_WATCH_ADDR_L_BITS *addrLo,
233*4882a593Smuzhiyun 			union TCP_WATCH_CNTL_BITS *cntl,
234*4882a593Smuzhiyun 			unsigned int index, unsigned int vmid)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun 	union ULARGE_INTEGER addr;
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	addr.quad_part = 0;
239*4882a593Smuzhiyun 	addrHi->u32All = 0;
240*4882a593Smuzhiyun 	addrLo->u32All = 0;
241*4882a593Smuzhiyun 	cntl->u32All = 0;
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun 	if (adw_info->watch_mask)
244*4882a593Smuzhiyun 		cntl->bitfields.mask =
245*4882a593Smuzhiyun 			(uint32_t) (adw_info->watch_mask[index] &
246*4882a593Smuzhiyun 					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
247*4882a593Smuzhiyun 	else
248*4882a593Smuzhiyun 		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 	addrHi->bitfields.addr = addr.u.high_part &
253*4882a593Smuzhiyun 					ADDRESS_WATCH_REG_ADDHIGH_MASK;
254*4882a593Smuzhiyun 	addrLo->bitfields.addr =
255*4882a593Smuzhiyun 			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	cntl->bitfields.mode = adw_info->watch_mode[index];
258*4882a593Smuzhiyun 	cntl->bitfields.vmid = (uint32_t) vmid;
259*4882a593Smuzhiyun 	/* for now assume it is an ATC address */
260*4882a593Smuzhiyun 	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
263*4882a593Smuzhiyun 	pr_debug("\t\t%20s %08x\n", "set reg add high :",
264*4882a593Smuzhiyun 			addrHi->bitfields.addr);
265*4882a593Smuzhiyun 	pr_debug("\t\t%20s %08x\n", "set reg add low :",
266*4882a593Smuzhiyun 			addrLo->bitfields.addr);
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun 
dbgdev_address_watch_nodiq(struct kfd_dbgdev * dbgdev,struct dbg_address_watch_info * adw_info)269*4882a593Smuzhiyun static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
270*4882a593Smuzhiyun 				      struct dbg_address_watch_info *adw_info)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun 	union TCP_WATCH_ADDR_H_BITS addrHi;
273*4882a593Smuzhiyun 	union TCP_WATCH_ADDR_L_BITS addrLo;
274*4882a593Smuzhiyun 	union TCP_WATCH_CNTL_BITS cntl;
275*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
276*4882a593Smuzhiyun 	unsigned int i;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 	/* taking the vmid for that process on the safe way using pdd */
279*4882a593Smuzhiyun 	pdd = kfd_get_process_device_data(dbgdev->dev,
280*4882a593Smuzhiyun 					adw_info->process);
281*4882a593Smuzhiyun 	if (!pdd) {
282*4882a593Smuzhiyun 		pr_err("Failed to get pdd for wave control no DIQ\n");
283*4882a593Smuzhiyun 		return -EFAULT;
284*4882a593Smuzhiyun 	}
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun 	addrHi.u32All = 0;
287*4882a593Smuzhiyun 	addrLo.u32All = 0;
288*4882a593Smuzhiyun 	cntl.u32All = 0;
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
291*4882a593Smuzhiyun 			(adw_info->num_watch_points == 0)) {
292*4882a593Smuzhiyun 		pr_err("num_watch_points is invalid\n");
293*4882a593Smuzhiyun 		return -EINVAL;
294*4882a593Smuzhiyun 	}
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	if (!adw_info->watch_mode || !adw_info->watch_address) {
297*4882a593Smuzhiyun 		pr_err("adw_info fields are not valid\n");
298*4882a593Smuzhiyun 		return -EINVAL;
299*4882a593Smuzhiyun 	}
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 	for (i = 0; i < adw_info->num_watch_points; i++) {
302*4882a593Smuzhiyun 		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
303*4882a593Smuzhiyun 						&cntl, i, pdd->qpd.vmid);
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
306*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "register index :", i);
307*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
308*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
309*4882a593Smuzhiyun 				addrLo.bitfields.addr);
310*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Address high is :",
311*4882a593Smuzhiyun 				addrHi.bitfields.addr);
312*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Address high is :",
313*4882a593Smuzhiyun 				addrHi.bitfields.addr);
314*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
315*4882a593Smuzhiyun 				cntl.bitfields.mask);
316*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
317*4882a593Smuzhiyun 				cntl.bitfields.mode);
318*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
319*4882a593Smuzhiyun 				cntl.bitfields.vmid);
320*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
321*4882a593Smuzhiyun 				cntl.bitfields.atc);
322*4882a593Smuzhiyun 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
323*4882a593Smuzhiyun 
324*4882a593Smuzhiyun 		pdd->dev->kfd2kgd->address_watch_execute(
325*4882a593Smuzhiyun 						dbgdev->dev->kgd,
326*4882a593Smuzhiyun 						i,
327*4882a593Smuzhiyun 						cntl.u32All,
328*4882a593Smuzhiyun 						addrHi.u32All,
329*4882a593Smuzhiyun 						addrLo.u32All);
330*4882a593Smuzhiyun 	}
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun 	return 0;
333*4882a593Smuzhiyun }
334*4882a593Smuzhiyun 
dbgdev_address_watch_diq(struct kfd_dbgdev * dbgdev,struct dbg_address_watch_info * adw_info)335*4882a593Smuzhiyun static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
336*4882a593Smuzhiyun 				    struct dbg_address_watch_info *adw_info)
337*4882a593Smuzhiyun {
338*4882a593Smuzhiyun 	struct pm4__set_config_reg *packets_vec;
339*4882a593Smuzhiyun 	union TCP_WATCH_ADDR_H_BITS addrHi;
340*4882a593Smuzhiyun 	union TCP_WATCH_ADDR_L_BITS addrLo;
341*4882a593Smuzhiyun 	union TCP_WATCH_CNTL_BITS cntl;
342*4882a593Smuzhiyun 	struct kfd_mem_obj *mem_obj;
343*4882a593Smuzhiyun 	unsigned int aw_reg_add_dword;
344*4882a593Smuzhiyun 	uint32_t *packet_buff_uint;
345*4882a593Smuzhiyun 	unsigned int i;
346*4882a593Smuzhiyun 	int status;
347*4882a593Smuzhiyun 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
348*4882a593Smuzhiyun 	/* we do not control the vmid in DIQ mode, just a place holder */
349*4882a593Smuzhiyun 	unsigned int vmid = 0;
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 	addrHi.u32All = 0;
352*4882a593Smuzhiyun 	addrLo.u32All = 0;
353*4882a593Smuzhiyun 	cntl.u32All = 0;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
356*4882a593Smuzhiyun 			(adw_info->num_watch_points == 0)) {
357*4882a593Smuzhiyun 		pr_err("num_watch_points is invalid\n");
358*4882a593Smuzhiyun 		return -EINVAL;
359*4882a593Smuzhiyun 	}
360*4882a593Smuzhiyun 
361*4882a593Smuzhiyun 	if (!adw_info->watch_mode || !adw_info->watch_address) {
362*4882a593Smuzhiyun 		pr_err("adw_info fields are not valid\n");
363*4882a593Smuzhiyun 		return -EINVAL;
364*4882a593Smuzhiyun 	}
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 	if (status) {
369*4882a593Smuzhiyun 		pr_err("Failed to allocate GART memory\n");
370*4882a593Smuzhiyun 		return status;
371*4882a593Smuzhiyun 	}
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	packet_buff_uint = mem_obj->cpu_ptr;
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun 	memset(packet_buff_uint, 0, ib_size);
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
378*4882a593Smuzhiyun 
379*4882a593Smuzhiyun 	packets_vec[0].header.count = 1;
380*4882a593Smuzhiyun 	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
381*4882a593Smuzhiyun 	packets_vec[0].header.type = PM4_TYPE_3;
382*4882a593Smuzhiyun 	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
383*4882a593Smuzhiyun 	packets_vec[0].bitfields2.insert_vmid = 1;
384*4882a593Smuzhiyun 	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
385*4882a593Smuzhiyun 	packets_vec[1].bitfields2.insert_vmid = 0;
386*4882a593Smuzhiyun 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
387*4882a593Smuzhiyun 	packets_vec[2].bitfields2.insert_vmid = 0;
388*4882a593Smuzhiyun 	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
389*4882a593Smuzhiyun 	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
390*4882a593Smuzhiyun 	packets_vec[3].bitfields2.insert_vmid = 1;
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	for (i = 0; i < adw_info->num_watch_points; i++) {
393*4882a593Smuzhiyun 		dbgdev_address_watch_set_registers(adw_info,
394*4882a593Smuzhiyun 						&addrHi,
395*4882a593Smuzhiyun 						&addrLo,
396*4882a593Smuzhiyun 						&cntl,
397*4882a593Smuzhiyun 						i,
398*4882a593Smuzhiyun 						vmid);
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
401*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "register index :", i);
402*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
403*4882a593Smuzhiyun 		pr_debug("\t\t%20s %p\n", "Add ptr is :",
404*4882a593Smuzhiyun 				adw_info->watch_address);
405*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08llx\n", "Add     is :",
406*4882a593Smuzhiyun 				adw_info->watch_address[i]);
407*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Address Low is :",
408*4882a593Smuzhiyun 				addrLo.bitfields.addr);
409*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Address high is :",
410*4882a593Smuzhiyun 				addrHi.bitfields.addr);
411*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
412*4882a593Smuzhiyun 				cntl.bitfields.mask);
413*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
414*4882a593Smuzhiyun 				cntl.bitfields.mode);
415*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
416*4882a593Smuzhiyun 				cntl.bitfields.vmid);
417*4882a593Smuzhiyun 		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
418*4882a593Smuzhiyun 				cntl.bitfields.atc);
419*4882a593Smuzhiyun 		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 		aw_reg_add_dword =
422*4882a593Smuzhiyun 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
423*4882a593Smuzhiyun 					dbgdev->dev->kgd,
424*4882a593Smuzhiyun 					i,
425*4882a593Smuzhiyun 					ADDRESS_WATCH_REG_CNTL);
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 		packets_vec[0].bitfields2.reg_offset =
428*4882a593Smuzhiyun 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 		packets_vec[0].reg_data[0] = cntl.u32All;
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 		aw_reg_add_dword =
433*4882a593Smuzhiyun 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
434*4882a593Smuzhiyun 					dbgdev->dev->kgd,
435*4882a593Smuzhiyun 					i,
436*4882a593Smuzhiyun 					ADDRESS_WATCH_REG_ADDR_HI);
437*4882a593Smuzhiyun 
438*4882a593Smuzhiyun 		packets_vec[1].bitfields2.reg_offset =
439*4882a593Smuzhiyun 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
440*4882a593Smuzhiyun 		packets_vec[1].reg_data[0] = addrHi.u32All;
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun 		aw_reg_add_dword =
443*4882a593Smuzhiyun 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
444*4882a593Smuzhiyun 					dbgdev->dev->kgd,
445*4882a593Smuzhiyun 					i,
446*4882a593Smuzhiyun 					ADDRESS_WATCH_REG_ADDR_LO);
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 		packets_vec[2].bitfields2.reg_offset =
449*4882a593Smuzhiyun 				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
450*4882a593Smuzhiyun 		packets_vec[2].reg_data[0] = addrLo.u32All;
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun 		/* enable watch flag if address is not zero*/
453*4882a593Smuzhiyun 		if (adw_info->watch_address[i] > 0)
454*4882a593Smuzhiyun 			cntl.bitfields.valid = 1;
455*4882a593Smuzhiyun 		else
456*4882a593Smuzhiyun 			cntl.bitfields.valid = 0;
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 		aw_reg_add_dword =
459*4882a593Smuzhiyun 				dbgdev->dev->kfd2kgd->address_watch_get_offset(
460*4882a593Smuzhiyun 					dbgdev->dev->kgd,
461*4882a593Smuzhiyun 					i,
462*4882a593Smuzhiyun 					ADDRESS_WATCH_REG_CNTL);
463*4882a593Smuzhiyun 
464*4882a593Smuzhiyun 		packets_vec[3].bitfields2.reg_offset =
465*4882a593Smuzhiyun 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
466*4882a593Smuzhiyun 		packets_vec[3].reg_data[0] = cntl.u32All;
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 		status = dbgdev_diq_submit_ib(
469*4882a593Smuzhiyun 					dbgdev,
470*4882a593Smuzhiyun 					adw_info->process->pasid,
471*4882a593Smuzhiyun 					mem_obj->gpu_addr,
472*4882a593Smuzhiyun 					packet_buff_uint,
473*4882a593Smuzhiyun 					ib_size);
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 		if (status) {
476*4882a593Smuzhiyun 			pr_err("Failed to submit IB to DIQ\n");
477*4882a593Smuzhiyun 			break;
478*4882a593Smuzhiyun 		}
479*4882a593Smuzhiyun 	}
480*4882a593Smuzhiyun 
481*4882a593Smuzhiyun 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
482*4882a593Smuzhiyun 	return status;
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun 
dbgdev_wave_control_set_registers(struct dbg_wave_control_info * wac_info,union SQ_CMD_BITS * in_reg_sq_cmd,union GRBM_GFX_INDEX_BITS * in_reg_gfx_index)485*4882a593Smuzhiyun static int dbgdev_wave_control_set_registers(
486*4882a593Smuzhiyun 				struct dbg_wave_control_info *wac_info,
487*4882a593Smuzhiyun 				union SQ_CMD_BITS *in_reg_sq_cmd,
488*4882a593Smuzhiyun 				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
489*4882a593Smuzhiyun {
490*4882a593Smuzhiyun 	int status = 0;
491*4882a593Smuzhiyun 	union SQ_CMD_BITS reg_sq_cmd;
492*4882a593Smuzhiyun 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
493*4882a593Smuzhiyun 	struct HsaDbgWaveMsgAMDGen2 *pMsg;
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 	reg_sq_cmd.u32All = 0;
496*4882a593Smuzhiyun 	reg_gfx_index.u32All = 0;
497*4882a593Smuzhiyun 	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	switch (wac_info->mode) {
500*4882a593Smuzhiyun 	/* Send command to single wave */
501*4882a593Smuzhiyun 	case HSA_DBG_WAVEMODE_SINGLE:
502*4882a593Smuzhiyun 		/*
503*4882a593Smuzhiyun 		 * Limit access to the process waves only,
504*4882a593Smuzhiyun 		 * by setting vmid check
505*4882a593Smuzhiyun 		 */
506*4882a593Smuzhiyun 		reg_sq_cmd.bits.check_vmid = 1;
507*4882a593Smuzhiyun 		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
508*4882a593Smuzhiyun 		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
509*4882a593Smuzhiyun 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
512*4882a593Smuzhiyun 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
513*4882a593Smuzhiyun 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 		break;
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	/* Send command to all waves with matching VMID */
518*4882a593Smuzhiyun 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
519*4882a593Smuzhiyun 
520*4882a593Smuzhiyun 		reg_gfx_index.bits.sh_broadcast_writes = 1;
521*4882a593Smuzhiyun 		reg_gfx_index.bits.se_broadcast_writes = 1;
522*4882a593Smuzhiyun 		reg_gfx_index.bits.instance_broadcast_writes = 1;
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun 		break;
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	/* Send command to all CU waves with matching VMID */
529*4882a593Smuzhiyun 	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 		reg_sq_cmd.bits.check_vmid = 1;
532*4882a593Smuzhiyun 		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
533*4882a593Smuzhiyun 
534*4882a593Smuzhiyun 		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
535*4882a593Smuzhiyun 		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
536*4882a593Smuzhiyun 		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 		break;
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	default:
541*4882a593Smuzhiyun 		return -EINVAL;
542*4882a593Smuzhiyun 	}
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	switch (wac_info->operand) {
545*4882a593Smuzhiyun 	case HSA_DBG_WAVEOP_HALT:
546*4882a593Smuzhiyun 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
547*4882a593Smuzhiyun 		break;
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	case HSA_DBG_WAVEOP_RESUME:
550*4882a593Smuzhiyun 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
551*4882a593Smuzhiyun 		break;
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 	case HSA_DBG_WAVEOP_KILL:
554*4882a593Smuzhiyun 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
555*4882a593Smuzhiyun 		break;
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 	case HSA_DBG_WAVEOP_DEBUG:
558*4882a593Smuzhiyun 		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
559*4882a593Smuzhiyun 		break;
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	case HSA_DBG_WAVEOP_TRAP:
562*4882a593Smuzhiyun 		if (wac_info->trapId < MAX_TRAPID) {
563*4882a593Smuzhiyun 			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
564*4882a593Smuzhiyun 			reg_sq_cmd.bits.trap_id = wac_info->trapId;
565*4882a593Smuzhiyun 		} else {
566*4882a593Smuzhiyun 			status = -EINVAL;
567*4882a593Smuzhiyun 		}
568*4882a593Smuzhiyun 		break;
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	default:
571*4882a593Smuzhiyun 		status = -EINVAL;
572*4882a593Smuzhiyun 		break;
573*4882a593Smuzhiyun 	}
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	if (status == 0) {
576*4882a593Smuzhiyun 		*in_reg_sq_cmd = reg_sq_cmd;
577*4882a593Smuzhiyun 		*in_reg_gfx_index = reg_gfx_index;
578*4882a593Smuzhiyun 	}
579*4882a593Smuzhiyun 
580*4882a593Smuzhiyun 	return status;
581*4882a593Smuzhiyun }
582*4882a593Smuzhiyun 
dbgdev_wave_control_diq(struct kfd_dbgdev * dbgdev,struct dbg_wave_control_info * wac_info)583*4882a593Smuzhiyun static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
584*4882a593Smuzhiyun 					struct dbg_wave_control_info *wac_info)
585*4882a593Smuzhiyun {
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun 	int status;
588*4882a593Smuzhiyun 	union SQ_CMD_BITS reg_sq_cmd;
589*4882a593Smuzhiyun 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
590*4882a593Smuzhiyun 	struct kfd_mem_obj *mem_obj;
591*4882a593Smuzhiyun 	uint32_t *packet_buff_uint;
592*4882a593Smuzhiyun 	struct pm4__set_config_reg *packets_vec;
593*4882a593Smuzhiyun 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	reg_sq_cmd.u32All = 0;
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
598*4882a593Smuzhiyun 							&reg_gfx_index);
599*4882a593Smuzhiyun 	if (status) {
600*4882a593Smuzhiyun 		pr_err("Failed to set wave control registers\n");
601*4882a593Smuzhiyun 		return status;
602*4882a593Smuzhiyun 	}
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	/* we do not control the VMID in DIQ, so reset it to a known value */
605*4882a593Smuzhiyun 	reg_sq_cmd.bits.vm_id = 0;
606*4882a593Smuzhiyun 
607*4882a593Smuzhiyun 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
608*4882a593Smuzhiyun 
609*4882a593Smuzhiyun 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
610*4882a593Smuzhiyun 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
611*4882a593Smuzhiyun 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
612*4882a593Smuzhiyun 	pr_debug("\t\t msg value is: %u\n",
613*4882a593Smuzhiyun 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
614*4882a593Smuzhiyun 	pr_debug("\t\t vmid      is: N/A\n");
615*4882a593Smuzhiyun 
616*4882a593Smuzhiyun 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
617*4882a593Smuzhiyun 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
618*4882a593Smuzhiyun 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
619*4882a593Smuzhiyun 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
620*4882a593Smuzhiyun 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
621*4882a593Smuzhiyun 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
622*4882a593Smuzhiyun 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
623*4882a593Smuzhiyun 
624*4882a593Smuzhiyun 	pr_debug("\t\t ibw       is : %u\n",
625*4882a593Smuzhiyun 			reg_gfx_index.bitfields.instance_broadcast_writes);
626*4882a593Smuzhiyun 	pr_debug("\t\t ii        is : %u\n",
627*4882a593Smuzhiyun 			reg_gfx_index.bitfields.instance_index);
628*4882a593Smuzhiyun 	pr_debug("\t\t sebw      is : %u\n",
629*4882a593Smuzhiyun 			reg_gfx_index.bitfields.se_broadcast_writes);
630*4882a593Smuzhiyun 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
631*4882a593Smuzhiyun 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
632*4882a593Smuzhiyun 	pr_debug("\t\t sbw       is : %u\n",
633*4882a593Smuzhiyun 			reg_gfx_index.bitfields.sh_broadcast_writes);
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 	if (status != 0) {
640*4882a593Smuzhiyun 		pr_err("Failed to allocate GART memory\n");
641*4882a593Smuzhiyun 		return status;
642*4882a593Smuzhiyun 	}
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	packet_buff_uint = mem_obj->cpu_ptr;
645*4882a593Smuzhiyun 
646*4882a593Smuzhiyun 	memset(packet_buff_uint, 0, ib_size);
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
649*4882a593Smuzhiyun 	packets_vec[0].header.count = 1;
650*4882a593Smuzhiyun 	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
651*4882a593Smuzhiyun 	packets_vec[0].header.type = PM4_TYPE_3;
652*4882a593Smuzhiyun 	packets_vec[0].bitfields2.reg_offset =
653*4882a593Smuzhiyun 			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
654*4882a593Smuzhiyun 
655*4882a593Smuzhiyun 	packets_vec[0].bitfields2.insert_vmid = 0;
656*4882a593Smuzhiyun 	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
657*4882a593Smuzhiyun 
658*4882a593Smuzhiyun 	packets_vec[1].header.count = 1;
659*4882a593Smuzhiyun 	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
660*4882a593Smuzhiyun 	packets_vec[1].header.type = PM4_TYPE_3;
661*4882a593Smuzhiyun 	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun 	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
664*4882a593Smuzhiyun 	packets_vec[1].bitfields2.insert_vmid = 1;
665*4882a593Smuzhiyun 	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun 	/* Restore the GRBM_GFX_INDEX register */
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun 	reg_gfx_index.u32All = 0;
670*4882a593Smuzhiyun 	reg_gfx_index.bits.sh_broadcast_writes = 1;
671*4882a593Smuzhiyun 	reg_gfx_index.bits.instance_broadcast_writes = 1;
672*4882a593Smuzhiyun 	reg_gfx_index.bits.se_broadcast_writes = 1;
673*4882a593Smuzhiyun 
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
676*4882a593Smuzhiyun 	packets_vec[2].bitfields2.reg_offset =
677*4882a593Smuzhiyun 				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 	packets_vec[2].bitfields2.insert_vmid = 0;
680*4882a593Smuzhiyun 	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	status = dbgdev_diq_submit_ib(
683*4882a593Smuzhiyun 			dbgdev,
684*4882a593Smuzhiyun 			wac_info->process->pasid,
685*4882a593Smuzhiyun 			mem_obj->gpu_addr,
686*4882a593Smuzhiyun 			packet_buff_uint,
687*4882a593Smuzhiyun 			ib_size);
688*4882a593Smuzhiyun 
689*4882a593Smuzhiyun 	if (status)
690*4882a593Smuzhiyun 		pr_err("Failed to submit IB to DIQ\n");
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
693*4882a593Smuzhiyun 
694*4882a593Smuzhiyun 	return status;
695*4882a593Smuzhiyun }
696*4882a593Smuzhiyun 
dbgdev_wave_control_nodiq(struct kfd_dbgdev * dbgdev,struct dbg_wave_control_info * wac_info)697*4882a593Smuzhiyun static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
698*4882a593Smuzhiyun 					struct dbg_wave_control_info *wac_info)
699*4882a593Smuzhiyun {
700*4882a593Smuzhiyun 	int status;
701*4882a593Smuzhiyun 	union SQ_CMD_BITS reg_sq_cmd;
702*4882a593Smuzhiyun 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
703*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 	reg_sq_cmd.u32All = 0;
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 	/* taking the VMID for that process on the safe way using PDD */
708*4882a593Smuzhiyun 	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
709*4882a593Smuzhiyun 
710*4882a593Smuzhiyun 	if (!pdd) {
711*4882a593Smuzhiyun 		pr_err("Failed to get pdd for wave control no DIQ\n");
712*4882a593Smuzhiyun 		return -EFAULT;
713*4882a593Smuzhiyun 	}
714*4882a593Smuzhiyun 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
715*4882a593Smuzhiyun 							&reg_gfx_index);
716*4882a593Smuzhiyun 	if (status) {
717*4882a593Smuzhiyun 		pr_err("Failed to set wave control registers\n");
718*4882a593Smuzhiyun 		return status;
719*4882a593Smuzhiyun 	}
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun 	/* for non DIQ we need to patch the VMID: */
722*4882a593Smuzhiyun 
723*4882a593Smuzhiyun 	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
724*4882a593Smuzhiyun 
725*4882a593Smuzhiyun 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
728*4882a593Smuzhiyun 	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
729*4882a593Smuzhiyun 	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
730*4882a593Smuzhiyun 	pr_debug("\t\t msg value is: %u\n",
731*4882a593Smuzhiyun 			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
732*4882a593Smuzhiyun 	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
733*4882a593Smuzhiyun 
734*4882a593Smuzhiyun 	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
735*4882a593Smuzhiyun 	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
736*4882a593Smuzhiyun 	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
737*4882a593Smuzhiyun 	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
738*4882a593Smuzhiyun 	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
739*4882a593Smuzhiyun 	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
740*4882a593Smuzhiyun 	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun 	pr_debug("\t\t ibw       is : %u\n",
743*4882a593Smuzhiyun 			reg_gfx_index.bitfields.instance_broadcast_writes);
744*4882a593Smuzhiyun 	pr_debug("\t\t ii        is : %u\n",
745*4882a593Smuzhiyun 			reg_gfx_index.bitfields.instance_index);
746*4882a593Smuzhiyun 	pr_debug("\t\t sebw      is : %u\n",
747*4882a593Smuzhiyun 			reg_gfx_index.bitfields.se_broadcast_writes);
748*4882a593Smuzhiyun 	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
749*4882a593Smuzhiyun 	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
750*4882a593Smuzhiyun 	pr_debug("\t\t sbw       is : %u\n",
751*4882a593Smuzhiyun 			reg_gfx_index.bitfields.sh_broadcast_writes);
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
754*4882a593Smuzhiyun 
755*4882a593Smuzhiyun 	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
756*4882a593Smuzhiyun 							reg_gfx_index.u32All,
757*4882a593Smuzhiyun 							reg_sq_cmd.u32All);
758*4882a593Smuzhiyun }
759*4882a593Smuzhiyun 
dbgdev_wave_reset_wavefronts(struct kfd_dev * dev,struct kfd_process * p)760*4882a593Smuzhiyun int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
761*4882a593Smuzhiyun {
762*4882a593Smuzhiyun 	int status = 0;
763*4882a593Smuzhiyun 	unsigned int vmid;
764*4882a593Smuzhiyun 	uint16_t queried_pasid;
765*4882a593Smuzhiyun 	union SQ_CMD_BITS reg_sq_cmd;
766*4882a593Smuzhiyun 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
767*4882a593Smuzhiyun 	struct kfd_process_device *pdd;
768*4882a593Smuzhiyun 	struct dbg_wave_control_info wac_info;
769*4882a593Smuzhiyun 	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
770*4882a593Smuzhiyun 	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
771*4882a593Smuzhiyun 
772*4882a593Smuzhiyun 	reg_sq_cmd.u32All = 0;
773*4882a593Smuzhiyun 	status = 0;
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
776*4882a593Smuzhiyun 	wac_info.operand = HSA_DBG_WAVEOP_KILL;
777*4882a593Smuzhiyun 
778*4882a593Smuzhiyun 	pr_debug("Killing all process wavefronts\n");
779*4882a593Smuzhiyun 
780*4882a593Smuzhiyun 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
781*4882a593Smuzhiyun 	 * ATC_VMID15_PASID_MAPPING
782*4882a593Smuzhiyun 	 * to check which VMID the current process is mapped to.
783*4882a593Smuzhiyun 	 */
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
786*4882a593Smuzhiyun 		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
787*4882a593Smuzhiyun 				(dev->kgd, vmid, &queried_pasid);
788*4882a593Smuzhiyun 
789*4882a593Smuzhiyun 		if (status && queried_pasid == p->pasid) {
790*4882a593Smuzhiyun 			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
791*4882a593Smuzhiyun 					vmid, p->pasid);
792*4882a593Smuzhiyun 			break;
793*4882a593Smuzhiyun 		}
794*4882a593Smuzhiyun 	}
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 	if (vmid > last_vmid_to_scan) {
797*4882a593Smuzhiyun 		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
798*4882a593Smuzhiyun 		return -EFAULT;
799*4882a593Smuzhiyun 	}
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 	/* taking the VMID for that process on the safe way using PDD */
802*4882a593Smuzhiyun 	pdd = kfd_get_process_device_data(dev, p);
803*4882a593Smuzhiyun 	if (!pdd)
804*4882a593Smuzhiyun 		return -EFAULT;
805*4882a593Smuzhiyun 
806*4882a593Smuzhiyun 	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
807*4882a593Smuzhiyun 			&reg_gfx_index);
808*4882a593Smuzhiyun 	if (status != 0)
809*4882a593Smuzhiyun 		return -EINVAL;
810*4882a593Smuzhiyun 
811*4882a593Smuzhiyun 	/* for non DIQ we need to patch the VMID: */
812*4882a593Smuzhiyun 	reg_sq_cmd.bits.vm_id = vmid;
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 	dev->kfd2kgd->wave_control_execute(dev->kgd,
815*4882a593Smuzhiyun 					reg_gfx_index.u32All,
816*4882a593Smuzhiyun 					reg_sq_cmd.u32All);
817*4882a593Smuzhiyun 
818*4882a593Smuzhiyun 	return 0;
819*4882a593Smuzhiyun }
820*4882a593Smuzhiyun 
kfd_dbgdev_init(struct kfd_dbgdev * pdbgdev,struct kfd_dev * pdev,enum DBGDEV_TYPE type)821*4882a593Smuzhiyun void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
822*4882a593Smuzhiyun 			enum DBGDEV_TYPE type)
823*4882a593Smuzhiyun {
824*4882a593Smuzhiyun 	pdbgdev->dev = pdev;
825*4882a593Smuzhiyun 	pdbgdev->kq = NULL;
826*4882a593Smuzhiyun 	pdbgdev->type = type;
827*4882a593Smuzhiyun 	pdbgdev->pqm = NULL;
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	switch (type) {
830*4882a593Smuzhiyun 	case DBGDEV_TYPE_NODIQ:
831*4882a593Smuzhiyun 		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
832*4882a593Smuzhiyun 		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
833*4882a593Smuzhiyun 		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
834*4882a593Smuzhiyun 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
835*4882a593Smuzhiyun 		break;
836*4882a593Smuzhiyun 	case DBGDEV_TYPE_DIQ:
837*4882a593Smuzhiyun 	default:
838*4882a593Smuzhiyun 		pdbgdev->dbgdev_register = dbgdev_register_diq;
839*4882a593Smuzhiyun 		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
840*4882a593Smuzhiyun 		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
841*4882a593Smuzhiyun 		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
842*4882a593Smuzhiyun 		break;
843*4882a593Smuzhiyun 	}
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun }
846