xref: /OK3568_Linux_fs/kernel/drivers/misc/habanalabs/gaudi/gaudi.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun 
3*4882a593Smuzhiyun /*
4*4882a593Smuzhiyun  * Copyright 2016-2020 HabanaLabs, Ltd.
5*4882a593Smuzhiyun  * All Rights Reserved.
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include "gaudiP.h"
9*4882a593Smuzhiyun #include "../include/hw_ip/mmu/mmu_general.h"
10*4882a593Smuzhiyun #include "../include/hw_ip/mmu/mmu_v1_1.h"
11*4882a593Smuzhiyun #include "../include/gaudi/gaudi_masks.h"
12*4882a593Smuzhiyun #include "../include/gaudi/gaudi_fw_if.h"
13*4882a593Smuzhiyun #include "../include/gaudi/gaudi_reg_map.h"
14*4882a593Smuzhiyun #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun #include <linux/module.h>
17*4882a593Smuzhiyun #include <linux/pci.h>
18*4882a593Smuzhiyun #include <linux/firmware.h>
19*4882a593Smuzhiyun #include <linux/hwmon.h>
20*4882a593Smuzhiyun #include <linux/genalloc.h>
21*4882a593Smuzhiyun #include <linux/io-64-nonatomic-lo-hi.h>
22*4882a593Smuzhiyun #include <linux/iommu.h>
23*4882a593Smuzhiyun #include <linux/seq_file.h>
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun /*
26*4882a593Smuzhiyun  * Gaudi security scheme:
27*4882a593Smuzhiyun  *
28*4882a593Smuzhiyun  * 1. Host is protected by:
29*4882a593Smuzhiyun  *        - Range registers
30*4882a593Smuzhiyun  *        - MMU
31*4882a593Smuzhiyun  *
32*4882a593Smuzhiyun  * 2. DDR is protected by:
33*4882a593Smuzhiyun  *        - Range registers (protect the first 512MB)
34*4882a593Smuzhiyun  *
35*4882a593Smuzhiyun  * 3. Configuration is protected by:
36*4882a593Smuzhiyun  *        - Range registers
37*4882a593Smuzhiyun  *        - Protection bits
38*4882a593Smuzhiyun  *
39*4882a593Smuzhiyun  * MMU is always enabled.
40*4882a593Smuzhiyun  *
41*4882a593Smuzhiyun  * QMAN DMA channels 0,1,5 (PCI DMAN):
42*4882a593Smuzhiyun  *     - DMA is not secured.
43*4882a593Smuzhiyun  *     - PQ and CQ are secured.
44*4882a593Smuzhiyun  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
45*4882a593Smuzhiyun  *                      because of TDMA (tensor DMA). Hence, WREG is always not
46*4882a593Smuzhiyun  *                      secured.
47*4882a593Smuzhiyun  *
48*4882a593Smuzhiyun  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49*4882a593Smuzhiyun  * channel 0 to be secured, execute the DMA and change it back to not secured.
50*4882a593Smuzhiyun  * Currently, the driver doesn't use the DMA while there are compute jobs
51*4882a593Smuzhiyun  * running.
52*4882a593Smuzhiyun  *
53*4882a593Smuzhiyun  * The current use cases for the driver to use the DMA are:
54*4882a593Smuzhiyun  *     - Clear SRAM on context switch (happens on context switch when device is
55*4882a593Smuzhiyun  *       idle)
56*4882a593Smuzhiyun  *     - MMU page tables area clear (happens on init)
57*4882a593Smuzhiyun  *
58*4882a593Smuzhiyun  * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59*4882a593Smuzhiyun  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60*4882a593Smuzhiyun  * CQ, CP and the engine are not secured
61*4882a593Smuzhiyun  *
62*4882a593Smuzhiyun  */
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
65*4882a593Smuzhiyun #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
66*4882a593Smuzhiyun #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun #define GAUDI_RESET_TIMEOUT_MSEC	1000		/* 1000ms */
71*4882a593Smuzhiyun #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
72*4882a593Smuzhiyun #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
73*4882a593Smuzhiyun #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
76*4882a593Smuzhiyun #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
77*4882a593Smuzhiyun #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
78*4882a593Smuzhiyun #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
79*4882a593Smuzhiyun #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
80*4882a593Smuzhiyun #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
81*4882a593Smuzhiyun #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
82*4882a593Smuzhiyun #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun #define GAUDI_MAX_STRING_LEN		20
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun #define GAUDI_CB_POOL_CB_CNT		512
89*4882a593Smuzhiyun #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun #define GAUDI_ARB_WDT_TIMEOUT		0x1000000
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun #define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
102*4882a593Smuzhiyun 		BIT(GAUDI_ENGINE_ID_MME_0) |\
103*4882a593Smuzhiyun 		BIT(GAUDI_ENGINE_ID_MME_2) |\
104*4882a593Smuzhiyun 		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107*4882a593Smuzhiyun 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108*4882a593Smuzhiyun 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109*4882a593Smuzhiyun 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110*4882a593Smuzhiyun 		"gaudi cpu eq"
111*4882a593Smuzhiyun };
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114*4882a593Smuzhiyun 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115*4882a593Smuzhiyun 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116*4882a593Smuzhiyun 	[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117*4882a593Smuzhiyun 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118*4882a593Smuzhiyun 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119*4882a593Smuzhiyun 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120*4882a593Smuzhiyun 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121*4882a593Smuzhiyun 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
122*4882a593Smuzhiyun };
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125*4882a593Smuzhiyun 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
126*4882a593Smuzhiyun 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
127*4882a593Smuzhiyun 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
128*4882a593Smuzhiyun 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
129*4882a593Smuzhiyun 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
130*4882a593Smuzhiyun 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
131*4882a593Smuzhiyun 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
132*4882a593Smuzhiyun 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
133*4882a593Smuzhiyun 	[8] = GAUDI_QUEUE_ID_DMA_5_0,
134*4882a593Smuzhiyun 	[9] = GAUDI_QUEUE_ID_DMA_5_1,
135*4882a593Smuzhiyun 	[10] = GAUDI_QUEUE_ID_DMA_5_2,
136*4882a593Smuzhiyun 	[11] = GAUDI_QUEUE_ID_DMA_5_3
137*4882a593Smuzhiyun };
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140*4882a593Smuzhiyun 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
141*4882a593Smuzhiyun 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
142*4882a593Smuzhiyun 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
143*4882a593Smuzhiyun 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
144*4882a593Smuzhiyun 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
145*4882a593Smuzhiyun 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
146*4882a593Smuzhiyun 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
147*4882a593Smuzhiyun 	[PACKET_FENCE]		= sizeof(struct packet_fence),
148*4882a593Smuzhiyun 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
149*4882a593Smuzhiyun 	[PACKET_NOP]		= sizeof(struct packet_nop),
150*4882a593Smuzhiyun 	[PACKET_STOP]		= sizeof(struct packet_stop),
151*4882a593Smuzhiyun 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
152*4882a593Smuzhiyun 	[PACKET_WAIT]		= sizeof(struct packet_wait),
153*4882a593Smuzhiyun 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
154*4882a593Smuzhiyun };
155*4882a593Smuzhiyun 
validate_packet_id(enum packet_id id)156*4882a593Smuzhiyun static inline bool validate_packet_id(enum packet_id id)
157*4882a593Smuzhiyun {
158*4882a593Smuzhiyun 	switch (id) {
159*4882a593Smuzhiyun 	case PACKET_WREG_32:
160*4882a593Smuzhiyun 	case PACKET_WREG_BULK:
161*4882a593Smuzhiyun 	case PACKET_MSG_LONG:
162*4882a593Smuzhiyun 	case PACKET_MSG_SHORT:
163*4882a593Smuzhiyun 	case PACKET_CP_DMA:
164*4882a593Smuzhiyun 	case PACKET_REPEAT:
165*4882a593Smuzhiyun 	case PACKET_MSG_PROT:
166*4882a593Smuzhiyun 	case PACKET_FENCE:
167*4882a593Smuzhiyun 	case PACKET_LIN_DMA:
168*4882a593Smuzhiyun 	case PACKET_NOP:
169*4882a593Smuzhiyun 	case PACKET_STOP:
170*4882a593Smuzhiyun 	case PACKET_ARB_POINT:
171*4882a593Smuzhiyun 	case PACKET_WAIT:
172*4882a593Smuzhiyun 	case PACKET_LOAD_AND_EXE:
173*4882a593Smuzhiyun 		return true;
174*4882a593Smuzhiyun 	default:
175*4882a593Smuzhiyun 		return false;
176*4882a593Smuzhiyun 	}
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun static const char * const
180*4882a593Smuzhiyun gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181*4882a593Smuzhiyun 	"tpc_address_exceed_slm",
182*4882a593Smuzhiyun 	"tpc_div_by_0",
183*4882a593Smuzhiyun 	"tpc_spu_mac_overflow",
184*4882a593Smuzhiyun 	"tpc_spu_addsub_overflow",
185*4882a593Smuzhiyun 	"tpc_spu_abs_overflow",
186*4882a593Smuzhiyun 	"tpc_spu_fp_dst_nan_inf",
187*4882a593Smuzhiyun 	"tpc_spu_fp_dst_denorm",
188*4882a593Smuzhiyun 	"tpc_vpu_mac_overflow",
189*4882a593Smuzhiyun 	"tpc_vpu_addsub_overflow",
190*4882a593Smuzhiyun 	"tpc_vpu_abs_overflow",
191*4882a593Smuzhiyun 	"tpc_vpu_fp_dst_nan_inf",
192*4882a593Smuzhiyun 	"tpc_vpu_fp_dst_denorm",
193*4882a593Smuzhiyun 	"tpc_assertions",
194*4882a593Smuzhiyun 	"tpc_illegal_instruction",
195*4882a593Smuzhiyun 	"tpc_pc_wrap_around",
196*4882a593Smuzhiyun 	"tpc_qm_sw_err",
197*4882a593Smuzhiyun 	"tpc_hbw_rresp_err",
198*4882a593Smuzhiyun 	"tpc_hbw_bresp_err",
199*4882a593Smuzhiyun 	"tpc_lbw_rresp_err",
200*4882a593Smuzhiyun 	"tpc_lbw_bresp_err"
201*4882a593Smuzhiyun };
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun static const char * const
204*4882a593Smuzhiyun gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205*4882a593Smuzhiyun 	"PQ AXI HBW error",
206*4882a593Smuzhiyun 	"CQ AXI HBW error",
207*4882a593Smuzhiyun 	"CP AXI HBW error",
208*4882a593Smuzhiyun 	"CP error due to undefined OPCODE",
209*4882a593Smuzhiyun 	"CP encountered STOP OPCODE",
210*4882a593Smuzhiyun 	"CP AXI LBW error",
211*4882a593Smuzhiyun 	"CP WRREG32 or WRBULK returned error",
212*4882a593Smuzhiyun 	"N/A",
213*4882a593Smuzhiyun 	"FENCE 0 inc over max value and clipped",
214*4882a593Smuzhiyun 	"FENCE 1 inc over max value and clipped",
215*4882a593Smuzhiyun 	"FENCE 2 inc over max value and clipped",
216*4882a593Smuzhiyun 	"FENCE 3 inc over max value and clipped",
217*4882a593Smuzhiyun 	"FENCE 0 dec under min value and clipped",
218*4882a593Smuzhiyun 	"FENCE 1 dec under min value and clipped",
219*4882a593Smuzhiyun 	"FENCE 2 dec under min value and clipped",
220*4882a593Smuzhiyun 	"FENCE 3 dec under min value and clipped"
221*4882a593Smuzhiyun };
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun static const char * const
224*4882a593Smuzhiyun gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225*4882a593Smuzhiyun 	"Choice push while full error",
226*4882a593Smuzhiyun 	"Choice Q watchdog error",
227*4882a593Smuzhiyun 	"MSG AXI LBW returned with error"
228*4882a593Smuzhiyun };
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239*4882a593Smuzhiyun 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255*4882a593Smuzhiyun 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303*4882a593Smuzhiyun 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_0 */
305*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_1 */
306*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_2 */
307*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_3 */
308*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_0 */
309*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_1 */
310*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_2 */
311*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_3 */
312*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_0 */
313*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_1 */
314*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_2 */
315*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_3 */
316*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_0 */
317*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_1 */
318*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_2 */
319*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_3 */
320*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_0 */
321*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_1 */
322*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_2 */
323*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_3 */
324*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_0 */
325*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_1 */
326*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_2 */
327*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_3 */
328*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_0 */
329*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_1 */
330*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_2 */
331*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_3 */
332*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_0 */
333*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_1 */
334*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_2 */
335*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_3 */
336*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_0 */
337*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_1 */
338*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_2 */
339*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_3 */
340*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_0 */
341*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_1 */
342*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_2 */
343*4882a593Smuzhiyun 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_3 */
344*4882a593Smuzhiyun };
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun struct ecc_info_extract_params {
347*4882a593Smuzhiyun 	u64 block_address;
348*4882a593Smuzhiyun 	u32 num_memories;
349*4882a593Smuzhiyun 	bool derr;
350*4882a593Smuzhiyun 	bool disable_clock_gating;
351*4882a593Smuzhiyun };
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354*4882a593Smuzhiyun 								u64 phys_addr);
355*4882a593Smuzhiyun static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356*4882a593Smuzhiyun 					struct hl_cs_job *job);
357*4882a593Smuzhiyun static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358*4882a593Smuzhiyun 					u32 size, u64 val);
359*4882a593Smuzhiyun static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360*4882a593Smuzhiyun 				u32 tpc_id);
361*4882a593Smuzhiyun static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362*4882a593Smuzhiyun static int gaudi_cpucp_info_get(struct hl_device *hdev);
363*4882a593Smuzhiyun static void gaudi_disable_clock_gating(struct hl_device *hdev);
364*4882a593Smuzhiyun static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365*4882a593Smuzhiyun 
gaudi_get_fixed_properties(struct hl_device * hdev)366*4882a593Smuzhiyun static int gaudi_get_fixed_properties(struct hl_device *hdev)
367*4882a593Smuzhiyun {
368*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
369*4882a593Smuzhiyun 	u32 num_sync_stream_queues = 0;
370*4882a593Smuzhiyun 	int i;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373*4882a593Smuzhiyun 	prop->hw_queues_props = kcalloc(prop->max_queues,
374*4882a593Smuzhiyun 			sizeof(struct hw_queue_properties),
375*4882a593Smuzhiyun 			GFP_KERNEL);
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	if (!prop->hw_queues_props)
378*4882a593Smuzhiyun 		return -ENOMEM;
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	for (i = 0 ; i < prop->max_queues ; i++) {
381*4882a593Smuzhiyun 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382*4882a593Smuzhiyun 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383*4882a593Smuzhiyun 			prop->hw_queues_props[i].driver_only = 0;
384*4882a593Smuzhiyun 			prop->hw_queues_props[i].requires_kernel_cb = 1;
385*4882a593Smuzhiyun 			prop->hw_queues_props[i].supports_sync_stream = 1;
386*4882a593Smuzhiyun 			num_sync_stream_queues++;
387*4882a593Smuzhiyun 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388*4882a593Smuzhiyun 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389*4882a593Smuzhiyun 			prop->hw_queues_props[i].driver_only = 1;
390*4882a593Smuzhiyun 			prop->hw_queues_props[i].requires_kernel_cb = 0;
391*4882a593Smuzhiyun 			prop->hw_queues_props[i].supports_sync_stream = 0;
392*4882a593Smuzhiyun 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393*4882a593Smuzhiyun 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394*4882a593Smuzhiyun 			prop->hw_queues_props[i].driver_only = 0;
395*4882a593Smuzhiyun 			prop->hw_queues_props[i].requires_kernel_cb = 0;
396*4882a593Smuzhiyun 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397*4882a593Smuzhiyun 			prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398*4882a593Smuzhiyun 			prop->hw_queues_props[i].driver_only = 0;
399*4882a593Smuzhiyun 			prop->hw_queues_props[i].requires_kernel_cb = 0;
400*4882a593Smuzhiyun 			prop->hw_queues_props[i].supports_sync_stream = 0;
401*4882a593Smuzhiyun 		}
402*4882a593Smuzhiyun 	}
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405*4882a593Smuzhiyun 	prop->sync_stream_first_sob = 0;
406*4882a593Smuzhiyun 	prop->sync_stream_first_mon = 0;
407*4882a593Smuzhiyun 	prop->dram_base_address = DRAM_PHYS_BASE;
408*4882a593Smuzhiyun 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
409*4882a593Smuzhiyun 	prop->dram_end_address = prop->dram_base_address +
410*4882a593Smuzhiyun 					prop->dram_size;
411*4882a593Smuzhiyun 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 	prop->sram_base_address = SRAM_BASE_ADDR;
414*4882a593Smuzhiyun 	prop->sram_size = SRAM_SIZE;
415*4882a593Smuzhiyun 	prop->sram_end_address = prop->sram_base_address +
416*4882a593Smuzhiyun 					prop->sram_size;
417*4882a593Smuzhiyun 	prop->sram_user_base_address = prop->sram_base_address +
418*4882a593Smuzhiyun 					SRAM_USER_BASE_OFFSET;
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421*4882a593Smuzhiyun 	if (hdev->pldm)
422*4882a593Smuzhiyun 		prop->mmu_pgt_size = 0x800000; /* 8MB */
423*4882a593Smuzhiyun 	else
424*4882a593Smuzhiyun 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425*4882a593Smuzhiyun 	prop->mmu_pte_size = HL_PTE_SIZE;
426*4882a593Smuzhiyun 	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427*4882a593Smuzhiyun 	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428*4882a593Smuzhiyun 	prop->dram_page_size = PAGE_SIZE_2MB;
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	prop->pmmu.hop0_shift = HOP0_SHIFT;
431*4882a593Smuzhiyun 	prop->pmmu.hop1_shift = HOP1_SHIFT;
432*4882a593Smuzhiyun 	prop->pmmu.hop2_shift = HOP2_SHIFT;
433*4882a593Smuzhiyun 	prop->pmmu.hop3_shift = HOP3_SHIFT;
434*4882a593Smuzhiyun 	prop->pmmu.hop4_shift = HOP4_SHIFT;
435*4882a593Smuzhiyun 	prop->pmmu.hop0_mask = HOP0_MASK;
436*4882a593Smuzhiyun 	prop->pmmu.hop1_mask = HOP1_MASK;
437*4882a593Smuzhiyun 	prop->pmmu.hop2_mask = HOP2_MASK;
438*4882a593Smuzhiyun 	prop->pmmu.hop3_mask = HOP3_MASK;
439*4882a593Smuzhiyun 	prop->pmmu.hop4_mask = HOP4_MASK;
440*4882a593Smuzhiyun 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
441*4882a593Smuzhiyun 	prop->pmmu.end_addr =
442*4882a593Smuzhiyun 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443*4882a593Smuzhiyun 	prop->pmmu.page_size = PAGE_SIZE_4KB;
444*4882a593Smuzhiyun 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	/* PMMU and HPMMU are the same except of page size */
447*4882a593Smuzhiyun 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448*4882a593Smuzhiyun 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	/* shifts and masks are the same in PMMU and DMMU */
451*4882a593Smuzhiyun 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452*4882a593Smuzhiyun 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453*4882a593Smuzhiyun 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
454*4882a593Smuzhiyun 	prop->dmmu.page_size = PAGE_SIZE_2MB;
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun 	prop->cfg_size = CFG_SIZE;
457*4882a593Smuzhiyun 	prop->max_asid = MAX_ASID;
458*4882a593Smuzhiyun 	prop->num_of_events = GAUDI_EVENT_SIZE;
459*4882a593Smuzhiyun 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	prop->max_power_default = MAX_POWER_DEFAULT_PCI;
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464*4882a593Smuzhiyun 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465*4882a593Smuzhiyun 
466*4882a593Smuzhiyun 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467*4882a593Smuzhiyun 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470*4882a593Smuzhiyun 					CARD_NAME_MAX_LEN);
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475*4882a593Smuzhiyun 			num_sync_stream_queues * HL_RSVD_SOBS;
476*4882a593Smuzhiyun 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477*4882a593Smuzhiyun 			num_sync_stream_queues * HL_RSVD_MONS;
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 	return 0;
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun 
gaudi_pci_bars_map(struct hl_device * hdev)482*4882a593Smuzhiyun static int gaudi_pci_bars_map(struct hl_device *hdev)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
485*4882a593Smuzhiyun 	bool is_wc[3] = {false, false, true};
486*4882a593Smuzhiyun 	int rc;
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun 	rc = hl_pci_bars_map(hdev, name, is_wc);
489*4882a593Smuzhiyun 	if (rc)
490*4882a593Smuzhiyun 		return rc;
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493*4882a593Smuzhiyun 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 	return 0;
496*4882a593Smuzhiyun }
497*4882a593Smuzhiyun 
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)498*4882a593Smuzhiyun static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499*4882a593Smuzhiyun {
500*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
501*4882a593Smuzhiyun 	struct hl_inbound_pci_region pci_region;
502*4882a593Smuzhiyun 	u64 old_addr = addr;
503*4882a593Smuzhiyun 	int rc;
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506*4882a593Smuzhiyun 		return old_addr;
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	/* Inbound Region 2 - Bar 4 - Point to HBM */
509*4882a593Smuzhiyun 	pci_region.mode = PCI_BAR_MATCH_MODE;
510*4882a593Smuzhiyun 	pci_region.bar = HBM_BAR_ID;
511*4882a593Smuzhiyun 	pci_region.addr = addr;
512*4882a593Smuzhiyun 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
513*4882a593Smuzhiyun 	if (rc)
514*4882a593Smuzhiyun 		return U64_MAX;
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	if (gaudi) {
517*4882a593Smuzhiyun 		old_addr = gaudi->hbm_bar_cur_addr;
518*4882a593Smuzhiyun 		gaudi->hbm_bar_cur_addr = addr;
519*4882a593Smuzhiyun 	}
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 	return old_addr;
522*4882a593Smuzhiyun }
523*4882a593Smuzhiyun 
gaudi_init_iatu(struct hl_device * hdev)524*4882a593Smuzhiyun static int gaudi_init_iatu(struct hl_device *hdev)
525*4882a593Smuzhiyun {
526*4882a593Smuzhiyun 	struct hl_inbound_pci_region inbound_region;
527*4882a593Smuzhiyun 	struct hl_outbound_pci_region outbound_region;
528*4882a593Smuzhiyun 	int rc;
529*4882a593Smuzhiyun 
530*4882a593Smuzhiyun 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531*4882a593Smuzhiyun 	inbound_region.mode = PCI_BAR_MATCH_MODE;
532*4882a593Smuzhiyun 	inbound_region.bar = SRAM_BAR_ID;
533*4882a593Smuzhiyun 	inbound_region.addr = SRAM_BASE_ADDR;
534*4882a593Smuzhiyun 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535*4882a593Smuzhiyun 	if (rc)
536*4882a593Smuzhiyun 		goto done;
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539*4882a593Smuzhiyun 	inbound_region.mode = PCI_BAR_MATCH_MODE;
540*4882a593Smuzhiyun 	inbound_region.bar = CFG_BAR_ID;
541*4882a593Smuzhiyun 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
542*4882a593Smuzhiyun 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
543*4882a593Smuzhiyun 	if (rc)
544*4882a593Smuzhiyun 		goto done;
545*4882a593Smuzhiyun 
546*4882a593Smuzhiyun 	/* Inbound Region 2 - Bar 4 - Point to HBM */
547*4882a593Smuzhiyun 	inbound_region.mode = PCI_BAR_MATCH_MODE;
548*4882a593Smuzhiyun 	inbound_region.bar = HBM_BAR_ID;
549*4882a593Smuzhiyun 	inbound_region.addr = DRAM_PHYS_BASE;
550*4882a593Smuzhiyun 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551*4882a593Smuzhiyun 	if (rc)
552*4882a593Smuzhiyun 		goto done;
553*4882a593Smuzhiyun 
554*4882a593Smuzhiyun 	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun 	/* Outbound Region 0 - Point to Host */
557*4882a593Smuzhiyun 	outbound_region.addr = HOST_PHYS_BASE;
558*4882a593Smuzhiyun 	outbound_region.size = HOST_PHYS_SIZE;
559*4882a593Smuzhiyun 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun done:
562*4882a593Smuzhiyun 	return rc;
563*4882a593Smuzhiyun }
564*4882a593Smuzhiyun 
gaudi_early_init(struct hl_device * hdev)565*4882a593Smuzhiyun static int gaudi_early_init(struct hl_device *hdev)
566*4882a593Smuzhiyun {
567*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
568*4882a593Smuzhiyun 	struct pci_dev *pdev = hdev->pdev;
569*4882a593Smuzhiyun 	int rc;
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun 	rc = gaudi_get_fixed_properties(hdev);
572*4882a593Smuzhiyun 	if (rc) {
573*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to get fixed properties\n");
574*4882a593Smuzhiyun 		return rc;
575*4882a593Smuzhiyun 	}
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 	/* Check BAR sizes */
578*4882a593Smuzhiyun 	if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579*4882a593Smuzhiyun 		dev_err(hdev->dev,
580*4882a593Smuzhiyun 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581*4882a593Smuzhiyun 			SRAM_BAR_ID,
582*4882a593Smuzhiyun 			(unsigned long long) pci_resource_len(pdev,
583*4882a593Smuzhiyun 							SRAM_BAR_ID),
584*4882a593Smuzhiyun 			SRAM_BAR_SIZE);
585*4882a593Smuzhiyun 		rc = -ENODEV;
586*4882a593Smuzhiyun 		goto free_queue_props;
587*4882a593Smuzhiyun 	}
588*4882a593Smuzhiyun 
589*4882a593Smuzhiyun 	if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590*4882a593Smuzhiyun 		dev_err(hdev->dev,
591*4882a593Smuzhiyun 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592*4882a593Smuzhiyun 			CFG_BAR_ID,
593*4882a593Smuzhiyun 			(unsigned long long) pci_resource_len(pdev,
594*4882a593Smuzhiyun 								CFG_BAR_ID),
595*4882a593Smuzhiyun 			CFG_BAR_SIZE);
596*4882a593Smuzhiyun 		rc = -ENODEV;
597*4882a593Smuzhiyun 		goto free_queue_props;
598*4882a593Smuzhiyun 	}
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601*4882a593Smuzhiyun 
602*4882a593Smuzhiyun 	rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
603*4882a593Smuzhiyun 			mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
604*4882a593Smuzhiyun 	if (rc)
605*4882a593Smuzhiyun 		goto free_queue_props;
606*4882a593Smuzhiyun 
607*4882a593Smuzhiyun 	/* GAUDI Firmware does not yet support security */
608*4882a593Smuzhiyun 	prop->fw_security_disabled = true;
609*4882a593Smuzhiyun 	dev_info(hdev->dev, "firmware-level security is disabled\n");
610*4882a593Smuzhiyun 
611*4882a593Smuzhiyun 	return 0;
612*4882a593Smuzhiyun 
613*4882a593Smuzhiyun free_queue_props:
614*4882a593Smuzhiyun 	kfree(hdev->asic_prop.hw_queues_props);
615*4882a593Smuzhiyun 	return rc;
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun 
gaudi_early_fini(struct hl_device * hdev)618*4882a593Smuzhiyun static int gaudi_early_fini(struct hl_device *hdev)
619*4882a593Smuzhiyun {
620*4882a593Smuzhiyun 	kfree(hdev->asic_prop.hw_queues_props);
621*4882a593Smuzhiyun 	hl_pci_fini(hdev);
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	return 0;
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun /**
627*4882a593Smuzhiyun  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
628*4882a593Smuzhiyun  *
629*4882a593Smuzhiyun  * @hdev: pointer to hl_device structure
630*4882a593Smuzhiyun  *
631*4882a593Smuzhiyun  */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)632*4882a593Smuzhiyun static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
633*4882a593Smuzhiyun {
634*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
635*4882a593Smuzhiyun 	u32 trace_freq = 0;
636*4882a593Smuzhiyun 	u32 pll_clk = 0;
637*4882a593Smuzhiyun 	u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
638*4882a593Smuzhiyun 	u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
639*4882a593Smuzhiyun 	u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
640*4882a593Smuzhiyun 	u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
641*4882a593Smuzhiyun 	u32 od = RREG32(mmPSOC_CPU_PLL_OD);
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun 	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
644*4882a593Smuzhiyun 		if (div_sel == DIV_SEL_REF_CLK)
645*4882a593Smuzhiyun 			trace_freq = PLL_REF_CLK;
646*4882a593Smuzhiyun 		else
647*4882a593Smuzhiyun 			trace_freq = PLL_REF_CLK / (div_fctr + 1);
648*4882a593Smuzhiyun 	} else if (div_sel == DIV_SEL_PLL_CLK ||
649*4882a593Smuzhiyun 					div_sel == DIV_SEL_DIVIDED_PLL) {
650*4882a593Smuzhiyun 		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
651*4882a593Smuzhiyun 		if (div_sel == DIV_SEL_PLL_CLK)
652*4882a593Smuzhiyun 			trace_freq = pll_clk;
653*4882a593Smuzhiyun 		else
654*4882a593Smuzhiyun 			trace_freq = pll_clk / (div_fctr + 1);
655*4882a593Smuzhiyun 	} else {
656*4882a593Smuzhiyun 		dev_warn(hdev->dev,
657*4882a593Smuzhiyun 			"Received invalid div select value: %d", div_sel);
658*4882a593Smuzhiyun 	}
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun 	prop->psoc_timestamp_frequency = trace_freq;
661*4882a593Smuzhiyun 	prop->psoc_pci_pll_nr = nr;
662*4882a593Smuzhiyun 	prop->psoc_pci_pll_nf = nf;
663*4882a593Smuzhiyun 	prop->psoc_pci_pll_od = od;
664*4882a593Smuzhiyun 	prop->psoc_pci_pll_div_factor = div_fctr;
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun 
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)667*4882a593Smuzhiyun static int _gaudi_init_tpc_mem(struct hl_device *hdev,
668*4882a593Smuzhiyun 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
669*4882a593Smuzhiyun {
670*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
671*4882a593Smuzhiyun 	struct packet_lin_dma *init_tpc_mem_pkt;
672*4882a593Smuzhiyun 	struct hl_cs_job *job;
673*4882a593Smuzhiyun 	struct hl_cb *cb;
674*4882a593Smuzhiyun 	u64 dst_addr;
675*4882a593Smuzhiyun 	u32 cb_size, ctl;
676*4882a593Smuzhiyun 	u8 tpc_id;
677*4882a593Smuzhiyun 	int rc;
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
680*4882a593Smuzhiyun 	if (!cb)
681*4882a593Smuzhiyun 		return -EFAULT;
682*4882a593Smuzhiyun 
683*4882a593Smuzhiyun 	init_tpc_mem_pkt = cb->kernel_address;
684*4882a593Smuzhiyun 	cb_size = sizeof(*init_tpc_mem_pkt);
685*4882a593Smuzhiyun 	memset(init_tpc_mem_pkt, 0, cb_size);
686*4882a593Smuzhiyun 
687*4882a593Smuzhiyun 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
688*4882a593Smuzhiyun 
689*4882a593Smuzhiyun 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
690*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
691*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
692*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
693*4882a593Smuzhiyun 
694*4882a593Smuzhiyun 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
697*4882a593Smuzhiyun 	dst_addr = (prop->sram_user_base_address &
698*4882a593Smuzhiyun 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
699*4882a593Smuzhiyun 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
700*4882a593Smuzhiyun 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
701*4882a593Smuzhiyun 
702*4882a593Smuzhiyun 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
703*4882a593Smuzhiyun 	if (!job) {
704*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to allocate a new job\n");
705*4882a593Smuzhiyun 		rc = -ENOMEM;
706*4882a593Smuzhiyun 		goto release_cb;
707*4882a593Smuzhiyun 	}
708*4882a593Smuzhiyun 
709*4882a593Smuzhiyun 	job->id = 0;
710*4882a593Smuzhiyun 	job->user_cb = cb;
711*4882a593Smuzhiyun 	job->user_cb->cs_cnt++;
712*4882a593Smuzhiyun 	job->user_cb_size = cb_size;
713*4882a593Smuzhiyun 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
714*4882a593Smuzhiyun 	job->patched_cb = job->user_cb;
715*4882a593Smuzhiyun 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun 	hl_debugfs_add_job(hdev, job);
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 	rc = gaudi_send_job_on_qman0(hdev, job);
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun 	if (rc)
722*4882a593Smuzhiyun 		goto free_job;
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
725*4882a593Smuzhiyun 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
726*4882a593Smuzhiyun 		if (rc)
727*4882a593Smuzhiyun 			break;
728*4882a593Smuzhiyun 	}
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun free_job:
731*4882a593Smuzhiyun 	hl_userptr_delete_list(hdev, &job->userptr_list);
732*4882a593Smuzhiyun 	hl_debugfs_remove_job(hdev, job);
733*4882a593Smuzhiyun 	kfree(job);
734*4882a593Smuzhiyun 	cb->cs_cnt--;
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun release_cb:
737*4882a593Smuzhiyun 	hl_cb_put(cb);
738*4882a593Smuzhiyun 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	return rc;
741*4882a593Smuzhiyun }
742*4882a593Smuzhiyun 
743*4882a593Smuzhiyun /*
744*4882a593Smuzhiyun  * gaudi_init_tpc_mem() - Initialize TPC memories.
745*4882a593Smuzhiyun  * @hdev: Pointer to hl_device structure.
746*4882a593Smuzhiyun  *
747*4882a593Smuzhiyun  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
748*4882a593Smuzhiyun  *
749*4882a593Smuzhiyun  * Return: 0 for success, negative value for error.
750*4882a593Smuzhiyun  */
gaudi_init_tpc_mem(struct hl_device * hdev)751*4882a593Smuzhiyun static int gaudi_init_tpc_mem(struct hl_device *hdev)
752*4882a593Smuzhiyun {
753*4882a593Smuzhiyun 	const struct firmware *fw;
754*4882a593Smuzhiyun 	size_t fw_size;
755*4882a593Smuzhiyun 	void *cpu_addr;
756*4882a593Smuzhiyun 	dma_addr_t dma_handle;
757*4882a593Smuzhiyun 	int rc, count = 5;
758*4882a593Smuzhiyun 
759*4882a593Smuzhiyun again:
760*4882a593Smuzhiyun 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
761*4882a593Smuzhiyun 	if (rc == -EINTR && count-- > 0) {
762*4882a593Smuzhiyun 		msleep(50);
763*4882a593Smuzhiyun 		goto again;
764*4882a593Smuzhiyun 	}
765*4882a593Smuzhiyun 
766*4882a593Smuzhiyun 	if (rc) {
767*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
768*4882a593Smuzhiyun 				GAUDI_TPC_FW_FILE);
769*4882a593Smuzhiyun 		goto out;
770*4882a593Smuzhiyun 	}
771*4882a593Smuzhiyun 
772*4882a593Smuzhiyun 	fw_size = fw->size;
773*4882a593Smuzhiyun 	cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
774*4882a593Smuzhiyun 			&dma_handle, GFP_KERNEL | __GFP_ZERO);
775*4882a593Smuzhiyun 	if (!cpu_addr) {
776*4882a593Smuzhiyun 		dev_err(hdev->dev,
777*4882a593Smuzhiyun 			"Failed to allocate %zu of dma memory for TPC kernel\n",
778*4882a593Smuzhiyun 			fw_size);
779*4882a593Smuzhiyun 		rc = -ENOMEM;
780*4882a593Smuzhiyun 		goto out;
781*4882a593Smuzhiyun 	}
782*4882a593Smuzhiyun 
783*4882a593Smuzhiyun 	memcpy(cpu_addr, fw->data, fw_size);
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
786*4882a593Smuzhiyun 
787*4882a593Smuzhiyun 	hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
788*4882a593Smuzhiyun 			dma_handle);
789*4882a593Smuzhiyun 
790*4882a593Smuzhiyun out:
791*4882a593Smuzhiyun 	release_firmware(fw);
792*4882a593Smuzhiyun 	return rc;
793*4882a593Smuzhiyun }
794*4882a593Smuzhiyun 
gaudi_late_init(struct hl_device * hdev)795*4882a593Smuzhiyun static int gaudi_late_init(struct hl_device *hdev)
796*4882a593Smuzhiyun {
797*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
798*4882a593Smuzhiyun 	int rc;
799*4882a593Smuzhiyun 
800*4882a593Smuzhiyun 	rc = gaudi->cpucp_info_get(hdev);
801*4882a593Smuzhiyun 	if (rc) {
802*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to get cpucp info\n");
803*4882a593Smuzhiyun 		return rc;
804*4882a593Smuzhiyun 	}
805*4882a593Smuzhiyun 
806*4882a593Smuzhiyun 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
807*4882a593Smuzhiyun 	if (rc) {
808*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
809*4882a593Smuzhiyun 		return rc;
810*4882a593Smuzhiyun 	}
811*4882a593Smuzhiyun 
812*4882a593Smuzhiyun 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 	gaudi_fetch_psoc_frequency(hdev);
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 	rc = gaudi_mmu_clear_pgt_range(hdev);
817*4882a593Smuzhiyun 	if (rc) {
818*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
819*4882a593Smuzhiyun 		goto disable_pci_access;
820*4882a593Smuzhiyun 	}
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 	rc = gaudi_init_tpc_mem(hdev);
823*4882a593Smuzhiyun 	if (rc) {
824*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
825*4882a593Smuzhiyun 		goto disable_pci_access;
826*4882a593Smuzhiyun 	}
827*4882a593Smuzhiyun 
828*4882a593Smuzhiyun 	return 0;
829*4882a593Smuzhiyun 
830*4882a593Smuzhiyun disable_pci_access:
831*4882a593Smuzhiyun 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
832*4882a593Smuzhiyun 
833*4882a593Smuzhiyun 	return rc;
834*4882a593Smuzhiyun }
835*4882a593Smuzhiyun 
gaudi_late_fini(struct hl_device * hdev)836*4882a593Smuzhiyun static void gaudi_late_fini(struct hl_device *hdev)
837*4882a593Smuzhiyun {
838*4882a593Smuzhiyun 	const struct hwmon_channel_info **channel_info_arr;
839*4882a593Smuzhiyun 	int i = 0;
840*4882a593Smuzhiyun 
841*4882a593Smuzhiyun 	if (!hdev->hl_chip_info->info)
842*4882a593Smuzhiyun 		return;
843*4882a593Smuzhiyun 
844*4882a593Smuzhiyun 	channel_info_arr = hdev->hl_chip_info->info;
845*4882a593Smuzhiyun 
846*4882a593Smuzhiyun 	while (channel_info_arr[i]) {
847*4882a593Smuzhiyun 		kfree(channel_info_arr[i]->config);
848*4882a593Smuzhiyun 		kfree(channel_info_arr[i]);
849*4882a593Smuzhiyun 		i++;
850*4882a593Smuzhiyun 	}
851*4882a593Smuzhiyun 
852*4882a593Smuzhiyun 	kfree(channel_info_arr);
853*4882a593Smuzhiyun 
854*4882a593Smuzhiyun 	hdev->hl_chip_info->info = NULL;
855*4882a593Smuzhiyun }
856*4882a593Smuzhiyun 
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)857*4882a593Smuzhiyun static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
858*4882a593Smuzhiyun {
859*4882a593Smuzhiyun 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
860*4882a593Smuzhiyun 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
861*4882a593Smuzhiyun 	int i, j, rc = 0;
862*4882a593Smuzhiyun 
863*4882a593Smuzhiyun 	/*
864*4882a593Smuzhiyun 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
865*4882a593Smuzhiyun 	 * to '1' when accessing the host.
866*4882a593Smuzhiyun 	 * Bits 49:39 of the full host address are saved for a later
867*4882a593Smuzhiyun 	 * configuration of the HW to perform extension to 50 bits.
868*4882a593Smuzhiyun 	 * Because there is a single HW register that holds the extension bits,
869*4882a593Smuzhiyun 	 * these bits must be identical in all allocated range.
870*4882a593Smuzhiyun 	 */
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
873*4882a593Smuzhiyun 		virt_addr_arr[i] =
874*4882a593Smuzhiyun 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
875*4882a593Smuzhiyun 						HL_CPU_ACCESSIBLE_MEM_SIZE,
876*4882a593Smuzhiyun 						&dma_addr_arr[i],
877*4882a593Smuzhiyun 						GFP_KERNEL | __GFP_ZERO);
878*4882a593Smuzhiyun 		if (!virt_addr_arr[i]) {
879*4882a593Smuzhiyun 			rc = -ENOMEM;
880*4882a593Smuzhiyun 			goto free_dma_mem_arr;
881*4882a593Smuzhiyun 		}
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
884*4882a593Smuzhiyun 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
885*4882a593Smuzhiyun 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
886*4882a593Smuzhiyun 			break;
887*4882a593Smuzhiyun 	}
888*4882a593Smuzhiyun 
889*4882a593Smuzhiyun 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
890*4882a593Smuzhiyun 		dev_err(hdev->dev,
891*4882a593Smuzhiyun 			"MSB of CPU accessible DMA memory are not identical in all range\n");
892*4882a593Smuzhiyun 		rc = -EFAULT;
893*4882a593Smuzhiyun 		goto free_dma_mem_arr;
894*4882a593Smuzhiyun 	}
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
897*4882a593Smuzhiyun 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
898*4882a593Smuzhiyun 	hdev->cpu_pci_msb_addr =
899*4882a593Smuzhiyun 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 	GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
902*4882a593Smuzhiyun 
903*4882a593Smuzhiyun free_dma_mem_arr:
904*4882a593Smuzhiyun 	for (j = 0 ; j < i ; j++)
905*4882a593Smuzhiyun 		hdev->asic_funcs->asic_dma_free_coherent(hdev,
906*4882a593Smuzhiyun 						HL_CPU_ACCESSIBLE_MEM_SIZE,
907*4882a593Smuzhiyun 						virt_addr_arr[j],
908*4882a593Smuzhiyun 						dma_addr_arr[j]);
909*4882a593Smuzhiyun 
910*4882a593Smuzhiyun 	return rc;
911*4882a593Smuzhiyun }
912*4882a593Smuzhiyun 
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)913*4882a593Smuzhiyun static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
914*4882a593Smuzhiyun {
915*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
916*4882a593Smuzhiyun 	struct gaudi_internal_qman_info *q;
917*4882a593Smuzhiyun 	u32 i;
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
920*4882a593Smuzhiyun 		q = &gaudi->internal_qmans[i];
921*4882a593Smuzhiyun 		if (!q->pq_kernel_addr)
922*4882a593Smuzhiyun 			continue;
923*4882a593Smuzhiyun 		hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
924*4882a593Smuzhiyun 							q->pq_kernel_addr,
925*4882a593Smuzhiyun 							q->pq_dma_addr);
926*4882a593Smuzhiyun 	}
927*4882a593Smuzhiyun }
928*4882a593Smuzhiyun 
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)929*4882a593Smuzhiyun static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
930*4882a593Smuzhiyun {
931*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
932*4882a593Smuzhiyun 	struct gaudi_internal_qman_info *q;
933*4882a593Smuzhiyun 	int rc, i;
934*4882a593Smuzhiyun 
935*4882a593Smuzhiyun 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
936*4882a593Smuzhiyun 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
937*4882a593Smuzhiyun 			continue;
938*4882a593Smuzhiyun 
939*4882a593Smuzhiyun 		q = &gaudi->internal_qmans[i];
940*4882a593Smuzhiyun 
941*4882a593Smuzhiyun 		switch (i) {
942*4882a593Smuzhiyun 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
943*4882a593Smuzhiyun 		case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
944*4882a593Smuzhiyun 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
945*4882a593Smuzhiyun 			break;
946*4882a593Smuzhiyun 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
947*4882a593Smuzhiyun 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
948*4882a593Smuzhiyun 			break;
949*4882a593Smuzhiyun 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
950*4882a593Smuzhiyun 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
951*4882a593Smuzhiyun 			break;
952*4882a593Smuzhiyun 		default:
953*4882a593Smuzhiyun 			dev_err(hdev->dev, "Bad internal queue index %d", i);
954*4882a593Smuzhiyun 			rc = -EINVAL;
955*4882a593Smuzhiyun 			goto free_internal_qmans_pq_mem;
956*4882a593Smuzhiyun 		}
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun 		q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
959*4882a593Smuzhiyun 						hdev, q->pq_size,
960*4882a593Smuzhiyun 						&q->pq_dma_addr,
961*4882a593Smuzhiyun 						GFP_KERNEL | __GFP_ZERO);
962*4882a593Smuzhiyun 		if (!q->pq_kernel_addr) {
963*4882a593Smuzhiyun 			rc = -ENOMEM;
964*4882a593Smuzhiyun 			goto free_internal_qmans_pq_mem;
965*4882a593Smuzhiyun 		}
966*4882a593Smuzhiyun 	}
967*4882a593Smuzhiyun 
968*4882a593Smuzhiyun 	return 0;
969*4882a593Smuzhiyun 
970*4882a593Smuzhiyun free_internal_qmans_pq_mem:
971*4882a593Smuzhiyun 	gaudi_free_internal_qmans_pq_mem(hdev);
972*4882a593Smuzhiyun 	return rc;
973*4882a593Smuzhiyun }
974*4882a593Smuzhiyun 
gaudi_sw_init(struct hl_device * hdev)975*4882a593Smuzhiyun static int gaudi_sw_init(struct hl_device *hdev)
976*4882a593Smuzhiyun {
977*4882a593Smuzhiyun 	struct gaudi_device *gaudi;
978*4882a593Smuzhiyun 	u32 i, event_id = 0;
979*4882a593Smuzhiyun 	int rc;
980*4882a593Smuzhiyun 
981*4882a593Smuzhiyun 	/* Allocate device structure */
982*4882a593Smuzhiyun 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
983*4882a593Smuzhiyun 	if (!gaudi)
984*4882a593Smuzhiyun 		return -ENOMEM;
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
987*4882a593Smuzhiyun 		if (gaudi_irq_map_table[i].valid) {
988*4882a593Smuzhiyun 			if (event_id == GAUDI_EVENT_SIZE) {
989*4882a593Smuzhiyun 				dev_err(hdev->dev,
990*4882a593Smuzhiyun 					"Event array exceeds the limit of %u events\n",
991*4882a593Smuzhiyun 					GAUDI_EVENT_SIZE);
992*4882a593Smuzhiyun 				rc = -EINVAL;
993*4882a593Smuzhiyun 				goto free_gaudi_device;
994*4882a593Smuzhiyun 			}
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun 			gaudi->events[event_id++] =
997*4882a593Smuzhiyun 					gaudi_irq_map_table[i].fc_id;
998*4882a593Smuzhiyun 		}
999*4882a593Smuzhiyun 	}
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1002*4882a593Smuzhiyun 
1003*4882a593Smuzhiyun 	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1004*4882a593Smuzhiyun 
1005*4882a593Smuzhiyun 	hdev->asic_specific = gaudi;
1006*4882a593Smuzhiyun 
1007*4882a593Smuzhiyun 	/* Create DMA pool for small allocations */
1008*4882a593Smuzhiyun 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1009*4882a593Smuzhiyun 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1010*4882a593Smuzhiyun 	if (!hdev->dma_pool) {
1011*4882a593Smuzhiyun 		dev_err(hdev->dev, "failed to create DMA pool\n");
1012*4882a593Smuzhiyun 		rc = -ENOMEM;
1013*4882a593Smuzhiyun 		goto free_gaudi_device;
1014*4882a593Smuzhiyun 	}
1015*4882a593Smuzhiyun 
1016*4882a593Smuzhiyun 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1017*4882a593Smuzhiyun 	if (rc)
1018*4882a593Smuzhiyun 		goto free_dma_pool;
1019*4882a593Smuzhiyun 
1020*4882a593Smuzhiyun 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1021*4882a593Smuzhiyun 	if (!hdev->cpu_accessible_dma_pool) {
1022*4882a593Smuzhiyun 		dev_err(hdev->dev,
1023*4882a593Smuzhiyun 			"Failed to create CPU accessible DMA pool\n");
1024*4882a593Smuzhiyun 		rc = -ENOMEM;
1025*4882a593Smuzhiyun 		goto free_cpu_dma_mem;
1026*4882a593Smuzhiyun 	}
1027*4882a593Smuzhiyun 
1028*4882a593Smuzhiyun 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1029*4882a593Smuzhiyun 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1030*4882a593Smuzhiyun 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1031*4882a593Smuzhiyun 	if (rc) {
1032*4882a593Smuzhiyun 		dev_err(hdev->dev,
1033*4882a593Smuzhiyun 			"Failed to add memory to CPU accessible DMA pool\n");
1034*4882a593Smuzhiyun 		rc = -EFAULT;
1035*4882a593Smuzhiyun 		goto free_cpu_accessible_dma_pool;
1036*4882a593Smuzhiyun 	}
1037*4882a593Smuzhiyun 
1038*4882a593Smuzhiyun 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1039*4882a593Smuzhiyun 	if (rc)
1040*4882a593Smuzhiyun 		goto free_cpu_accessible_dma_pool;
1041*4882a593Smuzhiyun 
1042*4882a593Smuzhiyun 	spin_lock_init(&gaudi->hw_queues_lock);
1043*4882a593Smuzhiyun 	mutex_init(&gaudi->clk_gate_mutex);
1044*4882a593Smuzhiyun 
1045*4882a593Smuzhiyun 	hdev->supports_sync_stream = true;
1046*4882a593Smuzhiyun 	hdev->supports_coresight = true;
1047*4882a593Smuzhiyun 
1048*4882a593Smuzhiyun 	return 0;
1049*4882a593Smuzhiyun 
1050*4882a593Smuzhiyun free_cpu_accessible_dma_pool:
1051*4882a593Smuzhiyun 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1052*4882a593Smuzhiyun free_cpu_dma_mem:
1053*4882a593Smuzhiyun 	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1054*4882a593Smuzhiyun 				hdev->cpu_pci_msb_addr);
1055*4882a593Smuzhiyun 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1056*4882a593Smuzhiyun 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1057*4882a593Smuzhiyun 			hdev->cpu_accessible_dma_mem,
1058*4882a593Smuzhiyun 			hdev->cpu_accessible_dma_address);
1059*4882a593Smuzhiyun free_dma_pool:
1060*4882a593Smuzhiyun 	dma_pool_destroy(hdev->dma_pool);
1061*4882a593Smuzhiyun free_gaudi_device:
1062*4882a593Smuzhiyun 	kfree(gaudi);
1063*4882a593Smuzhiyun 	return rc;
1064*4882a593Smuzhiyun }
1065*4882a593Smuzhiyun 
gaudi_sw_fini(struct hl_device * hdev)1066*4882a593Smuzhiyun static int gaudi_sw_fini(struct hl_device *hdev)
1067*4882a593Smuzhiyun {
1068*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
1069*4882a593Smuzhiyun 
1070*4882a593Smuzhiyun 	gaudi_free_internal_qmans_pq_mem(hdev);
1071*4882a593Smuzhiyun 
1072*4882a593Smuzhiyun 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1073*4882a593Smuzhiyun 
1074*4882a593Smuzhiyun 	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1075*4882a593Smuzhiyun 					hdev->cpu_pci_msb_addr);
1076*4882a593Smuzhiyun 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1077*4882a593Smuzhiyun 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1078*4882a593Smuzhiyun 			hdev->cpu_accessible_dma_mem,
1079*4882a593Smuzhiyun 			hdev->cpu_accessible_dma_address);
1080*4882a593Smuzhiyun 
1081*4882a593Smuzhiyun 	dma_pool_destroy(hdev->dma_pool);
1082*4882a593Smuzhiyun 
1083*4882a593Smuzhiyun 	mutex_destroy(&gaudi->clk_gate_mutex);
1084*4882a593Smuzhiyun 
1085*4882a593Smuzhiyun 	kfree(gaudi);
1086*4882a593Smuzhiyun 
1087*4882a593Smuzhiyun 	return 0;
1088*4882a593Smuzhiyun }
1089*4882a593Smuzhiyun 
gaudi_irq_handler_single(int irq,void * arg)1090*4882a593Smuzhiyun static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1091*4882a593Smuzhiyun {
1092*4882a593Smuzhiyun 	struct hl_device *hdev = arg;
1093*4882a593Smuzhiyun 	int i;
1094*4882a593Smuzhiyun 
1095*4882a593Smuzhiyun 	if (hdev->disabled)
1096*4882a593Smuzhiyun 		return IRQ_HANDLED;
1097*4882a593Smuzhiyun 
1098*4882a593Smuzhiyun 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1099*4882a593Smuzhiyun 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1100*4882a593Smuzhiyun 
1101*4882a593Smuzhiyun 	hl_irq_handler_eq(irq, &hdev->event_queue);
1102*4882a593Smuzhiyun 
1103*4882a593Smuzhiyun 	return IRQ_HANDLED;
1104*4882a593Smuzhiyun }
1105*4882a593Smuzhiyun 
1106*4882a593Smuzhiyun /*
1107*4882a593Smuzhiyun  * For backward compatibility, new MSI interrupts should be set after the
1108*4882a593Smuzhiyun  * existing CPU and NIC interrupts.
1109*4882a593Smuzhiyun  */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1110*4882a593Smuzhiyun static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1111*4882a593Smuzhiyun 				bool cpu_eq)
1112*4882a593Smuzhiyun {
1113*4882a593Smuzhiyun 	int msi_vec;
1114*4882a593Smuzhiyun 
1115*4882a593Smuzhiyun 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1116*4882a593Smuzhiyun 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1117*4882a593Smuzhiyun 				GAUDI_EVENT_QUEUE_MSI_IDX);
1118*4882a593Smuzhiyun 
1119*4882a593Smuzhiyun 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1120*4882a593Smuzhiyun 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1121*4882a593Smuzhiyun 
1122*4882a593Smuzhiyun 	return pci_irq_vector(hdev->pdev, msi_vec);
1123*4882a593Smuzhiyun }
1124*4882a593Smuzhiyun 
gaudi_enable_msi_single(struct hl_device * hdev)1125*4882a593Smuzhiyun static int gaudi_enable_msi_single(struct hl_device *hdev)
1126*4882a593Smuzhiyun {
1127*4882a593Smuzhiyun 	int rc, irq;
1128*4882a593Smuzhiyun 
1129*4882a593Smuzhiyun 	dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1130*4882a593Smuzhiyun 
1131*4882a593Smuzhiyun 	irq = gaudi_pci_irq_vector(hdev, 0, false);
1132*4882a593Smuzhiyun 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
1133*4882a593Smuzhiyun 			"gaudi single msi", hdev);
1134*4882a593Smuzhiyun 	if (rc)
1135*4882a593Smuzhiyun 		dev_err(hdev->dev,
1136*4882a593Smuzhiyun 			"Failed to request single MSI IRQ\n");
1137*4882a593Smuzhiyun 
1138*4882a593Smuzhiyun 	return rc;
1139*4882a593Smuzhiyun }
1140*4882a593Smuzhiyun 
gaudi_enable_msi_multi(struct hl_device * hdev)1141*4882a593Smuzhiyun static int gaudi_enable_msi_multi(struct hl_device *hdev)
1142*4882a593Smuzhiyun {
1143*4882a593Smuzhiyun 	int cq_cnt = hdev->asic_prop.completion_queues_count;
1144*4882a593Smuzhiyun 	int rc, i, irq_cnt_init, irq;
1145*4882a593Smuzhiyun 
1146*4882a593Smuzhiyun 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1147*4882a593Smuzhiyun 		irq = gaudi_pci_irq_vector(hdev, i, false);
1148*4882a593Smuzhiyun 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1149*4882a593Smuzhiyun 				&hdev->completion_queue[i]);
1150*4882a593Smuzhiyun 		if (rc) {
1151*4882a593Smuzhiyun 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1152*4882a593Smuzhiyun 			goto free_irqs;
1153*4882a593Smuzhiyun 		}
1154*4882a593Smuzhiyun 	}
1155*4882a593Smuzhiyun 
1156*4882a593Smuzhiyun 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1157*4882a593Smuzhiyun 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1158*4882a593Smuzhiyun 				&hdev->event_queue);
1159*4882a593Smuzhiyun 	if (rc) {
1160*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1161*4882a593Smuzhiyun 		goto free_irqs;
1162*4882a593Smuzhiyun 	}
1163*4882a593Smuzhiyun 
1164*4882a593Smuzhiyun 	return 0;
1165*4882a593Smuzhiyun 
1166*4882a593Smuzhiyun free_irqs:
1167*4882a593Smuzhiyun 	for (i = 0 ; i < irq_cnt_init ; i++)
1168*4882a593Smuzhiyun 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
1169*4882a593Smuzhiyun 				&hdev->completion_queue[i]);
1170*4882a593Smuzhiyun 	return rc;
1171*4882a593Smuzhiyun }
1172*4882a593Smuzhiyun 
gaudi_enable_msi(struct hl_device * hdev)1173*4882a593Smuzhiyun static int gaudi_enable_msi(struct hl_device *hdev)
1174*4882a593Smuzhiyun {
1175*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
1176*4882a593Smuzhiyun 	int rc;
1177*4882a593Smuzhiyun 
1178*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1179*4882a593Smuzhiyun 		return 0;
1180*4882a593Smuzhiyun 
1181*4882a593Smuzhiyun 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1182*4882a593Smuzhiyun 					PCI_IRQ_MSI);
1183*4882a593Smuzhiyun 	if (rc < 0) {
1184*4882a593Smuzhiyun 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1185*4882a593Smuzhiyun 		return rc;
1186*4882a593Smuzhiyun 	}
1187*4882a593Smuzhiyun 
1188*4882a593Smuzhiyun 	if (rc < NUMBER_OF_INTERRUPTS) {
1189*4882a593Smuzhiyun 		gaudi->multi_msi_mode = false;
1190*4882a593Smuzhiyun 		rc = gaudi_enable_msi_single(hdev);
1191*4882a593Smuzhiyun 	} else {
1192*4882a593Smuzhiyun 		gaudi->multi_msi_mode = true;
1193*4882a593Smuzhiyun 		rc = gaudi_enable_msi_multi(hdev);
1194*4882a593Smuzhiyun 	}
1195*4882a593Smuzhiyun 
1196*4882a593Smuzhiyun 	if (rc)
1197*4882a593Smuzhiyun 		goto free_pci_irq_vectors;
1198*4882a593Smuzhiyun 
1199*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
1200*4882a593Smuzhiyun 
1201*4882a593Smuzhiyun 	return 0;
1202*4882a593Smuzhiyun 
1203*4882a593Smuzhiyun free_pci_irq_vectors:
1204*4882a593Smuzhiyun 	pci_free_irq_vectors(hdev->pdev);
1205*4882a593Smuzhiyun 	return rc;
1206*4882a593Smuzhiyun }
1207*4882a593Smuzhiyun 
gaudi_sync_irqs(struct hl_device * hdev)1208*4882a593Smuzhiyun static void gaudi_sync_irqs(struct hl_device *hdev)
1209*4882a593Smuzhiyun {
1210*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
1211*4882a593Smuzhiyun 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1212*4882a593Smuzhiyun 
1213*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1214*4882a593Smuzhiyun 		return;
1215*4882a593Smuzhiyun 
1216*4882a593Smuzhiyun 	/* Wait for all pending IRQs to be finished */
1217*4882a593Smuzhiyun 	if (gaudi->multi_msi_mode) {
1218*4882a593Smuzhiyun 		for (i = 0 ; i < cq_cnt ; i++)
1219*4882a593Smuzhiyun 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1220*4882a593Smuzhiyun 
1221*4882a593Smuzhiyun 		synchronize_irq(gaudi_pci_irq_vector(hdev,
1222*4882a593Smuzhiyun 						GAUDI_EVENT_QUEUE_MSI_IDX,
1223*4882a593Smuzhiyun 						true));
1224*4882a593Smuzhiyun 	} else {
1225*4882a593Smuzhiyun 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1226*4882a593Smuzhiyun 	}
1227*4882a593Smuzhiyun }
1228*4882a593Smuzhiyun 
gaudi_disable_msi(struct hl_device * hdev)1229*4882a593Smuzhiyun static void gaudi_disable_msi(struct hl_device *hdev)
1230*4882a593Smuzhiyun {
1231*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
1232*4882a593Smuzhiyun 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1233*4882a593Smuzhiyun 
1234*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1235*4882a593Smuzhiyun 		return;
1236*4882a593Smuzhiyun 
1237*4882a593Smuzhiyun 	gaudi_sync_irqs(hdev);
1238*4882a593Smuzhiyun 
1239*4882a593Smuzhiyun 	if (gaudi->multi_msi_mode) {
1240*4882a593Smuzhiyun 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1241*4882a593Smuzhiyun 						true);
1242*4882a593Smuzhiyun 		free_irq(irq, &hdev->event_queue);
1243*4882a593Smuzhiyun 
1244*4882a593Smuzhiyun 		for (i = 0 ; i < cq_cnt ; i++) {
1245*4882a593Smuzhiyun 			irq = gaudi_pci_irq_vector(hdev, i, false);
1246*4882a593Smuzhiyun 			free_irq(irq, &hdev->completion_queue[i]);
1247*4882a593Smuzhiyun 		}
1248*4882a593Smuzhiyun 	} else {
1249*4882a593Smuzhiyun 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1250*4882a593Smuzhiyun 	}
1251*4882a593Smuzhiyun 
1252*4882a593Smuzhiyun 	pci_free_irq_vectors(hdev->pdev);
1253*4882a593Smuzhiyun 
1254*4882a593Smuzhiyun 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1255*4882a593Smuzhiyun }
1256*4882a593Smuzhiyun 
gaudi_init_scrambler_sram(struct hl_device * hdev)1257*4882a593Smuzhiyun static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1258*4882a593Smuzhiyun {
1259*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
1260*4882a593Smuzhiyun 
1261*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1262*4882a593Smuzhiyun 		return;
1263*4882a593Smuzhiyun 
1264*4882a593Smuzhiyun 	if (!hdev->sram_scrambler_enable)
1265*4882a593Smuzhiyun 		return;
1266*4882a593Smuzhiyun 
1267*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1268*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1270*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1272*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1274*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1276*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1278*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1279*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1280*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1281*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1282*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1283*4882a593Smuzhiyun 
1284*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1285*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1287*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1289*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1291*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1292*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1293*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1294*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1295*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1296*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1297*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1298*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1299*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1300*4882a593Smuzhiyun 
1301*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1302*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1304*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1306*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1308*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1309*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1310*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1311*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1312*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1313*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1314*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1315*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1316*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1317*4882a593Smuzhiyun 
1318*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1319*4882a593Smuzhiyun }
1320*4882a593Smuzhiyun 
gaudi_init_scrambler_hbm(struct hl_device * hdev)1321*4882a593Smuzhiyun static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1322*4882a593Smuzhiyun {
1323*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
1324*4882a593Smuzhiyun 
1325*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1326*4882a593Smuzhiyun 		return;
1327*4882a593Smuzhiyun 
1328*4882a593Smuzhiyun 	if (!hdev->dram_scrambler_enable)
1329*4882a593Smuzhiyun 		return;
1330*4882a593Smuzhiyun 
1331*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1332*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1334*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1336*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1338*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1340*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1342*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1343*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1344*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1345*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1346*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1347*4882a593Smuzhiyun 
1348*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1349*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1351*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1353*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1355*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1356*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1357*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1358*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1359*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1360*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1361*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1362*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1363*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1364*4882a593Smuzhiyun 
1365*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1366*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1368*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1370*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1372*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1373*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1374*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1375*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1376*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1377*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1378*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1379*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1380*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1383*4882a593Smuzhiyun }
1384*4882a593Smuzhiyun 
gaudi_init_e2e(struct hl_device * hdev)1385*4882a593Smuzhiyun static void gaudi_init_e2e(struct hl_device *hdev)
1386*4882a593Smuzhiyun {
1387*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1388*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1389*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1390*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1391*4882a593Smuzhiyun 
1392*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1393*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1394*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1395*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1396*4882a593Smuzhiyun 
1397*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1398*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1399*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1400*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1401*4882a593Smuzhiyun 
1402*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1403*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1404*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1405*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1406*4882a593Smuzhiyun 
1407*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1408*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1409*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1410*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1411*4882a593Smuzhiyun 
1412*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1413*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1414*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1415*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1416*4882a593Smuzhiyun 
1417*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1418*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1419*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1420*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1421*4882a593Smuzhiyun 
1422*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1423*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1424*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1425*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1426*4882a593Smuzhiyun 
1427*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1428*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1429*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1430*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1431*4882a593Smuzhiyun 
1432*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1433*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1434*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1435*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1436*4882a593Smuzhiyun 
1437*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1438*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1439*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1440*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1441*4882a593Smuzhiyun 
1442*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1443*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1444*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1445*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1446*4882a593Smuzhiyun 
1447*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1448*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1449*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1450*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1451*4882a593Smuzhiyun 
1452*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1453*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1454*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1455*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1456*4882a593Smuzhiyun 
1457*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1458*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1459*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1460*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1461*4882a593Smuzhiyun 
1462*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1463*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1464*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1465*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1466*4882a593Smuzhiyun 
1467*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1471*4882a593Smuzhiyun 
1472*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1476*4882a593Smuzhiyun 
1477*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1481*4882a593Smuzhiyun 
1482*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1486*4882a593Smuzhiyun 
1487*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1491*4882a593Smuzhiyun 
1492*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1496*4882a593Smuzhiyun 
1497*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1498*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1499*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1500*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1501*4882a593Smuzhiyun 
1502*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1503*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1504*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1505*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1506*4882a593Smuzhiyun 
1507*4882a593Smuzhiyun 	if (!hdev->dram_scrambler_enable) {
1508*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1509*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1510*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1511*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1512*4882a593Smuzhiyun 
1513*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1514*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1515*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1516*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1517*4882a593Smuzhiyun 
1518*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1519*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1520*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1521*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1522*4882a593Smuzhiyun 
1523*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1524*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1525*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1526*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1527*4882a593Smuzhiyun 
1528*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1529*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1530*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1531*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1532*4882a593Smuzhiyun 
1533*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1534*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1535*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1536*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1537*4882a593Smuzhiyun 
1538*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1539*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1540*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1541*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1542*4882a593Smuzhiyun 
1543*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1544*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1545*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1546*4882a593Smuzhiyun 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1547*4882a593Smuzhiyun 
1548*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1549*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1550*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1551*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1552*4882a593Smuzhiyun 
1553*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1554*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1555*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1556*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1557*4882a593Smuzhiyun 
1558*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1559*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1560*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1561*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1562*4882a593Smuzhiyun 
1563*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1564*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1565*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1566*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1567*4882a593Smuzhiyun 
1568*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1569*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1570*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1571*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1572*4882a593Smuzhiyun 
1573*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1574*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1575*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1576*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1577*4882a593Smuzhiyun 
1578*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1579*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1580*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1581*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1582*4882a593Smuzhiyun 
1583*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1584*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1585*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1586*4882a593Smuzhiyun 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1587*4882a593Smuzhiyun 
1588*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1592*4882a593Smuzhiyun 
1593*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1597*4882a593Smuzhiyun 
1598*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1602*4882a593Smuzhiyun 
1603*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606*4882a593Smuzhiyun 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1607*4882a593Smuzhiyun 
1608*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1612*4882a593Smuzhiyun 
1613*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1617*4882a593Smuzhiyun 
1618*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1619*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1620*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1621*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1622*4882a593Smuzhiyun 
1623*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1624*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1625*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1626*4882a593Smuzhiyun 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1627*4882a593Smuzhiyun 	}
1628*4882a593Smuzhiyun 
1629*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1630*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1632*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1633*4882a593Smuzhiyun 
1634*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1635*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1637*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1638*4882a593Smuzhiyun 
1639*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1640*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1642*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1643*4882a593Smuzhiyun 
1644*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1645*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1647*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1648*4882a593Smuzhiyun 
1649*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1650*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1652*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1653*4882a593Smuzhiyun 
1654*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1655*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1657*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1658*4882a593Smuzhiyun 
1659*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1660*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1662*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1663*4882a593Smuzhiyun 
1664*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1665*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666*4882a593Smuzhiyun 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1667*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1668*4882a593Smuzhiyun 
1669*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1670*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1672*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1673*4882a593Smuzhiyun 
1674*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1675*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1677*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1678*4882a593Smuzhiyun 
1679*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1680*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1682*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1683*4882a593Smuzhiyun 
1684*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1685*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1687*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1688*4882a593Smuzhiyun 
1689*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1690*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1692*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1693*4882a593Smuzhiyun 
1694*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1695*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1697*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1698*4882a593Smuzhiyun 
1699*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1700*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1701*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1702*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1703*4882a593Smuzhiyun 
1704*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1705*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1706*4882a593Smuzhiyun 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1707*4882a593Smuzhiyun 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1708*4882a593Smuzhiyun 
1709*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1710*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1712*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1713*4882a593Smuzhiyun 
1714*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1715*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1717*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1718*4882a593Smuzhiyun 
1719*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1720*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1722*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1723*4882a593Smuzhiyun 
1724*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1725*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1727*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1728*4882a593Smuzhiyun 
1729*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1730*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1732*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1733*4882a593Smuzhiyun 
1734*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1735*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1737*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1738*4882a593Smuzhiyun 
1739*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1740*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1741*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1742*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1743*4882a593Smuzhiyun 
1744*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1745*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1746*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1747*4882a593Smuzhiyun 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1748*4882a593Smuzhiyun }
1749*4882a593Smuzhiyun 
gaudi_init_hbm_cred(struct hl_device * hdev)1750*4882a593Smuzhiyun static void gaudi_init_hbm_cred(struct hl_device *hdev)
1751*4882a593Smuzhiyun {
1752*4882a593Smuzhiyun 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1753*4882a593Smuzhiyun 
1754*4882a593Smuzhiyun 	hbm0_wr = 0x33333333;
1755*4882a593Smuzhiyun 	hbm0_rd = 0x77777777;
1756*4882a593Smuzhiyun 	hbm1_wr = 0x55555555;
1757*4882a593Smuzhiyun 	hbm1_rd = 0xDDDDDDDD;
1758*4882a593Smuzhiyun 
1759*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1763*4882a593Smuzhiyun 
1764*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1768*4882a593Smuzhiyun 
1769*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1770*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1771*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1772*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1773*4882a593Smuzhiyun 
1774*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1775*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1776*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1777*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1778*4882a593Smuzhiyun 
1779*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1780*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1781*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1783*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1786*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1789*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791*4882a593Smuzhiyun 
1792*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1793*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1794*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1795*4882a593Smuzhiyun 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1796*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1797*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1798*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1799*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1800*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1801*4882a593Smuzhiyun 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1802*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1803*4882a593Smuzhiyun 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1804*4882a593Smuzhiyun }
1805*4882a593Smuzhiyun 
gaudi_init_golden_registers(struct hl_device * hdev)1806*4882a593Smuzhiyun static void gaudi_init_golden_registers(struct hl_device *hdev)
1807*4882a593Smuzhiyun {
1808*4882a593Smuzhiyun 	u32 tpc_offset;
1809*4882a593Smuzhiyun 	int tpc_id, i;
1810*4882a593Smuzhiyun 
1811*4882a593Smuzhiyun 	gaudi_init_e2e(hdev);
1812*4882a593Smuzhiyun 
1813*4882a593Smuzhiyun 	gaudi_init_hbm_cred(hdev);
1814*4882a593Smuzhiyun 
1815*4882a593Smuzhiyun 	hdev->asic_funcs->disable_clock_gating(hdev);
1816*4882a593Smuzhiyun 
1817*4882a593Smuzhiyun 	for (tpc_id = 0, tpc_offset = 0;
1818*4882a593Smuzhiyun 				tpc_id < TPC_NUMBER_OF_ENGINES;
1819*4882a593Smuzhiyun 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1820*4882a593Smuzhiyun 		/* Mask all arithmetic interrupts from TPC */
1821*4882a593Smuzhiyun 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1822*4882a593Smuzhiyun 		/* Set 16 cache lines */
1823*4882a593Smuzhiyun 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1824*4882a593Smuzhiyun 				ICACHE_FETCH_LINE_NUM, 2);
1825*4882a593Smuzhiyun 	}
1826*4882a593Smuzhiyun 
1827*4882a593Smuzhiyun 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1828*4882a593Smuzhiyun 	for (i = 0 ; i < 128 ; i += 8)
1829*4882a593Smuzhiyun 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1830*4882a593Smuzhiyun 
1831*4882a593Smuzhiyun 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1832*4882a593Smuzhiyun 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1833*4882a593Smuzhiyun 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1834*4882a593Smuzhiyun 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1835*4882a593Smuzhiyun }
1836*4882a593Smuzhiyun 
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)1837*4882a593Smuzhiyun static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1838*4882a593Smuzhiyun 					int qman_id, dma_addr_t qman_pq_addr)
1839*4882a593Smuzhiyun {
1840*4882a593Smuzhiyun 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1841*4882a593Smuzhiyun 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1842*4882a593Smuzhiyun 	u32 q_off, dma_qm_offset;
1843*4882a593Smuzhiyun 	u32 dma_qm_err_cfg;
1844*4882a593Smuzhiyun 
1845*4882a593Smuzhiyun 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1846*4882a593Smuzhiyun 
1847*4882a593Smuzhiyun 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
1848*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849*4882a593Smuzhiyun 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
1850*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1851*4882a593Smuzhiyun 	so_base_en_lo = lower_32_bits(CFG_BASE +
1852*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1853*4882a593Smuzhiyun 	so_base_en_hi = upper_32_bits(CFG_BASE +
1854*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1855*4882a593Smuzhiyun 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1856*4882a593Smuzhiyun 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1857*4882a593Smuzhiyun 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1858*4882a593Smuzhiyun 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1859*4882a593Smuzhiyun 	so_base_ws_lo = lower_32_bits(CFG_BASE +
1860*4882a593Smuzhiyun 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1861*4882a593Smuzhiyun 	so_base_ws_hi = upper_32_bits(CFG_BASE +
1862*4882a593Smuzhiyun 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1863*4882a593Smuzhiyun 
1864*4882a593Smuzhiyun 	q_off = dma_qm_offset + qman_id * 4;
1865*4882a593Smuzhiyun 
1866*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1867*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1868*4882a593Smuzhiyun 
1869*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1870*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1871*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1872*4882a593Smuzhiyun 
1873*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
1874*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
1875*4882a593Smuzhiyun 							QMAN_LDMA_SRC_OFFSET);
1876*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
1877*4882a593Smuzhiyun 							QMAN_LDMA_DST_OFFSET);
1878*4882a593Smuzhiyun 
1879*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1880*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1881*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1882*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1883*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1884*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1885*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1886*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1887*4882a593Smuzhiyun 
1888*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1889*4882a593Smuzhiyun 
1890*4882a593Smuzhiyun 	/* The following configuration is needed only once per QMAN */
1891*4882a593Smuzhiyun 	if (qman_id == 0) {
1892*4882a593Smuzhiyun 		/* Configure RAZWI IRQ */
1893*4882a593Smuzhiyun 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1894*4882a593Smuzhiyun 		if (hdev->stop_on_err) {
1895*4882a593Smuzhiyun 			dma_qm_err_cfg |=
1896*4882a593Smuzhiyun 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1897*4882a593Smuzhiyun 		}
1898*4882a593Smuzhiyun 
1899*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1900*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1901*4882a593Smuzhiyun 			lower_32_bits(CFG_BASE +
1902*4882a593Smuzhiyun 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1903*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1904*4882a593Smuzhiyun 			upper_32_bits(CFG_BASE +
1905*4882a593Smuzhiyun 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1906*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1907*4882a593Smuzhiyun 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1908*4882a593Smuzhiyun 									dma_id);
1909*4882a593Smuzhiyun 
1910*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1911*4882a593Smuzhiyun 				QM_ARB_ERR_MSG_EN_MASK);
1912*4882a593Smuzhiyun 
1913*4882a593Smuzhiyun 		/* Increase ARB WDT to support streams architecture */
1914*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1915*4882a593Smuzhiyun 				GAUDI_ARB_WDT_TIMEOUT);
1916*4882a593Smuzhiyun 
1917*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1918*4882a593Smuzhiyun 				QMAN_EXTERNAL_MAKE_TRUSTED);
1919*4882a593Smuzhiyun 
1920*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1921*4882a593Smuzhiyun 	}
1922*4882a593Smuzhiyun }
1923*4882a593Smuzhiyun 
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)1924*4882a593Smuzhiyun static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1925*4882a593Smuzhiyun {
1926*4882a593Smuzhiyun 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1927*4882a593Smuzhiyun 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1928*4882a593Smuzhiyun 
1929*4882a593Smuzhiyun 	/* Set to maximum possible according to physical size */
1930*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1931*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1932*4882a593Smuzhiyun 
1933*4882a593Smuzhiyun 	/* WA for H/W bug H3-2116 */
1934*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
1935*4882a593Smuzhiyun 
1936*4882a593Smuzhiyun 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
1937*4882a593Smuzhiyun 	if (hdev->stop_on_err)
1938*4882a593Smuzhiyun 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1939*4882a593Smuzhiyun 
1940*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1941*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1942*4882a593Smuzhiyun 		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1943*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1944*4882a593Smuzhiyun 		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1945*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1946*4882a593Smuzhiyun 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1947*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_PROT + dma_offset,
1948*4882a593Smuzhiyun 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1949*4882a593Smuzhiyun 	/* If the channel is secured, it should be in MMU bypass mode */
1950*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1951*4882a593Smuzhiyun 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1952*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1953*4882a593Smuzhiyun }
1954*4882a593Smuzhiyun 
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)1955*4882a593Smuzhiyun static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1956*4882a593Smuzhiyun 				u32 enable_mask)
1957*4882a593Smuzhiyun {
1958*4882a593Smuzhiyun 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1959*4882a593Smuzhiyun 
1960*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1961*4882a593Smuzhiyun }
1962*4882a593Smuzhiyun 
gaudi_init_pci_dma_qmans(struct hl_device * hdev)1963*4882a593Smuzhiyun static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1964*4882a593Smuzhiyun {
1965*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
1966*4882a593Smuzhiyun 	struct hl_hw_queue *q;
1967*4882a593Smuzhiyun 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1968*4882a593Smuzhiyun 
1969*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1970*4882a593Smuzhiyun 		return;
1971*4882a593Smuzhiyun 
1972*4882a593Smuzhiyun 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1973*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[i];
1974*4882a593Smuzhiyun 		/*
1975*4882a593Smuzhiyun 		 * For queues after the CPU Q need to add 1 to get the correct
1976*4882a593Smuzhiyun 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1977*4882a593Smuzhiyun 		 * order to get the correct MSI register.
1978*4882a593Smuzhiyun 		 */
1979*4882a593Smuzhiyun 		if (dma_id > 1) {
1980*4882a593Smuzhiyun 			cpu_skip = 1;
1981*4882a593Smuzhiyun 			nic_skip = NIC_NUMBER_OF_ENGINES;
1982*4882a593Smuzhiyun 		} else {
1983*4882a593Smuzhiyun 			cpu_skip = 0;
1984*4882a593Smuzhiyun 			nic_skip = 0;
1985*4882a593Smuzhiyun 		}
1986*4882a593Smuzhiyun 
1987*4882a593Smuzhiyun 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
1988*4882a593Smuzhiyun 			q_idx = 4 * dma_id + j + cpu_skip;
1989*4882a593Smuzhiyun 			q = &hdev->kernel_queues[q_idx];
1990*4882a593Smuzhiyun 			q->cq_id = cq_id++;
1991*4882a593Smuzhiyun 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1992*4882a593Smuzhiyun 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
1993*4882a593Smuzhiyun 						q->bus_address);
1994*4882a593Smuzhiyun 		}
1995*4882a593Smuzhiyun 
1996*4882a593Smuzhiyun 		gaudi_init_dma_core(hdev, dma_id);
1997*4882a593Smuzhiyun 
1998*4882a593Smuzhiyun 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1999*4882a593Smuzhiyun 	}
2000*4882a593Smuzhiyun 
2001*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2002*4882a593Smuzhiyun }
2003*4882a593Smuzhiyun 
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2004*4882a593Smuzhiyun static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2005*4882a593Smuzhiyun 					int qman_id, u64 qman_base_addr)
2006*4882a593Smuzhiyun {
2007*4882a593Smuzhiyun 	u32 mtr_base_lo, mtr_base_hi;
2008*4882a593Smuzhiyun 	u32 so_base_lo, so_base_hi;
2009*4882a593Smuzhiyun 	u32 q_off, dma_qm_offset;
2010*4882a593Smuzhiyun 	u32 dma_qm_err_cfg;
2011*4882a593Smuzhiyun 
2012*4882a593Smuzhiyun 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2013*4882a593Smuzhiyun 
2014*4882a593Smuzhiyun 	mtr_base_lo = lower_32_bits(CFG_BASE +
2015*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2016*4882a593Smuzhiyun 	mtr_base_hi = upper_32_bits(CFG_BASE +
2017*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2018*4882a593Smuzhiyun 	so_base_lo = lower_32_bits(CFG_BASE +
2019*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2020*4882a593Smuzhiyun 	so_base_hi = upper_32_bits(CFG_BASE +
2021*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2022*4882a593Smuzhiyun 
2023*4882a593Smuzhiyun 	q_off = dma_qm_offset + qman_id * 4;
2024*4882a593Smuzhiyun 
2025*4882a593Smuzhiyun 	if (qman_id < 4) {
2026*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2027*4882a593Smuzhiyun 					lower_32_bits(qman_base_addr));
2028*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2029*4882a593Smuzhiyun 					upper_32_bits(qman_base_addr));
2030*4882a593Smuzhiyun 
2031*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2032*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2033*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2034*4882a593Smuzhiyun 
2035*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2036*4882a593Smuzhiyun 							QMAN_CPDMA_SIZE_OFFSET);
2037*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2038*4882a593Smuzhiyun 							QMAN_CPDMA_SRC_OFFSET);
2039*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2040*4882a593Smuzhiyun 							QMAN_CPDMA_DST_OFFSET);
2041*4882a593Smuzhiyun 	} else {
2042*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2043*4882a593Smuzhiyun 							QMAN_LDMA_SIZE_OFFSET);
2044*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2045*4882a593Smuzhiyun 							QMAN_LDMA_SRC_OFFSET);
2046*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2047*4882a593Smuzhiyun 							QMAN_LDMA_DST_OFFSET);
2048*4882a593Smuzhiyun 
2049*4882a593Smuzhiyun 		/* Configure RAZWI IRQ */
2050*4882a593Smuzhiyun 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2051*4882a593Smuzhiyun 		if (hdev->stop_on_err) {
2052*4882a593Smuzhiyun 			dma_qm_err_cfg |=
2053*4882a593Smuzhiyun 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2054*4882a593Smuzhiyun 		}
2055*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2056*4882a593Smuzhiyun 
2057*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2058*4882a593Smuzhiyun 			lower_32_bits(CFG_BASE +
2059*4882a593Smuzhiyun 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2060*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2061*4882a593Smuzhiyun 			upper_32_bits(CFG_BASE +
2062*4882a593Smuzhiyun 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2063*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2064*4882a593Smuzhiyun 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2065*4882a593Smuzhiyun 									dma_id);
2066*4882a593Smuzhiyun 
2067*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2068*4882a593Smuzhiyun 				QM_ARB_ERR_MSG_EN_MASK);
2069*4882a593Smuzhiyun 
2070*4882a593Smuzhiyun 		/* Increase ARB WDT to support streams architecture */
2071*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2072*4882a593Smuzhiyun 				GAUDI_ARB_WDT_TIMEOUT);
2073*4882a593Smuzhiyun 
2074*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2075*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2076*4882a593Smuzhiyun 				QMAN_INTERNAL_MAKE_TRUSTED);
2077*4882a593Smuzhiyun 	}
2078*4882a593Smuzhiyun 
2079*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2080*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2081*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2082*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2083*4882a593Smuzhiyun }
2084*4882a593Smuzhiyun 
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2085*4882a593Smuzhiyun static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2086*4882a593Smuzhiyun {
2087*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2088*4882a593Smuzhiyun 	struct gaudi_internal_qman_info *q;
2089*4882a593Smuzhiyun 	u64 qman_base_addr;
2090*4882a593Smuzhiyun 	int i, j, dma_id, internal_q_index;
2091*4882a593Smuzhiyun 
2092*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2093*4882a593Smuzhiyun 		return;
2094*4882a593Smuzhiyun 
2095*4882a593Smuzhiyun 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2096*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2097*4882a593Smuzhiyun 
2098*4882a593Smuzhiyun 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2099*4882a593Smuzhiyun 			 /*
2100*4882a593Smuzhiyun 			  * Add the CPU queue in order to get the correct queue
2101*4882a593Smuzhiyun 			  * number as all internal queue are placed after it
2102*4882a593Smuzhiyun 			  */
2103*4882a593Smuzhiyun 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2104*4882a593Smuzhiyun 
2105*4882a593Smuzhiyun 			q = &gaudi->internal_qmans[internal_q_index];
2106*4882a593Smuzhiyun 			qman_base_addr = (u64) q->pq_dma_addr;
2107*4882a593Smuzhiyun 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2108*4882a593Smuzhiyun 						qman_base_addr);
2109*4882a593Smuzhiyun 		}
2110*4882a593Smuzhiyun 
2111*4882a593Smuzhiyun 		/* Initializing lower CP for HBM DMA QMAN */
2112*4882a593Smuzhiyun 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2113*4882a593Smuzhiyun 
2114*4882a593Smuzhiyun 		gaudi_init_dma_core(hdev, dma_id);
2115*4882a593Smuzhiyun 
2116*4882a593Smuzhiyun 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2117*4882a593Smuzhiyun 	}
2118*4882a593Smuzhiyun 
2119*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2120*4882a593Smuzhiyun }
2121*4882a593Smuzhiyun 
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2122*4882a593Smuzhiyun static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2123*4882a593Smuzhiyun 					int qman_id, u64 qman_base_addr)
2124*4882a593Smuzhiyun {
2125*4882a593Smuzhiyun 	u32 mtr_base_lo, mtr_base_hi;
2126*4882a593Smuzhiyun 	u32 so_base_lo, so_base_hi;
2127*4882a593Smuzhiyun 	u32 q_off, mme_id;
2128*4882a593Smuzhiyun 	u32 mme_qm_err_cfg;
2129*4882a593Smuzhiyun 
2130*4882a593Smuzhiyun 	mtr_base_lo = lower_32_bits(CFG_BASE +
2131*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2132*4882a593Smuzhiyun 	mtr_base_hi = upper_32_bits(CFG_BASE +
2133*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2134*4882a593Smuzhiyun 	so_base_lo = lower_32_bits(CFG_BASE +
2135*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2136*4882a593Smuzhiyun 	so_base_hi = upper_32_bits(CFG_BASE +
2137*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2138*4882a593Smuzhiyun 
2139*4882a593Smuzhiyun 	q_off = mme_offset + qman_id * 4;
2140*4882a593Smuzhiyun 
2141*4882a593Smuzhiyun 	if (qman_id < 4) {
2142*4882a593Smuzhiyun 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2143*4882a593Smuzhiyun 					lower_32_bits(qman_base_addr));
2144*4882a593Smuzhiyun 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2145*4882a593Smuzhiyun 					upper_32_bits(qman_base_addr));
2146*4882a593Smuzhiyun 
2147*4882a593Smuzhiyun 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2148*4882a593Smuzhiyun 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2149*4882a593Smuzhiyun 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2150*4882a593Smuzhiyun 
2151*4882a593Smuzhiyun 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2152*4882a593Smuzhiyun 							QMAN_CPDMA_SIZE_OFFSET);
2153*4882a593Smuzhiyun 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2154*4882a593Smuzhiyun 							QMAN_CPDMA_SRC_OFFSET);
2155*4882a593Smuzhiyun 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2156*4882a593Smuzhiyun 							QMAN_CPDMA_DST_OFFSET);
2157*4882a593Smuzhiyun 	} else {
2158*4882a593Smuzhiyun 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2159*4882a593Smuzhiyun 							QMAN_LDMA_SIZE_OFFSET);
2160*4882a593Smuzhiyun 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2161*4882a593Smuzhiyun 							QMAN_LDMA_SRC_OFFSET);
2162*4882a593Smuzhiyun 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2163*4882a593Smuzhiyun 							QMAN_LDMA_DST_OFFSET);
2164*4882a593Smuzhiyun 
2165*4882a593Smuzhiyun 		/* Configure RAZWI IRQ */
2166*4882a593Smuzhiyun 		mme_id = mme_offset /
2167*4882a593Smuzhiyun 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2168*4882a593Smuzhiyun 
2169*4882a593Smuzhiyun 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2170*4882a593Smuzhiyun 		if (hdev->stop_on_err) {
2171*4882a593Smuzhiyun 			mme_qm_err_cfg |=
2172*4882a593Smuzhiyun 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2173*4882a593Smuzhiyun 		}
2174*4882a593Smuzhiyun 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2175*4882a593Smuzhiyun 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2176*4882a593Smuzhiyun 			lower_32_bits(CFG_BASE +
2177*4882a593Smuzhiyun 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2178*4882a593Smuzhiyun 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2179*4882a593Smuzhiyun 			upper_32_bits(CFG_BASE +
2180*4882a593Smuzhiyun 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2181*4882a593Smuzhiyun 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2182*4882a593Smuzhiyun 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2183*4882a593Smuzhiyun 									mme_id);
2184*4882a593Smuzhiyun 
2185*4882a593Smuzhiyun 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2186*4882a593Smuzhiyun 				QM_ARB_ERR_MSG_EN_MASK);
2187*4882a593Smuzhiyun 
2188*4882a593Smuzhiyun 		/* Increase ARB WDT to support streams architecture */
2189*4882a593Smuzhiyun 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2190*4882a593Smuzhiyun 				GAUDI_ARB_WDT_TIMEOUT);
2191*4882a593Smuzhiyun 
2192*4882a593Smuzhiyun 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2193*4882a593Smuzhiyun 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2194*4882a593Smuzhiyun 				QMAN_INTERNAL_MAKE_TRUSTED);
2195*4882a593Smuzhiyun 	}
2196*4882a593Smuzhiyun 
2197*4882a593Smuzhiyun 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2198*4882a593Smuzhiyun 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2199*4882a593Smuzhiyun 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2200*4882a593Smuzhiyun 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2201*4882a593Smuzhiyun }
2202*4882a593Smuzhiyun 
gaudi_init_mme_qmans(struct hl_device * hdev)2203*4882a593Smuzhiyun static void gaudi_init_mme_qmans(struct hl_device *hdev)
2204*4882a593Smuzhiyun {
2205*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2206*4882a593Smuzhiyun 	struct gaudi_internal_qman_info *q;
2207*4882a593Smuzhiyun 	u64 qman_base_addr;
2208*4882a593Smuzhiyun 	u32 mme_offset;
2209*4882a593Smuzhiyun 	int i, internal_q_index;
2210*4882a593Smuzhiyun 
2211*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2212*4882a593Smuzhiyun 		return;
2213*4882a593Smuzhiyun 
2214*4882a593Smuzhiyun 	/*
2215*4882a593Smuzhiyun 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2216*4882a593Smuzhiyun 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2217*4882a593Smuzhiyun 	 */
2218*4882a593Smuzhiyun 
2219*4882a593Smuzhiyun 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2220*4882a593Smuzhiyun 
2221*4882a593Smuzhiyun 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2222*4882a593Smuzhiyun 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2223*4882a593Smuzhiyun 		q = &gaudi->internal_qmans[internal_q_index];
2224*4882a593Smuzhiyun 		qman_base_addr = (u64) q->pq_dma_addr;
2225*4882a593Smuzhiyun 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2226*4882a593Smuzhiyun 					qman_base_addr);
2227*4882a593Smuzhiyun 		if (i == 3)
2228*4882a593Smuzhiyun 			mme_offset = 0;
2229*4882a593Smuzhiyun 	}
2230*4882a593Smuzhiyun 
2231*4882a593Smuzhiyun 	/* Initializing lower CP for MME QMANs */
2232*4882a593Smuzhiyun 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2233*4882a593Smuzhiyun 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2234*4882a593Smuzhiyun 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2235*4882a593Smuzhiyun 
2236*4882a593Smuzhiyun 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2237*4882a593Smuzhiyun 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2238*4882a593Smuzhiyun 
2239*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2240*4882a593Smuzhiyun }
2241*4882a593Smuzhiyun 
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2242*4882a593Smuzhiyun static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2243*4882a593Smuzhiyun 				int qman_id, u64 qman_base_addr)
2244*4882a593Smuzhiyun {
2245*4882a593Smuzhiyun 	u32 mtr_base_lo, mtr_base_hi;
2246*4882a593Smuzhiyun 	u32 so_base_lo, so_base_hi;
2247*4882a593Smuzhiyun 	u32 q_off, tpc_id;
2248*4882a593Smuzhiyun 	u32 tpc_qm_err_cfg;
2249*4882a593Smuzhiyun 
2250*4882a593Smuzhiyun 	mtr_base_lo = lower_32_bits(CFG_BASE +
2251*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2252*4882a593Smuzhiyun 	mtr_base_hi = upper_32_bits(CFG_BASE +
2253*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2254*4882a593Smuzhiyun 	so_base_lo = lower_32_bits(CFG_BASE +
2255*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2256*4882a593Smuzhiyun 	so_base_hi = upper_32_bits(CFG_BASE +
2257*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2258*4882a593Smuzhiyun 
2259*4882a593Smuzhiyun 	q_off = tpc_offset + qman_id * 4;
2260*4882a593Smuzhiyun 
2261*4882a593Smuzhiyun 	if (qman_id < 4) {
2262*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2263*4882a593Smuzhiyun 					lower_32_bits(qman_base_addr));
2264*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2265*4882a593Smuzhiyun 					upper_32_bits(qman_base_addr));
2266*4882a593Smuzhiyun 
2267*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2268*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2269*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2270*4882a593Smuzhiyun 
2271*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2272*4882a593Smuzhiyun 							QMAN_CPDMA_SIZE_OFFSET);
2273*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2274*4882a593Smuzhiyun 							QMAN_CPDMA_SRC_OFFSET);
2275*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2276*4882a593Smuzhiyun 							QMAN_CPDMA_DST_OFFSET);
2277*4882a593Smuzhiyun 	} else {
2278*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2279*4882a593Smuzhiyun 							QMAN_LDMA_SIZE_OFFSET);
2280*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2281*4882a593Smuzhiyun 							QMAN_LDMA_SRC_OFFSET);
2282*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2283*4882a593Smuzhiyun 							QMAN_LDMA_DST_OFFSET);
2284*4882a593Smuzhiyun 
2285*4882a593Smuzhiyun 		/* Configure RAZWI IRQ */
2286*4882a593Smuzhiyun 		tpc_id = tpc_offset /
2287*4882a593Smuzhiyun 				(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2288*4882a593Smuzhiyun 
2289*4882a593Smuzhiyun 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2290*4882a593Smuzhiyun 		if (hdev->stop_on_err) {
2291*4882a593Smuzhiyun 			tpc_qm_err_cfg |=
2292*4882a593Smuzhiyun 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2293*4882a593Smuzhiyun 		}
2294*4882a593Smuzhiyun 
2295*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2296*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2297*4882a593Smuzhiyun 			lower_32_bits(CFG_BASE +
2298*4882a593Smuzhiyun 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2299*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2300*4882a593Smuzhiyun 			upper_32_bits(CFG_BASE +
2301*4882a593Smuzhiyun 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2302*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2303*4882a593Smuzhiyun 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2304*4882a593Smuzhiyun 									tpc_id);
2305*4882a593Smuzhiyun 
2306*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2307*4882a593Smuzhiyun 				QM_ARB_ERR_MSG_EN_MASK);
2308*4882a593Smuzhiyun 
2309*4882a593Smuzhiyun 		/* Increase ARB WDT to support streams architecture */
2310*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2311*4882a593Smuzhiyun 				GAUDI_ARB_WDT_TIMEOUT);
2312*4882a593Smuzhiyun 
2313*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2314*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2315*4882a593Smuzhiyun 				QMAN_INTERNAL_MAKE_TRUSTED);
2316*4882a593Smuzhiyun 	}
2317*4882a593Smuzhiyun 
2318*4882a593Smuzhiyun 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2319*4882a593Smuzhiyun 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2320*4882a593Smuzhiyun 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2321*4882a593Smuzhiyun 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2322*4882a593Smuzhiyun }
2323*4882a593Smuzhiyun 
gaudi_init_tpc_qmans(struct hl_device * hdev)2324*4882a593Smuzhiyun static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2325*4882a593Smuzhiyun {
2326*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2327*4882a593Smuzhiyun 	struct gaudi_internal_qman_info *q;
2328*4882a593Smuzhiyun 	u64 qman_base_addr;
2329*4882a593Smuzhiyun 	u32 so_base_hi, tpc_offset = 0;
2330*4882a593Smuzhiyun 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2331*4882a593Smuzhiyun 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2332*4882a593Smuzhiyun 	int i, tpc_id, internal_q_index;
2333*4882a593Smuzhiyun 
2334*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2335*4882a593Smuzhiyun 		return;
2336*4882a593Smuzhiyun 
2337*4882a593Smuzhiyun 	so_base_hi = upper_32_bits(CFG_BASE +
2338*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2339*4882a593Smuzhiyun 
2340*4882a593Smuzhiyun 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2341*4882a593Smuzhiyun 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
2342*4882a593Smuzhiyun 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2343*4882a593Smuzhiyun 						tpc_id * QMAN_STREAMS + i;
2344*4882a593Smuzhiyun 			q = &gaudi->internal_qmans[internal_q_index];
2345*4882a593Smuzhiyun 			qman_base_addr = (u64) q->pq_dma_addr;
2346*4882a593Smuzhiyun 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
2347*4882a593Smuzhiyun 						qman_base_addr);
2348*4882a593Smuzhiyun 
2349*4882a593Smuzhiyun 			if (i == 3) {
2350*4882a593Smuzhiyun 				/* Initializing lower CP for TPC QMAN */
2351*4882a593Smuzhiyun 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2352*4882a593Smuzhiyun 
2353*4882a593Smuzhiyun 				/* Enable the QMAN and TPC channel */
2354*4882a593Smuzhiyun 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2355*4882a593Smuzhiyun 						QMAN_TPC_ENABLE);
2356*4882a593Smuzhiyun 			}
2357*4882a593Smuzhiyun 		}
2358*4882a593Smuzhiyun 
2359*4882a593Smuzhiyun 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2360*4882a593Smuzhiyun 				so_base_hi);
2361*4882a593Smuzhiyun 
2362*4882a593Smuzhiyun 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2363*4882a593Smuzhiyun 
2364*4882a593Smuzhiyun 		gaudi->hw_cap_initialized |=
2365*4882a593Smuzhiyun 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2366*4882a593Smuzhiyun 	}
2367*4882a593Smuzhiyun }
2368*4882a593Smuzhiyun 
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)2369*4882a593Smuzhiyun static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2370*4882a593Smuzhiyun {
2371*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2372*4882a593Smuzhiyun 
2373*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2374*4882a593Smuzhiyun 		return;
2375*4882a593Smuzhiyun 
2376*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2377*4882a593Smuzhiyun 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2378*4882a593Smuzhiyun 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2379*4882a593Smuzhiyun }
2380*4882a593Smuzhiyun 
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)2381*4882a593Smuzhiyun static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2382*4882a593Smuzhiyun {
2383*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2384*4882a593Smuzhiyun 
2385*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2386*4882a593Smuzhiyun 		return;
2387*4882a593Smuzhiyun 
2388*4882a593Smuzhiyun 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2389*4882a593Smuzhiyun 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2390*4882a593Smuzhiyun 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2391*4882a593Smuzhiyun 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2392*4882a593Smuzhiyun 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2393*4882a593Smuzhiyun }
2394*4882a593Smuzhiyun 
gaudi_disable_mme_qmans(struct hl_device * hdev)2395*4882a593Smuzhiyun static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2396*4882a593Smuzhiyun {
2397*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2398*4882a593Smuzhiyun 
2399*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2400*4882a593Smuzhiyun 		return;
2401*4882a593Smuzhiyun 
2402*4882a593Smuzhiyun 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
2403*4882a593Smuzhiyun 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
2404*4882a593Smuzhiyun }
2405*4882a593Smuzhiyun 
gaudi_disable_tpc_qmans(struct hl_device * hdev)2406*4882a593Smuzhiyun static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2407*4882a593Smuzhiyun {
2408*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2409*4882a593Smuzhiyun 	u32 tpc_offset = 0;
2410*4882a593Smuzhiyun 	int tpc_id;
2411*4882a593Smuzhiyun 
2412*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2413*4882a593Smuzhiyun 		return;
2414*4882a593Smuzhiyun 
2415*4882a593Smuzhiyun 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2416*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2417*4882a593Smuzhiyun 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2418*4882a593Smuzhiyun 	}
2419*4882a593Smuzhiyun }
2420*4882a593Smuzhiyun 
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)2421*4882a593Smuzhiyun static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2422*4882a593Smuzhiyun {
2423*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2424*4882a593Smuzhiyun 
2425*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2426*4882a593Smuzhiyun 		return;
2427*4882a593Smuzhiyun 
2428*4882a593Smuzhiyun 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2429*4882a593Smuzhiyun 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2430*4882a593Smuzhiyun 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2431*4882a593Smuzhiyun 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2432*4882a593Smuzhiyun }
2433*4882a593Smuzhiyun 
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)2434*4882a593Smuzhiyun static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2435*4882a593Smuzhiyun {
2436*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2437*4882a593Smuzhiyun 
2438*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2439*4882a593Smuzhiyun 		return;
2440*4882a593Smuzhiyun 
2441*4882a593Smuzhiyun 	/* Stop CPs of HBM DMA QMANs */
2442*4882a593Smuzhiyun 
2443*4882a593Smuzhiyun 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444*4882a593Smuzhiyun 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2445*4882a593Smuzhiyun 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2446*4882a593Smuzhiyun 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2447*4882a593Smuzhiyun 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2448*4882a593Smuzhiyun }
2449*4882a593Smuzhiyun 
gaudi_stop_mme_qmans(struct hl_device * hdev)2450*4882a593Smuzhiyun static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2451*4882a593Smuzhiyun {
2452*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2453*4882a593Smuzhiyun 
2454*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2455*4882a593Smuzhiyun 		return;
2456*4882a593Smuzhiyun 
2457*4882a593Smuzhiyun 	/* Stop CPs of MME QMANs */
2458*4882a593Smuzhiyun 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2459*4882a593Smuzhiyun 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2460*4882a593Smuzhiyun }
2461*4882a593Smuzhiyun 
gaudi_stop_tpc_qmans(struct hl_device * hdev)2462*4882a593Smuzhiyun static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2463*4882a593Smuzhiyun {
2464*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2465*4882a593Smuzhiyun 
2466*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2467*4882a593Smuzhiyun 		return;
2468*4882a593Smuzhiyun 
2469*4882a593Smuzhiyun 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2470*4882a593Smuzhiyun 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2471*4882a593Smuzhiyun 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2472*4882a593Smuzhiyun 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2473*4882a593Smuzhiyun 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2474*4882a593Smuzhiyun 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2475*4882a593Smuzhiyun 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2476*4882a593Smuzhiyun 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2477*4882a593Smuzhiyun }
2478*4882a593Smuzhiyun 
gaudi_pci_dma_stall(struct hl_device * hdev)2479*4882a593Smuzhiyun static void gaudi_pci_dma_stall(struct hl_device *hdev)
2480*4882a593Smuzhiyun {
2481*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2482*4882a593Smuzhiyun 
2483*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2484*4882a593Smuzhiyun 		return;
2485*4882a593Smuzhiyun 
2486*4882a593Smuzhiyun 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2487*4882a593Smuzhiyun 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2488*4882a593Smuzhiyun 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2489*4882a593Smuzhiyun }
2490*4882a593Smuzhiyun 
gaudi_hbm_dma_stall(struct hl_device * hdev)2491*4882a593Smuzhiyun static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2492*4882a593Smuzhiyun {
2493*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2494*4882a593Smuzhiyun 
2495*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2496*4882a593Smuzhiyun 		return;
2497*4882a593Smuzhiyun 
2498*4882a593Smuzhiyun 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2499*4882a593Smuzhiyun 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2500*4882a593Smuzhiyun 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2501*4882a593Smuzhiyun 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2502*4882a593Smuzhiyun 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2503*4882a593Smuzhiyun }
2504*4882a593Smuzhiyun 
gaudi_mme_stall(struct hl_device * hdev)2505*4882a593Smuzhiyun static void gaudi_mme_stall(struct hl_device *hdev)
2506*4882a593Smuzhiyun {
2507*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2508*4882a593Smuzhiyun 
2509*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2510*4882a593Smuzhiyun 		return;
2511*4882a593Smuzhiyun 
2512*4882a593Smuzhiyun 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
2513*4882a593Smuzhiyun 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2514*4882a593Smuzhiyun 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2515*4882a593Smuzhiyun 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2516*4882a593Smuzhiyun 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2517*4882a593Smuzhiyun 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2518*4882a593Smuzhiyun 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2519*4882a593Smuzhiyun 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2520*4882a593Smuzhiyun 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2521*4882a593Smuzhiyun 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2522*4882a593Smuzhiyun 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2523*4882a593Smuzhiyun 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2524*4882a593Smuzhiyun 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2525*4882a593Smuzhiyun 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2526*4882a593Smuzhiyun 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2527*4882a593Smuzhiyun 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2528*4882a593Smuzhiyun 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2529*4882a593Smuzhiyun }
2530*4882a593Smuzhiyun 
gaudi_tpc_stall(struct hl_device * hdev)2531*4882a593Smuzhiyun static void gaudi_tpc_stall(struct hl_device *hdev)
2532*4882a593Smuzhiyun {
2533*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2534*4882a593Smuzhiyun 
2535*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2536*4882a593Smuzhiyun 		return;
2537*4882a593Smuzhiyun 
2538*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2539*4882a593Smuzhiyun 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2540*4882a593Smuzhiyun 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2541*4882a593Smuzhiyun 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2542*4882a593Smuzhiyun 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2543*4882a593Smuzhiyun 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2544*4882a593Smuzhiyun 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2545*4882a593Smuzhiyun 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2546*4882a593Smuzhiyun }
2547*4882a593Smuzhiyun 
gaudi_set_clock_gating(struct hl_device * hdev)2548*4882a593Smuzhiyun static void gaudi_set_clock_gating(struct hl_device *hdev)
2549*4882a593Smuzhiyun {
2550*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2551*4882a593Smuzhiyun 	u32 qman_offset;
2552*4882a593Smuzhiyun 	bool enable;
2553*4882a593Smuzhiyun 	int i;
2554*4882a593Smuzhiyun 
2555*4882a593Smuzhiyun 	/* In case we are during debug session, don't enable the clock gate
2556*4882a593Smuzhiyun 	 * as it may interfere
2557*4882a593Smuzhiyun 	 */
2558*4882a593Smuzhiyun 	if (hdev->in_debug)
2559*4882a593Smuzhiyun 		return;
2560*4882a593Smuzhiyun 
2561*4882a593Smuzhiyun 	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2562*4882a593Smuzhiyun 		enable = !!(hdev->clock_gating_mask &
2563*4882a593Smuzhiyun 				(BIT_ULL(gaudi_dma_assignment[i])));
2564*4882a593Smuzhiyun 
2565*4882a593Smuzhiyun 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2566*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2567*4882a593Smuzhiyun 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2568*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2569*4882a593Smuzhiyun 				enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2570*4882a593Smuzhiyun 	}
2571*4882a593Smuzhiyun 
2572*4882a593Smuzhiyun 	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2573*4882a593Smuzhiyun 		enable = !!(hdev->clock_gating_mask &
2574*4882a593Smuzhiyun 				(BIT_ULL(gaudi_dma_assignment[i])));
2575*4882a593Smuzhiyun 
2576*4882a593Smuzhiyun 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2577*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2578*4882a593Smuzhiyun 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2579*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2580*4882a593Smuzhiyun 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2581*4882a593Smuzhiyun 	}
2582*4882a593Smuzhiyun 
2583*4882a593Smuzhiyun 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2584*4882a593Smuzhiyun 	WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2585*4882a593Smuzhiyun 	WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2586*4882a593Smuzhiyun 
2587*4882a593Smuzhiyun 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2588*4882a593Smuzhiyun 	WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2589*4882a593Smuzhiyun 	WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2590*4882a593Smuzhiyun 
2591*4882a593Smuzhiyun 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2592*4882a593Smuzhiyun 		enable = !!(hdev->clock_gating_mask &
2593*4882a593Smuzhiyun 				(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2594*4882a593Smuzhiyun 
2595*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2596*4882a593Smuzhiyun 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2597*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2598*4882a593Smuzhiyun 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2599*4882a593Smuzhiyun 
2600*4882a593Smuzhiyun 		qman_offset += TPC_QMAN_OFFSET;
2601*4882a593Smuzhiyun 	}
2602*4882a593Smuzhiyun 
2603*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2604*4882a593Smuzhiyun }
2605*4882a593Smuzhiyun 
gaudi_disable_clock_gating(struct hl_device * hdev)2606*4882a593Smuzhiyun static void gaudi_disable_clock_gating(struct hl_device *hdev)
2607*4882a593Smuzhiyun {
2608*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2609*4882a593Smuzhiyun 	u32 qman_offset;
2610*4882a593Smuzhiyun 	int i;
2611*4882a593Smuzhiyun 
2612*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2613*4882a593Smuzhiyun 		return;
2614*4882a593Smuzhiyun 
2615*4882a593Smuzhiyun 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2616*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2617*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2618*4882a593Smuzhiyun 
2619*4882a593Smuzhiyun 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2620*4882a593Smuzhiyun 	}
2621*4882a593Smuzhiyun 
2622*4882a593Smuzhiyun 	WREG32(mmMME0_QM_CGM_CFG, 0);
2623*4882a593Smuzhiyun 	WREG32(mmMME0_QM_CGM_CFG1, 0);
2624*4882a593Smuzhiyun 	WREG32(mmMME2_QM_CGM_CFG, 0);
2625*4882a593Smuzhiyun 	WREG32(mmMME2_QM_CGM_CFG1, 0);
2626*4882a593Smuzhiyun 
2627*4882a593Smuzhiyun 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2628*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2629*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2630*4882a593Smuzhiyun 
2631*4882a593Smuzhiyun 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2632*4882a593Smuzhiyun 	}
2633*4882a593Smuzhiyun 
2634*4882a593Smuzhiyun 	gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2635*4882a593Smuzhiyun }
2636*4882a593Smuzhiyun 
gaudi_enable_timestamp(struct hl_device * hdev)2637*4882a593Smuzhiyun static void gaudi_enable_timestamp(struct hl_device *hdev)
2638*4882a593Smuzhiyun {
2639*4882a593Smuzhiyun 	/* Disable the timestamp counter */
2640*4882a593Smuzhiyun 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2641*4882a593Smuzhiyun 
2642*4882a593Smuzhiyun 	/* Zero the lower/upper parts of the 64-bit counter */
2643*4882a593Smuzhiyun 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2644*4882a593Smuzhiyun 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2645*4882a593Smuzhiyun 
2646*4882a593Smuzhiyun 	/* Enable the counter */
2647*4882a593Smuzhiyun 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2648*4882a593Smuzhiyun }
2649*4882a593Smuzhiyun 
gaudi_disable_timestamp(struct hl_device * hdev)2650*4882a593Smuzhiyun static void gaudi_disable_timestamp(struct hl_device *hdev)
2651*4882a593Smuzhiyun {
2652*4882a593Smuzhiyun 	/* Disable the timestamp counter */
2653*4882a593Smuzhiyun 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2654*4882a593Smuzhiyun }
2655*4882a593Smuzhiyun 
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset)2656*4882a593Smuzhiyun static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2657*4882a593Smuzhiyun {
2658*4882a593Smuzhiyun 	u32 wait_timeout_ms;
2659*4882a593Smuzhiyun 
2660*4882a593Smuzhiyun 	dev_info(hdev->dev,
2661*4882a593Smuzhiyun 		"Halting compute engines and disabling interrupts\n");
2662*4882a593Smuzhiyun 
2663*4882a593Smuzhiyun 	if (hdev->pldm)
2664*4882a593Smuzhiyun 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2665*4882a593Smuzhiyun 	else
2666*4882a593Smuzhiyun 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2667*4882a593Smuzhiyun 
2668*4882a593Smuzhiyun 
2669*4882a593Smuzhiyun 	gaudi_stop_mme_qmans(hdev);
2670*4882a593Smuzhiyun 	gaudi_stop_tpc_qmans(hdev);
2671*4882a593Smuzhiyun 	gaudi_stop_hbm_dma_qmans(hdev);
2672*4882a593Smuzhiyun 	gaudi_stop_pci_dma_qmans(hdev);
2673*4882a593Smuzhiyun 
2674*4882a593Smuzhiyun 	hdev->asic_funcs->disable_clock_gating(hdev);
2675*4882a593Smuzhiyun 
2676*4882a593Smuzhiyun 	msleep(wait_timeout_ms);
2677*4882a593Smuzhiyun 
2678*4882a593Smuzhiyun 	gaudi_pci_dma_stall(hdev);
2679*4882a593Smuzhiyun 	gaudi_hbm_dma_stall(hdev);
2680*4882a593Smuzhiyun 	gaudi_tpc_stall(hdev);
2681*4882a593Smuzhiyun 	gaudi_mme_stall(hdev);
2682*4882a593Smuzhiyun 
2683*4882a593Smuzhiyun 	msleep(wait_timeout_ms);
2684*4882a593Smuzhiyun 
2685*4882a593Smuzhiyun 	gaudi_disable_mme_qmans(hdev);
2686*4882a593Smuzhiyun 	gaudi_disable_tpc_qmans(hdev);
2687*4882a593Smuzhiyun 	gaudi_disable_hbm_dma_qmans(hdev);
2688*4882a593Smuzhiyun 	gaudi_disable_pci_dma_qmans(hdev);
2689*4882a593Smuzhiyun 
2690*4882a593Smuzhiyun 	gaudi_disable_timestamp(hdev);
2691*4882a593Smuzhiyun 
2692*4882a593Smuzhiyun 	gaudi_disable_msi(hdev);
2693*4882a593Smuzhiyun }
2694*4882a593Smuzhiyun 
gaudi_mmu_init(struct hl_device * hdev)2695*4882a593Smuzhiyun static int gaudi_mmu_init(struct hl_device *hdev)
2696*4882a593Smuzhiyun {
2697*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2698*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2699*4882a593Smuzhiyun 	u64 hop0_addr;
2700*4882a593Smuzhiyun 	int rc, i;
2701*4882a593Smuzhiyun 
2702*4882a593Smuzhiyun 	if (!hdev->mmu_enable)
2703*4882a593Smuzhiyun 		return 0;
2704*4882a593Smuzhiyun 
2705*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2706*4882a593Smuzhiyun 		return 0;
2707*4882a593Smuzhiyun 
2708*4882a593Smuzhiyun 	hdev->dram_supports_virtual_memory = false;
2709*4882a593Smuzhiyun 
2710*4882a593Smuzhiyun 	for (i = 0 ; i < prop->max_asid ; i++) {
2711*4882a593Smuzhiyun 		hop0_addr = prop->mmu_pgt_addr +
2712*4882a593Smuzhiyun 				(i * prop->mmu_hop_table_size);
2713*4882a593Smuzhiyun 
2714*4882a593Smuzhiyun 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2715*4882a593Smuzhiyun 		if (rc) {
2716*4882a593Smuzhiyun 			dev_err(hdev->dev,
2717*4882a593Smuzhiyun 				"failed to set hop0 addr for asid %d\n", i);
2718*4882a593Smuzhiyun 			goto err;
2719*4882a593Smuzhiyun 		}
2720*4882a593Smuzhiyun 	}
2721*4882a593Smuzhiyun 
2722*4882a593Smuzhiyun 	/* init MMU cache manage page */
2723*4882a593Smuzhiyun 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2724*4882a593Smuzhiyun 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2725*4882a593Smuzhiyun 
2726*4882a593Smuzhiyun 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2727*4882a593Smuzhiyun 
2728*4882a593Smuzhiyun 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
2729*4882a593Smuzhiyun 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
2730*4882a593Smuzhiyun 
2731*4882a593Smuzhiyun 	WREG32(mmSTLB_HOP_CONFIGURATION,
2732*4882a593Smuzhiyun 			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2733*4882a593Smuzhiyun 
2734*4882a593Smuzhiyun 	/*
2735*4882a593Smuzhiyun 	 * The H/W expects the first PI after init to be 1. After wraparound
2736*4882a593Smuzhiyun 	 * we'll write 0.
2737*4882a593Smuzhiyun 	 */
2738*4882a593Smuzhiyun 	gaudi->mmu_cache_inv_pi = 1;
2739*4882a593Smuzhiyun 
2740*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
2741*4882a593Smuzhiyun 
2742*4882a593Smuzhiyun 	return 0;
2743*4882a593Smuzhiyun 
2744*4882a593Smuzhiyun err:
2745*4882a593Smuzhiyun 	return rc;
2746*4882a593Smuzhiyun }
2747*4882a593Smuzhiyun 
gaudi_load_firmware_to_device(struct hl_device * hdev)2748*4882a593Smuzhiyun static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2749*4882a593Smuzhiyun {
2750*4882a593Smuzhiyun 	void __iomem *dst;
2751*4882a593Smuzhiyun 
2752*4882a593Smuzhiyun 	/* HBM scrambler must be initialized before pushing F/W to HBM */
2753*4882a593Smuzhiyun 	gaudi_init_scrambler_hbm(hdev);
2754*4882a593Smuzhiyun 
2755*4882a593Smuzhiyun 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2756*4882a593Smuzhiyun 
2757*4882a593Smuzhiyun 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2758*4882a593Smuzhiyun }
2759*4882a593Smuzhiyun 
gaudi_load_boot_fit_to_device(struct hl_device * hdev)2760*4882a593Smuzhiyun static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2761*4882a593Smuzhiyun {
2762*4882a593Smuzhiyun 	void __iomem *dst;
2763*4882a593Smuzhiyun 
2764*4882a593Smuzhiyun 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2765*4882a593Smuzhiyun 
2766*4882a593Smuzhiyun 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2767*4882a593Smuzhiyun }
2768*4882a593Smuzhiyun 
gaudi_read_device_fw_version(struct hl_device * hdev,enum hl_fw_component fwc)2769*4882a593Smuzhiyun static void gaudi_read_device_fw_version(struct hl_device *hdev,
2770*4882a593Smuzhiyun 					enum hl_fw_component fwc)
2771*4882a593Smuzhiyun {
2772*4882a593Smuzhiyun 	const char *name;
2773*4882a593Smuzhiyun 	u32 ver_off;
2774*4882a593Smuzhiyun 	char *dest;
2775*4882a593Smuzhiyun 
2776*4882a593Smuzhiyun 	switch (fwc) {
2777*4882a593Smuzhiyun 	case FW_COMP_UBOOT:
2778*4882a593Smuzhiyun 		ver_off = RREG32(mmUBOOT_VER_OFFSET);
2779*4882a593Smuzhiyun 		dest = hdev->asic_prop.uboot_ver;
2780*4882a593Smuzhiyun 		name = "U-Boot";
2781*4882a593Smuzhiyun 		break;
2782*4882a593Smuzhiyun 	case FW_COMP_PREBOOT:
2783*4882a593Smuzhiyun 		ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2784*4882a593Smuzhiyun 		dest = hdev->asic_prop.preboot_ver;
2785*4882a593Smuzhiyun 		name = "Preboot";
2786*4882a593Smuzhiyun 		break;
2787*4882a593Smuzhiyun 	default:
2788*4882a593Smuzhiyun 		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2789*4882a593Smuzhiyun 		return;
2790*4882a593Smuzhiyun 	}
2791*4882a593Smuzhiyun 
2792*4882a593Smuzhiyun 	ver_off &= ~((u32)SRAM_BASE_ADDR);
2793*4882a593Smuzhiyun 
2794*4882a593Smuzhiyun 	if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2795*4882a593Smuzhiyun 		memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2796*4882a593Smuzhiyun 							VERSION_MAX_LEN);
2797*4882a593Smuzhiyun 	} else {
2798*4882a593Smuzhiyun 		dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2799*4882a593Smuzhiyun 								name, ver_off);
2800*4882a593Smuzhiyun 		strcpy(dest, "unavailable");
2801*4882a593Smuzhiyun 	}
2802*4882a593Smuzhiyun }
2803*4882a593Smuzhiyun 
gaudi_init_cpu(struct hl_device * hdev)2804*4882a593Smuzhiyun static int gaudi_init_cpu(struct hl_device *hdev)
2805*4882a593Smuzhiyun {
2806*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2807*4882a593Smuzhiyun 	int rc;
2808*4882a593Smuzhiyun 
2809*4882a593Smuzhiyun 	if (!hdev->cpu_enable)
2810*4882a593Smuzhiyun 		return 0;
2811*4882a593Smuzhiyun 
2812*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2813*4882a593Smuzhiyun 		return 0;
2814*4882a593Smuzhiyun 
2815*4882a593Smuzhiyun 	/*
2816*4882a593Smuzhiyun 	 * The device CPU works with 40 bits addresses.
2817*4882a593Smuzhiyun 	 * This register sets the extension to 50 bits.
2818*4882a593Smuzhiyun 	 */
2819*4882a593Smuzhiyun 	WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2820*4882a593Smuzhiyun 
2821*4882a593Smuzhiyun 	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2822*4882a593Smuzhiyun 			mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2823*4882a593Smuzhiyun 			mmCPU_CMD_STATUS_TO_HOST,
2824*4882a593Smuzhiyun 			mmCPU_BOOT_ERR0,
2825*4882a593Smuzhiyun 			!hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2826*4882a593Smuzhiyun 			GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2827*4882a593Smuzhiyun 
2828*4882a593Smuzhiyun 	if (rc)
2829*4882a593Smuzhiyun 		return rc;
2830*4882a593Smuzhiyun 
2831*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
2832*4882a593Smuzhiyun 
2833*4882a593Smuzhiyun 	return 0;
2834*4882a593Smuzhiyun }
2835*4882a593Smuzhiyun 
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)2836*4882a593Smuzhiyun static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2837*4882a593Smuzhiyun {
2838*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
2839*4882a593Smuzhiyun 	struct hl_eq *eq;
2840*4882a593Smuzhiyun 	u32 status;
2841*4882a593Smuzhiyun 	struct hl_hw_queue *cpu_pq =
2842*4882a593Smuzhiyun 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2843*4882a593Smuzhiyun 	int err;
2844*4882a593Smuzhiyun 
2845*4882a593Smuzhiyun 	if (!hdev->cpu_queues_enable)
2846*4882a593Smuzhiyun 		return 0;
2847*4882a593Smuzhiyun 
2848*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2849*4882a593Smuzhiyun 		return 0;
2850*4882a593Smuzhiyun 
2851*4882a593Smuzhiyun 	eq = &hdev->event_queue;
2852*4882a593Smuzhiyun 
2853*4882a593Smuzhiyun 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2854*4882a593Smuzhiyun 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2855*4882a593Smuzhiyun 
2856*4882a593Smuzhiyun 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2857*4882a593Smuzhiyun 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2858*4882a593Smuzhiyun 
2859*4882a593Smuzhiyun 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2860*4882a593Smuzhiyun 			lower_32_bits(hdev->cpu_accessible_dma_address));
2861*4882a593Smuzhiyun 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2862*4882a593Smuzhiyun 			upper_32_bits(hdev->cpu_accessible_dma_address));
2863*4882a593Smuzhiyun 
2864*4882a593Smuzhiyun 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2865*4882a593Smuzhiyun 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2866*4882a593Smuzhiyun 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2867*4882a593Smuzhiyun 
2868*4882a593Smuzhiyun 	/* Used for EQ CI */
2869*4882a593Smuzhiyun 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2870*4882a593Smuzhiyun 
2871*4882a593Smuzhiyun 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
2872*4882a593Smuzhiyun 
2873*4882a593Smuzhiyun 	if (gaudi->multi_msi_mode)
2874*4882a593Smuzhiyun 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2875*4882a593Smuzhiyun 	else
2876*4882a593Smuzhiyun 		WREG32(mmCPU_IF_QUEUE_INIT,
2877*4882a593Smuzhiyun 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2878*4882a593Smuzhiyun 
2879*4882a593Smuzhiyun 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2880*4882a593Smuzhiyun 
2881*4882a593Smuzhiyun 	err = hl_poll_timeout(
2882*4882a593Smuzhiyun 		hdev,
2883*4882a593Smuzhiyun 		mmCPU_IF_QUEUE_INIT,
2884*4882a593Smuzhiyun 		status,
2885*4882a593Smuzhiyun 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
2886*4882a593Smuzhiyun 		1000,
2887*4882a593Smuzhiyun 		cpu_timeout);
2888*4882a593Smuzhiyun 
2889*4882a593Smuzhiyun 	if (err) {
2890*4882a593Smuzhiyun 		dev_err(hdev->dev,
2891*4882a593Smuzhiyun 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
2892*4882a593Smuzhiyun 		return -EIO;
2893*4882a593Smuzhiyun 	}
2894*4882a593Smuzhiyun 
2895*4882a593Smuzhiyun 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2896*4882a593Smuzhiyun 	return 0;
2897*4882a593Smuzhiyun }
2898*4882a593Smuzhiyun 
gaudi_pre_hw_init(struct hl_device * hdev)2899*4882a593Smuzhiyun static void gaudi_pre_hw_init(struct hl_device *hdev)
2900*4882a593Smuzhiyun {
2901*4882a593Smuzhiyun 	/* Perform read from the device to make sure device is up */
2902*4882a593Smuzhiyun 	RREG32(mmHW_STATE);
2903*4882a593Smuzhiyun 
2904*4882a593Smuzhiyun 	/* Set the access through PCI bars (Linux driver only) as
2905*4882a593Smuzhiyun 	 * secured
2906*4882a593Smuzhiyun 	 */
2907*4882a593Smuzhiyun 	WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
2908*4882a593Smuzhiyun 			(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2909*4882a593Smuzhiyun 			PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2910*4882a593Smuzhiyun 
2911*4882a593Smuzhiyun 	/* Perform read to flush the waiting writes to ensure
2912*4882a593Smuzhiyun 	 * configuration was set in the device
2913*4882a593Smuzhiyun 	 */
2914*4882a593Smuzhiyun 	RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2915*4882a593Smuzhiyun 
2916*4882a593Smuzhiyun 	/*
2917*4882a593Smuzhiyun 	 * Let's mark in the H/W that we have reached this point. We check
2918*4882a593Smuzhiyun 	 * this value in the reset_before_init function to understand whether
2919*4882a593Smuzhiyun 	 * we need to reset the chip before doing H/W init. This register is
2920*4882a593Smuzhiyun 	 * cleared by the H/W upon H/W reset
2921*4882a593Smuzhiyun 	 */
2922*4882a593Smuzhiyun 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2923*4882a593Smuzhiyun 
2924*4882a593Smuzhiyun 	/* Configure the reset registers. Must be done as early as possible
2925*4882a593Smuzhiyun 	 * in case we fail during H/W initialization
2926*4882a593Smuzhiyun 	 */
2927*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2928*4882a593Smuzhiyun 					(CFG_RST_H_DMA_MASK |
2929*4882a593Smuzhiyun 					CFG_RST_H_MME_MASK |
2930*4882a593Smuzhiyun 					CFG_RST_H_SM_MASK |
2931*4882a593Smuzhiyun 					CFG_RST_H_TPC_7_MASK));
2932*4882a593Smuzhiyun 
2933*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2934*4882a593Smuzhiyun 
2935*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2936*4882a593Smuzhiyun 					(CFG_RST_H_HBM_MASK |
2937*4882a593Smuzhiyun 					CFG_RST_H_TPC_7_MASK |
2938*4882a593Smuzhiyun 					CFG_RST_H_NIC_MASK |
2939*4882a593Smuzhiyun 					CFG_RST_H_SM_MASK |
2940*4882a593Smuzhiyun 					CFG_RST_H_DMA_MASK |
2941*4882a593Smuzhiyun 					CFG_RST_H_MME_MASK |
2942*4882a593Smuzhiyun 					CFG_RST_H_CPU_MASK |
2943*4882a593Smuzhiyun 					CFG_RST_H_MMU_MASK));
2944*4882a593Smuzhiyun 
2945*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2946*4882a593Smuzhiyun 					(CFG_RST_L_IF_MASK |
2947*4882a593Smuzhiyun 					CFG_RST_L_PSOC_MASK |
2948*4882a593Smuzhiyun 					CFG_RST_L_TPC_MASK));
2949*4882a593Smuzhiyun }
2950*4882a593Smuzhiyun 
gaudi_hw_init(struct hl_device * hdev)2951*4882a593Smuzhiyun static int gaudi_hw_init(struct hl_device *hdev)
2952*4882a593Smuzhiyun {
2953*4882a593Smuzhiyun 	int rc;
2954*4882a593Smuzhiyun 
2955*4882a593Smuzhiyun 	dev_info(hdev->dev, "Starting initialization of H/W\n");
2956*4882a593Smuzhiyun 
2957*4882a593Smuzhiyun 	gaudi_pre_hw_init(hdev);
2958*4882a593Smuzhiyun 
2959*4882a593Smuzhiyun 	gaudi_init_pci_dma_qmans(hdev);
2960*4882a593Smuzhiyun 
2961*4882a593Smuzhiyun 	gaudi_init_hbm_dma_qmans(hdev);
2962*4882a593Smuzhiyun 
2963*4882a593Smuzhiyun 	rc = gaudi_init_cpu(hdev);
2964*4882a593Smuzhiyun 	if (rc) {
2965*4882a593Smuzhiyun 		dev_err(hdev->dev, "failed to initialize CPU\n");
2966*4882a593Smuzhiyun 		return rc;
2967*4882a593Smuzhiyun 	}
2968*4882a593Smuzhiyun 
2969*4882a593Smuzhiyun 	/* SRAM scrambler must be initialized after CPU is running from HBM */
2970*4882a593Smuzhiyun 	gaudi_init_scrambler_sram(hdev);
2971*4882a593Smuzhiyun 
2972*4882a593Smuzhiyun 	/* This is here just in case we are working without CPU */
2973*4882a593Smuzhiyun 	gaudi_init_scrambler_hbm(hdev);
2974*4882a593Smuzhiyun 
2975*4882a593Smuzhiyun 	gaudi_init_golden_registers(hdev);
2976*4882a593Smuzhiyun 
2977*4882a593Smuzhiyun 	rc = gaudi_mmu_init(hdev);
2978*4882a593Smuzhiyun 	if (rc)
2979*4882a593Smuzhiyun 		return rc;
2980*4882a593Smuzhiyun 
2981*4882a593Smuzhiyun 	gaudi_init_security(hdev);
2982*4882a593Smuzhiyun 
2983*4882a593Smuzhiyun 	gaudi_init_mme_qmans(hdev);
2984*4882a593Smuzhiyun 
2985*4882a593Smuzhiyun 	gaudi_init_tpc_qmans(hdev);
2986*4882a593Smuzhiyun 
2987*4882a593Smuzhiyun 	hdev->asic_funcs->set_clock_gating(hdev);
2988*4882a593Smuzhiyun 
2989*4882a593Smuzhiyun 	gaudi_enable_timestamp(hdev);
2990*4882a593Smuzhiyun 
2991*4882a593Smuzhiyun 	/* MSI must be enabled before CPU queues are initialized */
2992*4882a593Smuzhiyun 	rc = gaudi_enable_msi(hdev);
2993*4882a593Smuzhiyun 	if (rc)
2994*4882a593Smuzhiyun 		goto disable_queues;
2995*4882a593Smuzhiyun 
2996*4882a593Smuzhiyun 	/* must be called after MSI was enabled */
2997*4882a593Smuzhiyun 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2998*4882a593Smuzhiyun 	if (rc) {
2999*4882a593Smuzhiyun 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3000*4882a593Smuzhiyun 			rc);
3001*4882a593Smuzhiyun 		goto disable_msi;
3002*4882a593Smuzhiyun 	}
3003*4882a593Smuzhiyun 
3004*4882a593Smuzhiyun 	/* Perform read from the device to flush all configuration */
3005*4882a593Smuzhiyun 	RREG32(mmHW_STATE);
3006*4882a593Smuzhiyun 
3007*4882a593Smuzhiyun 	return 0;
3008*4882a593Smuzhiyun 
3009*4882a593Smuzhiyun disable_msi:
3010*4882a593Smuzhiyun 	gaudi_disable_msi(hdev);
3011*4882a593Smuzhiyun disable_queues:
3012*4882a593Smuzhiyun 	gaudi_disable_mme_qmans(hdev);
3013*4882a593Smuzhiyun 	gaudi_disable_pci_dma_qmans(hdev);
3014*4882a593Smuzhiyun 
3015*4882a593Smuzhiyun 	return rc;
3016*4882a593Smuzhiyun }
3017*4882a593Smuzhiyun 
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset)3018*4882a593Smuzhiyun static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3019*4882a593Smuzhiyun {
3020*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
3021*4882a593Smuzhiyun 	u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3022*4882a593Smuzhiyun 
3023*4882a593Smuzhiyun 	if (!hard_reset) {
3024*4882a593Smuzhiyun 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3025*4882a593Smuzhiyun 		return;
3026*4882a593Smuzhiyun 	}
3027*4882a593Smuzhiyun 
3028*4882a593Smuzhiyun 	if (hdev->pldm) {
3029*4882a593Smuzhiyun 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3030*4882a593Smuzhiyun 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3031*4882a593Smuzhiyun 	} else {
3032*4882a593Smuzhiyun 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3033*4882a593Smuzhiyun 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3034*4882a593Smuzhiyun 	}
3035*4882a593Smuzhiyun 
3036*4882a593Smuzhiyun 	/* Set device to handle FLR by H/W as we will put the device CPU to
3037*4882a593Smuzhiyun 	 * halt mode
3038*4882a593Smuzhiyun 	 */
3039*4882a593Smuzhiyun 	WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3040*4882a593Smuzhiyun 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3041*4882a593Smuzhiyun 
3042*4882a593Smuzhiyun 	/* I don't know what is the state of the CPU so make sure it is
3043*4882a593Smuzhiyun 	 * stopped in any means necessary
3044*4882a593Smuzhiyun 	 */
3045*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3046*4882a593Smuzhiyun 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3047*4882a593Smuzhiyun 
3048*4882a593Smuzhiyun 	msleep(cpu_timeout_ms);
3049*4882a593Smuzhiyun 
3050*4882a593Smuzhiyun 	/* Tell ASIC not to re-initialize PCIe */
3051*4882a593Smuzhiyun 	WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3052*4882a593Smuzhiyun 
3053*4882a593Smuzhiyun 	boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3054*4882a593Smuzhiyun 
3055*4882a593Smuzhiyun 	/* H/W bug WA:
3056*4882a593Smuzhiyun 	 * rdata[31:0] = strap_read_val;
3057*4882a593Smuzhiyun 	 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3058*4882a593Smuzhiyun 	 */
3059*4882a593Smuzhiyun 	boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3060*4882a593Smuzhiyun 			(boot_strap & 0x001FFFFF));
3061*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3062*4882a593Smuzhiyun 
3063*4882a593Smuzhiyun 	/* Restart BTL/BLR upon hard-reset */
3064*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3065*4882a593Smuzhiyun 
3066*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3067*4882a593Smuzhiyun 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3068*4882a593Smuzhiyun 	dev_info(hdev->dev,
3069*4882a593Smuzhiyun 		"Issued HARD reset command, going to wait %dms\n",
3070*4882a593Smuzhiyun 		reset_timeout_ms);
3071*4882a593Smuzhiyun 
3072*4882a593Smuzhiyun 	/*
3073*4882a593Smuzhiyun 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3074*4882a593Smuzhiyun 	 * itself is in reset. Need to wait until the reset is deasserted
3075*4882a593Smuzhiyun 	 */
3076*4882a593Smuzhiyun 	msleep(reset_timeout_ms);
3077*4882a593Smuzhiyun 
3078*4882a593Smuzhiyun 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3079*4882a593Smuzhiyun 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3080*4882a593Smuzhiyun 		dev_err(hdev->dev,
3081*4882a593Smuzhiyun 			"Timeout while waiting for device to reset 0x%x\n",
3082*4882a593Smuzhiyun 			status);
3083*4882a593Smuzhiyun 
3084*4882a593Smuzhiyun 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3085*4882a593Smuzhiyun 
3086*4882a593Smuzhiyun 	gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3087*4882a593Smuzhiyun 					HW_CAP_HBM | HW_CAP_PCI_DMA |
3088*4882a593Smuzhiyun 					HW_CAP_MME | HW_CAP_TPC_MASK |
3089*4882a593Smuzhiyun 					HW_CAP_HBM_DMA | HW_CAP_PLL |
3090*4882a593Smuzhiyun 					HW_CAP_MMU |
3091*4882a593Smuzhiyun 					HW_CAP_SRAM_SCRAMBLER |
3092*4882a593Smuzhiyun 					HW_CAP_HBM_SCRAMBLER |
3093*4882a593Smuzhiyun 					HW_CAP_CLK_GATE);
3094*4882a593Smuzhiyun 
3095*4882a593Smuzhiyun 	memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3096*4882a593Smuzhiyun }
3097*4882a593Smuzhiyun 
gaudi_suspend(struct hl_device * hdev)3098*4882a593Smuzhiyun static int gaudi_suspend(struct hl_device *hdev)
3099*4882a593Smuzhiyun {
3100*4882a593Smuzhiyun 	int rc;
3101*4882a593Smuzhiyun 
3102*4882a593Smuzhiyun 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3103*4882a593Smuzhiyun 	if (rc)
3104*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3105*4882a593Smuzhiyun 
3106*4882a593Smuzhiyun 	return rc;
3107*4882a593Smuzhiyun }
3108*4882a593Smuzhiyun 
gaudi_resume(struct hl_device * hdev)3109*4882a593Smuzhiyun static int gaudi_resume(struct hl_device *hdev)
3110*4882a593Smuzhiyun {
3111*4882a593Smuzhiyun 	return gaudi_init_iatu(hdev);
3112*4882a593Smuzhiyun }
3113*4882a593Smuzhiyun 
gaudi_cb_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)3114*4882a593Smuzhiyun static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3115*4882a593Smuzhiyun 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
3116*4882a593Smuzhiyun {
3117*4882a593Smuzhiyun 	int rc;
3118*4882a593Smuzhiyun 
3119*4882a593Smuzhiyun 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3120*4882a593Smuzhiyun 			VM_DONTCOPY | VM_NORESERVE;
3121*4882a593Smuzhiyun 
3122*4882a593Smuzhiyun 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
3123*4882a593Smuzhiyun 				(dma_addr - HOST_PHYS_BASE), size);
3124*4882a593Smuzhiyun 	if (rc)
3125*4882a593Smuzhiyun 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3126*4882a593Smuzhiyun 
3127*4882a593Smuzhiyun 	return rc;
3128*4882a593Smuzhiyun }
3129*4882a593Smuzhiyun 
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)3130*4882a593Smuzhiyun static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3131*4882a593Smuzhiyun {
3132*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
3133*4882a593Smuzhiyun 	u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3134*4882a593Smuzhiyun 	int dma_id;
3135*4882a593Smuzhiyun 	bool invalid_queue = false;
3136*4882a593Smuzhiyun 
3137*4882a593Smuzhiyun 	switch (hw_queue_id) {
3138*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3139*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3140*4882a593Smuzhiyun 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141*4882a593Smuzhiyun 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3142*4882a593Smuzhiyun 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143*4882a593Smuzhiyun 		break;
3144*4882a593Smuzhiyun 
3145*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3146*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3147*4882a593Smuzhiyun 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148*4882a593Smuzhiyun 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3149*4882a593Smuzhiyun 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150*4882a593Smuzhiyun 		break;
3151*4882a593Smuzhiyun 
3152*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3153*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3154*4882a593Smuzhiyun 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155*4882a593Smuzhiyun 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156*4882a593Smuzhiyun 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157*4882a593Smuzhiyun 		break;
3158*4882a593Smuzhiyun 
3159*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3160*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3161*4882a593Smuzhiyun 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162*4882a593Smuzhiyun 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163*4882a593Smuzhiyun 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164*4882a593Smuzhiyun 		break;
3165*4882a593Smuzhiyun 
3166*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3167*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3168*4882a593Smuzhiyun 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169*4882a593Smuzhiyun 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170*4882a593Smuzhiyun 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171*4882a593Smuzhiyun 		break;
3172*4882a593Smuzhiyun 
3173*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3174*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3175*4882a593Smuzhiyun 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3176*4882a593Smuzhiyun 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3177*4882a593Smuzhiyun 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3178*4882a593Smuzhiyun 		break;
3179*4882a593Smuzhiyun 
3180*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3181*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3182*4882a593Smuzhiyun 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3183*4882a593Smuzhiyun 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3184*4882a593Smuzhiyun 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3185*4882a593Smuzhiyun 		break;
3186*4882a593Smuzhiyun 
3187*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3188*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3189*4882a593Smuzhiyun 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3190*4882a593Smuzhiyun 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3191*4882a593Smuzhiyun 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3192*4882a593Smuzhiyun 		break;
3193*4882a593Smuzhiyun 
3194*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_CPU_PQ:
3195*4882a593Smuzhiyun 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3196*4882a593Smuzhiyun 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
3197*4882a593Smuzhiyun 		else
3198*4882a593Smuzhiyun 			invalid_queue = true;
3199*4882a593Smuzhiyun 		break;
3200*4882a593Smuzhiyun 
3201*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_MME_0_0:
3202*4882a593Smuzhiyun 		db_reg_offset = mmMME2_QM_PQ_PI_0;
3203*4882a593Smuzhiyun 		break;
3204*4882a593Smuzhiyun 
3205*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_MME_0_1:
3206*4882a593Smuzhiyun 		db_reg_offset = mmMME2_QM_PQ_PI_1;
3207*4882a593Smuzhiyun 		break;
3208*4882a593Smuzhiyun 
3209*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_MME_0_2:
3210*4882a593Smuzhiyun 		db_reg_offset = mmMME2_QM_PQ_PI_2;
3211*4882a593Smuzhiyun 		break;
3212*4882a593Smuzhiyun 
3213*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_MME_0_3:
3214*4882a593Smuzhiyun 		db_reg_offset = mmMME2_QM_PQ_PI_3;
3215*4882a593Smuzhiyun 		break;
3216*4882a593Smuzhiyun 
3217*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_MME_1_0:
3218*4882a593Smuzhiyun 		db_reg_offset = mmMME0_QM_PQ_PI_0;
3219*4882a593Smuzhiyun 		break;
3220*4882a593Smuzhiyun 
3221*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_MME_1_1:
3222*4882a593Smuzhiyun 		db_reg_offset = mmMME0_QM_PQ_PI_1;
3223*4882a593Smuzhiyun 		break;
3224*4882a593Smuzhiyun 
3225*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_MME_1_2:
3226*4882a593Smuzhiyun 		db_reg_offset = mmMME0_QM_PQ_PI_2;
3227*4882a593Smuzhiyun 		break;
3228*4882a593Smuzhiyun 
3229*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_MME_1_3:
3230*4882a593Smuzhiyun 		db_reg_offset = mmMME0_QM_PQ_PI_3;
3231*4882a593Smuzhiyun 		break;
3232*4882a593Smuzhiyun 
3233*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_0_0:
3234*4882a593Smuzhiyun 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
3235*4882a593Smuzhiyun 		break;
3236*4882a593Smuzhiyun 
3237*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_0_1:
3238*4882a593Smuzhiyun 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
3239*4882a593Smuzhiyun 		break;
3240*4882a593Smuzhiyun 
3241*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_0_2:
3242*4882a593Smuzhiyun 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
3243*4882a593Smuzhiyun 		break;
3244*4882a593Smuzhiyun 
3245*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_0_3:
3246*4882a593Smuzhiyun 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
3247*4882a593Smuzhiyun 		break;
3248*4882a593Smuzhiyun 
3249*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_1_0:
3250*4882a593Smuzhiyun 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
3251*4882a593Smuzhiyun 		break;
3252*4882a593Smuzhiyun 
3253*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_1_1:
3254*4882a593Smuzhiyun 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
3255*4882a593Smuzhiyun 		break;
3256*4882a593Smuzhiyun 
3257*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_1_2:
3258*4882a593Smuzhiyun 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
3259*4882a593Smuzhiyun 		break;
3260*4882a593Smuzhiyun 
3261*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_1_3:
3262*4882a593Smuzhiyun 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
3263*4882a593Smuzhiyun 		break;
3264*4882a593Smuzhiyun 
3265*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_2_0:
3266*4882a593Smuzhiyun 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
3267*4882a593Smuzhiyun 		break;
3268*4882a593Smuzhiyun 
3269*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_2_1:
3270*4882a593Smuzhiyun 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
3271*4882a593Smuzhiyun 		break;
3272*4882a593Smuzhiyun 
3273*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_2_2:
3274*4882a593Smuzhiyun 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
3275*4882a593Smuzhiyun 		break;
3276*4882a593Smuzhiyun 
3277*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_2_3:
3278*4882a593Smuzhiyun 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
3279*4882a593Smuzhiyun 		break;
3280*4882a593Smuzhiyun 
3281*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_3_0:
3282*4882a593Smuzhiyun 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
3283*4882a593Smuzhiyun 		break;
3284*4882a593Smuzhiyun 
3285*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_3_1:
3286*4882a593Smuzhiyun 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
3287*4882a593Smuzhiyun 		break;
3288*4882a593Smuzhiyun 
3289*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_3_2:
3290*4882a593Smuzhiyun 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
3291*4882a593Smuzhiyun 		break;
3292*4882a593Smuzhiyun 
3293*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_3_3:
3294*4882a593Smuzhiyun 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
3295*4882a593Smuzhiyun 		break;
3296*4882a593Smuzhiyun 
3297*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_4_0:
3298*4882a593Smuzhiyun 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
3299*4882a593Smuzhiyun 		break;
3300*4882a593Smuzhiyun 
3301*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_4_1:
3302*4882a593Smuzhiyun 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
3303*4882a593Smuzhiyun 		break;
3304*4882a593Smuzhiyun 
3305*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_4_2:
3306*4882a593Smuzhiyun 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
3307*4882a593Smuzhiyun 		break;
3308*4882a593Smuzhiyun 
3309*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_4_3:
3310*4882a593Smuzhiyun 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
3311*4882a593Smuzhiyun 		break;
3312*4882a593Smuzhiyun 
3313*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_5_0:
3314*4882a593Smuzhiyun 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
3315*4882a593Smuzhiyun 		break;
3316*4882a593Smuzhiyun 
3317*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_5_1:
3318*4882a593Smuzhiyun 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
3319*4882a593Smuzhiyun 		break;
3320*4882a593Smuzhiyun 
3321*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_5_2:
3322*4882a593Smuzhiyun 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
3323*4882a593Smuzhiyun 		break;
3324*4882a593Smuzhiyun 
3325*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_5_3:
3326*4882a593Smuzhiyun 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
3327*4882a593Smuzhiyun 		break;
3328*4882a593Smuzhiyun 
3329*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_6_0:
3330*4882a593Smuzhiyun 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
3331*4882a593Smuzhiyun 		break;
3332*4882a593Smuzhiyun 
3333*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_6_1:
3334*4882a593Smuzhiyun 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
3335*4882a593Smuzhiyun 		break;
3336*4882a593Smuzhiyun 
3337*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_6_2:
3338*4882a593Smuzhiyun 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
3339*4882a593Smuzhiyun 		break;
3340*4882a593Smuzhiyun 
3341*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_6_3:
3342*4882a593Smuzhiyun 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
3343*4882a593Smuzhiyun 		break;
3344*4882a593Smuzhiyun 
3345*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_7_0:
3346*4882a593Smuzhiyun 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
3347*4882a593Smuzhiyun 		break;
3348*4882a593Smuzhiyun 
3349*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_7_1:
3350*4882a593Smuzhiyun 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
3351*4882a593Smuzhiyun 		break;
3352*4882a593Smuzhiyun 
3353*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_7_2:
3354*4882a593Smuzhiyun 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
3355*4882a593Smuzhiyun 		break;
3356*4882a593Smuzhiyun 
3357*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_TPC_7_3:
3358*4882a593Smuzhiyun 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
3359*4882a593Smuzhiyun 		break;
3360*4882a593Smuzhiyun 
3361*4882a593Smuzhiyun 	default:
3362*4882a593Smuzhiyun 		invalid_queue = true;
3363*4882a593Smuzhiyun 	}
3364*4882a593Smuzhiyun 
3365*4882a593Smuzhiyun 	if (invalid_queue) {
3366*4882a593Smuzhiyun 		/* Should never get here */
3367*4882a593Smuzhiyun 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3368*4882a593Smuzhiyun 			hw_queue_id);
3369*4882a593Smuzhiyun 		return;
3370*4882a593Smuzhiyun 	}
3371*4882a593Smuzhiyun 
3372*4882a593Smuzhiyun 	db_value = pi;
3373*4882a593Smuzhiyun 
3374*4882a593Smuzhiyun 	/* ring the doorbell */
3375*4882a593Smuzhiyun 	WREG32(db_reg_offset, db_value);
3376*4882a593Smuzhiyun 
3377*4882a593Smuzhiyun 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3378*4882a593Smuzhiyun 		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3379*4882a593Smuzhiyun 				GAUDI_EVENT_PI_UPDATE);
3380*4882a593Smuzhiyun }
3381*4882a593Smuzhiyun 
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)3382*4882a593Smuzhiyun static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3383*4882a593Smuzhiyun 				struct hl_bd *bd)
3384*4882a593Smuzhiyun {
3385*4882a593Smuzhiyun 	__le64 *pbd = (__le64 *) bd;
3386*4882a593Smuzhiyun 
3387*4882a593Smuzhiyun 	/* The QMANs are on the host memory so a simple copy suffice */
3388*4882a593Smuzhiyun 	pqe[0] = pbd[0];
3389*4882a593Smuzhiyun 	pqe[1] = pbd[1];
3390*4882a593Smuzhiyun }
3391*4882a593Smuzhiyun 
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)3392*4882a593Smuzhiyun static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3393*4882a593Smuzhiyun 					dma_addr_t *dma_handle, gfp_t flags)
3394*4882a593Smuzhiyun {
3395*4882a593Smuzhiyun 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3396*4882a593Smuzhiyun 						dma_handle, flags);
3397*4882a593Smuzhiyun 
3398*4882a593Smuzhiyun 	/* Shift to the device's base physical address of host memory */
3399*4882a593Smuzhiyun 	if (kernel_addr)
3400*4882a593Smuzhiyun 		*dma_handle += HOST_PHYS_BASE;
3401*4882a593Smuzhiyun 
3402*4882a593Smuzhiyun 	return kernel_addr;
3403*4882a593Smuzhiyun }
3404*4882a593Smuzhiyun 
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)3405*4882a593Smuzhiyun static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3406*4882a593Smuzhiyun 		void *cpu_addr, dma_addr_t dma_handle)
3407*4882a593Smuzhiyun {
3408*4882a593Smuzhiyun 	/* Cancel the device's base physical address of host memory */
3409*4882a593Smuzhiyun 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3410*4882a593Smuzhiyun 
3411*4882a593Smuzhiyun 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3412*4882a593Smuzhiyun }
3413*4882a593Smuzhiyun 
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)3414*4882a593Smuzhiyun static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3415*4882a593Smuzhiyun 				u32 queue_id, dma_addr_t *dma_handle,
3416*4882a593Smuzhiyun 				u16 *queue_len)
3417*4882a593Smuzhiyun {
3418*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
3419*4882a593Smuzhiyun 	struct gaudi_internal_qman_info *q;
3420*4882a593Smuzhiyun 
3421*4882a593Smuzhiyun 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3422*4882a593Smuzhiyun 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3423*4882a593Smuzhiyun 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3424*4882a593Smuzhiyun 		return NULL;
3425*4882a593Smuzhiyun 	}
3426*4882a593Smuzhiyun 
3427*4882a593Smuzhiyun 	q = &gaudi->internal_qmans[queue_id];
3428*4882a593Smuzhiyun 	*dma_handle = q->pq_dma_addr;
3429*4882a593Smuzhiyun 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3430*4882a593Smuzhiyun 
3431*4882a593Smuzhiyun 	return q->pq_kernel_addr;
3432*4882a593Smuzhiyun }
3433*4882a593Smuzhiyun 
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,long * result)3434*4882a593Smuzhiyun static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3435*4882a593Smuzhiyun 				u16 len, u32 timeout, long *result)
3436*4882a593Smuzhiyun {
3437*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
3438*4882a593Smuzhiyun 
3439*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3440*4882a593Smuzhiyun 		if (result)
3441*4882a593Smuzhiyun 			*result = 0;
3442*4882a593Smuzhiyun 		return 0;
3443*4882a593Smuzhiyun 	}
3444*4882a593Smuzhiyun 
3445*4882a593Smuzhiyun 	if (!timeout)
3446*4882a593Smuzhiyun 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3447*4882a593Smuzhiyun 
3448*4882a593Smuzhiyun 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3449*4882a593Smuzhiyun 						timeout, result);
3450*4882a593Smuzhiyun }
3451*4882a593Smuzhiyun 
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)3452*4882a593Smuzhiyun static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3453*4882a593Smuzhiyun {
3454*4882a593Smuzhiyun 	struct packet_msg_prot *fence_pkt;
3455*4882a593Smuzhiyun 	dma_addr_t pkt_dma_addr;
3456*4882a593Smuzhiyun 	u32 fence_val, tmp, timeout_usec;
3457*4882a593Smuzhiyun 	dma_addr_t fence_dma_addr;
3458*4882a593Smuzhiyun 	u32 *fence_ptr;
3459*4882a593Smuzhiyun 	int rc;
3460*4882a593Smuzhiyun 
3461*4882a593Smuzhiyun 	if (hdev->pldm)
3462*4882a593Smuzhiyun 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3463*4882a593Smuzhiyun 	else
3464*4882a593Smuzhiyun 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3465*4882a593Smuzhiyun 
3466*4882a593Smuzhiyun 	fence_val = GAUDI_QMAN0_FENCE_VAL;
3467*4882a593Smuzhiyun 
3468*4882a593Smuzhiyun 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3469*4882a593Smuzhiyun 							&fence_dma_addr);
3470*4882a593Smuzhiyun 	if (!fence_ptr) {
3471*4882a593Smuzhiyun 		dev_err(hdev->dev,
3472*4882a593Smuzhiyun 			"Failed to allocate memory for H/W queue %d testing\n",
3473*4882a593Smuzhiyun 			hw_queue_id);
3474*4882a593Smuzhiyun 		return -ENOMEM;
3475*4882a593Smuzhiyun 	}
3476*4882a593Smuzhiyun 
3477*4882a593Smuzhiyun 	*fence_ptr = 0;
3478*4882a593Smuzhiyun 
3479*4882a593Smuzhiyun 	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3480*4882a593Smuzhiyun 					sizeof(struct packet_msg_prot),
3481*4882a593Smuzhiyun 					GFP_KERNEL, &pkt_dma_addr);
3482*4882a593Smuzhiyun 	if (!fence_pkt) {
3483*4882a593Smuzhiyun 		dev_err(hdev->dev,
3484*4882a593Smuzhiyun 			"Failed to allocate packet for H/W queue %d testing\n",
3485*4882a593Smuzhiyun 			hw_queue_id);
3486*4882a593Smuzhiyun 		rc = -ENOMEM;
3487*4882a593Smuzhiyun 		goto free_fence_ptr;
3488*4882a593Smuzhiyun 	}
3489*4882a593Smuzhiyun 
3490*4882a593Smuzhiyun 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3491*4882a593Smuzhiyun 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3492*4882a593Smuzhiyun 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3493*4882a593Smuzhiyun 
3494*4882a593Smuzhiyun 	fence_pkt->ctl = cpu_to_le32(tmp);
3495*4882a593Smuzhiyun 	fence_pkt->value = cpu_to_le32(fence_val);
3496*4882a593Smuzhiyun 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3497*4882a593Smuzhiyun 
3498*4882a593Smuzhiyun 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3499*4882a593Smuzhiyun 					sizeof(struct packet_msg_prot),
3500*4882a593Smuzhiyun 					pkt_dma_addr);
3501*4882a593Smuzhiyun 	if (rc) {
3502*4882a593Smuzhiyun 		dev_err(hdev->dev,
3503*4882a593Smuzhiyun 			"Failed to send fence packet to H/W queue %d\n",
3504*4882a593Smuzhiyun 			hw_queue_id);
3505*4882a593Smuzhiyun 		goto free_pkt;
3506*4882a593Smuzhiyun 	}
3507*4882a593Smuzhiyun 
3508*4882a593Smuzhiyun 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3509*4882a593Smuzhiyun 					1000, timeout_usec, true);
3510*4882a593Smuzhiyun 
3511*4882a593Smuzhiyun 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3512*4882a593Smuzhiyun 
3513*4882a593Smuzhiyun 	if (rc == -ETIMEDOUT) {
3514*4882a593Smuzhiyun 		dev_err(hdev->dev,
3515*4882a593Smuzhiyun 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3516*4882a593Smuzhiyun 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3517*4882a593Smuzhiyun 		rc = -EIO;
3518*4882a593Smuzhiyun 	}
3519*4882a593Smuzhiyun 
3520*4882a593Smuzhiyun free_pkt:
3521*4882a593Smuzhiyun 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3522*4882a593Smuzhiyun 					pkt_dma_addr);
3523*4882a593Smuzhiyun free_fence_ptr:
3524*4882a593Smuzhiyun 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3525*4882a593Smuzhiyun 					fence_dma_addr);
3526*4882a593Smuzhiyun 	return rc;
3527*4882a593Smuzhiyun }
3528*4882a593Smuzhiyun 
gaudi_test_cpu_queue(struct hl_device * hdev)3529*4882a593Smuzhiyun static int gaudi_test_cpu_queue(struct hl_device *hdev)
3530*4882a593Smuzhiyun {
3531*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
3532*4882a593Smuzhiyun 
3533*4882a593Smuzhiyun 	/*
3534*4882a593Smuzhiyun 	 * check capability here as send_cpu_message() won't update the result
3535*4882a593Smuzhiyun 	 * value if no capability
3536*4882a593Smuzhiyun 	 */
3537*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3538*4882a593Smuzhiyun 		return 0;
3539*4882a593Smuzhiyun 
3540*4882a593Smuzhiyun 	return hl_fw_test_cpu_queue(hdev);
3541*4882a593Smuzhiyun }
3542*4882a593Smuzhiyun 
gaudi_test_queues(struct hl_device * hdev)3543*4882a593Smuzhiyun static int gaudi_test_queues(struct hl_device *hdev)
3544*4882a593Smuzhiyun {
3545*4882a593Smuzhiyun 	int i, rc, ret_val = 0;
3546*4882a593Smuzhiyun 
3547*4882a593Smuzhiyun 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3548*4882a593Smuzhiyun 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3549*4882a593Smuzhiyun 			rc = gaudi_test_queue(hdev, i);
3550*4882a593Smuzhiyun 			if (rc)
3551*4882a593Smuzhiyun 				ret_val = -EINVAL;
3552*4882a593Smuzhiyun 		}
3553*4882a593Smuzhiyun 	}
3554*4882a593Smuzhiyun 
3555*4882a593Smuzhiyun 	rc = gaudi_test_cpu_queue(hdev);
3556*4882a593Smuzhiyun 	if (rc)
3557*4882a593Smuzhiyun 		ret_val = -EINVAL;
3558*4882a593Smuzhiyun 
3559*4882a593Smuzhiyun 	return ret_val;
3560*4882a593Smuzhiyun }
3561*4882a593Smuzhiyun 
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)3562*4882a593Smuzhiyun static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3563*4882a593Smuzhiyun 		gfp_t mem_flags, dma_addr_t *dma_handle)
3564*4882a593Smuzhiyun {
3565*4882a593Smuzhiyun 	void *kernel_addr;
3566*4882a593Smuzhiyun 
3567*4882a593Smuzhiyun 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
3568*4882a593Smuzhiyun 		return NULL;
3569*4882a593Smuzhiyun 
3570*4882a593Smuzhiyun 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3571*4882a593Smuzhiyun 
3572*4882a593Smuzhiyun 	/* Shift to the device's base physical address of host memory */
3573*4882a593Smuzhiyun 	if (kernel_addr)
3574*4882a593Smuzhiyun 		*dma_handle += HOST_PHYS_BASE;
3575*4882a593Smuzhiyun 
3576*4882a593Smuzhiyun 	return kernel_addr;
3577*4882a593Smuzhiyun }
3578*4882a593Smuzhiyun 
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)3579*4882a593Smuzhiyun static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3580*4882a593Smuzhiyun 			dma_addr_t dma_addr)
3581*4882a593Smuzhiyun {
3582*4882a593Smuzhiyun 	/* Cancel the device's base physical address of host memory */
3583*4882a593Smuzhiyun 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3584*4882a593Smuzhiyun 
3585*4882a593Smuzhiyun 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3586*4882a593Smuzhiyun }
3587*4882a593Smuzhiyun 
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)3588*4882a593Smuzhiyun static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3589*4882a593Smuzhiyun 					size_t size, dma_addr_t *dma_handle)
3590*4882a593Smuzhiyun {
3591*4882a593Smuzhiyun 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3592*4882a593Smuzhiyun }
3593*4882a593Smuzhiyun 
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)3594*4882a593Smuzhiyun static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3595*4882a593Smuzhiyun 						size_t size, void *vaddr)
3596*4882a593Smuzhiyun {
3597*4882a593Smuzhiyun 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3598*4882a593Smuzhiyun }
3599*4882a593Smuzhiyun 
gaudi_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)3600*4882a593Smuzhiyun static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3601*4882a593Smuzhiyun 			int nents, enum dma_data_direction dir)
3602*4882a593Smuzhiyun {
3603*4882a593Smuzhiyun 	struct scatterlist *sg;
3604*4882a593Smuzhiyun 	int i;
3605*4882a593Smuzhiyun 
3606*4882a593Smuzhiyun 	if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3607*4882a593Smuzhiyun 		return -ENOMEM;
3608*4882a593Smuzhiyun 
3609*4882a593Smuzhiyun 	/* Shift to the device's base physical address of host memory */
3610*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i)
3611*4882a593Smuzhiyun 		sg->dma_address += HOST_PHYS_BASE;
3612*4882a593Smuzhiyun 
3613*4882a593Smuzhiyun 	return 0;
3614*4882a593Smuzhiyun }
3615*4882a593Smuzhiyun 
gaudi_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)3616*4882a593Smuzhiyun static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3617*4882a593Smuzhiyun 			int nents, enum dma_data_direction dir)
3618*4882a593Smuzhiyun {
3619*4882a593Smuzhiyun 	struct scatterlist *sg;
3620*4882a593Smuzhiyun 	int i;
3621*4882a593Smuzhiyun 
3622*4882a593Smuzhiyun 	/* Cancel the device's base physical address of host memory */
3623*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i)
3624*4882a593Smuzhiyun 		sg->dma_address -= HOST_PHYS_BASE;
3625*4882a593Smuzhiyun 
3626*4882a593Smuzhiyun 	dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3627*4882a593Smuzhiyun }
3628*4882a593Smuzhiyun 
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)3629*4882a593Smuzhiyun static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3630*4882a593Smuzhiyun 					struct sg_table *sgt)
3631*4882a593Smuzhiyun {
3632*4882a593Smuzhiyun 	struct scatterlist *sg, *sg_next_iter;
3633*4882a593Smuzhiyun 	u32 count, dma_desc_cnt;
3634*4882a593Smuzhiyun 	u64 len, len_next;
3635*4882a593Smuzhiyun 	dma_addr_t addr, addr_next;
3636*4882a593Smuzhiyun 
3637*4882a593Smuzhiyun 	dma_desc_cnt = 0;
3638*4882a593Smuzhiyun 
3639*4882a593Smuzhiyun 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3640*4882a593Smuzhiyun 
3641*4882a593Smuzhiyun 		len = sg_dma_len(sg);
3642*4882a593Smuzhiyun 		addr = sg_dma_address(sg);
3643*4882a593Smuzhiyun 
3644*4882a593Smuzhiyun 		if (len == 0)
3645*4882a593Smuzhiyun 			break;
3646*4882a593Smuzhiyun 
3647*4882a593Smuzhiyun 		while ((count + 1) < sgt->nents) {
3648*4882a593Smuzhiyun 			sg_next_iter = sg_next(sg);
3649*4882a593Smuzhiyun 			len_next = sg_dma_len(sg_next_iter);
3650*4882a593Smuzhiyun 			addr_next = sg_dma_address(sg_next_iter);
3651*4882a593Smuzhiyun 
3652*4882a593Smuzhiyun 			if (len_next == 0)
3653*4882a593Smuzhiyun 				break;
3654*4882a593Smuzhiyun 
3655*4882a593Smuzhiyun 			if ((addr + len == addr_next) &&
3656*4882a593Smuzhiyun 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3657*4882a593Smuzhiyun 				len += len_next;
3658*4882a593Smuzhiyun 				count++;
3659*4882a593Smuzhiyun 				sg = sg_next_iter;
3660*4882a593Smuzhiyun 			} else {
3661*4882a593Smuzhiyun 				break;
3662*4882a593Smuzhiyun 			}
3663*4882a593Smuzhiyun 		}
3664*4882a593Smuzhiyun 
3665*4882a593Smuzhiyun 		dma_desc_cnt++;
3666*4882a593Smuzhiyun 	}
3667*4882a593Smuzhiyun 
3668*4882a593Smuzhiyun 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
3669*4882a593Smuzhiyun }
3670*4882a593Smuzhiyun 
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)3671*4882a593Smuzhiyun static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3672*4882a593Smuzhiyun 				struct hl_cs_parser *parser,
3673*4882a593Smuzhiyun 				struct packet_lin_dma *user_dma_pkt,
3674*4882a593Smuzhiyun 				u64 addr, enum dma_data_direction dir)
3675*4882a593Smuzhiyun {
3676*4882a593Smuzhiyun 	struct hl_userptr *userptr;
3677*4882a593Smuzhiyun 	int rc;
3678*4882a593Smuzhiyun 
3679*4882a593Smuzhiyun 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3680*4882a593Smuzhiyun 			parser->job_userptr_list, &userptr))
3681*4882a593Smuzhiyun 		goto already_pinned;
3682*4882a593Smuzhiyun 
3683*4882a593Smuzhiyun 	userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3684*4882a593Smuzhiyun 	if (!userptr)
3685*4882a593Smuzhiyun 		return -ENOMEM;
3686*4882a593Smuzhiyun 
3687*4882a593Smuzhiyun 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3688*4882a593Smuzhiyun 				userptr);
3689*4882a593Smuzhiyun 	if (rc)
3690*4882a593Smuzhiyun 		goto free_userptr;
3691*4882a593Smuzhiyun 
3692*4882a593Smuzhiyun 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
3693*4882a593Smuzhiyun 
3694*4882a593Smuzhiyun 	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3695*4882a593Smuzhiyun 					userptr->sgt->nents, dir);
3696*4882a593Smuzhiyun 	if (rc) {
3697*4882a593Smuzhiyun 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3698*4882a593Smuzhiyun 		goto unpin_memory;
3699*4882a593Smuzhiyun 	}
3700*4882a593Smuzhiyun 
3701*4882a593Smuzhiyun 	userptr->dma_mapped = true;
3702*4882a593Smuzhiyun 	userptr->dir = dir;
3703*4882a593Smuzhiyun 
3704*4882a593Smuzhiyun already_pinned:
3705*4882a593Smuzhiyun 	parser->patched_cb_size +=
3706*4882a593Smuzhiyun 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3707*4882a593Smuzhiyun 
3708*4882a593Smuzhiyun 	return 0;
3709*4882a593Smuzhiyun 
3710*4882a593Smuzhiyun unpin_memory:
3711*4882a593Smuzhiyun 	list_del(&userptr->job_node);
3712*4882a593Smuzhiyun 	hl_unpin_host_memory(hdev, userptr);
3713*4882a593Smuzhiyun free_userptr:
3714*4882a593Smuzhiyun 	kfree(userptr);
3715*4882a593Smuzhiyun 	return rc;
3716*4882a593Smuzhiyun }
3717*4882a593Smuzhiyun 
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)3718*4882a593Smuzhiyun static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3719*4882a593Smuzhiyun 				struct hl_cs_parser *parser,
3720*4882a593Smuzhiyun 				struct packet_lin_dma *user_dma_pkt,
3721*4882a593Smuzhiyun 				bool src_in_host)
3722*4882a593Smuzhiyun {
3723*4882a593Smuzhiyun 	enum dma_data_direction dir;
3724*4882a593Smuzhiyun 	bool skip_host_mem_pin = false, user_memset;
3725*4882a593Smuzhiyun 	u64 addr;
3726*4882a593Smuzhiyun 	int rc = 0;
3727*4882a593Smuzhiyun 
3728*4882a593Smuzhiyun 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3729*4882a593Smuzhiyun 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3730*4882a593Smuzhiyun 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3731*4882a593Smuzhiyun 
3732*4882a593Smuzhiyun 	if (src_in_host) {
3733*4882a593Smuzhiyun 		if (user_memset)
3734*4882a593Smuzhiyun 			skip_host_mem_pin = true;
3735*4882a593Smuzhiyun 
3736*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3737*4882a593Smuzhiyun 		dir = DMA_TO_DEVICE;
3738*4882a593Smuzhiyun 		addr = le64_to_cpu(user_dma_pkt->src_addr);
3739*4882a593Smuzhiyun 	} else {
3740*4882a593Smuzhiyun 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3741*4882a593Smuzhiyun 		dir = DMA_FROM_DEVICE;
3742*4882a593Smuzhiyun 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3743*4882a593Smuzhiyun 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3744*4882a593Smuzhiyun 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3745*4882a593Smuzhiyun 	}
3746*4882a593Smuzhiyun 
3747*4882a593Smuzhiyun 	if (skip_host_mem_pin)
3748*4882a593Smuzhiyun 		parser->patched_cb_size += sizeof(*user_dma_pkt);
3749*4882a593Smuzhiyun 	else
3750*4882a593Smuzhiyun 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3751*4882a593Smuzhiyun 						addr, dir);
3752*4882a593Smuzhiyun 
3753*4882a593Smuzhiyun 	return rc;
3754*4882a593Smuzhiyun }
3755*4882a593Smuzhiyun 
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)3756*4882a593Smuzhiyun static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3757*4882a593Smuzhiyun 				struct hl_cs_parser *parser,
3758*4882a593Smuzhiyun 				struct packet_lin_dma *user_dma_pkt)
3759*4882a593Smuzhiyun {
3760*4882a593Smuzhiyun 	bool src_in_host = false;
3761*4882a593Smuzhiyun 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3762*4882a593Smuzhiyun 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3763*4882a593Smuzhiyun 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3764*4882a593Smuzhiyun 
3765*4882a593Smuzhiyun 	dev_dbg(hdev->dev, "DMA packet details:\n");
3766*4882a593Smuzhiyun 	dev_dbg(hdev->dev, "source == 0x%llx\n",
3767*4882a593Smuzhiyun 				le64_to_cpu(user_dma_pkt->src_addr));
3768*4882a593Smuzhiyun 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3769*4882a593Smuzhiyun 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3770*4882a593Smuzhiyun 
3771*4882a593Smuzhiyun 	/*
3772*4882a593Smuzhiyun 	 * Special handling for DMA with size 0. Bypass all validations
3773*4882a593Smuzhiyun 	 * because no transactions will be done except for WR_COMP, which
3774*4882a593Smuzhiyun 	 * is not a security issue
3775*4882a593Smuzhiyun 	 */
3776*4882a593Smuzhiyun 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
3777*4882a593Smuzhiyun 		parser->patched_cb_size += sizeof(*user_dma_pkt);
3778*4882a593Smuzhiyun 		return 0;
3779*4882a593Smuzhiyun 	}
3780*4882a593Smuzhiyun 
3781*4882a593Smuzhiyun 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3782*4882a593Smuzhiyun 		src_in_host = true;
3783*4882a593Smuzhiyun 
3784*4882a593Smuzhiyun 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3785*4882a593Smuzhiyun 						src_in_host);
3786*4882a593Smuzhiyun }
3787*4882a593Smuzhiyun 
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)3788*4882a593Smuzhiyun static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3789*4882a593Smuzhiyun 					struct hl_cs_parser *parser,
3790*4882a593Smuzhiyun 					struct packet_load_and_exe *user_pkt)
3791*4882a593Smuzhiyun {
3792*4882a593Smuzhiyun 	u32 cfg;
3793*4882a593Smuzhiyun 
3794*4882a593Smuzhiyun 	cfg = le32_to_cpu(user_pkt->cfg);
3795*4882a593Smuzhiyun 
3796*4882a593Smuzhiyun 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3797*4882a593Smuzhiyun 		dev_err(hdev->dev,
3798*4882a593Smuzhiyun 			"User not allowed to use Load and Execute\n");
3799*4882a593Smuzhiyun 		return -EPERM;
3800*4882a593Smuzhiyun 	}
3801*4882a593Smuzhiyun 
3802*4882a593Smuzhiyun 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3803*4882a593Smuzhiyun 
3804*4882a593Smuzhiyun 	return 0;
3805*4882a593Smuzhiyun }
3806*4882a593Smuzhiyun 
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)3807*4882a593Smuzhiyun static int gaudi_validate_cb(struct hl_device *hdev,
3808*4882a593Smuzhiyun 			struct hl_cs_parser *parser, bool is_mmu)
3809*4882a593Smuzhiyun {
3810*4882a593Smuzhiyun 	u32 cb_parsed_length = 0;
3811*4882a593Smuzhiyun 	int rc = 0;
3812*4882a593Smuzhiyun 
3813*4882a593Smuzhiyun 	parser->patched_cb_size = 0;
3814*4882a593Smuzhiyun 
3815*4882a593Smuzhiyun 	/* cb_user_size is more than 0 so loop will always be executed */
3816*4882a593Smuzhiyun 	while (cb_parsed_length < parser->user_cb_size) {
3817*4882a593Smuzhiyun 		enum packet_id pkt_id;
3818*4882a593Smuzhiyun 		u16 pkt_size;
3819*4882a593Smuzhiyun 		struct gaudi_packet *user_pkt;
3820*4882a593Smuzhiyun 
3821*4882a593Smuzhiyun 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3822*4882a593Smuzhiyun 
3823*4882a593Smuzhiyun 		pkt_id = (enum packet_id) (
3824*4882a593Smuzhiyun 				(le64_to_cpu(user_pkt->header) &
3825*4882a593Smuzhiyun 				PACKET_HEADER_PACKET_ID_MASK) >>
3826*4882a593Smuzhiyun 					PACKET_HEADER_PACKET_ID_SHIFT);
3827*4882a593Smuzhiyun 
3828*4882a593Smuzhiyun 		if (!validate_packet_id(pkt_id)) {
3829*4882a593Smuzhiyun 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3830*4882a593Smuzhiyun 			rc = -EINVAL;
3831*4882a593Smuzhiyun 			break;
3832*4882a593Smuzhiyun 		}
3833*4882a593Smuzhiyun 
3834*4882a593Smuzhiyun 		pkt_size = gaudi_packet_sizes[pkt_id];
3835*4882a593Smuzhiyun 		cb_parsed_length += pkt_size;
3836*4882a593Smuzhiyun 		if (cb_parsed_length > parser->user_cb_size) {
3837*4882a593Smuzhiyun 			dev_err(hdev->dev,
3838*4882a593Smuzhiyun 				"packet 0x%x is out of CB boundary\n", pkt_id);
3839*4882a593Smuzhiyun 			rc = -EINVAL;
3840*4882a593Smuzhiyun 			break;
3841*4882a593Smuzhiyun 		}
3842*4882a593Smuzhiyun 
3843*4882a593Smuzhiyun 		switch (pkt_id) {
3844*4882a593Smuzhiyun 		case PACKET_MSG_PROT:
3845*4882a593Smuzhiyun 			dev_err(hdev->dev,
3846*4882a593Smuzhiyun 				"User not allowed to use MSG_PROT\n");
3847*4882a593Smuzhiyun 			rc = -EPERM;
3848*4882a593Smuzhiyun 			break;
3849*4882a593Smuzhiyun 
3850*4882a593Smuzhiyun 		case PACKET_CP_DMA:
3851*4882a593Smuzhiyun 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3852*4882a593Smuzhiyun 			rc = -EPERM;
3853*4882a593Smuzhiyun 			break;
3854*4882a593Smuzhiyun 
3855*4882a593Smuzhiyun 		case PACKET_STOP:
3856*4882a593Smuzhiyun 			dev_err(hdev->dev, "User not allowed to use STOP\n");
3857*4882a593Smuzhiyun 			rc = -EPERM;
3858*4882a593Smuzhiyun 			break;
3859*4882a593Smuzhiyun 
3860*4882a593Smuzhiyun 		case PACKET_WREG_BULK:
3861*4882a593Smuzhiyun 			dev_err(hdev->dev,
3862*4882a593Smuzhiyun 				"User not allowed to use WREG_BULK\n");
3863*4882a593Smuzhiyun 			rc = -EPERM;
3864*4882a593Smuzhiyun 			break;
3865*4882a593Smuzhiyun 
3866*4882a593Smuzhiyun 		case PACKET_LOAD_AND_EXE:
3867*4882a593Smuzhiyun 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3868*4882a593Smuzhiyun 				(struct packet_load_and_exe *) user_pkt);
3869*4882a593Smuzhiyun 			break;
3870*4882a593Smuzhiyun 
3871*4882a593Smuzhiyun 		case PACKET_LIN_DMA:
3872*4882a593Smuzhiyun 			parser->contains_dma_pkt = true;
3873*4882a593Smuzhiyun 			if (is_mmu)
3874*4882a593Smuzhiyun 				parser->patched_cb_size += pkt_size;
3875*4882a593Smuzhiyun 			else
3876*4882a593Smuzhiyun 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3877*4882a593Smuzhiyun 					(struct packet_lin_dma *) user_pkt);
3878*4882a593Smuzhiyun 			break;
3879*4882a593Smuzhiyun 
3880*4882a593Smuzhiyun 		case PACKET_WREG_32:
3881*4882a593Smuzhiyun 		case PACKET_MSG_LONG:
3882*4882a593Smuzhiyun 		case PACKET_MSG_SHORT:
3883*4882a593Smuzhiyun 		case PACKET_REPEAT:
3884*4882a593Smuzhiyun 		case PACKET_FENCE:
3885*4882a593Smuzhiyun 		case PACKET_NOP:
3886*4882a593Smuzhiyun 		case PACKET_ARB_POINT:
3887*4882a593Smuzhiyun 			parser->patched_cb_size += pkt_size;
3888*4882a593Smuzhiyun 			break;
3889*4882a593Smuzhiyun 
3890*4882a593Smuzhiyun 		default:
3891*4882a593Smuzhiyun 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3892*4882a593Smuzhiyun 				pkt_id);
3893*4882a593Smuzhiyun 			rc = -EINVAL;
3894*4882a593Smuzhiyun 			break;
3895*4882a593Smuzhiyun 		}
3896*4882a593Smuzhiyun 
3897*4882a593Smuzhiyun 		if (rc)
3898*4882a593Smuzhiyun 			break;
3899*4882a593Smuzhiyun 	}
3900*4882a593Smuzhiyun 
3901*4882a593Smuzhiyun 	/*
3902*4882a593Smuzhiyun 	 * The new CB should have space at the end for two MSG_PROT packets:
3903*4882a593Smuzhiyun 	 * 1. A packet that will act as a completion packet
3904*4882a593Smuzhiyun 	 * 2. A packet that will generate MSI-X interrupt
3905*4882a593Smuzhiyun 	 */
3906*4882a593Smuzhiyun 	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3907*4882a593Smuzhiyun 
3908*4882a593Smuzhiyun 	return rc;
3909*4882a593Smuzhiyun }
3910*4882a593Smuzhiyun 
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)3911*4882a593Smuzhiyun static int gaudi_patch_dma_packet(struct hl_device *hdev,
3912*4882a593Smuzhiyun 				struct hl_cs_parser *parser,
3913*4882a593Smuzhiyun 				struct packet_lin_dma *user_dma_pkt,
3914*4882a593Smuzhiyun 				struct packet_lin_dma *new_dma_pkt,
3915*4882a593Smuzhiyun 				u32 *new_dma_pkt_size)
3916*4882a593Smuzhiyun {
3917*4882a593Smuzhiyun 	struct hl_userptr *userptr;
3918*4882a593Smuzhiyun 	struct scatterlist *sg, *sg_next_iter;
3919*4882a593Smuzhiyun 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3920*4882a593Smuzhiyun 	u64 len, len_next;
3921*4882a593Smuzhiyun 	dma_addr_t dma_addr, dma_addr_next;
3922*4882a593Smuzhiyun 	u64 device_memory_addr, addr;
3923*4882a593Smuzhiyun 	enum dma_data_direction dir;
3924*4882a593Smuzhiyun 	struct sg_table *sgt;
3925*4882a593Smuzhiyun 	bool src_in_host = false;
3926*4882a593Smuzhiyun 	bool skip_host_mem_pin = false;
3927*4882a593Smuzhiyun 	bool user_memset;
3928*4882a593Smuzhiyun 
3929*4882a593Smuzhiyun 	ctl = le32_to_cpu(user_dma_pkt->ctl);
3930*4882a593Smuzhiyun 
3931*4882a593Smuzhiyun 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3932*4882a593Smuzhiyun 		src_in_host = true;
3933*4882a593Smuzhiyun 
3934*4882a593Smuzhiyun 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3935*4882a593Smuzhiyun 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3936*4882a593Smuzhiyun 
3937*4882a593Smuzhiyun 	if (src_in_host) {
3938*4882a593Smuzhiyun 		addr = le64_to_cpu(user_dma_pkt->src_addr);
3939*4882a593Smuzhiyun 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3940*4882a593Smuzhiyun 		dir = DMA_TO_DEVICE;
3941*4882a593Smuzhiyun 		if (user_memset)
3942*4882a593Smuzhiyun 			skip_host_mem_pin = true;
3943*4882a593Smuzhiyun 	} else {
3944*4882a593Smuzhiyun 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3945*4882a593Smuzhiyun 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3946*4882a593Smuzhiyun 		dir = DMA_FROM_DEVICE;
3947*4882a593Smuzhiyun 	}
3948*4882a593Smuzhiyun 
3949*4882a593Smuzhiyun 	if ((!skip_host_mem_pin) &&
3950*4882a593Smuzhiyun 		(!hl_userptr_is_pinned(hdev, addr,
3951*4882a593Smuzhiyun 					le32_to_cpu(user_dma_pkt->tsize),
3952*4882a593Smuzhiyun 					parser->job_userptr_list, &userptr))) {
3953*4882a593Smuzhiyun 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3954*4882a593Smuzhiyun 				addr, user_dma_pkt->tsize);
3955*4882a593Smuzhiyun 		return -EFAULT;
3956*4882a593Smuzhiyun 	}
3957*4882a593Smuzhiyun 
3958*4882a593Smuzhiyun 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3959*4882a593Smuzhiyun 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3960*4882a593Smuzhiyun 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
3961*4882a593Smuzhiyun 		return 0;
3962*4882a593Smuzhiyun 	}
3963*4882a593Smuzhiyun 
3964*4882a593Smuzhiyun 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3965*4882a593Smuzhiyun 
3966*4882a593Smuzhiyun 	sgt = userptr->sgt;
3967*4882a593Smuzhiyun 	dma_desc_cnt = 0;
3968*4882a593Smuzhiyun 
3969*4882a593Smuzhiyun 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3970*4882a593Smuzhiyun 		len = sg_dma_len(sg);
3971*4882a593Smuzhiyun 		dma_addr = sg_dma_address(sg);
3972*4882a593Smuzhiyun 
3973*4882a593Smuzhiyun 		if (len == 0)
3974*4882a593Smuzhiyun 			break;
3975*4882a593Smuzhiyun 
3976*4882a593Smuzhiyun 		while ((count + 1) < sgt->nents) {
3977*4882a593Smuzhiyun 			sg_next_iter = sg_next(sg);
3978*4882a593Smuzhiyun 			len_next = sg_dma_len(sg_next_iter);
3979*4882a593Smuzhiyun 			dma_addr_next = sg_dma_address(sg_next_iter);
3980*4882a593Smuzhiyun 
3981*4882a593Smuzhiyun 			if (len_next == 0)
3982*4882a593Smuzhiyun 				break;
3983*4882a593Smuzhiyun 
3984*4882a593Smuzhiyun 			if ((dma_addr + len == dma_addr_next) &&
3985*4882a593Smuzhiyun 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3986*4882a593Smuzhiyun 				len += len_next;
3987*4882a593Smuzhiyun 				count++;
3988*4882a593Smuzhiyun 				sg = sg_next_iter;
3989*4882a593Smuzhiyun 			} else {
3990*4882a593Smuzhiyun 				break;
3991*4882a593Smuzhiyun 			}
3992*4882a593Smuzhiyun 		}
3993*4882a593Smuzhiyun 
3994*4882a593Smuzhiyun 		ctl = le32_to_cpu(user_dma_pkt->ctl);
3995*4882a593Smuzhiyun 		if (likely(dma_desc_cnt))
3996*4882a593Smuzhiyun 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3997*4882a593Smuzhiyun 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3998*4882a593Smuzhiyun 		new_dma_pkt->ctl = cpu_to_le32(ctl);
3999*4882a593Smuzhiyun 		new_dma_pkt->tsize = cpu_to_le32(len);
4000*4882a593Smuzhiyun 
4001*4882a593Smuzhiyun 		if (dir == DMA_TO_DEVICE) {
4002*4882a593Smuzhiyun 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4003*4882a593Smuzhiyun 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4004*4882a593Smuzhiyun 		} else {
4005*4882a593Smuzhiyun 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4006*4882a593Smuzhiyun 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4007*4882a593Smuzhiyun 		}
4008*4882a593Smuzhiyun 
4009*4882a593Smuzhiyun 		if (!user_memset)
4010*4882a593Smuzhiyun 			device_memory_addr += len;
4011*4882a593Smuzhiyun 		dma_desc_cnt++;
4012*4882a593Smuzhiyun 		new_dma_pkt++;
4013*4882a593Smuzhiyun 	}
4014*4882a593Smuzhiyun 
4015*4882a593Smuzhiyun 	if (!dma_desc_cnt) {
4016*4882a593Smuzhiyun 		dev_err(hdev->dev,
4017*4882a593Smuzhiyun 			"Error of 0 SG entries when patching DMA packet\n");
4018*4882a593Smuzhiyun 		return -EFAULT;
4019*4882a593Smuzhiyun 	}
4020*4882a593Smuzhiyun 
4021*4882a593Smuzhiyun 	/* Fix the last dma packet - wrcomp must be as user set it */
4022*4882a593Smuzhiyun 	new_dma_pkt--;
4023*4882a593Smuzhiyun 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4024*4882a593Smuzhiyun 
4025*4882a593Smuzhiyun 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4026*4882a593Smuzhiyun 
4027*4882a593Smuzhiyun 	return 0;
4028*4882a593Smuzhiyun }
4029*4882a593Smuzhiyun 
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)4030*4882a593Smuzhiyun static int gaudi_patch_cb(struct hl_device *hdev,
4031*4882a593Smuzhiyun 				struct hl_cs_parser *parser)
4032*4882a593Smuzhiyun {
4033*4882a593Smuzhiyun 	u32 cb_parsed_length = 0;
4034*4882a593Smuzhiyun 	u32 cb_patched_cur_length = 0;
4035*4882a593Smuzhiyun 	int rc = 0;
4036*4882a593Smuzhiyun 
4037*4882a593Smuzhiyun 	/* cb_user_size is more than 0 so loop will always be executed */
4038*4882a593Smuzhiyun 	while (cb_parsed_length < parser->user_cb_size) {
4039*4882a593Smuzhiyun 		enum packet_id pkt_id;
4040*4882a593Smuzhiyun 		u16 pkt_size;
4041*4882a593Smuzhiyun 		u32 new_pkt_size = 0;
4042*4882a593Smuzhiyun 		struct gaudi_packet *user_pkt, *kernel_pkt;
4043*4882a593Smuzhiyun 
4044*4882a593Smuzhiyun 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4045*4882a593Smuzhiyun 		kernel_pkt = parser->patched_cb->kernel_address +
4046*4882a593Smuzhiyun 					cb_patched_cur_length;
4047*4882a593Smuzhiyun 
4048*4882a593Smuzhiyun 		pkt_id = (enum packet_id) (
4049*4882a593Smuzhiyun 				(le64_to_cpu(user_pkt->header) &
4050*4882a593Smuzhiyun 				PACKET_HEADER_PACKET_ID_MASK) >>
4051*4882a593Smuzhiyun 					PACKET_HEADER_PACKET_ID_SHIFT);
4052*4882a593Smuzhiyun 
4053*4882a593Smuzhiyun 		if (!validate_packet_id(pkt_id)) {
4054*4882a593Smuzhiyun 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4055*4882a593Smuzhiyun 			rc = -EINVAL;
4056*4882a593Smuzhiyun 			break;
4057*4882a593Smuzhiyun 		}
4058*4882a593Smuzhiyun 
4059*4882a593Smuzhiyun 		pkt_size = gaudi_packet_sizes[pkt_id];
4060*4882a593Smuzhiyun 		cb_parsed_length += pkt_size;
4061*4882a593Smuzhiyun 		if (cb_parsed_length > parser->user_cb_size) {
4062*4882a593Smuzhiyun 			dev_err(hdev->dev,
4063*4882a593Smuzhiyun 				"packet 0x%x is out of CB boundary\n", pkt_id);
4064*4882a593Smuzhiyun 			rc = -EINVAL;
4065*4882a593Smuzhiyun 			break;
4066*4882a593Smuzhiyun 		}
4067*4882a593Smuzhiyun 
4068*4882a593Smuzhiyun 		switch (pkt_id) {
4069*4882a593Smuzhiyun 		case PACKET_LIN_DMA:
4070*4882a593Smuzhiyun 			rc = gaudi_patch_dma_packet(hdev, parser,
4071*4882a593Smuzhiyun 					(struct packet_lin_dma *) user_pkt,
4072*4882a593Smuzhiyun 					(struct packet_lin_dma *) kernel_pkt,
4073*4882a593Smuzhiyun 					&new_pkt_size);
4074*4882a593Smuzhiyun 			cb_patched_cur_length += new_pkt_size;
4075*4882a593Smuzhiyun 			break;
4076*4882a593Smuzhiyun 
4077*4882a593Smuzhiyun 		case PACKET_MSG_PROT:
4078*4882a593Smuzhiyun 			dev_err(hdev->dev,
4079*4882a593Smuzhiyun 				"User not allowed to use MSG_PROT\n");
4080*4882a593Smuzhiyun 			rc = -EPERM;
4081*4882a593Smuzhiyun 			break;
4082*4882a593Smuzhiyun 
4083*4882a593Smuzhiyun 		case PACKET_CP_DMA:
4084*4882a593Smuzhiyun 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4085*4882a593Smuzhiyun 			rc = -EPERM;
4086*4882a593Smuzhiyun 			break;
4087*4882a593Smuzhiyun 
4088*4882a593Smuzhiyun 		case PACKET_STOP:
4089*4882a593Smuzhiyun 			dev_err(hdev->dev, "User not allowed to use STOP\n");
4090*4882a593Smuzhiyun 			rc = -EPERM;
4091*4882a593Smuzhiyun 			break;
4092*4882a593Smuzhiyun 
4093*4882a593Smuzhiyun 		case PACKET_WREG_32:
4094*4882a593Smuzhiyun 		case PACKET_WREG_BULK:
4095*4882a593Smuzhiyun 		case PACKET_MSG_LONG:
4096*4882a593Smuzhiyun 		case PACKET_MSG_SHORT:
4097*4882a593Smuzhiyun 		case PACKET_REPEAT:
4098*4882a593Smuzhiyun 		case PACKET_FENCE:
4099*4882a593Smuzhiyun 		case PACKET_NOP:
4100*4882a593Smuzhiyun 		case PACKET_ARB_POINT:
4101*4882a593Smuzhiyun 		case PACKET_LOAD_AND_EXE:
4102*4882a593Smuzhiyun 			memcpy(kernel_pkt, user_pkt, pkt_size);
4103*4882a593Smuzhiyun 			cb_patched_cur_length += pkt_size;
4104*4882a593Smuzhiyun 			break;
4105*4882a593Smuzhiyun 
4106*4882a593Smuzhiyun 		default:
4107*4882a593Smuzhiyun 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4108*4882a593Smuzhiyun 				pkt_id);
4109*4882a593Smuzhiyun 			rc = -EINVAL;
4110*4882a593Smuzhiyun 			break;
4111*4882a593Smuzhiyun 		}
4112*4882a593Smuzhiyun 
4113*4882a593Smuzhiyun 		if (rc)
4114*4882a593Smuzhiyun 			break;
4115*4882a593Smuzhiyun 	}
4116*4882a593Smuzhiyun 
4117*4882a593Smuzhiyun 	return rc;
4118*4882a593Smuzhiyun }
4119*4882a593Smuzhiyun 
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)4120*4882a593Smuzhiyun static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4121*4882a593Smuzhiyun 		struct hl_cs_parser *parser)
4122*4882a593Smuzhiyun {
4123*4882a593Smuzhiyun 	u64 patched_cb_handle;
4124*4882a593Smuzhiyun 	u32 patched_cb_size;
4125*4882a593Smuzhiyun 	struct hl_cb *user_cb;
4126*4882a593Smuzhiyun 	int rc;
4127*4882a593Smuzhiyun 
4128*4882a593Smuzhiyun 	/*
4129*4882a593Smuzhiyun 	 * The new CB should have space at the end for two MSG_PROT pkt:
4130*4882a593Smuzhiyun 	 * 1. A packet that will act as a completion packet
4131*4882a593Smuzhiyun 	 * 2. A packet that will generate MSI interrupt
4132*4882a593Smuzhiyun 	 */
4133*4882a593Smuzhiyun 	parser->patched_cb_size = parser->user_cb_size +
4134*4882a593Smuzhiyun 			sizeof(struct packet_msg_prot) * 2;
4135*4882a593Smuzhiyun 
4136*4882a593Smuzhiyun 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4137*4882a593Smuzhiyun 				parser->patched_cb_size, false, false,
4138*4882a593Smuzhiyun 				&patched_cb_handle);
4139*4882a593Smuzhiyun 
4140*4882a593Smuzhiyun 	if (rc) {
4141*4882a593Smuzhiyun 		dev_err(hdev->dev,
4142*4882a593Smuzhiyun 			"Failed to allocate patched CB for DMA CS %d\n",
4143*4882a593Smuzhiyun 			rc);
4144*4882a593Smuzhiyun 		return rc;
4145*4882a593Smuzhiyun 	}
4146*4882a593Smuzhiyun 
4147*4882a593Smuzhiyun 	patched_cb_handle >>= PAGE_SHIFT;
4148*4882a593Smuzhiyun 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4149*4882a593Smuzhiyun 				(u32) patched_cb_handle);
4150*4882a593Smuzhiyun 	/* hl_cb_get should never fail here so use kernel WARN */
4151*4882a593Smuzhiyun 	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4152*4882a593Smuzhiyun 			(u32) patched_cb_handle);
4153*4882a593Smuzhiyun 	if (!parser->patched_cb) {
4154*4882a593Smuzhiyun 		rc = -EFAULT;
4155*4882a593Smuzhiyun 		goto out;
4156*4882a593Smuzhiyun 	}
4157*4882a593Smuzhiyun 
4158*4882a593Smuzhiyun 	/*
4159*4882a593Smuzhiyun 	 * The check that parser->user_cb_size <= parser->user_cb->size was done
4160*4882a593Smuzhiyun 	 * in validate_queue_index().
4161*4882a593Smuzhiyun 	 */
4162*4882a593Smuzhiyun 	memcpy(parser->patched_cb->kernel_address,
4163*4882a593Smuzhiyun 		parser->user_cb->kernel_address,
4164*4882a593Smuzhiyun 		parser->user_cb_size);
4165*4882a593Smuzhiyun 
4166*4882a593Smuzhiyun 	patched_cb_size = parser->patched_cb_size;
4167*4882a593Smuzhiyun 
4168*4882a593Smuzhiyun 	/* Validate patched CB instead of user CB */
4169*4882a593Smuzhiyun 	user_cb = parser->user_cb;
4170*4882a593Smuzhiyun 	parser->user_cb = parser->patched_cb;
4171*4882a593Smuzhiyun 	rc = gaudi_validate_cb(hdev, parser, true);
4172*4882a593Smuzhiyun 	parser->user_cb = user_cb;
4173*4882a593Smuzhiyun 
4174*4882a593Smuzhiyun 	if (rc) {
4175*4882a593Smuzhiyun 		hl_cb_put(parser->patched_cb);
4176*4882a593Smuzhiyun 		goto out;
4177*4882a593Smuzhiyun 	}
4178*4882a593Smuzhiyun 
4179*4882a593Smuzhiyun 	if (patched_cb_size != parser->patched_cb_size) {
4180*4882a593Smuzhiyun 		dev_err(hdev->dev, "user CB size mismatch\n");
4181*4882a593Smuzhiyun 		hl_cb_put(parser->patched_cb);
4182*4882a593Smuzhiyun 		rc = -EINVAL;
4183*4882a593Smuzhiyun 		goto out;
4184*4882a593Smuzhiyun 	}
4185*4882a593Smuzhiyun 
4186*4882a593Smuzhiyun out:
4187*4882a593Smuzhiyun 	/*
4188*4882a593Smuzhiyun 	 * Always call cb destroy here because we still have 1 reference
4189*4882a593Smuzhiyun 	 * to it by calling cb_get earlier. After the job will be completed,
4190*4882a593Smuzhiyun 	 * cb_put will release it, but here we want to remove it from the
4191*4882a593Smuzhiyun 	 * idr
4192*4882a593Smuzhiyun 	 */
4193*4882a593Smuzhiyun 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4194*4882a593Smuzhiyun 					patched_cb_handle << PAGE_SHIFT);
4195*4882a593Smuzhiyun 
4196*4882a593Smuzhiyun 	return rc;
4197*4882a593Smuzhiyun }
4198*4882a593Smuzhiyun 
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)4199*4882a593Smuzhiyun static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4200*4882a593Smuzhiyun 		struct hl_cs_parser *parser)
4201*4882a593Smuzhiyun {
4202*4882a593Smuzhiyun 	u64 patched_cb_handle;
4203*4882a593Smuzhiyun 	int rc;
4204*4882a593Smuzhiyun 
4205*4882a593Smuzhiyun 	rc = gaudi_validate_cb(hdev, parser, false);
4206*4882a593Smuzhiyun 
4207*4882a593Smuzhiyun 	if (rc)
4208*4882a593Smuzhiyun 		goto free_userptr;
4209*4882a593Smuzhiyun 
4210*4882a593Smuzhiyun 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4211*4882a593Smuzhiyun 				parser->patched_cb_size, false, false,
4212*4882a593Smuzhiyun 				&patched_cb_handle);
4213*4882a593Smuzhiyun 	if (rc) {
4214*4882a593Smuzhiyun 		dev_err(hdev->dev,
4215*4882a593Smuzhiyun 			"Failed to allocate patched CB for DMA CS %d\n", rc);
4216*4882a593Smuzhiyun 		goto free_userptr;
4217*4882a593Smuzhiyun 	}
4218*4882a593Smuzhiyun 
4219*4882a593Smuzhiyun 	patched_cb_handle >>= PAGE_SHIFT;
4220*4882a593Smuzhiyun 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4221*4882a593Smuzhiyun 				(u32) patched_cb_handle);
4222*4882a593Smuzhiyun 	/* hl_cb_get should never fail here so use kernel WARN */
4223*4882a593Smuzhiyun 	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4224*4882a593Smuzhiyun 			(u32) patched_cb_handle);
4225*4882a593Smuzhiyun 	if (!parser->patched_cb) {
4226*4882a593Smuzhiyun 		rc = -EFAULT;
4227*4882a593Smuzhiyun 		goto out;
4228*4882a593Smuzhiyun 	}
4229*4882a593Smuzhiyun 
4230*4882a593Smuzhiyun 	rc = gaudi_patch_cb(hdev, parser);
4231*4882a593Smuzhiyun 
4232*4882a593Smuzhiyun 	if (rc)
4233*4882a593Smuzhiyun 		hl_cb_put(parser->patched_cb);
4234*4882a593Smuzhiyun 
4235*4882a593Smuzhiyun out:
4236*4882a593Smuzhiyun 	/*
4237*4882a593Smuzhiyun 	 * Always call cb destroy here because we still have 1 reference
4238*4882a593Smuzhiyun 	 * to it by calling cb_get earlier. After the job will be completed,
4239*4882a593Smuzhiyun 	 * cb_put will release it, but here we want to remove it from the
4240*4882a593Smuzhiyun 	 * idr
4241*4882a593Smuzhiyun 	 */
4242*4882a593Smuzhiyun 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4243*4882a593Smuzhiyun 				patched_cb_handle << PAGE_SHIFT);
4244*4882a593Smuzhiyun 
4245*4882a593Smuzhiyun free_userptr:
4246*4882a593Smuzhiyun 	if (rc)
4247*4882a593Smuzhiyun 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
4248*4882a593Smuzhiyun 	return rc;
4249*4882a593Smuzhiyun }
4250*4882a593Smuzhiyun 
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)4251*4882a593Smuzhiyun static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4252*4882a593Smuzhiyun 					struct hl_cs_parser *parser)
4253*4882a593Smuzhiyun {
4254*4882a593Smuzhiyun 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4255*4882a593Smuzhiyun 
4256*4882a593Smuzhiyun 	/* For internal queue jobs just check if CB address is valid */
4257*4882a593Smuzhiyun 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4258*4882a593Smuzhiyun 					parser->user_cb_size,
4259*4882a593Smuzhiyun 					asic_prop->sram_user_base_address,
4260*4882a593Smuzhiyun 					asic_prop->sram_end_address))
4261*4882a593Smuzhiyun 		return 0;
4262*4882a593Smuzhiyun 
4263*4882a593Smuzhiyun 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4264*4882a593Smuzhiyun 					parser->user_cb_size,
4265*4882a593Smuzhiyun 					asic_prop->dram_user_base_address,
4266*4882a593Smuzhiyun 					asic_prop->dram_end_address))
4267*4882a593Smuzhiyun 		return 0;
4268*4882a593Smuzhiyun 
4269*4882a593Smuzhiyun 	/* PMMU and HPMMU addresses are equal, check only one of them */
4270*4882a593Smuzhiyun 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4271*4882a593Smuzhiyun 					parser->user_cb_size,
4272*4882a593Smuzhiyun 					asic_prop->pmmu.start_addr,
4273*4882a593Smuzhiyun 					asic_prop->pmmu.end_addr))
4274*4882a593Smuzhiyun 		return 0;
4275*4882a593Smuzhiyun 
4276*4882a593Smuzhiyun 	dev_err(hdev->dev,
4277*4882a593Smuzhiyun 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4278*4882a593Smuzhiyun 		parser->user_cb, parser->user_cb_size);
4279*4882a593Smuzhiyun 
4280*4882a593Smuzhiyun 	return -EFAULT;
4281*4882a593Smuzhiyun }
4282*4882a593Smuzhiyun 
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)4283*4882a593Smuzhiyun static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4284*4882a593Smuzhiyun {
4285*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4286*4882a593Smuzhiyun 
4287*4882a593Smuzhiyun 	if (parser->queue_type == QUEUE_TYPE_INT)
4288*4882a593Smuzhiyun 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
4289*4882a593Smuzhiyun 
4290*4882a593Smuzhiyun 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4291*4882a593Smuzhiyun 		return gaudi_parse_cb_mmu(hdev, parser);
4292*4882a593Smuzhiyun 	else
4293*4882a593Smuzhiyun 		return gaudi_parse_cb_no_mmu(hdev, parser);
4294*4882a593Smuzhiyun }
4295*4882a593Smuzhiyun 
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)4296*4882a593Smuzhiyun static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4297*4882a593Smuzhiyun 					void *kernel_address, u32 len,
4298*4882a593Smuzhiyun 					u64 cq_addr, u32 cq_val, u32 msi_vec,
4299*4882a593Smuzhiyun 					bool eb)
4300*4882a593Smuzhiyun {
4301*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4302*4882a593Smuzhiyun 	struct packet_msg_prot *cq_pkt;
4303*4882a593Smuzhiyun 	u32 tmp;
4304*4882a593Smuzhiyun 
4305*4882a593Smuzhiyun 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4306*4882a593Smuzhiyun 
4307*4882a593Smuzhiyun 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4308*4882a593Smuzhiyun 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4309*4882a593Smuzhiyun 
4310*4882a593Smuzhiyun 	if (eb)
4311*4882a593Smuzhiyun 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4312*4882a593Smuzhiyun 
4313*4882a593Smuzhiyun 	cq_pkt->ctl = cpu_to_le32(tmp);
4314*4882a593Smuzhiyun 	cq_pkt->value = cpu_to_le32(cq_val);
4315*4882a593Smuzhiyun 	cq_pkt->addr = cpu_to_le64(cq_addr);
4316*4882a593Smuzhiyun 
4317*4882a593Smuzhiyun 	cq_pkt++;
4318*4882a593Smuzhiyun 
4319*4882a593Smuzhiyun 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4320*4882a593Smuzhiyun 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4321*4882a593Smuzhiyun 	cq_pkt->ctl = cpu_to_le32(tmp);
4322*4882a593Smuzhiyun 	cq_pkt->value = cpu_to_le32(1);
4323*4882a593Smuzhiyun 
4324*4882a593Smuzhiyun 	if (!gaudi->multi_msi_mode)
4325*4882a593Smuzhiyun 		msi_vec = 0;
4326*4882a593Smuzhiyun 
4327*4882a593Smuzhiyun 	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4328*4882a593Smuzhiyun }
4329*4882a593Smuzhiyun 
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)4330*4882a593Smuzhiyun static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4331*4882a593Smuzhiyun {
4332*4882a593Smuzhiyun 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4333*4882a593Smuzhiyun }
4334*4882a593Smuzhiyun 
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)4335*4882a593Smuzhiyun static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4336*4882a593Smuzhiyun 					u32 size, u64 val)
4337*4882a593Smuzhiyun {
4338*4882a593Smuzhiyun 	struct packet_lin_dma *lin_dma_pkt;
4339*4882a593Smuzhiyun 	struct hl_cs_job *job;
4340*4882a593Smuzhiyun 	u32 cb_size, ctl, err_cause;
4341*4882a593Smuzhiyun 	struct hl_cb *cb;
4342*4882a593Smuzhiyun 	int rc;
4343*4882a593Smuzhiyun 
4344*4882a593Smuzhiyun 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4345*4882a593Smuzhiyun 	if (!cb)
4346*4882a593Smuzhiyun 		return -EFAULT;
4347*4882a593Smuzhiyun 
4348*4882a593Smuzhiyun 	lin_dma_pkt = cb->kernel_address;
4349*4882a593Smuzhiyun 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4350*4882a593Smuzhiyun 	cb_size = sizeof(*lin_dma_pkt);
4351*4882a593Smuzhiyun 
4352*4882a593Smuzhiyun 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4353*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4354*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4355*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4356*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4357*4882a593Smuzhiyun 
4358*4882a593Smuzhiyun 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
4359*4882a593Smuzhiyun 	lin_dma_pkt->src_addr = cpu_to_le64(val);
4360*4882a593Smuzhiyun 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4361*4882a593Smuzhiyun 	lin_dma_pkt->tsize = cpu_to_le32(size);
4362*4882a593Smuzhiyun 
4363*4882a593Smuzhiyun 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4364*4882a593Smuzhiyun 	if (!job) {
4365*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to allocate a new job\n");
4366*4882a593Smuzhiyun 		rc = -ENOMEM;
4367*4882a593Smuzhiyun 		goto release_cb;
4368*4882a593Smuzhiyun 	}
4369*4882a593Smuzhiyun 
4370*4882a593Smuzhiyun 	/* Verify DMA is OK */
4371*4882a593Smuzhiyun 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4372*4882a593Smuzhiyun 	if (err_cause && !hdev->init_done) {
4373*4882a593Smuzhiyun 		dev_dbg(hdev->dev,
4374*4882a593Smuzhiyun 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
4375*4882a593Smuzhiyun 			err_cause);
4376*4882a593Smuzhiyun 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4377*4882a593Smuzhiyun 	}
4378*4882a593Smuzhiyun 
4379*4882a593Smuzhiyun 	job->id = 0;
4380*4882a593Smuzhiyun 	job->user_cb = cb;
4381*4882a593Smuzhiyun 	job->user_cb->cs_cnt++;
4382*4882a593Smuzhiyun 	job->user_cb_size = cb_size;
4383*4882a593Smuzhiyun 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4384*4882a593Smuzhiyun 	job->patched_cb = job->user_cb;
4385*4882a593Smuzhiyun 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4386*4882a593Smuzhiyun 
4387*4882a593Smuzhiyun 	hl_debugfs_add_job(hdev, job);
4388*4882a593Smuzhiyun 
4389*4882a593Smuzhiyun 	rc = gaudi_send_job_on_qman0(hdev, job);
4390*4882a593Smuzhiyun 	hl_debugfs_remove_job(hdev, job);
4391*4882a593Smuzhiyun 	kfree(job);
4392*4882a593Smuzhiyun 	cb->cs_cnt--;
4393*4882a593Smuzhiyun 
4394*4882a593Smuzhiyun 	/* Verify DMA is OK */
4395*4882a593Smuzhiyun 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4396*4882a593Smuzhiyun 	if (err_cause) {
4397*4882a593Smuzhiyun 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4398*4882a593Smuzhiyun 		rc = -EIO;
4399*4882a593Smuzhiyun 		if (!hdev->init_done) {
4400*4882a593Smuzhiyun 			dev_dbg(hdev->dev,
4401*4882a593Smuzhiyun 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
4402*4882a593Smuzhiyun 				err_cause);
4403*4882a593Smuzhiyun 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4404*4882a593Smuzhiyun 		}
4405*4882a593Smuzhiyun 	}
4406*4882a593Smuzhiyun 
4407*4882a593Smuzhiyun release_cb:
4408*4882a593Smuzhiyun 	hl_cb_put(cb);
4409*4882a593Smuzhiyun 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4410*4882a593Smuzhiyun 
4411*4882a593Smuzhiyun 	return rc;
4412*4882a593Smuzhiyun }
4413*4882a593Smuzhiyun 
gaudi_restore_sm_registers(struct hl_device * hdev)4414*4882a593Smuzhiyun static void gaudi_restore_sm_registers(struct hl_device *hdev)
4415*4882a593Smuzhiyun {
4416*4882a593Smuzhiyun 	int i;
4417*4882a593Smuzhiyun 
4418*4882a593Smuzhiyun 	for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4419*4882a593Smuzhiyun 		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4420*4882a593Smuzhiyun 		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4421*4882a593Smuzhiyun 		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4422*4882a593Smuzhiyun 	}
4423*4882a593Smuzhiyun 
4424*4882a593Smuzhiyun 	for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4425*4882a593Smuzhiyun 		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4426*4882a593Smuzhiyun 		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4427*4882a593Smuzhiyun 		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4428*4882a593Smuzhiyun 	}
4429*4882a593Smuzhiyun 
4430*4882a593Smuzhiyun 	i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4431*4882a593Smuzhiyun 
4432*4882a593Smuzhiyun 	for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4433*4882a593Smuzhiyun 		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4434*4882a593Smuzhiyun 
4435*4882a593Smuzhiyun 	i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4436*4882a593Smuzhiyun 
4437*4882a593Smuzhiyun 	for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4438*4882a593Smuzhiyun 		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4439*4882a593Smuzhiyun }
4440*4882a593Smuzhiyun 
gaudi_restore_dma_registers(struct hl_device * hdev)4441*4882a593Smuzhiyun static void gaudi_restore_dma_registers(struct hl_device *hdev)
4442*4882a593Smuzhiyun {
4443*4882a593Smuzhiyun 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4444*4882a593Smuzhiyun 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4445*4882a593Smuzhiyun 	int i;
4446*4882a593Smuzhiyun 
4447*4882a593Smuzhiyun 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4448*4882a593Smuzhiyun 		u64 sob_addr = CFG_BASE +
4449*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4450*4882a593Smuzhiyun 				(i * sob_delta);
4451*4882a593Smuzhiyun 		u32 dma_offset = i * DMA_CORE_OFFSET;
4452*4882a593Smuzhiyun 
4453*4882a593Smuzhiyun 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4454*4882a593Smuzhiyun 				lower_32_bits(sob_addr));
4455*4882a593Smuzhiyun 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4456*4882a593Smuzhiyun 				upper_32_bits(sob_addr));
4457*4882a593Smuzhiyun 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4458*4882a593Smuzhiyun 
4459*4882a593Smuzhiyun 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4460*4882a593Smuzhiyun 		 * modified by the user for SRAM reduction
4461*4882a593Smuzhiyun 		 */
4462*4882a593Smuzhiyun 		if (i > 1)
4463*4882a593Smuzhiyun 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4464*4882a593Smuzhiyun 								0x00000001);
4465*4882a593Smuzhiyun 	}
4466*4882a593Smuzhiyun }
4467*4882a593Smuzhiyun 
gaudi_restore_qm_registers(struct hl_device * hdev)4468*4882a593Smuzhiyun static void gaudi_restore_qm_registers(struct hl_device *hdev)
4469*4882a593Smuzhiyun {
4470*4882a593Smuzhiyun 	u32 qman_offset;
4471*4882a593Smuzhiyun 	int i;
4472*4882a593Smuzhiyun 
4473*4882a593Smuzhiyun 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4474*4882a593Smuzhiyun 		qman_offset = i * DMA_QMAN_OFFSET;
4475*4882a593Smuzhiyun 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4476*4882a593Smuzhiyun 	}
4477*4882a593Smuzhiyun 
4478*4882a593Smuzhiyun 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4479*4882a593Smuzhiyun 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4480*4882a593Smuzhiyun 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4481*4882a593Smuzhiyun 	}
4482*4882a593Smuzhiyun 
4483*4882a593Smuzhiyun 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4484*4882a593Smuzhiyun 		qman_offset = i * TPC_QMAN_OFFSET;
4485*4882a593Smuzhiyun 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4486*4882a593Smuzhiyun 	}
4487*4882a593Smuzhiyun }
4488*4882a593Smuzhiyun 
gaudi_restore_user_registers(struct hl_device * hdev)4489*4882a593Smuzhiyun static void gaudi_restore_user_registers(struct hl_device *hdev)
4490*4882a593Smuzhiyun {
4491*4882a593Smuzhiyun 	gaudi_restore_sm_registers(hdev);
4492*4882a593Smuzhiyun 	gaudi_restore_dma_registers(hdev);
4493*4882a593Smuzhiyun 	gaudi_restore_qm_registers(hdev);
4494*4882a593Smuzhiyun }
4495*4882a593Smuzhiyun 
gaudi_context_switch(struct hl_device * hdev,u32 asid)4496*4882a593Smuzhiyun static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4497*4882a593Smuzhiyun {
4498*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4499*4882a593Smuzhiyun 	u64 addr = prop->sram_user_base_address;
4500*4882a593Smuzhiyun 	u32 size = hdev->pldm ? 0x10000 :
4501*4882a593Smuzhiyun 			(prop->sram_size - SRAM_USER_BASE_OFFSET);
4502*4882a593Smuzhiyun 	u64 val = 0x7777777777777777ull;
4503*4882a593Smuzhiyun 	int rc;
4504*4882a593Smuzhiyun 
4505*4882a593Smuzhiyun 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4506*4882a593Smuzhiyun 	if (rc) {
4507*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4508*4882a593Smuzhiyun 		return rc;
4509*4882a593Smuzhiyun 	}
4510*4882a593Smuzhiyun 
4511*4882a593Smuzhiyun 	gaudi_mmu_prepare(hdev, asid);
4512*4882a593Smuzhiyun 
4513*4882a593Smuzhiyun 	gaudi_restore_user_registers(hdev);
4514*4882a593Smuzhiyun 
4515*4882a593Smuzhiyun 	return 0;
4516*4882a593Smuzhiyun }
4517*4882a593Smuzhiyun 
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)4518*4882a593Smuzhiyun static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4519*4882a593Smuzhiyun {
4520*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4521*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4522*4882a593Smuzhiyun 	u64 addr = prop->mmu_pgt_addr;
4523*4882a593Smuzhiyun 	u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4524*4882a593Smuzhiyun 
4525*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4526*4882a593Smuzhiyun 		return 0;
4527*4882a593Smuzhiyun 
4528*4882a593Smuzhiyun 	return gaudi_memset_device_memory(hdev, addr, size, 0);
4529*4882a593Smuzhiyun }
4530*4882a593Smuzhiyun 
gaudi_restore_phase_topology(struct hl_device * hdev)4531*4882a593Smuzhiyun static void gaudi_restore_phase_topology(struct hl_device *hdev)
4532*4882a593Smuzhiyun {
4533*4882a593Smuzhiyun 
4534*4882a593Smuzhiyun }
4535*4882a593Smuzhiyun 
gaudi_debugfs_read32(struct hl_device * hdev,u64 addr,u32 * val)4536*4882a593Smuzhiyun static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4537*4882a593Smuzhiyun {
4538*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4539*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4540*4882a593Smuzhiyun 	u64 hbm_bar_addr;
4541*4882a593Smuzhiyun 	int rc = 0;
4542*4882a593Smuzhiyun 
4543*4882a593Smuzhiyun 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4544*4882a593Smuzhiyun 
4545*4882a593Smuzhiyun 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4546*4882a593Smuzhiyun 				(hdev->clock_gating_mask &
4547*4882a593Smuzhiyun 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4548*4882a593Smuzhiyun 
4549*4882a593Smuzhiyun 			dev_err_ratelimited(hdev->dev,
4550*4882a593Smuzhiyun 				"Can't read register - clock gating is enabled!\n");
4551*4882a593Smuzhiyun 			rc = -EFAULT;
4552*4882a593Smuzhiyun 		} else {
4553*4882a593Smuzhiyun 			*val = RREG32(addr - CFG_BASE);
4554*4882a593Smuzhiyun 		}
4555*4882a593Smuzhiyun 
4556*4882a593Smuzhiyun 	} else if ((addr >= SRAM_BASE_ADDR) &&
4557*4882a593Smuzhiyun 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4558*4882a593Smuzhiyun 		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4559*4882a593Smuzhiyun 				(addr - SRAM_BASE_ADDR));
4560*4882a593Smuzhiyun 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4561*4882a593Smuzhiyun 		u64 bar_base_addr = DRAM_PHYS_BASE +
4562*4882a593Smuzhiyun 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4563*4882a593Smuzhiyun 
4564*4882a593Smuzhiyun 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4565*4882a593Smuzhiyun 		if (hbm_bar_addr != U64_MAX) {
4566*4882a593Smuzhiyun 			*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4567*4882a593Smuzhiyun 						(addr - bar_base_addr));
4568*4882a593Smuzhiyun 
4569*4882a593Smuzhiyun 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4570*4882a593Smuzhiyun 						hbm_bar_addr);
4571*4882a593Smuzhiyun 		}
4572*4882a593Smuzhiyun 		if (hbm_bar_addr == U64_MAX)
4573*4882a593Smuzhiyun 			rc = -EIO;
4574*4882a593Smuzhiyun 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4575*4882a593Smuzhiyun 		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4576*4882a593Smuzhiyun 	} else {
4577*4882a593Smuzhiyun 		rc = -EFAULT;
4578*4882a593Smuzhiyun 	}
4579*4882a593Smuzhiyun 
4580*4882a593Smuzhiyun 	return rc;
4581*4882a593Smuzhiyun }
4582*4882a593Smuzhiyun 
gaudi_debugfs_write32(struct hl_device * hdev,u64 addr,u32 val)4583*4882a593Smuzhiyun static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4584*4882a593Smuzhiyun {
4585*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4586*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4587*4882a593Smuzhiyun 	u64 hbm_bar_addr;
4588*4882a593Smuzhiyun 	int rc = 0;
4589*4882a593Smuzhiyun 
4590*4882a593Smuzhiyun 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4591*4882a593Smuzhiyun 
4592*4882a593Smuzhiyun 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4593*4882a593Smuzhiyun 				(hdev->clock_gating_mask &
4594*4882a593Smuzhiyun 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4595*4882a593Smuzhiyun 
4596*4882a593Smuzhiyun 			dev_err_ratelimited(hdev->dev,
4597*4882a593Smuzhiyun 				"Can't write register - clock gating is enabled!\n");
4598*4882a593Smuzhiyun 			rc = -EFAULT;
4599*4882a593Smuzhiyun 		} else {
4600*4882a593Smuzhiyun 			WREG32(addr - CFG_BASE, val);
4601*4882a593Smuzhiyun 		}
4602*4882a593Smuzhiyun 
4603*4882a593Smuzhiyun 	} else if ((addr >= SRAM_BASE_ADDR) &&
4604*4882a593Smuzhiyun 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4605*4882a593Smuzhiyun 		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4606*4882a593Smuzhiyun 					(addr - SRAM_BASE_ADDR));
4607*4882a593Smuzhiyun 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4608*4882a593Smuzhiyun 		u64 bar_base_addr = DRAM_PHYS_BASE +
4609*4882a593Smuzhiyun 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4610*4882a593Smuzhiyun 
4611*4882a593Smuzhiyun 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4612*4882a593Smuzhiyun 		if (hbm_bar_addr != U64_MAX) {
4613*4882a593Smuzhiyun 			writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4614*4882a593Smuzhiyun 						(addr - bar_base_addr));
4615*4882a593Smuzhiyun 
4616*4882a593Smuzhiyun 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4617*4882a593Smuzhiyun 						hbm_bar_addr);
4618*4882a593Smuzhiyun 		}
4619*4882a593Smuzhiyun 		if (hbm_bar_addr == U64_MAX)
4620*4882a593Smuzhiyun 			rc = -EIO;
4621*4882a593Smuzhiyun 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4622*4882a593Smuzhiyun 		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4623*4882a593Smuzhiyun 	} else {
4624*4882a593Smuzhiyun 		rc = -EFAULT;
4625*4882a593Smuzhiyun 	}
4626*4882a593Smuzhiyun 
4627*4882a593Smuzhiyun 	return rc;
4628*4882a593Smuzhiyun }
4629*4882a593Smuzhiyun 
gaudi_debugfs_read64(struct hl_device * hdev,u64 addr,u64 * val)4630*4882a593Smuzhiyun static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4631*4882a593Smuzhiyun {
4632*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4633*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4634*4882a593Smuzhiyun 	u64 hbm_bar_addr;
4635*4882a593Smuzhiyun 	int rc = 0;
4636*4882a593Smuzhiyun 
4637*4882a593Smuzhiyun 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4638*4882a593Smuzhiyun 
4639*4882a593Smuzhiyun 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4640*4882a593Smuzhiyun 				(hdev->clock_gating_mask &
4641*4882a593Smuzhiyun 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4642*4882a593Smuzhiyun 
4643*4882a593Smuzhiyun 			dev_err_ratelimited(hdev->dev,
4644*4882a593Smuzhiyun 				"Can't read register - clock gating is enabled!\n");
4645*4882a593Smuzhiyun 			rc = -EFAULT;
4646*4882a593Smuzhiyun 		} else {
4647*4882a593Smuzhiyun 			u32 val_l = RREG32(addr - CFG_BASE);
4648*4882a593Smuzhiyun 			u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4649*4882a593Smuzhiyun 
4650*4882a593Smuzhiyun 			*val = (((u64) val_h) << 32) | val_l;
4651*4882a593Smuzhiyun 		}
4652*4882a593Smuzhiyun 
4653*4882a593Smuzhiyun 	} else if ((addr >= SRAM_BASE_ADDR) &&
4654*4882a593Smuzhiyun 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4655*4882a593Smuzhiyun 		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4656*4882a593Smuzhiyun 				(addr - SRAM_BASE_ADDR));
4657*4882a593Smuzhiyun 	} else if (addr <=
4658*4882a593Smuzhiyun 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4659*4882a593Smuzhiyun 		u64 bar_base_addr = DRAM_PHYS_BASE +
4660*4882a593Smuzhiyun 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4661*4882a593Smuzhiyun 
4662*4882a593Smuzhiyun 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4663*4882a593Smuzhiyun 		if (hbm_bar_addr != U64_MAX) {
4664*4882a593Smuzhiyun 			*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4665*4882a593Smuzhiyun 						(addr - bar_base_addr));
4666*4882a593Smuzhiyun 
4667*4882a593Smuzhiyun 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4668*4882a593Smuzhiyun 						hbm_bar_addr);
4669*4882a593Smuzhiyun 		}
4670*4882a593Smuzhiyun 		if (hbm_bar_addr == U64_MAX)
4671*4882a593Smuzhiyun 			rc = -EIO;
4672*4882a593Smuzhiyun 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4673*4882a593Smuzhiyun 		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4674*4882a593Smuzhiyun 	} else {
4675*4882a593Smuzhiyun 		rc = -EFAULT;
4676*4882a593Smuzhiyun 	}
4677*4882a593Smuzhiyun 
4678*4882a593Smuzhiyun 	return rc;
4679*4882a593Smuzhiyun }
4680*4882a593Smuzhiyun 
gaudi_debugfs_write64(struct hl_device * hdev,u64 addr,u64 val)4681*4882a593Smuzhiyun static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4682*4882a593Smuzhiyun {
4683*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4684*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4685*4882a593Smuzhiyun 	u64 hbm_bar_addr;
4686*4882a593Smuzhiyun 	int rc = 0;
4687*4882a593Smuzhiyun 
4688*4882a593Smuzhiyun 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4689*4882a593Smuzhiyun 
4690*4882a593Smuzhiyun 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4691*4882a593Smuzhiyun 				(hdev->clock_gating_mask &
4692*4882a593Smuzhiyun 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4693*4882a593Smuzhiyun 
4694*4882a593Smuzhiyun 			dev_err_ratelimited(hdev->dev,
4695*4882a593Smuzhiyun 				"Can't write register - clock gating is enabled!\n");
4696*4882a593Smuzhiyun 			rc = -EFAULT;
4697*4882a593Smuzhiyun 		} else {
4698*4882a593Smuzhiyun 			WREG32(addr - CFG_BASE, lower_32_bits(val));
4699*4882a593Smuzhiyun 			WREG32(addr + sizeof(u32) - CFG_BASE,
4700*4882a593Smuzhiyun 				upper_32_bits(val));
4701*4882a593Smuzhiyun 		}
4702*4882a593Smuzhiyun 
4703*4882a593Smuzhiyun 	} else if ((addr >= SRAM_BASE_ADDR) &&
4704*4882a593Smuzhiyun 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4705*4882a593Smuzhiyun 		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4706*4882a593Smuzhiyun 					(addr - SRAM_BASE_ADDR));
4707*4882a593Smuzhiyun 	} else if (addr <=
4708*4882a593Smuzhiyun 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4709*4882a593Smuzhiyun 		u64 bar_base_addr = DRAM_PHYS_BASE +
4710*4882a593Smuzhiyun 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4711*4882a593Smuzhiyun 
4712*4882a593Smuzhiyun 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4713*4882a593Smuzhiyun 		if (hbm_bar_addr != U64_MAX) {
4714*4882a593Smuzhiyun 			writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4715*4882a593Smuzhiyun 						(addr - bar_base_addr));
4716*4882a593Smuzhiyun 
4717*4882a593Smuzhiyun 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4718*4882a593Smuzhiyun 						hbm_bar_addr);
4719*4882a593Smuzhiyun 		}
4720*4882a593Smuzhiyun 		if (hbm_bar_addr == U64_MAX)
4721*4882a593Smuzhiyun 			rc = -EIO;
4722*4882a593Smuzhiyun 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4723*4882a593Smuzhiyun 		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4724*4882a593Smuzhiyun 	} else {
4725*4882a593Smuzhiyun 		rc = -EFAULT;
4726*4882a593Smuzhiyun 	}
4727*4882a593Smuzhiyun 
4728*4882a593Smuzhiyun 	return rc;
4729*4882a593Smuzhiyun }
4730*4882a593Smuzhiyun 
gaudi_read_pte(struct hl_device * hdev,u64 addr)4731*4882a593Smuzhiyun static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4732*4882a593Smuzhiyun {
4733*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4734*4882a593Smuzhiyun 
4735*4882a593Smuzhiyun 	if (hdev->hard_reset_pending)
4736*4882a593Smuzhiyun 		return U64_MAX;
4737*4882a593Smuzhiyun 
4738*4882a593Smuzhiyun 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
4739*4882a593Smuzhiyun 			(addr - gaudi->hbm_bar_cur_addr));
4740*4882a593Smuzhiyun }
4741*4882a593Smuzhiyun 
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)4742*4882a593Smuzhiyun static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4743*4882a593Smuzhiyun {
4744*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4745*4882a593Smuzhiyun 
4746*4882a593Smuzhiyun 	if (hdev->hard_reset_pending)
4747*4882a593Smuzhiyun 		return;
4748*4882a593Smuzhiyun 
4749*4882a593Smuzhiyun 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4750*4882a593Smuzhiyun 			(addr - gaudi->hbm_bar_cur_addr));
4751*4882a593Smuzhiyun }
4752*4882a593Smuzhiyun 
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)4753*4882a593Smuzhiyun void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4754*4882a593Smuzhiyun {
4755*4882a593Smuzhiyun 	/* mask to zero the MMBP and ASID bits */
4756*4882a593Smuzhiyun 	WREG32_AND(reg, ~0x7FF);
4757*4882a593Smuzhiyun 	WREG32_OR(reg, asid);
4758*4882a593Smuzhiyun }
4759*4882a593Smuzhiyun 
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)4760*4882a593Smuzhiyun static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4761*4882a593Smuzhiyun {
4762*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
4763*4882a593Smuzhiyun 
4764*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4765*4882a593Smuzhiyun 		return;
4766*4882a593Smuzhiyun 
4767*4882a593Smuzhiyun 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4768*4882a593Smuzhiyun 		WARN(1, "asid %u is too big\n", asid);
4769*4882a593Smuzhiyun 		return;
4770*4882a593Smuzhiyun 	}
4771*4882a593Smuzhiyun 
4772*4882a593Smuzhiyun 	mutex_lock(&gaudi->clk_gate_mutex);
4773*4882a593Smuzhiyun 
4774*4882a593Smuzhiyun 	hdev->asic_funcs->disable_clock_gating(hdev);
4775*4882a593Smuzhiyun 
4776*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4781*4882a593Smuzhiyun 
4782*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4783*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4784*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4785*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4786*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4787*4882a593Smuzhiyun 
4788*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4789*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4790*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4791*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4792*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4793*4882a593Smuzhiyun 
4794*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4799*4882a593Smuzhiyun 
4800*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4801*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4802*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4803*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4804*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4805*4882a593Smuzhiyun 
4806*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4807*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4808*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4809*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4810*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4811*4882a593Smuzhiyun 
4812*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4813*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4814*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4815*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4816*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4817*4882a593Smuzhiyun 
4818*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4819*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4820*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4821*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4822*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4823*4882a593Smuzhiyun 
4824*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4825*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4826*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4827*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4828*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4829*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4830*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4831*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4832*4882a593Smuzhiyun 
4833*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4834*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4835*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4836*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4837*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4838*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4839*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4840*4882a593Smuzhiyun 
4841*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4842*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4843*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4844*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4845*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4846*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4847*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4848*4882a593Smuzhiyun 
4849*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4850*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4851*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4852*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4853*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4854*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4855*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4856*4882a593Smuzhiyun 
4857*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4858*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4859*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4860*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4861*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4862*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4863*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4864*4882a593Smuzhiyun 
4865*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4866*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4867*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4868*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4869*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4870*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4871*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4872*4882a593Smuzhiyun 
4873*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4874*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4875*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4876*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4877*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4878*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4879*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4880*4882a593Smuzhiyun 
4881*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4882*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4883*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4884*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4885*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4886*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4887*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4888*4882a593Smuzhiyun 
4889*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4890*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4891*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4892*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4893*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4894*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4895*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4896*4882a593Smuzhiyun 
4897*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4898*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4899*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4900*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4901*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4902*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4903*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4904*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4905*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4906*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4907*4882a593Smuzhiyun 
4908*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4909*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4910*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4911*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4912*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4913*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4914*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4915*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4916*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4917*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4918*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4919*4882a593Smuzhiyun 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4920*4882a593Smuzhiyun 
4921*4882a593Smuzhiyun 	hdev->asic_funcs->set_clock_gating(hdev);
4922*4882a593Smuzhiyun 
4923*4882a593Smuzhiyun 	mutex_unlock(&gaudi->clk_gate_mutex);
4924*4882a593Smuzhiyun }
4925*4882a593Smuzhiyun 
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)4926*4882a593Smuzhiyun static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4927*4882a593Smuzhiyun 		struct hl_cs_job *job)
4928*4882a593Smuzhiyun {
4929*4882a593Smuzhiyun 	struct packet_msg_prot *fence_pkt;
4930*4882a593Smuzhiyun 	u32 *fence_ptr;
4931*4882a593Smuzhiyun 	dma_addr_t fence_dma_addr;
4932*4882a593Smuzhiyun 	struct hl_cb *cb;
4933*4882a593Smuzhiyun 	u32 tmp, timeout, dma_offset;
4934*4882a593Smuzhiyun 	int rc;
4935*4882a593Smuzhiyun 
4936*4882a593Smuzhiyun 	if (hdev->pldm)
4937*4882a593Smuzhiyun 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4938*4882a593Smuzhiyun 	else
4939*4882a593Smuzhiyun 		timeout = HL_DEVICE_TIMEOUT_USEC;
4940*4882a593Smuzhiyun 
4941*4882a593Smuzhiyun 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4942*4882a593Smuzhiyun 		dev_err_ratelimited(hdev->dev,
4943*4882a593Smuzhiyun 			"Can't send driver job on QMAN0 because the device is not idle\n");
4944*4882a593Smuzhiyun 		return -EBUSY;
4945*4882a593Smuzhiyun 	}
4946*4882a593Smuzhiyun 
4947*4882a593Smuzhiyun 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4948*4882a593Smuzhiyun 							&fence_dma_addr);
4949*4882a593Smuzhiyun 	if (!fence_ptr) {
4950*4882a593Smuzhiyun 		dev_err(hdev->dev,
4951*4882a593Smuzhiyun 			"Failed to allocate fence memory for QMAN0\n");
4952*4882a593Smuzhiyun 		return -ENOMEM;
4953*4882a593Smuzhiyun 	}
4954*4882a593Smuzhiyun 
4955*4882a593Smuzhiyun 	cb = job->patched_cb;
4956*4882a593Smuzhiyun 
4957*4882a593Smuzhiyun 	fence_pkt = cb->kernel_address +
4958*4882a593Smuzhiyun 			job->job_cb_size - sizeof(struct packet_msg_prot);
4959*4882a593Smuzhiyun 
4960*4882a593Smuzhiyun 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4961*4882a593Smuzhiyun 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4962*4882a593Smuzhiyun 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4963*4882a593Smuzhiyun 
4964*4882a593Smuzhiyun 	fence_pkt->ctl = cpu_to_le32(tmp);
4965*4882a593Smuzhiyun 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4966*4882a593Smuzhiyun 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4967*4882a593Smuzhiyun 
4968*4882a593Smuzhiyun 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4969*4882a593Smuzhiyun 
4970*4882a593Smuzhiyun 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4971*4882a593Smuzhiyun 
4972*4882a593Smuzhiyun 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4973*4882a593Smuzhiyun 					job->job_cb_size, cb->bus_address);
4974*4882a593Smuzhiyun 	if (rc) {
4975*4882a593Smuzhiyun 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4976*4882a593Smuzhiyun 		goto free_fence_ptr;
4977*4882a593Smuzhiyun 	}
4978*4882a593Smuzhiyun 
4979*4882a593Smuzhiyun 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4980*4882a593Smuzhiyun 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4981*4882a593Smuzhiyun 				timeout, true);
4982*4882a593Smuzhiyun 
4983*4882a593Smuzhiyun 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4984*4882a593Smuzhiyun 
4985*4882a593Smuzhiyun 	if (rc == -ETIMEDOUT) {
4986*4882a593Smuzhiyun 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4987*4882a593Smuzhiyun 		goto free_fence_ptr;
4988*4882a593Smuzhiyun 	}
4989*4882a593Smuzhiyun 
4990*4882a593Smuzhiyun free_fence_ptr:
4991*4882a593Smuzhiyun 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4992*4882a593Smuzhiyun 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4993*4882a593Smuzhiyun 
4994*4882a593Smuzhiyun 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4995*4882a593Smuzhiyun 					fence_dma_addr);
4996*4882a593Smuzhiyun 	return rc;
4997*4882a593Smuzhiyun }
4998*4882a593Smuzhiyun 
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)4999*4882a593Smuzhiyun static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
5000*4882a593Smuzhiyun {
5001*4882a593Smuzhiyun 	if (event_type >= GAUDI_EVENT_SIZE)
5002*4882a593Smuzhiyun 		goto event_not_supported;
5003*4882a593Smuzhiyun 
5004*4882a593Smuzhiyun 	if (!gaudi_irq_map_table[event_type].valid)
5005*4882a593Smuzhiyun 		goto event_not_supported;
5006*4882a593Smuzhiyun 
5007*4882a593Smuzhiyun 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5008*4882a593Smuzhiyun 
5009*4882a593Smuzhiyun 	return;
5010*4882a593Smuzhiyun 
5011*4882a593Smuzhiyun event_not_supported:
5012*4882a593Smuzhiyun 	snprintf(desc, size, "N/A");
5013*4882a593Smuzhiyun }
5014*4882a593Smuzhiyun 
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write)5015*4882a593Smuzhiyun static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5016*4882a593Smuzhiyun 							u32 x_y, bool is_write)
5017*4882a593Smuzhiyun {
5018*4882a593Smuzhiyun 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5019*4882a593Smuzhiyun 
5020*4882a593Smuzhiyun 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5021*4882a593Smuzhiyun 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5022*4882a593Smuzhiyun 
5023*4882a593Smuzhiyun 	switch (x_y) {
5024*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5025*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5026*4882a593Smuzhiyun 		dma_id[0] = 0;
5027*4882a593Smuzhiyun 		dma_id[1] = 2;
5028*4882a593Smuzhiyun 		break;
5029*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5030*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5031*4882a593Smuzhiyun 		dma_id[0] = 1;
5032*4882a593Smuzhiyun 		dma_id[1] = 3;
5033*4882a593Smuzhiyun 		break;
5034*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5035*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5036*4882a593Smuzhiyun 		dma_id[0] = 4;
5037*4882a593Smuzhiyun 		dma_id[1] = 6;
5038*4882a593Smuzhiyun 		break;
5039*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5040*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5041*4882a593Smuzhiyun 		dma_id[0] = 5;
5042*4882a593Smuzhiyun 		dma_id[1] = 7;
5043*4882a593Smuzhiyun 		break;
5044*4882a593Smuzhiyun 	default:
5045*4882a593Smuzhiyun 		goto unknown_initiator;
5046*4882a593Smuzhiyun 	}
5047*4882a593Smuzhiyun 
5048*4882a593Smuzhiyun 	for (i = 0 ; i < 2 ; i++) {
5049*4882a593Smuzhiyun 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5050*4882a593Smuzhiyun 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5051*4882a593Smuzhiyun 	}
5052*4882a593Smuzhiyun 
5053*4882a593Smuzhiyun 	switch (x_y) {
5054*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5055*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5056*4882a593Smuzhiyun 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5057*4882a593Smuzhiyun 			return "DMA0";
5058*4882a593Smuzhiyun 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5059*4882a593Smuzhiyun 			return "DMA2";
5060*4882a593Smuzhiyun 		else
5061*4882a593Smuzhiyun 			return "DMA0 or DMA2";
5062*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5063*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5064*4882a593Smuzhiyun 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5065*4882a593Smuzhiyun 			return "DMA1";
5066*4882a593Smuzhiyun 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5067*4882a593Smuzhiyun 			return "DMA3";
5068*4882a593Smuzhiyun 		else
5069*4882a593Smuzhiyun 			return "DMA1 or DMA3";
5070*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5071*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5072*4882a593Smuzhiyun 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5073*4882a593Smuzhiyun 			return "DMA4";
5074*4882a593Smuzhiyun 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5075*4882a593Smuzhiyun 			return "DMA6";
5076*4882a593Smuzhiyun 		else
5077*4882a593Smuzhiyun 			return "DMA4 or DMA6";
5078*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5079*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5080*4882a593Smuzhiyun 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5081*4882a593Smuzhiyun 			return "DMA5";
5082*4882a593Smuzhiyun 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5083*4882a593Smuzhiyun 			return "DMA7";
5084*4882a593Smuzhiyun 		else
5085*4882a593Smuzhiyun 			return "DMA5 or DMA7";
5086*4882a593Smuzhiyun 	}
5087*4882a593Smuzhiyun 
5088*4882a593Smuzhiyun unknown_initiator:
5089*4882a593Smuzhiyun 	return "unknown initiator";
5090*4882a593Smuzhiyun }
5091*4882a593Smuzhiyun 
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write)5092*4882a593Smuzhiyun static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5093*4882a593Smuzhiyun 							bool is_write)
5094*4882a593Smuzhiyun {
5095*4882a593Smuzhiyun 	u32 val, x_y, axi_id;
5096*4882a593Smuzhiyun 
5097*4882a593Smuzhiyun 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5098*4882a593Smuzhiyun 				RREG32(mmMMU_UP_RAZWI_READ_ID);
5099*4882a593Smuzhiyun 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5100*4882a593Smuzhiyun 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5101*4882a593Smuzhiyun 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5102*4882a593Smuzhiyun 			RAZWI_INITIATOR_AXI_ID_SHIFT);
5103*4882a593Smuzhiyun 
5104*4882a593Smuzhiyun 	switch (x_y) {
5105*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5106*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5107*4882a593Smuzhiyun 			return "TPC0";
5108*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5109*4882a593Smuzhiyun 			return "NIC0";
5110*4882a593Smuzhiyun 		break;
5111*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
5112*4882a593Smuzhiyun 		return "TPC1";
5113*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5114*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5115*4882a593Smuzhiyun 		return "MME0";
5116*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5117*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5118*4882a593Smuzhiyun 		return "MME1";
5119*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
5120*4882a593Smuzhiyun 		return "TPC2";
5121*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5122*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5123*4882a593Smuzhiyun 			return "TPC3";
5124*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5125*4882a593Smuzhiyun 			return "PCI";
5126*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5127*4882a593Smuzhiyun 			return "CPU";
5128*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5129*4882a593Smuzhiyun 			return "PSOC";
5130*4882a593Smuzhiyun 		break;
5131*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5132*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5133*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5134*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5135*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5136*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5137*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5138*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5139*4882a593Smuzhiyun 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5140*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5141*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5142*4882a593Smuzhiyun 			return "TPC4";
5143*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5144*4882a593Smuzhiyun 			return "NIC1";
5145*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5146*4882a593Smuzhiyun 			return "NIC2";
5147*4882a593Smuzhiyun 		break;
5148*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
5149*4882a593Smuzhiyun 		return "TPC5";
5150*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5151*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5152*4882a593Smuzhiyun 		return "MME2";
5153*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5154*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5155*4882a593Smuzhiyun 		return "MME3";
5156*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
5157*4882a593Smuzhiyun 		return "TPC6";
5158*4882a593Smuzhiyun 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5159*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5160*4882a593Smuzhiyun 			return "TPC7";
5161*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5162*4882a593Smuzhiyun 			return "NIC4";
5163*4882a593Smuzhiyun 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5164*4882a593Smuzhiyun 			return "NIC5";
5165*4882a593Smuzhiyun 		break;
5166*4882a593Smuzhiyun 	default:
5167*4882a593Smuzhiyun 		break;
5168*4882a593Smuzhiyun 	}
5169*4882a593Smuzhiyun 
5170*4882a593Smuzhiyun 	dev_err(hdev->dev,
5171*4882a593Smuzhiyun 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5172*4882a593Smuzhiyun 		val,
5173*4882a593Smuzhiyun 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5174*4882a593Smuzhiyun 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5175*4882a593Smuzhiyun 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5176*4882a593Smuzhiyun 			RAZWI_INITIATOR_AXI_ID_MASK);
5177*4882a593Smuzhiyun 
5178*4882a593Smuzhiyun 	return "unknown initiator";
5179*4882a593Smuzhiyun }
5180*4882a593Smuzhiyun 
gaudi_print_razwi_info(struct hl_device * hdev)5181*4882a593Smuzhiyun static void gaudi_print_razwi_info(struct hl_device *hdev)
5182*4882a593Smuzhiyun {
5183*4882a593Smuzhiyun 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5184*4882a593Smuzhiyun 		dev_err_ratelimited(hdev->dev,
5185*4882a593Smuzhiyun 			"RAZWI event caused by illegal write of %s\n",
5186*4882a593Smuzhiyun 			gaudi_get_razwi_initiator_name(hdev, true));
5187*4882a593Smuzhiyun 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5188*4882a593Smuzhiyun 	}
5189*4882a593Smuzhiyun 
5190*4882a593Smuzhiyun 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5191*4882a593Smuzhiyun 		dev_err_ratelimited(hdev->dev,
5192*4882a593Smuzhiyun 			"RAZWI event caused by illegal read of %s\n",
5193*4882a593Smuzhiyun 			gaudi_get_razwi_initiator_name(hdev, false));
5194*4882a593Smuzhiyun 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5195*4882a593Smuzhiyun 	}
5196*4882a593Smuzhiyun }
5197*4882a593Smuzhiyun 
gaudi_print_mmu_error_info(struct hl_device * hdev)5198*4882a593Smuzhiyun static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5199*4882a593Smuzhiyun {
5200*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
5201*4882a593Smuzhiyun 	u64 addr;
5202*4882a593Smuzhiyun 	u32 val;
5203*4882a593Smuzhiyun 
5204*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5205*4882a593Smuzhiyun 		return;
5206*4882a593Smuzhiyun 
5207*4882a593Smuzhiyun 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5208*4882a593Smuzhiyun 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5209*4882a593Smuzhiyun 		addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5210*4882a593Smuzhiyun 		addr <<= 32;
5211*4882a593Smuzhiyun 		addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5212*4882a593Smuzhiyun 
5213*4882a593Smuzhiyun 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5214*4882a593Smuzhiyun 					addr);
5215*4882a593Smuzhiyun 
5216*4882a593Smuzhiyun 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5217*4882a593Smuzhiyun 	}
5218*4882a593Smuzhiyun 
5219*4882a593Smuzhiyun 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5220*4882a593Smuzhiyun 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5221*4882a593Smuzhiyun 		addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5222*4882a593Smuzhiyun 		addr <<= 32;
5223*4882a593Smuzhiyun 		addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5224*4882a593Smuzhiyun 
5225*4882a593Smuzhiyun 		dev_err_ratelimited(hdev->dev,
5226*4882a593Smuzhiyun 				"MMU access error on va 0x%llx\n", addr);
5227*4882a593Smuzhiyun 
5228*4882a593Smuzhiyun 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5229*4882a593Smuzhiyun 	}
5230*4882a593Smuzhiyun }
5231*4882a593Smuzhiyun 
5232*4882a593Smuzhiyun /*
5233*4882a593Smuzhiyun  *  +-------------------+------------------------------------------------------+
5234*4882a593Smuzhiyun  *  | Configuration Reg |                     Description                      |
5235*4882a593Smuzhiyun  *  |      Address      |                                                      |
5236*4882a593Smuzhiyun  *  +-------------------+------------------------------------------------------+
5237*4882a593Smuzhiyun  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
5238*4882a593Smuzhiyun  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
5239*4882a593Smuzhiyun  *  |                   |0xF34 memory wrappers 63:32                           |
5240*4882a593Smuzhiyun  *  |                   |0xF38 memory wrappers 95:64                           |
5241*4882a593Smuzhiyun  *  |                   |0xF3C memory wrappers 127:96                          |
5242*4882a593Smuzhiyun  *  +-------------------+------------------------------------------------------+
5243*4882a593Smuzhiyun  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
5244*4882a593Smuzhiyun  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
5245*4882a593Smuzhiyun  *  |                   |0xF44 memory wrappers 63:32                           |
5246*4882a593Smuzhiyun  *  |                   |0xF48 memory wrappers 95:64                           |
5247*4882a593Smuzhiyun  *  |                   |0xF4C memory wrappers 127:96                          |
5248*4882a593Smuzhiyun  *  +-------------------+------------------------------------------------------+
5249*4882a593Smuzhiyun  */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)5250*4882a593Smuzhiyun static int gaudi_extract_ecc_info(struct hl_device *hdev,
5251*4882a593Smuzhiyun 		struct ecc_info_extract_params *params, u64 *ecc_address,
5252*4882a593Smuzhiyun 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5253*4882a593Smuzhiyun {
5254*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
5255*4882a593Smuzhiyun 	u32 i, num_mem_regs, reg, err_bit;
5256*4882a593Smuzhiyun 	u64 err_addr, err_word = 0;
5257*4882a593Smuzhiyun 	int rc = 0;
5258*4882a593Smuzhiyun 
5259*4882a593Smuzhiyun 	num_mem_regs = params->num_memories / 32 +
5260*4882a593Smuzhiyun 			((params->num_memories % 32) ? 1 : 0);
5261*4882a593Smuzhiyun 
5262*4882a593Smuzhiyun 	if (params->block_address >= CFG_BASE)
5263*4882a593Smuzhiyun 		params->block_address -= CFG_BASE;
5264*4882a593Smuzhiyun 
5265*4882a593Smuzhiyun 	if (params->derr)
5266*4882a593Smuzhiyun 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5267*4882a593Smuzhiyun 	else
5268*4882a593Smuzhiyun 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5269*4882a593Smuzhiyun 
5270*4882a593Smuzhiyun 	if (params->disable_clock_gating) {
5271*4882a593Smuzhiyun 		mutex_lock(&gaudi->clk_gate_mutex);
5272*4882a593Smuzhiyun 		hdev->asic_funcs->disable_clock_gating(hdev);
5273*4882a593Smuzhiyun 	}
5274*4882a593Smuzhiyun 
5275*4882a593Smuzhiyun 	/* Set invalid wrapper index */
5276*4882a593Smuzhiyun 	*memory_wrapper_idx = 0xFF;
5277*4882a593Smuzhiyun 
5278*4882a593Smuzhiyun 	/* Iterate through memory wrappers, a single bit must be set */
5279*4882a593Smuzhiyun 	for (i = 0 ; i < num_mem_regs ; i++) {
5280*4882a593Smuzhiyun 		err_addr += i * 4;
5281*4882a593Smuzhiyun 		err_word = RREG32(err_addr);
5282*4882a593Smuzhiyun 		if (err_word) {
5283*4882a593Smuzhiyun 			err_bit = __ffs(err_word);
5284*4882a593Smuzhiyun 			*memory_wrapper_idx = err_bit + (32 * i);
5285*4882a593Smuzhiyun 			break;
5286*4882a593Smuzhiyun 		}
5287*4882a593Smuzhiyun 	}
5288*4882a593Smuzhiyun 
5289*4882a593Smuzhiyun 	if (*memory_wrapper_idx == 0xFF) {
5290*4882a593Smuzhiyun 		dev_err(hdev->dev, "ECC error information cannot be found\n");
5291*4882a593Smuzhiyun 		rc = -EINVAL;
5292*4882a593Smuzhiyun 		goto enable_clk_gate;
5293*4882a593Smuzhiyun 	}
5294*4882a593Smuzhiyun 
5295*4882a593Smuzhiyun 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5296*4882a593Smuzhiyun 			*memory_wrapper_idx);
5297*4882a593Smuzhiyun 
5298*4882a593Smuzhiyun 	*ecc_address =
5299*4882a593Smuzhiyun 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5300*4882a593Smuzhiyun 	*ecc_syndrom =
5301*4882a593Smuzhiyun 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5302*4882a593Smuzhiyun 
5303*4882a593Smuzhiyun 	/* Clear error indication */
5304*4882a593Smuzhiyun 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5305*4882a593Smuzhiyun 	if (params->derr)
5306*4882a593Smuzhiyun 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5307*4882a593Smuzhiyun 	else
5308*4882a593Smuzhiyun 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5309*4882a593Smuzhiyun 
5310*4882a593Smuzhiyun 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5311*4882a593Smuzhiyun 
5312*4882a593Smuzhiyun enable_clk_gate:
5313*4882a593Smuzhiyun 	if (params->disable_clock_gating) {
5314*4882a593Smuzhiyun 		hdev->asic_funcs->set_clock_gating(hdev);
5315*4882a593Smuzhiyun 
5316*4882a593Smuzhiyun 		mutex_unlock(&gaudi->clk_gate_mutex);
5317*4882a593Smuzhiyun 	}
5318*4882a593Smuzhiyun 
5319*4882a593Smuzhiyun 	return rc;
5320*4882a593Smuzhiyun }
5321*4882a593Smuzhiyun 
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 glbl_sts_addr,u64 arb_err_addr)5322*4882a593Smuzhiyun static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5323*4882a593Smuzhiyun 					  const char *qm_name,
5324*4882a593Smuzhiyun 					  u64 glbl_sts_addr,
5325*4882a593Smuzhiyun 					  u64 arb_err_addr)
5326*4882a593Smuzhiyun {
5327*4882a593Smuzhiyun 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5328*4882a593Smuzhiyun 	char reg_desc[32];
5329*4882a593Smuzhiyun 
5330*4882a593Smuzhiyun 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
5331*4882a593Smuzhiyun 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5332*4882a593Smuzhiyun 		glbl_sts_clr_val = 0;
5333*4882a593Smuzhiyun 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5334*4882a593Smuzhiyun 
5335*4882a593Smuzhiyun 		if (!glbl_sts_val)
5336*4882a593Smuzhiyun 			continue;
5337*4882a593Smuzhiyun 
5338*4882a593Smuzhiyun 		if (i == QMAN_STREAMS)
5339*4882a593Smuzhiyun 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5340*4882a593Smuzhiyun 		else
5341*4882a593Smuzhiyun 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5342*4882a593Smuzhiyun 
5343*4882a593Smuzhiyun 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5344*4882a593Smuzhiyun 			if (glbl_sts_val & BIT(j)) {
5345*4882a593Smuzhiyun 				dev_err_ratelimited(hdev->dev,
5346*4882a593Smuzhiyun 						"%s %s. err cause: %s\n",
5347*4882a593Smuzhiyun 						qm_name, reg_desc,
5348*4882a593Smuzhiyun 						gaudi_qman_error_cause[j]);
5349*4882a593Smuzhiyun 				glbl_sts_clr_val |= BIT(j);
5350*4882a593Smuzhiyun 			}
5351*4882a593Smuzhiyun 		}
5352*4882a593Smuzhiyun 
5353*4882a593Smuzhiyun 		/* Write 1 clear errors */
5354*4882a593Smuzhiyun 		WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5355*4882a593Smuzhiyun 	}
5356*4882a593Smuzhiyun 
5357*4882a593Smuzhiyun 	arb_err_val = RREG32(arb_err_addr);
5358*4882a593Smuzhiyun 
5359*4882a593Smuzhiyun 	if (!arb_err_val)
5360*4882a593Smuzhiyun 		return;
5361*4882a593Smuzhiyun 
5362*4882a593Smuzhiyun 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5363*4882a593Smuzhiyun 		if (arb_err_val & BIT(j)) {
5364*4882a593Smuzhiyun 			dev_err_ratelimited(hdev->dev,
5365*4882a593Smuzhiyun 					"%s ARB_ERR. err cause: %s\n",
5366*4882a593Smuzhiyun 					qm_name,
5367*4882a593Smuzhiyun 					gaudi_qman_arb_error_cause[j]);
5368*4882a593Smuzhiyun 		}
5369*4882a593Smuzhiyun 	}
5370*4882a593Smuzhiyun }
5371*4882a593Smuzhiyun 
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)5372*4882a593Smuzhiyun static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5373*4882a593Smuzhiyun 		struct hl_eq_ecc_data *ecc_data)
5374*4882a593Smuzhiyun {
5375*4882a593Smuzhiyun 	struct ecc_info_extract_params params;
5376*4882a593Smuzhiyun 	u64 ecc_address = 0, ecc_syndrom = 0;
5377*4882a593Smuzhiyun 	u8 index, memory_wrapper_idx = 0;
5378*4882a593Smuzhiyun 	bool extract_info_from_fw;
5379*4882a593Smuzhiyun 	int rc;
5380*4882a593Smuzhiyun 
5381*4882a593Smuzhiyun 	switch (event_type) {
5382*4882a593Smuzhiyun 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5383*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5384*4882a593Smuzhiyun 		extract_info_from_fw = true;
5385*4882a593Smuzhiyun 		break;
5386*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5387*4882a593Smuzhiyun 		index = event_type - GAUDI_EVENT_TPC0_SERR;
5388*4882a593Smuzhiyun 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5389*4882a593Smuzhiyun 		params.num_memories = 90;
5390*4882a593Smuzhiyun 		params.derr = false;
5391*4882a593Smuzhiyun 		params.disable_clock_gating = true;
5392*4882a593Smuzhiyun 		extract_info_from_fw = false;
5393*4882a593Smuzhiyun 		break;
5394*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5395*4882a593Smuzhiyun 		index = event_type - GAUDI_EVENT_TPC0_DERR;
5396*4882a593Smuzhiyun 		params.block_address =
5397*4882a593Smuzhiyun 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5398*4882a593Smuzhiyun 		params.num_memories = 90;
5399*4882a593Smuzhiyun 		params.derr = true;
5400*4882a593Smuzhiyun 		params.disable_clock_gating = true;
5401*4882a593Smuzhiyun 		extract_info_from_fw = false;
5402*4882a593Smuzhiyun 		break;
5403*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_ACC_SERR:
5404*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_ACC_SERR:
5405*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_ACC_SERR:
5406*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_ACC_SERR:
5407*4882a593Smuzhiyun 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5408*4882a593Smuzhiyun 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5409*4882a593Smuzhiyun 		params.num_memories = 128;
5410*4882a593Smuzhiyun 		params.derr = false;
5411*4882a593Smuzhiyun 		params.disable_clock_gating = true;
5412*4882a593Smuzhiyun 		extract_info_from_fw = false;
5413*4882a593Smuzhiyun 		break;
5414*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_ACC_DERR:
5415*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_ACC_DERR:
5416*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_ACC_DERR:
5417*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_ACC_DERR:
5418*4882a593Smuzhiyun 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5419*4882a593Smuzhiyun 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5420*4882a593Smuzhiyun 		params.num_memories = 128;
5421*4882a593Smuzhiyun 		params.derr = true;
5422*4882a593Smuzhiyun 		params.disable_clock_gating = true;
5423*4882a593Smuzhiyun 		extract_info_from_fw = false;
5424*4882a593Smuzhiyun 		break;
5425*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_SBAB_SERR:
5426*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_SBAB_SERR:
5427*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_SBAB_SERR:
5428*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_SBAB_SERR:
5429*4882a593Smuzhiyun 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5430*4882a593Smuzhiyun 		params.block_address =
5431*4882a593Smuzhiyun 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5432*4882a593Smuzhiyun 		params.num_memories = 33;
5433*4882a593Smuzhiyun 		params.derr = false;
5434*4882a593Smuzhiyun 		params.disable_clock_gating = true;
5435*4882a593Smuzhiyun 		extract_info_from_fw = false;
5436*4882a593Smuzhiyun 		break;
5437*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_SBAB_DERR:
5438*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_SBAB_DERR:
5439*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_SBAB_DERR:
5440*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_SBAB_DERR:
5441*4882a593Smuzhiyun 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5442*4882a593Smuzhiyun 		params.block_address =
5443*4882a593Smuzhiyun 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5444*4882a593Smuzhiyun 		params.num_memories = 33;
5445*4882a593Smuzhiyun 		params.derr = true;
5446*4882a593Smuzhiyun 		params.disable_clock_gating = true;
5447*4882a593Smuzhiyun 		extract_info_from_fw = false;
5448*4882a593Smuzhiyun 		break;
5449*4882a593Smuzhiyun 	default:
5450*4882a593Smuzhiyun 		return;
5451*4882a593Smuzhiyun 	}
5452*4882a593Smuzhiyun 
5453*4882a593Smuzhiyun 	if (extract_info_from_fw) {
5454*4882a593Smuzhiyun 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
5455*4882a593Smuzhiyun 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5456*4882a593Smuzhiyun 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5457*4882a593Smuzhiyun 	} else {
5458*4882a593Smuzhiyun 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5459*4882a593Smuzhiyun 				&ecc_syndrom, &memory_wrapper_idx);
5460*4882a593Smuzhiyun 		if (rc)
5461*4882a593Smuzhiyun 			return;
5462*4882a593Smuzhiyun 	}
5463*4882a593Smuzhiyun 
5464*4882a593Smuzhiyun 	dev_err(hdev->dev,
5465*4882a593Smuzhiyun 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5466*4882a593Smuzhiyun 		ecc_address, ecc_syndrom, memory_wrapper_idx);
5467*4882a593Smuzhiyun }
5468*4882a593Smuzhiyun 
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type)5469*4882a593Smuzhiyun static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5470*4882a593Smuzhiyun {
5471*4882a593Smuzhiyun 	u64 glbl_sts_addr, arb_err_addr;
5472*4882a593Smuzhiyun 	u8 index;
5473*4882a593Smuzhiyun 	char desc[32];
5474*4882a593Smuzhiyun 
5475*4882a593Smuzhiyun 	switch (event_type) {
5476*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5477*4882a593Smuzhiyun 		index = event_type - GAUDI_EVENT_TPC0_QM;
5478*4882a593Smuzhiyun 		glbl_sts_addr =
5479*4882a593Smuzhiyun 			mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5480*4882a593Smuzhiyun 		arb_err_addr =
5481*4882a593Smuzhiyun 			mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5482*4882a593Smuzhiyun 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5483*4882a593Smuzhiyun 		break;
5484*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5485*4882a593Smuzhiyun 		index = event_type - GAUDI_EVENT_MME0_QM;
5486*4882a593Smuzhiyun 		glbl_sts_addr =
5487*4882a593Smuzhiyun 			mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5488*4882a593Smuzhiyun 		arb_err_addr =
5489*4882a593Smuzhiyun 			mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5490*4882a593Smuzhiyun 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5491*4882a593Smuzhiyun 		break;
5492*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5493*4882a593Smuzhiyun 		index = event_type - GAUDI_EVENT_DMA0_QM;
5494*4882a593Smuzhiyun 		glbl_sts_addr =
5495*4882a593Smuzhiyun 			mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5496*4882a593Smuzhiyun 		arb_err_addr =
5497*4882a593Smuzhiyun 			mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5498*4882a593Smuzhiyun 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5499*4882a593Smuzhiyun 		break;
5500*4882a593Smuzhiyun 	default:
5501*4882a593Smuzhiyun 		return;
5502*4882a593Smuzhiyun 	}
5503*4882a593Smuzhiyun 
5504*4882a593Smuzhiyun 	gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5505*4882a593Smuzhiyun }
5506*4882a593Smuzhiyun 
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)5507*4882a593Smuzhiyun static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5508*4882a593Smuzhiyun 					bool razwi)
5509*4882a593Smuzhiyun {
5510*4882a593Smuzhiyun 	char desc[64] = "";
5511*4882a593Smuzhiyun 
5512*4882a593Smuzhiyun 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
5513*4882a593Smuzhiyun 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5514*4882a593Smuzhiyun 		event_type, desc);
5515*4882a593Smuzhiyun 
5516*4882a593Smuzhiyun 	if (razwi) {
5517*4882a593Smuzhiyun 		gaudi_print_razwi_info(hdev);
5518*4882a593Smuzhiyun 		gaudi_print_mmu_error_info(hdev);
5519*4882a593Smuzhiyun 	}
5520*4882a593Smuzhiyun }
5521*4882a593Smuzhiyun 
gaudi_soft_reset_late_init(struct hl_device * hdev)5522*4882a593Smuzhiyun static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5523*4882a593Smuzhiyun {
5524*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
5525*4882a593Smuzhiyun 
5526*4882a593Smuzhiyun 	/* Unmask all IRQs since some could have been received
5527*4882a593Smuzhiyun 	 * during the soft reset
5528*4882a593Smuzhiyun 	 */
5529*4882a593Smuzhiyun 	return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5530*4882a593Smuzhiyun }
5531*4882a593Smuzhiyun 
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device)5532*4882a593Smuzhiyun static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5533*4882a593Smuzhiyun {
5534*4882a593Smuzhiyun 	int ch, err = 0;
5535*4882a593Smuzhiyun 	u32 base, val, val2;
5536*4882a593Smuzhiyun 
5537*4882a593Smuzhiyun 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5538*4882a593Smuzhiyun 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5539*4882a593Smuzhiyun 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5540*4882a593Smuzhiyun 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
5541*4882a593Smuzhiyun 		if (val) {
5542*4882a593Smuzhiyun 			err = 1;
5543*4882a593Smuzhiyun 			dev_err(hdev->dev,
5544*4882a593Smuzhiyun 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5545*4882a593Smuzhiyun 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5546*4882a593Smuzhiyun 				(val >> 2) & 0x1, (val >> 3) & 0x1,
5547*4882a593Smuzhiyun 				(val >> 4) & 0x1);
5548*4882a593Smuzhiyun 
5549*4882a593Smuzhiyun 			val2 = RREG32(base + ch * 0x1000 + 0x060);
5550*4882a593Smuzhiyun 			dev_err(hdev->dev,
5551*4882a593Smuzhiyun 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5552*4882a593Smuzhiyun 				device, ch * 2,
5553*4882a593Smuzhiyun 				RREG32(base + ch * 0x1000 + 0x064),
5554*4882a593Smuzhiyun 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5555*4882a593Smuzhiyun 				(val2 & 0xFF0000) >> 16,
5556*4882a593Smuzhiyun 				(val2 & 0xFF000000) >> 24);
5557*4882a593Smuzhiyun 		}
5558*4882a593Smuzhiyun 
5559*4882a593Smuzhiyun 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5560*4882a593Smuzhiyun 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
5561*4882a593Smuzhiyun 		if (val) {
5562*4882a593Smuzhiyun 			err = 1;
5563*4882a593Smuzhiyun 			dev_err(hdev->dev,
5564*4882a593Smuzhiyun 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5565*4882a593Smuzhiyun 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5566*4882a593Smuzhiyun 				(val >> 2) & 0x1, (val >> 3) & 0x1,
5567*4882a593Smuzhiyun 				(val >> 4) & 0x1);
5568*4882a593Smuzhiyun 
5569*4882a593Smuzhiyun 			val2 = RREG32(base + ch * 0x1000 + 0x070);
5570*4882a593Smuzhiyun 			dev_err(hdev->dev,
5571*4882a593Smuzhiyun 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5572*4882a593Smuzhiyun 				device, ch * 2 + 1,
5573*4882a593Smuzhiyun 				RREG32(base + ch * 0x1000 + 0x074),
5574*4882a593Smuzhiyun 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5575*4882a593Smuzhiyun 				(val2 & 0xFF0000) >> 16,
5576*4882a593Smuzhiyun 				(val2 & 0xFF000000) >> 24);
5577*4882a593Smuzhiyun 		}
5578*4882a593Smuzhiyun 
5579*4882a593Smuzhiyun 		/* Clear interrupts */
5580*4882a593Smuzhiyun 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5581*4882a593Smuzhiyun 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5582*4882a593Smuzhiyun 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5583*4882a593Smuzhiyun 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5584*4882a593Smuzhiyun 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5585*4882a593Smuzhiyun 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5586*4882a593Smuzhiyun 	}
5587*4882a593Smuzhiyun 
5588*4882a593Smuzhiyun 	val  = RREG32(base + 0x8F30);
5589*4882a593Smuzhiyun 	val2 = RREG32(base + 0x8F34);
5590*4882a593Smuzhiyun 	if (val | val2) {
5591*4882a593Smuzhiyun 		err = 1;
5592*4882a593Smuzhiyun 		dev_err(hdev->dev,
5593*4882a593Smuzhiyun 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5594*4882a593Smuzhiyun 			device, val, val2);
5595*4882a593Smuzhiyun 	}
5596*4882a593Smuzhiyun 	val  = RREG32(base + 0x8F40);
5597*4882a593Smuzhiyun 	val2 = RREG32(base + 0x8F44);
5598*4882a593Smuzhiyun 	if (val | val2) {
5599*4882a593Smuzhiyun 		err = 1;
5600*4882a593Smuzhiyun 		dev_err(hdev->dev,
5601*4882a593Smuzhiyun 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5602*4882a593Smuzhiyun 			device, val, val2);
5603*4882a593Smuzhiyun 	}
5604*4882a593Smuzhiyun 
5605*4882a593Smuzhiyun 	return err;
5606*4882a593Smuzhiyun }
5607*4882a593Smuzhiyun 
gaudi_hbm_event_to_dev(u16 hbm_event_type)5608*4882a593Smuzhiyun static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5609*4882a593Smuzhiyun {
5610*4882a593Smuzhiyun 	switch (hbm_event_type) {
5611*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM0_SPI_0:
5612*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM0_SPI_1:
5613*4882a593Smuzhiyun 		return 0;
5614*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM1_SPI_0:
5615*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM1_SPI_1:
5616*4882a593Smuzhiyun 		return 1;
5617*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM2_SPI_0:
5618*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM2_SPI_1:
5619*4882a593Smuzhiyun 		return 2;
5620*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM3_SPI_0:
5621*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM3_SPI_1:
5622*4882a593Smuzhiyun 		return 3;
5623*4882a593Smuzhiyun 	default:
5624*4882a593Smuzhiyun 		break;
5625*4882a593Smuzhiyun 	}
5626*4882a593Smuzhiyun 
5627*4882a593Smuzhiyun 	/* Should never happen */
5628*4882a593Smuzhiyun 	return 0;
5629*4882a593Smuzhiyun }
5630*4882a593Smuzhiyun 
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)5631*4882a593Smuzhiyun static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5632*4882a593Smuzhiyun 					char *interrupt_name)
5633*4882a593Smuzhiyun {
5634*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
5635*4882a593Smuzhiyun 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5636*4882a593Smuzhiyun 	bool soft_reset_required = false;
5637*4882a593Smuzhiyun 
5638*4882a593Smuzhiyun 	/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5639*4882a593Smuzhiyun 	 * gating, and thus cannot be done in CPU-CP and should be done instead
5640*4882a593Smuzhiyun 	 * by the driver.
5641*4882a593Smuzhiyun 	 */
5642*4882a593Smuzhiyun 
5643*4882a593Smuzhiyun 	mutex_lock(&gaudi->clk_gate_mutex);
5644*4882a593Smuzhiyun 
5645*4882a593Smuzhiyun 	hdev->asic_funcs->disable_clock_gating(hdev);
5646*4882a593Smuzhiyun 
5647*4882a593Smuzhiyun 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5648*4882a593Smuzhiyun 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5649*4882a593Smuzhiyun 
5650*4882a593Smuzhiyun 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5651*4882a593Smuzhiyun 		if (tpc_interrupts_cause & BIT(i)) {
5652*4882a593Smuzhiyun 			dev_err_ratelimited(hdev->dev,
5653*4882a593Smuzhiyun 					"TPC%d_%s interrupt cause: %s\n",
5654*4882a593Smuzhiyun 					tpc_id, interrupt_name,
5655*4882a593Smuzhiyun 					gaudi_tpc_interrupts_cause[i]);
5656*4882a593Smuzhiyun 			/* If this is QM error, we need to soft-reset */
5657*4882a593Smuzhiyun 			if (i == 15)
5658*4882a593Smuzhiyun 				soft_reset_required = true;
5659*4882a593Smuzhiyun 		}
5660*4882a593Smuzhiyun 
5661*4882a593Smuzhiyun 	/* Clear interrupts */
5662*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5663*4882a593Smuzhiyun 
5664*4882a593Smuzhiyun 	hdev->asic_funcs->set_clock_gating(hdev);
5665*4882a593Smuzhiyun 
5666*4882a593Smuzhiyun 	mutex_unlock(&gaudi->clk_gate_mutex);
5667*4882a593Smuzhiyun 
5668*4882a593Smuzhiyun 	return soft_reset_required;
5669*4882a593Smuzhiyun }
5670*4882a593Smuzhiyun 
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)5671*4882a593Smuzhiyun static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5672*4882a593Smuzhiyun {
5673*4882a593Smuzhiyun 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5674*4882a593Smuzhiyun }
5675*4882a593Smuzhiyun 
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)5676*4882a593Smuzhiyun static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5677*4882a593Smuzhiyun {
5678*4882a593Smuzhiyun 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5679*4882a593Smuzhiyun }
5680*4882a593Smuzhiyun 
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type)5681*4882a593Smuzhiyun static void gaudi_print_clk_change_info(struct hl_device *hdev,
5682*4882a593Smuzhiyun 					u16 event_type)
5683*4882a593Smuzhiyun {
5684*4882a593Smuzhiyun 	switch (event_type) {
5685*4882a593Smuzhiyun 	case GAUDI_EVENT_FIX_POWER_ENV_S:
5686*4882a593Smuzhiyun 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5687*4882a593Smuzhiyun 		dev_info_ratelimited(hdev->dev,
5688*4882a593Smuzhiyun 			"Clock throttling due to power consumption\n");
5689*4882a593Smuzhiyun 		break;
5690*4882a593Smuzhiyun 
5691*4882a593Smuzhiyun 	case GAUDI_EVENT_FIX_POWER_ENV_E:
5692*4882a593Smuzhiyun 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5693*4882a593Smuzhiyun 		dev_info_ratelimited(hdev->dev,
5694*4882a593Smuzhiyun 			"Power envelop is safe, back to optimal clock\n");
5695*4882a593Smuzhiyun 		break;
5696*4882a593Smuzhiyun 
5697*4882a593Smuzhiyun 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5698*4882a593Smuzhiyun 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5699*4882a593Smuzhiyun 		dev_info_ratelimited(hdev->dev,
5700*4882a593Smuzhiyun 			"Clock throttling due to overheating\n");
5701*4882a593Smuzhiyun 		break;
5702*4882a593Smuzhiyun 
5703*4882a593Smuzhiyun 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5704*4882a593Smuzhiyun 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5705*4882a593Smuzhiyun 		dev_info_ratelimited(hdev->dev,
5706*4882a593Smuzhiyun 			"Thermal envelop is safe, back to optimal clock\n");
5707*4882a593Smuzhiyun 		break;
5708*4882a593Smuzhiyun 
5709*4882a593Smuzhiyun 	default:
5710*4882a593Smuzhiyun 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
5711*4882a593Smuzhiyun 			event_type);
5712*4882a593Smuzhiyun 		break;
5713*4882a593Smuzhiyun 	}
5714*4882a593Smuzhiyun }
5715*4882a593Smuzhiyun 
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)5716*4882a593Smuzhiyun static void gaudi_handle_eqe(struct hl_device *hdev,
5717*4882a593Smuzhiyun 				struct hl_eq_entry *eq_entry)
5718*4882a593Smuzhiyun {
5719*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
5720*4882a593Smuzhiyun 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5721*4882a593Smuzhiyun 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5722*4882a593Smuzhiyun 			>> EQ_CTL_EVENT_TYPE_SHIFT);
5723*4882a593Smuzhiyun 	u8 cause;
5724*4882a593Smuzhiyun 	bool reset_required;
5725*4882a593Smuzhiyun 
5726*4882a593Smuzhiyun 	if (event_type >= GAUDI_EVENT_SIZE) {
5727*4882a593Smuzhiyun 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
5728*4882a593Smuzhiyun 				event_type, GAUDI_EVENT_SIZE - 1);
5729*4882a593Smuzhiyun 		return;
5730*4882a593Smuzhiyun 	}
5731*4882a593Smuzhiyun 
5732*4882a593Smuzhiyun 	gaudi->events_stat[event_type]++;
5733*4882a593Smuzhiyun 	gaudi->events_stat_aggregate[event_type]++;
5734*4882a593Smuzhiyun 
5735*4882a593Smuzhiyun 	switch (event_type) {
5736*4882a593Smuzhiyun 	case GAUDI_EVENT_PCIE_CORE_DERR:
5737*4882a593Smuzhiyun 	case GAUDI_EVENT_PCIE_IF_DERR:
5738*4882a593Smuzhiyun 	case GAUDI_EVENT_PCIE_PHY_DERR:
5739*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5740*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_ACC_DERR:
5741*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_SBAB_DERR:
5742*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_ACC_DERR:
5743*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_SBAB_DERR:
5744*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_ACC_DERR:
5745*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_SBAB_DERR:
5746*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_ACC_DERR:
5747*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_SBAB_DERR:
5748*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5749*4882a593Smuzhiyun 		fallthrough;
5750*4882a593Smuzhiyun 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
5751*4882a593Smuzhiyun 	case GAUDI_EVENT_PSOC_MEM_DERR:
5752*4882a593Smuzhiyun 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5753*4882a593Smuzhiyun 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5754*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5755*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5756*4882a593Smuzhiyun 	case GAUDI_EVENT_MMU_DERR:
5757*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, true);
5758*4882a593Smuzhiyun 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5759*4882a593Smuzhiyun 		if (hdev->hard_reset_on_fw_events)
5760*4882a593Smuzhiyun 			hl_device_reset(hdev, true, false);
5761*4882a593Smuzhiyun 		break;
5762*4882a593Smuzhiyun 
5763*4882a593Smuzhiyun 	case GAUDI_EVENT_GIC500:
5764*4882a593Smuzhiyun 	case GAUDI_EVENT_AXI_ECC:
5765*4882a593Smuzhiyun 	case GAUDI_EVENT_L2_RAM_ECC:
5766*4882a593Smuzhiyun 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5767*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, false);
5768*4882a593Smuzhiyun 		if (hdev->hard_reset_on_fw_events)
5769*4882a593Smuzhiyun 			hl_device_reset(hdev, true, false);
5770*4882a593Smuzhiyun 		break;
5771*4882a593Smuzhiyun 
5772*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM0_SPI_0:
5773*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM1_SPI_0:
5774*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM2_SPI_0:
5775*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM3_SPI_0:
5776*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, false);
5777*4882a593Smuzhiyun 		gaudi_hbm_read_interrupts(hdev,
5778*4882a593Smuzhiyun 					  gaudi_hbm_event_to_dev(event_type));
5779*4882a593Smuzhiyun 		if (hdev->hard_reset_on_fw_events)
5780*4882a593Smuzhiyun 			hl_device_reset(hdev, true, false);
5781*4882a593Smuzhiyun 		break;
5782*4882a593Smuzhiyun 
5783*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM0_SPI_1:
5784*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM1_SPI_1:
5785*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM2_SPI_1:
5786*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM3_SPI_1:
5787*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, false);
5788*4882a593Smuzhiyun 		gaudi_hbm_read_interrupts(hdev,
5789*4882a593Smuzhiyun 					  gaudi_hbm_event_to_dev(event_type));
5790*4882a593Smuzhiyun 		break;
5791*4882a593Smuzhiyun 
5792*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_DEC:
5793*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC1_DEC:
5794*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC2_DEC:
5795*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC3_DEC:
5796*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC4_DEC:
5797*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC5_DEC:
5798*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC6_DEC:
5799*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC7_DEC:
5800*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, true);
5801*4882a593Smuzhiyun 		reset_required = gaudi_tpc_read_interrupts(hdev,
5802*4882a593Smuzhiyun 					tpc_dec_event_to_tpc_id(event_type),
5803*4882a593Smuzhiyun 					"AXI_SLV_DEC_Error");
5804*4882a593Smuzhiyun 		if (reset_required) {
5805*4882a593Smuzhiyun 			dev_err(hdev->dev, "hard reset required due to %s\n",
5806*4882a593Smuzhiyun 				gaudi_irq_map_table[event_type].name);
5807*4882a593Smuzhiyun 
5808*4882a593Smuzhiyun 			if (hdev->hard_reset_on_fw_events)
5809*4882a593Smuzhiyun 				hl_device_reset(hdev, true, false);
5810*4882a593Smuzhiyun 		} else {
5811*4882a593Smuzhiyun 			hl_fw_unmask_irq(hdev, event_type);
5812*4882a593Smuzhiyun 		}
5813*4882a593Smuzhiyun 		break;
5814*4882a593Smuzhiyun 
5815*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_KRN_ERR:
5816*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC1_KRN_ERR:
5817*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC2_KRN_ERR:
5818*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC3_KRN_ERR:
5819*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC4_KRN_ERR:
5820*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC5_KRN_ERR:
5821*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC6_KRN_ERR:
5822*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC7_KRN_ERR:
5823*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, true);
5824*4882a593Smuzhiyun 		reset_required = gaudi_tpc_read_interrupts(hdev,
5825*4882a593Smuzhiyun 					tpc_krn_event_to_tpc_id(event_type),
5826*4882a593Smuzhiyun 					"KRN_ERR");
5827*4882a593Smuzhiyun 		if (reset_required) {
5828*4882a593Smuzhiyun 			dev_err(hdev->dev, "hard reset required due to %s\n",
5829*4882a593Smuzhiyun 				gaudi_irq_map_table[event_type].name);
5830*4882a593Smuzhiyun 
5831*4882a593Smuzhiyun 			if (hdev->hard_reset_on_fw_events)
5832*4882a593Smuzhiyun 				hl_device_reset(hdev, true, false);
5833*4882a593Smuzhiyun 		} else {
5834*4882a593Smuzhiyun 			hl_fw_unmask_irq(hdev, event_type);
5835*4882a593Smuzhiyun 		}
5836*4882a593Smuzhiyun 		break;
5837*4882a593Smuzhiyun 
5838*4882a593Smuzhiyun 	case GAUDI_EVENT_PCIE_CORE_SERR:
5839*4882a593Smuzhiyun 	case GAUDI_EVENT_PCIE_IF_SERR:
5840*4882a593Smuzhiyun 	case GAUDI_EVENT_PCIE_PHY_SERR:
5841*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5842*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_ACC_SERR:
5843*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_SBAB_SERR:
5844*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_ACC_SERR:
5845*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_SBAB_SERR:
5846*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_ACC_SERR:
5847*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_SBAB_SERR:
5848*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_ACC_SERR:
5849*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_SBAB_SERR:
5850*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5851*4882a593Smuzhiyun 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
5852*4882a593Smuzhiyun 	case GAUDI_EVENT_PSOC_MEM_SERR:
5853*4882a593Smuzhiyun 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5854*4882a593Smuzhiyun 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5855*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5856*4882a593Smuzhiyun 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5857*4882a593Smuzhiyun 		fallthrough;
5858*4882a593Smuzhiyun 	case GAUDI_EVENT_MMU_SERR:
5859*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, true);
5860*4882a593Smuzhiyun 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5861*4882a593Smuzhiyun 		hl_fw_unmask_irq(hdev, event_type);
5862*4882a593Smuzhiyun 		break;
5863*4882a593Smuzhiyun 
5864*4882a593Smuzhiyun 	case GAUDI_EVENT_PCIE_DEC:
5865*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_WBC_RSP:
5866*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_SBAB0_RSP:
5867*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_WBC_RSP:
5868*4882a593Smuzhiyun 	case GAUDI_EVENT_MME1_SBAB0_RSP:
5869*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_WBC_RSP:
5870*4882a593Smuzhiyun 	case GAUDI_EVENT_MME2_SBAB0_RSP:
5871*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_WBC_RSP:
5872*4882a593Smuzhiyun 	case GAUDI_EVENT_MME3_SBAB0_RSP:
5873*4882a593Smuzhiyun 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
5874*4882a593Smuzhiyun 	case GAUDI_EVENT_PSOC_AXI_DEC:
5875*4882a593Smuzhiyun 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
5876*4882a593Smuzhiyun 	case GAUDI_EVENT_MMU_PAGE_FAULT:
5877*4882a593Smuzhiyun 	case GAUDI_EVENT_MMU_WR_PERM:
5878*4882a593Smuzhiyun 	case GAUDI_EVENT_RAZWI_OR_ADC:
5879*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5880*4882a593Smuzhiyun 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5881*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5882*4882a593Smuzhiyun 		fallthrough;
5883*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5884*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, true);
5885*4882a593Smuzhiyun 		gaudi_handle_qman_err(hdev, event_type);
5886*4882a593Smuzhiyun 		hl_fw_unmask_irq(hdev, event_type);
5887*4882a593Smuzhiyun 		break;
5888*4882a593Smuzhiyun 
5889*4882a593Smuzhiyun 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5890*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, true);
5891*4882a593Smuzhiyun 		if (hdev->hard_reset_on_fw_events)
5892*4882a593Smuzhiyun 			hl_device_reset(hdev, true, false);
5893*4882a593Smuzhiyun 		break;
5894*4882a593Smuzhiyun 
5895*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC0_BMON_SPMU:
5896*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC1_BMON_SPMU:
5897*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC2_BMON_SPMU:
5898*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC3_BMON_SPMU:
5899*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC4_BMON_SPMU:
5900*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC5_BMON_SPMU:
5901*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC6_BMON_SPMU:
5902*4882a593Smuzhiyun 	case GAUDI_EVENT_TPC7_BMON_SPMU:
5903*4882a593Smuzhiyun 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5904*4882a593Smuzhiyun 		gaudi_print_irq_info(hdev, event_type, false);
5905*4882a593Smuzhiyun 		hl_fw_unmask_irq(hdev, event_type);
5906*4882a593Smuzhiyun 		break;
5907*4882a593Smuzhiyun 
5908*4882a593Smuzhiyun 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5909*4882a593Smuzhiyun 		gaudi_print_clk_change_info(hdev, event_type);
5910*4882a593Smuzhiyun 		hl_fw_unmask_irq(hdev, event_type);
5911*4882a593Smuzhiyun 		break;
5912*4882a593Smuzhiyun 
5913*4882a593Smuzhiyun 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
5914*4882a593Smuzhiyun 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5915*4882a593Smuzhiyun 		dev_err(hdev->dev,
5916*4882a593Smuzhiyun 			"Received high temp H/W interrupt %d (cause %d)\n",
5917*4882a593Smuzhiyun 			event_type, cause);
5918*4882a593Smuzhiyun 		break;
5919*4882a593Smuzhiyun 
5920*4882a593Smuzhiyun 	default:
5921*4882a593Smuzhiyun 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5922*4882a593Smuzhiyun 				event_type);
5923*4882a593Smuzhiyun 		break;
5924*4882a593Smuzhiyun 	}
5925*4882a593Smuzhiyun }
5926*4882a593Smuzhiyun 
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)5927*4882a593Smuzhiyun static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5928*4882a593Smuzhiyun 					u32 *size)
5929*4882a593Smuzhiyun {
5930*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
5931*4882a593Smuzhiyun 
5932*4882a593Smuzhiyun 	if (aggregate) {
5933*4882a593Smuzhiyun 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
5934*4882a593Smuzhiyun 		return gaudi->events_stat_aggregate;
5935*4882a593Smuzhiyun 	}
5936*4882a593Smuzhiyun 
5937*4882a593Smuzhiyun 	*size = (u32) sizeof(gaudi->events_stat);
5938*4882a593Smuzhiyun 	return gaudi->events_stat;
5939*4882a593Smuzhiyun }
5940*4882a593Smuzhiyun 
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5941*4882a593Smuzhiyun static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5942*4882a593Smuzhiyun 					u32 flags)
5943*4882a593Smuzhiyun {
5944*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
5945*4882a593Smuzhiyun 	u32 status, timeout_usec;
5946*4882a593Smuzhiyun 	int rc;
5947*4882a593Smuzhiyun 
5948*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5949*4882a593Smuzhiyun 		hdev->hard_reset_pending)
5950*4882a593Smuzhiyun 		return 0;
5951*4882a593Smuzhiyun 
5952*4882a593Smuzhiyun 	if (hdev->pldm)
5953*4882a593Smuzhiyun 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5954*4882a593Smuzhiyun 	else
5955*4882a593Smuzhiyun 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5956*4882a593Smuzhiyun 
5957*4882a593Smuzhiyun 	mutex_lock(&hdev->mmu_cache_lock);
5958*4882a593Smuzhiyun 
5959*4882a593Smuzhiyun 	/* L0 & L1 invalidation */
5960*4882a593Smuzhiyun 	WREG32(mmSTLB_INV_PS, 3);
5961*4882a593Smuzhiyun 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5962*4882a593Smuzhiyun 	WREG32(mmSTLB_INV_PS, 2);
5963*4882a593Smuzhiyun 
5964*4882a593Smuzhiyun 	rc = hl_poll_timeout(
5965*4882a593Smuzhiyun 		hdev,
5966*4882a593Smuzhiyun 		mmSTLB_INV_PS,
5967*4882a593Smuzhiyun 		status,
5968*4882a593Smuzhiyun 		!status,
5969*4882a593Smuzhiyun 		1000,
5970*4882a593Smuzhiyun 		timeout_usec);
5971*4882a593Smuzhiyun 
5972*4882a593Smuzhiyun 	WREG32(mmSTLB_INV_SET, 0);
5973*4882a593Smuzhiyun 
5974*4882a593Smuzhiyun 	mutex_unlock(&hdev->mmu_cache_lock);
5975*4882a593Smuzhiyun 
5976*4882a593Smuzhiyun 	if (rc) {
5977*4882a593Smuzhiyun 		dev_err_ratelimited(hdev->dev,
5978*4882a593Smuzhiyun 					"MMU cache invalidation timeout\n");
5979*4882a593Smuzhiyun 		hl_device_reset(hdev, true, false);
5980*4882a593Smuzhiyun 	}
5981*4882a593Smuzhiyun 
5982*4882a593Smuzhiyun 	return rc;
5983*4882a593Smuzhiyun }
5984*4882a593Smuzhiyun 
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 asid,u64 va,u64 size)5985*4882a593Smuzhiyun static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5986*4882a593Smuzhiyun 				bool is_hard, u32 asid, u64 va, u64 size)
5987*4882a593Smuzhiyun {
5988*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
5989*4882a593Smuzhiyun 	u32 status, timeout_usec;
5990*4882a593Smuzhiyun 	u32 inv_data;
5991*4882a593Smuzhiyun 	u32 pi;
5992*4882a593Smuzhiyun 	int rc;
5993*4882a593Smuzhiyun 
5994*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5995*4882a593Smuzhiyun 		hdev->hard_reset_pending)
5996*4882a593Smuzhiyun 		return 0;
5997*4882a593Smuzhiyun 
5998*4882a593Smuzhiyun 	mutex_lock(&hdev->mmu_cache_lock);
5999*4882a593Smuzhiyun 
6000*4882a593Smuzhiyun 	if (hdev->pldm)
6001*4882a593Smuzhiyun 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6002*4882a593Smuzhiyun 	else
6003*4882a593Smuzhiyun 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6004*4882a593Smuzhiyun 
6005*4882a593Smuzhiyun 	/*
6006*4882a593Smuzhiyun 	 * TODO: currently invalidate entire L0 & L1 as in regular hard
6007*4882a593Smuzhiyun 	 * invalidation. Need to apply invalidation of specific cache
6008*4882a593Smuzhiyun 	 * lines with mask of ASID & VA & size.
6009*4882a593Smuzhiyun 	 * Note that L1 with be flushed entirely in any case.
6010*4882a593Smuzhiyun 	 */
6011*4882a593Smuzhiyun 
6012*4882a593Smuzhiyun 	/* L0 & L1 invalidation */
6013*4882a593Smuzhiyun 	inv_data = RREG32(mmSTLB_CACHE_INV);
6014*4882a593Smuzhiyun 	/* PI is 8 bit */
6015*4882a593Smuzhiyun 	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6016*4882a593Smuzhiyun 	WREG32(mmSTLB_CACHE_INV,
6017*4882a593Smuzhiyun 		(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6018*4882a593Smuzhiyun 
6019*4882a593Smuzhiyun 	rc = hl_poll_timeout(
6020*4882a593Smuzhiyun 		hdev,
6021*4882a593Smuzhiyun 		mmSTLB_INV_CONSUMER_INDEX,
6022*4882a593Smuzhiyun 		status,
6023*4882a593Smuzhiyun 		status == pi,
6024*4882a593Smuzhiyun 		1000,
6025*4882a593Smuzhiyun 		timeout_usec);
6026*4882a593Smuzhiyun 
6027*4882a593Smuzhiyun 	mutex_unlock(&hdev->mmu_cache_lock);
6028*4882a593Smuzhiyun 
6029*4882a593Smuzhiyun 	if (rc) {
6030*4882a593Smuzhiyun 		dev_err_ratelimited(hdev->dev,
6031*4882a593Smuzhiyun 					"MMU cache invalidation timeout\n");
6032*4882a593Smuzhiyun 		hl_device_reset(hdev, true, false);
6033*4882a593Smuzhiyun 	}
6034*4882a593Smuzhiyun 
6035*4882a593Smuzhiyun 	return rc;
6036*4882a593Smuzhiyun }
6037*4882a593Smuzhiyun 
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)6038*4882a593Smuzhiyun static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6039*4882a593Smuzhiyun 					u32 asid, u64 phys_addr)
6040*4882a593Smuzhiyun {
6041*4882a593Smuzhiyun 	u32 status, timeout_usec;
6042*4882a593Smuzhiyun 	int rc;
6043*4882a593Smuzhiyun 
6044*4882a593Smuzhiyun 	if (hdev->pldm)
6045*4882a593Smuzhiyun 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6046*4882a593Smuzhiyun 	else
6047*4882a593Smuzhiyun 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6048*4882a593Smuzhiyun 
6049*4882a593Smuzhiyun 	WREG32(MMU_ASID, asid);
6050*4882a593Smuzhiyun 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6051*4882a593Smuzhiyun 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6052*4882a593Smuzhiyun 	WREG32(MMU_BUSY, 0x80000000);
6053*4882a593Smuzhiyun 
6054*4882a593Smuzhiyun 	rc = hl_poll_timeout(
6055*4882a593Smuzhiyun 		hdev,
6056*4882a593Smuzhiyun 		MMU_BUSY,
6057*4882a593Smuzhiyun 		status,
6058*4882a593Smuzhiyun 		!(status & 0x80000000),
6059*4882a593Smuzhiyun 		1000,
6060*4882a593Smuzhiyun 		timeout_usec);
6061*4882a593Smuzhiyun 
6062*4882a593Smuzhiyun 	if (rc) {
6063*4882a593Smuzhiyun 		dev_err(hdev->dev,
6064*4882a593Smuzhiyun 			"Timeout during MMU hop0 config of asid %d\n", asid);
6065*4882a593Smuzhiyun 		return rc;
6066*4882a593Smuzhiyun 	}
6067*4882a593Smuzhiyun 
6068*4882a593Smuzhiyun 	return 0;
6069*4882a593Smuzhiyun }
6070*4882a593Smuzhiyun 
gaudi_send_heartbeat(struct hl_device * hdev)6071*4882a593Smuzhiyun static int gaudi_send_heartbeat(struct hl_device *hdev)
6072*4882a593Smuzhiyun {
6073*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
6074*4882a593Smuzhiyun 
6075*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6076*4882a593Smuzhiyun 		return 0;
6077*4882a593Smuzhiyun 
6078*4882a593Smuzhiyun 	return hl_fw_send_heartbeat(hdev);
6079*4882a593Smuzhiyun }
6080*4882a593Smuzhiyun 
gaudi_cpucp_info_get(struct hl_device * hdev)6081*4882a593Smuzhiyun static int gaudi_cpucp_info_get(struct hl_device *hdev)
6082*4882a593Smuzhiyun {
6083*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
6084*4882a593Smuzhiyun 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6085*4882a593Smuzhiyun 	int rc;
6086*4882a593Smuzhiyun 
6087*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6088*4882a593Smuzhiyun 		return 0;
6089*4882a593Smuzhiyun 
6090*4882a593Smuzhiyun 	rc = hl_fw_cpucp_info_get(hdev);
6091*4882a593Smuzhiyun 	if (rc)
6092*4882a593Smuzhiyun 		return rc;
6093*4882a593Smuzhiyun 
6094*4882a593Smuzhiyun 	if (!strlen(prop->cpucp_info.card_name))
6095*4882a593Smuzhiyun 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6096*4882a593Smuzhiyun 				CARD_NAME_MAX_LEN);
6097*4882a593Smuzhiyun 
6098*4882a593Smuzhiyun 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6099*4882a593Smuzhiyun 
6100*4882a593Smuzhiyun 	if (hdev->card_type == cpucp_card_type_pci)
6101*4882a593Smuzhiyun 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6102*4882a593Smuzhiyun 	else if (hdev->card_type == cpucp_card_type_pmc)
6103*4882a593Smuzhiyun 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6104*4882a593Smuzhiyun 
6105*4882a593Smuzhiyun 	hdev->max_power = prop->max_power_default;
6106*4882a593Smuzhiyun 
6107*4882a593Smuzhiyun 	return 0;
6108*4882a593Smuzhiyun }
6109*4882a593Smuzhiyun 
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask,struct seq_file * s)6110*4882a593Smuzhiyun static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6111*4882a593Smuzhiyun 					struct seq_file *s)
6112*4882a593Smuzhiyun {
6113*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
6114*4882a593Smuzhiyun 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6115*4882a593Smuzhiyun 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6116*4882a593Smuzhiyun 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6117*4882a593Smuzhiyun 	bool is_idle = true, is_eng_idle, is_slave;
6118*4882a593Smuzhiyun 	u64 offset;
6119*4882a593Smuzhiyun 	int i, dma_id;
6120*4882a593Smuzhiyun 
6121*4882a593Smuzhiyun 	mutex_lock(&gaudi->clk_gate_mutex);
6122*4882a593Smuzhiyun 
6123*4882a593Smuzhiyun 	hdev->asic_funcs->disable_clock_gating(hdev);
6124*4882a593Smuzhiyun 
6125*4882a593Smuzhiyun 	if (s)
6126*4882a593Smuzhiyun 		seq_puts(s,
6127*4882a593Smuzhiyun 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6128*4882a593Smuzhiyun 			"---  -------  ------------  ----------  -------------\n");
6129*4882a593Smuzhiyun 
6130*4882a593Smuzhiyun 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6131*4882a593Smuzhiyun 		dma_id = gaudi_dma_assignment[i];
6132*4882a593Smuzhiyun 		offset = dma_id * DMA_QMAN_OFFSET;
6133*4882a593Smuzhiyun 
6134*4882a593Smuzhiyun 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6135*4882a593Smuzhiyun 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6136*4882a593Smuzhiyun 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6137*4882a593Smuzhiyun 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6138*4882a593Smuzhiyun 				IS_DMA_IDLE(dma_core_sts0);
6139*4882a593Smuzhiyun 		is_idle &= is_eng_idle;
6140*4882a593Smuzhiyun 
6141*4882a593Smuzhiyun 		if (mask)
6142*4882a593Smuzhiyun 			*mask |= ((u64) !is_eng_idle) <<
6143*4882a593Smuzhiyun 					(GAUDI_ENGINE_ID_DMA_0 + dma_id);
6144*4882a593Smuzhiyun 		if (s)
6145*4882a593Smuzhiyun 			seq_printf(s, fmt, dma_id,
6146*4882a593Smuzhiyun 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6147*4882a593Smuzhiyun 				qm_cgm_sts, dma_core_sts0);
6148*4882a593Smuzhiyun 	}
6149*4882a593Smuzhiyun 
6150*4882a593Smuzhiyun 	if (s)
6151*4882a593Smuzhiyun 		seq_puts(s,
6152*4882a593Smuzhiyun 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
6153*4882a593Smuzhiyun 			"---  -------  ------------  ----------  ----------\n");
6154*4882a593Smuzhiyun 
6155*4882a593Smuzhiyun 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6156*4882a593Smuzhiyun 		offset = i * TPC_QMAN_OFFSET;
6157*4882a593Smuzhiyun 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6158*4882a593Smuzhiyun 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6159*4882a593Smuzhiyun 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6160*4882a593Smuzhiyun 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6161*4882a593Smuzhiyun 				IS_TPC_IDLE(tpc_cfg_sts);
6162*4882a593Smuzhiyun 		is_idle &= is_eng_idle;
6163*4882a593Smuzhiyun 
6164*4882a593Smuzhiyun 		if (mask)
6165*4882a593Smuzhiyun 			*mask |= ((u64) !is_eng_idle) <<
6166*4882a593Smuzhiyun 						(GAUDI_ENGINE_ID_TPC_0 + i);
6167*4882a593Smuzhiyun 		if (s)
6168*4882a593Smuzhiyun 			seq_printf(s, fmt, i,
6169*4882a593Smuzhiyun 				is_eng_idle ? "Y" : "N",
6170*4882a593Smuzhiyun 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6171*4882a593Smuzhiyun 	}
6172*4882a593Smuzhiyun 
6173*4882a593Smuzhiyun 	if (s)
6174*4882a593Smuzhiyun 		seq_puts(s,
6175*4882a593Smuzhiyun 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
6176*4882a593Smuzhiyun 			"---  -------  ------------  ----------  -----------\n");
6177*4882a593Smuzhiyun 
6178*4882a593Smuzhiyun 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6179*4882a593Smuzhiyun 		offset = i * MME_QMAN_OFFSET;
6180*4882a593Smuzhiyun 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6181*4882a593Smuzhiyun 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6182*4882a593Smuzhiyun 
6183*4882a593Smuzhiyun 		/* MME 1 & 3 are slaves, no need to check their QMANs */
6184*4882a593Smuzhiyun 		is_slave = i % 2;
6185*4882a593Smuzhiyun 		if (!is_slave) {
6186*4882a593Smuzhiyun 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6187*4882a593Smuzhiyun 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6188*4882a593Smuzhiyun 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6189*4882a593Smuzhiyun 		}
6190*4882a593Smuzhiyun 
6191*4882a593Smuzhiyun 		is_idle &= is_eng_idle;
6192*4882a593Smuzhiyun 
6193*4882a593Smuzhiyun 		if (mask)
6194*4882a593Smuzhiyun 			*mask |= ((u64) !is_eng_idle) <<
6195*4882a593Smuzhiyun 						(GAUDI_ENGINE_ID_MME_0 + i);
6196*4882a593Smuzhiyun 		if (s) {
6197*4882a593Smuzhiyun 			if (!is_slave)
6198*4882a593Smuzhiyun 				seq_printf(s, fmt, i,
6199*4882a593Smuzhiyun 					is_eng_idle ? "Y" : "N",
6200*4882a593Smuzhiyun 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6201*4882a593Smuzhiyun 			else
6202*4882a593Smuzhiyun 				seq_printf(s, mme_slave_fmt, i,
6203*4882a593Smuzhiyun 					is_eng_idle ? "Y" : "N", "-",
6204*4882a593Smuzhiyun 					"-", mme_arch_sts);
6205*4882a593Smuzhiyun 		}
6206*4882a593Smuzhiyun 	}
6207*4882a593Smuzhiyun 
6208*4882a593Smuzhiyun 	if (s)
6209*4882a593Smuzhiyun 		seq_puts(s, "\n");
6210*4882a593Smuzhiyun 
6211*4882a593Smuzhiyun 	hdev->asic_funcs->set_clock_gating(hdev);
6212*4882a593Smuzhiyun 
6213*4882a593Smuzhiyun 	mutex_unlock(&gaudi->clk_gate_mutex);
6214*4882a593Smuzhiyun 
6215*4882a593Smuzhiyun 	return is_idle;
6216*4882a593Smuzhiyun }
6217*4882a593Smuzhiyun 
gaudi_hw_queues_lock(struct hl_device * hdev)6218*4882a593Smuzhiyun static void gaudi_hw_queues_lock(struct hl_device *hdev)
6219*4882a593Smuzhiyun 	__acquires(&gaudi->hw_queues_lock)
6220*4882a593Smuzhiyun {
6221*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
6222*4882a593Smuzhiyun 
6223*4882a593Smuzhiyun 	spin_lock(&gaudi->hw_queues_lock);
6224*4882a593Smuzhiyun }
6225*4882a593Smuzhiyun 
gaudi_hw_queues_unlock(struct hl_device * hdev)6226*4882a593Smuzhiyun static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6227*4882a593Smuzhiyun 	__releases(&gaudi->hw_queues_lock)
6228*4882a593Smuzhiyun {
6229*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
6230*4882a593Smuzhiyun 
6231*4882a593Smuzhiyun 	spin_unlock(&gaudi->hw_queues_lock);
6232*4882a593Smuzhiyun }
6233*4882a593Smuzhiyun 
gaudi_get_pci_id(struct hl_device * hdev)6234*4882a593Smuzhiyun static u32 gaudi_get_pci_id(struct hl_device *hdev)
6235*4882a593Smuzhiyun {
6236*4882a593Smuzhiyun 	return hdev->pdev->device;
6237*4882a593Smuzhiyun }
6238*4882a593Smuzhiyun 
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)6239*4882a593Smuzhiyun static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6240*4882a593Smuzhiyun 				size_t max_size)
6241*4882a593Smuzhiyun {
6242*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
6243*4882a593Smuzhiyun 
6244*4882a593Smuzhiyun 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6245*4882a593Smuzhiyun 		return 0;
6246*4882a593Smuzhiyun 
6247*4882a593Smuzhiyun 	return hl_fw_get_eeprom_data(hdev, data, max_size);
6248*4882a593Smuzhiyun }
6249*4882a593Smuzhiyun 
6250*4882a593Smuzhiyun /*
6251*4882a593Smuzhiyun  * this function should be used only during initialization and/or after reset,
6252*4882a593Smuzhiyun  * when there are no active users.
6253*4882a593Smuzhiyun  */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)6254*4882a593Smuzhiyun static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6255*4882a593Smuzhiyun 				u32 tpc_id)
6256*4882a593Smuzhiyun {
6257*4882a593Smuzhiyun 	struct gaudi_device *gaudi = hdev->asic_specific;
6258*4882a593Smuzhiyun 	u64 kernel_timeout;
6259*4882a593Smuzhiyun 	u32 status, offset;
6260*4882a593Smuzhiyun 	int rc;
6261*4882a593Smuzhiyun 
6262*4882a593Smuzhiyun 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6263*4882a593Smuzhiyun 
6264*4882a593Smuzhiyun 	if (hdev->pldm)
6265*4882a593Smuzhiyun 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6266*4882a593Smuzhiyun 	else
6267*4882a593Smuzhiyun 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6268*4882a593Smuzhiyun 
6269*4882a593Smuzhiyun 	mutex_lock(&gaudi->clk_gate_mutex);
6270*4882a593Smuzhiyun 
6271*4882a593Smuzhiyun 	hdev->asic_funcs->disable_clock_gating(hdev);
6272*4882a593Smuzhiyun 
6273*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6274*4882a593Smuzhiyun 			lower_32_bits(tpc_kernel));
6275*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6276*4882a593Smuzhiyun 			upper_32_bits(tpc_kernel));
6277*4882a593Smuzhiyun 
6278*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6279*4882a593Smuzhiyun 			lower_32_bits(tpc_kernel));
6280*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6281*4882a593Smuzhiyun 			upper_32_bits(tpc_kernel));
6282*4882a593Smuzhiyun 	/* set a valid LUT pointer, content is of no significance */
6283*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6284*4882a593Smuzhiyun 			lower_32_bits(tpc_kernel));
6285*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6286*4882a593Smuzhiyun 			upper_32_bits(tpc_kernel));
6287*4882a593Smuzhiyun 
6288*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6289*4882a593Smuzhiyun 			lower_32_bits(CFG_BASE +
6290*4882a593Smuzhiyun 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6291*4882a593Smuzhiyun 
6292*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
6293*4882a593Smuzhiyun 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6294*4882a593Smuzhiyun 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6295*4882a593Smuzhiyun 	/* wait a bit for the engine to start executing */
6296*4882a593Smuzhiyun 	usleep_range(1000, 1500);
6297*4882a593Smuzhiyun 
6298*4882a593Smuzhiyun 	/* wait until engine has finished executing */
6299*4882a593Smuzhiyun 	rc = hl_poll_timeout(
6300*4882a593Smuzhiyun 		hdev,
6301*4882a593Smuzhiyun 		mmTPC0_CFG_STATUS + offset,
6302*4882a593Smuzhiyun 		status,
6303*4882a593Smuzhiyun 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6304*4882a593Smuzhiyun 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6305*4882a593Smuzhiyun 		1000,
6306*4882a593Smuzhiyun 		kernel_timeout);
6307*4882a593Smuzhiyun 
6308*4882a593Smuzhiyun 	if (rc) {
6309*4882a593Smuzhiyun 		dev_err(hdev->dev,
6310*4882a593Smuzhiyun 			"Timeout while waiting for TPC%d icache prefetch\n",
6311*4882a593Smuzhiyun 			tpc_id);
6312*4882a593Smuzhiyun 		hdev->asic_funcs->set_clock_gating(hdev);
6313*4882a593Smuzhiyun 		mutex_unlock(&gaudi->clk_gate_mutex);
6314*4882a593Smuzhiyun 		return -EIO;
6315*4882a593Smuzhiyun 	}
6316*4882a593Smuzhiyun 
6317*4882a593Smuzhiyun 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6318*4882a593Smuzhiyun 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6319*4882a593Smuzhiyun 
6320*4882a593Smuzhiyun 	/* wait a bit for the engine to start executing */
6321*4882a593Smuzhiyun 	usleep_range(1000, 1500);
6322*4882a593Smuzhiyun 
6323*4882a593Smuzhiyun 	/* wait until engine has finished executing */
6324*4882a593Smuzhiyun 	rc = hl_poll_timeout(
6325*4882a593Smuzhiyun 		hdev,
6326*4882a593Smuzhiyun 		mmTPC0_CFG_STATUS + offset,
6327*4882a593Smuzhiyun 		status,
6328*4882a593Smuzhiyun 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6329*4882a593Smuzhiyun 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6330*4882a593Smuzhiyun 		1000,
6331*4882a593Smuzhiyun 		kernel_timeout);
6332*4882a593Smuzhiyun 
6333*4882a593Smuzhiyun 	if (rc) {
6334*4882a593Smuzhiyun 		dev_err(hdev->dev,
6335*4882a593Smuzhiyun 			"Timeout while waiting for TPC%d vector pipe\n",
6336*4882a593Smuzhiyun 			tpc_id);
6337*4882a593Smuzhiyun 		hdev->asic_funcs->set_clock_gating(hdev);
6338*4882a593Smuzhiyun 		mutex_unlock(&gaudi->clk_gate_mutex);
6339*4882a593Smuzhiyun 		return -EIO;
6340*4882a593Smuzhiyun 	}
6341*4882a593Smuzhiyun 
6342*4882a593Smuzhiyun 	rc = hl_poll_timeout(
6343*4882a593Smuzhiyun 		hdev,
6344*4882a593Smuzhiyun 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6345*4882a593Smuzhiyun 		status,
6346*4882a593Smuzhiyun 		(status == 0),
6347*4882a593Smuzhiyun 		1000,
6348*4882a593Smuzhiyun 		kernel_timeout);
6349*4882a593Smuzhiyun 
6350*4882a593Smuzhiyun 	hdev->asic_funcs->set_clock_gating(hdev);
6351*4882a593Smuzhiyun 	mutex_unlock(&gaudi->clk_gate_mutex);
6352*4882a593Smuzhiyun 
6353*4882a593Smuzhiyun 	if (rc) {
6354*4882a593Smuzhiyun 		dev_err(hdev->dev,
6355*4882a593Smuzhiyun 			"Timeout while waiting for TPC%d kernel to execute\n",
6356*4882a593Smuzhiyun 			tpc_id);
6357*4882a593Smuzhiyun 		return -EIO;
6358*4882a593Smuzhiyun 	}
6359*4882a593Smuzhiyun 
6360*4882a593Smuzhiyun 	return 0;
6361*4882a593Smuzhiyun }
6362*4882a593Smuzhiyun 
gaudi_get_hw_state(struct hl_device * hdev)6363*4882a593Smuzhiyun static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6364*4882a593Smuzhiyun {
6365*4882a593Smuzhiyun 	return RREG32(mmHW_STATE);
6366*4882a593Smuzhiyun }
6367*4882a593Smuzhiyun 
gaudi_ctx_init(struct hl_ctx * ctx)6368*4882a593Smuzhiyun static int gaudi_ctx_init(struct hl_ctx *ctx)
6369*4882a593Smuzhiyun {
6370*4882a593Smuzhiyun 	return 0;
6371*4882a593Smuzhiyun }
6372*4882a593Smuzhiyun 
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)6373*4882a593Smuzhiyun static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6374*4882a593Smuzhiyun {
6375*4882a593Smuzhiyun 	return gaudi_cq_assignment[cq_idx];
6376*4882a593Smuzhiyun }
6377*4882a593Smuzhiyun 
gaudi_get_signal_cb_size(struct hl_device * hdev)6378*4882a593Smuzhiyun static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6379*4882a593Smuzhiyun {
6380*4882a593Smuzhiyun 	return sizeof(struct packet_msg_short) +
6381*4882a593Smuzhiyun 			sizeof(struct packet_msg_prot) * 2;
6382*4882a593Smuzhiyun }
6383*4882a593Smuzhiyun 
gaudi_get_wait_cb_size(struct hl_device * hdev)6384*4882a593Smuzhiyun static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6385*4882a593Smuzhiyun {
6386*4882a593Smuzhiyun 	return sizeof(struct packet_msg_short) * 4 +
6387*4882a593Smuzhiyun 			sizeof(struct packet_fence) +
6388*4882a593Smuzhiyun 			sizeof(struct packet_msg_prot) * 2;
6389*4882a593Smuzhiyun }
6390*4882a593Smuzhiyun 
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id)6391*4882a593Smuzhiyun static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6392*4882a593Smuzhiyun {
6393*4882a593Smuzhiyun 	struct hl_cb *cb = (struct hl_cb *) data;
6394*4882a593Smuzhiyun 	struct packet_msg_short *pkt;
6395*4882a593Smuzhiyun 	u32 value, ctl;
6396*4882a593Smuzhiyun 
6397*4882a593Smuzhiyun 	pkt = cb->kernel_address;
6398*4882a593Smuzhiyun 	memset(pkt, 0, sizeof(*pkt));
6399*4882a593Smuzhiyun 
6400*4882a593Smuzhiyun 	/* Inc by 1, Mode ADD */
6401*4882a593Smuzhiyun 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6402*4882a593Smuzhiyun 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6403*4882a593Smuzhiyun 
6404*4882a593Smuzhiyun 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6405*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6406*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6407*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6408*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6409*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6410*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6411*4882a593Smuzhiyun 
6412*4882a593Smuzhiyun 	pkt->value = cpu_to_le32(value);
6413*4882a593Smuzhiyun 	pkt->ctl = cpu_to_le32(ctl);
6414*4882a593Smuzhiyun }
6415*4882a593Smuzhiyun 
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)6416*4882a593Smuzhiyun static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6417*4882a593Smuzhiyun 					u16 addr)
6418*4882a593Smuzhiyun {
6419*4882a593Smuzhiyun 	u32 ctl, pkt_size = sizeof(*pkt);
6420*4882a593Smuzhiyun 
6421*4882a593Smuzhiyun 	memset(pkt, 0, pkt_size);
6422*4882a593Smuzhiyun 
6423*4882a593Smuzhiyun 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6424*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
6425*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6426*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6427*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6428*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6429*4882a593Smuzhiyun 
6430*4882a593Smuzhiyun 	pkt->value = cpu_to_le32(value);
6431*4882a593Smuzhiyun 	pkt->ctl = cpu_to_le32(ctl);
6432*4882a593Smuzhiyun 
6433*4882a593Smuzhiyun 	return pkt_size;
6434*4882a593Smuzhiyun }
6435*4882a593Smuzhiyun 
gaudi_add_arm_monitor_pkt(struct packet_msg_short * pkt,u16 sob_id,u16 sob_val,u16 addr)6436*4882a593Smuzhiyun static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6437*4882a593Smuzhiyun 					u16 sob_val, u16 addr)
6438*4882a593Smuzhiyun {
6439*4882a593Smuzhiyun 	u32 ctl, value, pkt_size = sizeof(*pkt);
6440*4882a593Smuzhiyun 	u8 mask = ~(1 << (sob_id & 0x7));
6441*4882a593Smuzhiyun 
6442*4882a593Smuzhiyun 	memset(pkt, 0, pkt_size);
6443*4882a593Smuzhiyun 
6444*4882a593Smuzhiyun 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6445*4882a593Smuzhiyun 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6446*4882a593Smuzhiyun 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6447*4882a593Smuzhiyun 			0); /* GREATER OR EQUAL*/
6448*4882a593Smuzhiyun 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6449*4882a593Smuzhiyun 
6450*4882a593Smuzhiyun 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6451*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6452*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6453*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6454*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6455*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6456*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6457*4882a593Smuzhiyun 
6458*4882a593Smuzhiyun 	pkt->value = cpu_to_le32(value);
6459*4882a593Smuzhiyun 	pkt->ctl = cpu_to_le32(ctl);
6460*4882a593Smuzhiyun 
6461*4882a593Smuzhiyun 	return pkt_size;
6462*4882a593Smuzhiyun }
6463*4882a593Smuzhiyun 
gaudi_add_fence_pkt(struct packet_fence * pkt)6464*4882a593Smuzhiyun static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6465*4882a593Smuzhiyun {
6466*4882a593Smuzhiyun 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
6467*4882a593Smuzhiyun 
6468*4882a593Smuzhiyun 	memset(pkt, 0, pkt_size);
6469*4882a593Smuzhiyun 
6470*4882a593Smuzhiyun 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6471*4882a593Smuzhiyun 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6472*4882a593Smuzhiyun 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6473*4882a593Smuzhiyun 
6474*4882a593Smuzhiyun 	ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6475*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6476*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6477*4882a593Smuzhiyun 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6478*4882a593Smuzhiyun 
6479*4882a593Smuzhiyun 	pkt->cfg = cpu_to_le32(cfg);
6480*4882a593Smuzhiyun 	pkt->ctl = cpu_to_le32(ctl);
6481*4882a593Smuzhiyun 
6482*4882a593Smuzhiyun 	return pkt_size;
6483*4882a593Smuzhiyun }
6484*4882a593Smuzhiyun 
gaudi_gen_wait_cb(struct hl_device * hdev,void * data,u16 sob_id,u16 sob_val,u16 mon_id,u32 q_idx)6485*4882a593Smuzhiyun static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6486*4882a593Smuzhiyun 			u16 sob_val, u16 mon_id, u32 q_idx)
6487*4882a593Smuzhiyun {
6488*4882a593Smuzhiyun 	struct hl_cb *cb = (struct hl_cb *) data;
6489*4882a593Smuzhiyun 	void *buf = cb->kernel_address;
6490*4882a593Smuzhiyun 	u64 monitor_base, fence_addr = 0;
6491*4882a593Smuzhiyun 	u32 size = 0;
6492*4882a593Smuzhiyun 	u16 msg_addr_offset;
6493*4882a593Smuzhiyun 
6494*4882a593Smuzhiyun 	switch (q_idx) {
6495*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_0_0:
6496*4882a593Smuzhiyun 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6497*4882a593Smuzhiyun 		break;
6498*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_0_1:
6499*4882a593Smuzhiyun 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6500*4882a593Smuzhiyun 		break;
6501*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_0_2:
6502*4882a593Smuzhiyun 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6503*4882a593Smuzhiyun 		break;
6504*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_0_3:
6505*4882a593Smuzhiyun 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6506*4882a593Smuzhiyun 		break;
6507*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_1_0:
6508*4882a593Smuzhiyun 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6509*4882a593Smuzhiyun 		break;
6510*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_1_1:
6511*4882a593Smuzhiyun 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6512*4882a593Smuzhiyun 		break;
6513*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_1_2:
6514*4882a593Smuzhiyun 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6515*4882a593Smuzhiyun 		break;
6516*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_1_3:
6517*4882a593Smuzhiyun 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6518*4882a593Smuzhiyun 		break;
6519*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_5_0:
6520*4882a593Smuzhiyun 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6521*4882a593Smuzhiyun 		break;
6522*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_5_1:
6523*4882a593Smuzhiyun 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6524*4882a593Smuzhiyun 		break;
6525*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_5_2:
6526*4882a593Smuzhiyun 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6527*4882a593Smuzhiyun 		break;
6528*4882a593Smuzhiyun 	case GAUDI_QUEUE_ID_DMA_5_3:
6529*4882a593Smuzhiyun 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6530*4882a593Smuzhiyun 		break;
6531*4882a593Smuzhiyun 	default:
6532*4882a593Smuzhiyun 		/* queue index should be valid here */
6533*4882a593Smuzhiyun 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6534*4882a593Smuzhiyun 				q_idx);
6535*4882a593Smuzhiyun 		return;
6536*4882a593Smuzhiyun 	}
6537*4882a593Smuzhiyun 
6538*4882a593Smuzhiyun 	fence_addr += CFG_BASE;
6539*4882a593Smuzhiyun 
6540*4882a593Smuzhiyun 	/*
6541*4882a593Smuzhiyun 	 * monitor_base should be the content of the base0 address registers,
6542*4882a593Smuzhiyun 	 * so it will be added to the msg short offsets
6543*4882a593Smuzhiyun 	 */
6544*4882a593Smuzhiyun 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6545*4882a593Smuzhiyun 
6546*4882a593Smuzhiyun 	/* First monitor config packet: low address of the sync */
6547*4882a593Smuzhiyun 	msg_addr_offset =
6548*4882a593Smuzhiyun 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6549*4882a593Smuzhiyun 				monitor_base;
6550*4882a593Smuzhiyun 
6551*4882a593Smuzhiyun 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6552*4882a593Smuzhiyun 					msg_addr_offset);
6553*4882a593Smuzhiyun 
6554*4882a593Smuzhiyun 	/* Second monitor config packet: high address of the sync */
6555*4882a593Smuzhiyun 	msg_addr_offset =
6556*4882a593Smuzhiyun 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6557*4882a593Smuzhiyun 				monitor_base;
6558*4882a593Smuzhiyun 
6559*4882a593Smuzhiyun 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6560*4882a593Smuzhiyun 					msg_addr_offset);
6561*4882a593Smuzhiyun 
6562*4882a593Smuzhiyun 	/*
6563*4882a593Smuzhiyun 	 * Third monitor config packet: the payload, i.e. what to write when the
6564*4882a593Smuzhiyun 	 * sync triggers
6565*4882a593Smuzhiyun 	 */
6566*4882a593Smuzhiyun 	msg_addr_offset =
6567*4882a593Smuzhiyun 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6568*4882a593Smuzhiyun 				monitor_base;
6569*4882a593Smuzhiyun 
6570*4882a593Smuzhiyun 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6571*4882a593Smuzhiyun 
6572*4882a593Smuzhiyun 	/* Fourth monitor config packet: bind the monitor to a sync object */
6573*4882a593Smuzhiyun 	msg_addr_offset =
6574*4882a593Smuzhiyun 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6575*4882a593Smuzhiyun 				monitor_base;
6576*4882a593Smuzhiyun 	size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6577*4882a593Smuzhiyun 						msg_addr_offset);
6578*4882a593Smuzhiyun 
6579*4882a593Smuzhiyun 	/* Fence packet */
6580*4882a593Smuzhiyun 	size += gaudi_add_fence_pkt(buf + size);
6581*4882a593Smuzhiyun }
6582*4882a593Smuzhiyun 
gaudi_reset_sob(struct hl_device * hdev,void * data)6583*4882a593Smuzhiyun static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6584*4882a593Smuzhiyun {
6585*4882a593Smuzhiyun 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6586*4882a593Smuzhiyun 
6587*4882a593Smuzhiyun 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6588*4882a593Smuzhiyun 		hw_sob->sob_id);
6589*4882a593Smuzhiyun 
6590*4882a593Smuzhiyun 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6591*4882a593Smuzhiyun 		0);
6592*4882a593Smuzhiyun 
6593*4882a593Smuzhiyun 	kref_init(&hw_sob->kref);
6594*4882a593Smuzhiyun }
6595*4882a593Smuzhiyun 
gaudi_set_dma_mask_from_fw(struct hl_device * hdev)6596*4882a593Smuzhiyun static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6597*4882a593Smuzhiyun {
6598*4882a593Smuzhiyun 	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6599*4882a593Smuzhiyun 							HL_POWER9_HOST_MAGIC) {
6600*4882a593Smuzhiyun 		hdev->power9_64bit_dma_enable = 1;
6601*4882a593Smuzhiyun 		hdev->dma_mask = 64;
6602*4882a593Smuzhiyun 	} else {
6603*4882a593Smuzhiyun 		hdev->power9_64bit_dma_enable = 0;
6604*4882a593Smuzhiyun 		hdev->dma_mask = 48;
6605*4882a593Smuzhiyun 	}
6606*4882a593Smuzhiyun }
6607*4882a593Smuzhiyun 
gaudi_get_device_time(struct hl_device * hdev)6608*4882a593Smuzhiyun static u64 gaudi_get_device_time(struct hl_device *hdev)
6609*4882a593Smuzhiyun {
6610*4882a593Smuzhiyun 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6611*4882a593Smuzhiyun 
6612*4882a593Smuzhiyun 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6613*4882a593Smuzhiyun }
6614*4882a593Smuzhiyun 
6615*4882a593Smuzhiyun static const struct hl_asic_funcs gaudi_funcs = {
6616*4882a593Smuzhiyun 	.early_init = gaudi_early_init,
6617*4882a593Smuzhiyun 	.early_fini = gaudi_early_fini,
6618*4882a593Smuzhiyun 	.late_init = gaudi_late_init,
6619*4882a593Smuzhiyun 	.late_fini = gaudi_late_fini,
6620*4882a593Smuzhiyun 	.sw_init = gaudi_sw_init,
6621*4882a593Smuzhiyun 	.sw_fini = gaudi_sw_fini,
6622*4882a593Smuzhiyun 	.hw_init = gaudi_hw_init,
6623*4882a593Smuzhiyun 	.hw_fini = gaudi_hw_fini,
6624*4882a593Smuzhiyun 	.halt_engines = gaudi_halt_engines,
6625*4882a593Smuzhiyun 	.suspend = gaudi_suspend,
6626*4882a593Smuzhiyun 	.resume = gaudi_resume,
6627*4882a593Smuzhiyun 	.cb_mmap = gaudi_cb_mmap,
6628*4882a593Smuzhiyun 	.ring_doorbell = gaudi_ring_doorbell,
6629*4882a593Smuzhiyun 	.pqe_write = gaudi_pqe_write,
6630*4882a593Smuzhiyun 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6631*4882a593Smuzhiyun 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
6632*4882a593Smuzhiyun 	.get_int_queue_base = gaudi_get_int_queue_base,
6633*4882a593Smuzhiyun 	.test_queues = gaudi_test_queues,
6634*4882a593Smuzhiyun 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6635*4882a593Smuzhiyun 	.asic_dma_pool_free = gaudi_dma_pool_free,
6636*4882a593Smuzhiyun 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6637*4882a593Smuzhiyun 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6638*4882a593Smuzhiyun 	.hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6639*4882a593Smuzhiyun 	.cs_parser = gaudi_cs_parser,
6640*4882a593Smuzhiyun 	.asic_dma_map_sg = gaudi_dma_map_sg,
6641*4882a593Smuzhiyun 	.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6642*4882a593Smuzhiyun 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6643*4882a593Smuzhiyun 	.update_eq_ci = gaudi_update_eq_ci,
6644*4882a593Smuzhiyun 	.context_switch = gaudi_context_switch,
6645*4882a593Smuzhiyun 	.restore_phase_topology = gaudi_restore_phase_topology,
6646*4882a593Smuzhiyun 	.debugfs_read32 = gaudi_debugfs_read32,
6647*4882a593Smuzhiyun 	.debugfs_write32 = gaudi_debugfs_write32,
6648*4882a593Smuzhiyun 	.debugfs_read64 = gaudi_debugfs_read64,
6649*4882a593Smuzhiyun 	.debugfs_write64 = gaudi_debugfs_write64,
6650*4882a593Smuzhiyun 	.add_device_attr = gaudi_add_device_attr,
6651*4882a593Smuzhiyun 	.handle_eqe = gaudi_handle_eqe,
6652*4882a593Smuzhiyun 	.set_pll_profile = gaudi_set_pll_profile,
6653*4882a593Smuzhiyun 	.get_events_stat = gaudi_get_events_stat,
6654*4882a593Smuzhiyun 	.read_pte = gaudi_read_pte,
6655*4882a593Smuzhiyun 	.write_pte = gaudi_write_pte,
6656*4882a593Smuzhiyun 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6657*4882a593Smuzhiyun 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6658*4882a593Smuzhiyun 	.send_heartbeat = gaudi_send_heartbeat,
6659*4882a593Smuzhiyun 	.set_clock_gating = gaudi_set_clock_gating,
6660*4882a593Smuzhiyun 	.disable_clock_gating = gaudi_disable_clock_gating,
6661*4882a593Smuzhiyun 	.debug_coresight = gaudi_debug_coresight,
6662*4882a593Smuzhiyun 	.is_device_idle = gaudi_is_device_idle,
6663*4882a593Smuzhiyun 	.soft_reset_late_init = gaudi_soft_reset_late_init,
6664*4882a593Smuzhiyun 	.hw_queues_lock = gaudi_hw_queues_lock,
6665*4882a593Smuzhiyun 	.hw_queues_unlock = gaudi_hw_queues_unlock,
6666*4882a593Smuzhiyun 	.get_pci_id = gaudi_get_pci_id,
6667*4882a593Smuzhiyun 	.get_eeprom_data = gaudi_get_eeprom_data,
6668*4882a593Smuzhiyun 	.send_cpu_message = gaudi_send_cpu_message,
6669*4882a593Smuzhiyun 	.get_hw_state = gaudi_get_hw_state,
6670*4882a593Smuzhiyun 	.pci_bars_map = gaudi_pci_bars_map,
6671*4882a593Smuzhiyun 	.init_iatu = gaudi_init_iatu,
6672*4882a593Smuzhiyun 	.rreg = hl_rreg,
6673*4882a593Smuzhiyun 	.wreg = hl_wreg,
6674*4882a593Smuzhiyun 	.halt_coresight = gaudi_halt_coresight,
6675*4882a593Smuzhiyun 	.ctx_init = gaudi_ctx_init,
6676*4882a593Smuzhiyun 	.get_clk_rate = gaudi_get_clk_rate,
6677*4882a593Smuzhiyun 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6678*4882a593Smuzhiyun 	.read_device_fw_version = gaudi_read_device_fw_version,
6679*4882a593Smuzhiyun 	.load_firmware_to_device = gaudi_load_firmware_to_device,
6680*4882a593Smuzhiyun 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6681*4882a593Smuzhiyun 	.get_signal_cb_size = gaudi_get_signal_cb_size,
6682*4882a593Smuzhiyun 	.get_wait_cb_size = gaudi_get_wait_cb_size,
6683*4882a593Smuzhiyun 	.gen_signal_cb = gaudi_gen_signal_cb,
6684*4882a593Smuzhiyun 	.gen_wait_cb = gaudi_gen_wait_cb,
6685*4882a593Smuzhiyun 	.reset_sob = gaudi_reset_sob,
6686*4882a593Smuzhiyun 	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6687*4882a593Smuzhiyun 	.get_device_time = gaudi_get_device_time
6688*4882a593Smuzhiyun };
6689*4882a593Smuzhiyun 
6690*4882a593Smuzhiyun /**
6691*4882a593Smuzhiyun  * gaudi_set_asic_funcs - set GAUDI function pointers
6692*4882a593Smuzhiyun  *
6693*4882a593Smuzhiyun  * @hdev: pointer to hl_device structure
6694*4882a593Smuzhiyun  *
6695*4882a593Smuzhiyun  */
gaudi_set_asic_funcs(struct hl_device * hdev)6696*4882a593Smuzhiyun void gaudi_set_asic_funcs(struct hl_device *hdev)
6697*4882a593Smuzhiyun {
6698*4882a593Smuzhiyun 	hdev->asic_funcs = &gaudi_funcs;
6699*4882a593Smuzhiyun }
6700