xref: /OK3568_Linux_fs/kernel/drivers/misc/habanalabs/gaudi/gaudiP.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun  *
3*4882a593Smuzhiyun  * Copyright 2019-2020 HabanaLabs, Ltd.
4*4882a593Smuzhiyun  * All Rights Reserved.
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #ifndef GAUDIP_H_
9*4882a593Smuzhiyun #define GAUDIP_H_
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include <uapi/misc/habanalabs.h>
12*4882a593Smuzhiyun #include "../common/habanalabs.h"
13*4882a593Smuzhiyun #include "../include/common/hl_boot_if.h"
14*4882a593Smuzhiyun #include "../include/gaudi/gaudi_packets.h"
15*4882a593Smuzhiyun #include "../include/gaudi/gaudi.h"
16*4882a593Smuzhiyun #include "../include/gaudi/gaudi_async_events.h"
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun #define NUMBER_OF_EXT_HW_QUEUES		12
19*4882a593Smuzhiyun #define NUMBER_OF_CMPLT_QUEUES		NUMBER_OF_EXT_HW_QUEUES
20*4882a593Smuzhiyun #define NUMBER_OF_CPU_HW_QUEUES		1
21*4882a593Smuzhiyun #define NUMBER_OF_INT_HW_QUEUES		100
22*4882a593Smuzhiyun #define NUMBER_OF_HW_QUEUES		(NUMBER_OF_EXT_HW_QUEUES + \
23*4882a593Smuzhiyun 					NUMBER_OF_CPU_HW_QUEUES + \
24*4882a593Smuzhiyun 					NUMBER_OF_INT_HW_QUEUES)
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun /*
27*4882a593Smuzhiyun  * Number of MSI interrupts IDS:
28*4882a593Smuzhiyun  * Each completion queue has 1 ID
29*4882a593Smuzhiyun  * The event queue has 1 ID
30*4882a593Smuzhiyun  */
31*4882a593Smuzhiyun #define NUMBER_OF_INTERRUPTS		(NUMBER_OF_CMPLT_QUEUES + \
32*4882a593Smuzhiyun 						NUMBER_OF_CPU_HW_QUEUES)
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
35*4882a593Smuzhiyun #error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
36*4882a593Smuzhiyun #endif
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun #define CORESIGHT_TIMEOUT_USEC		100000		/* 100 ms */
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun #define GAUDI_MAX_CLK_FREQ		2200000000ull	/* 2200 MHz */
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun #define MAX_POWER_DEFAULT_PCI		200000		/* 200W */
43*4882a593Smuzhiyun #define MAX_POWER_DEFAULT_PMC		350000		/* 350W */
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun #define GAUDI_CPU_TIMEOUT_USEC		30000000	/* 30s */
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun #define TPC_ENABLED_MASK		0xFF
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun #define GAUDI_HBM_SIZE_32GB		0x800000000ull
50*4882a593Smuzhiyun #define GAUDI_HBM_DEVICES		4
51*4882a593Smuzhiyun #define GAUDI_HBM_CHANNELS		8
52*4882a593Smuzhiyun #define GAUDI_HBM_CFG_BASE		(mmHBM0_BASE - CFG_BASE)
53*4882a593Smuzhiyun #define GAUDI_HBM_CFG_OFFSET		(mmHBM1_BASE - mmHBM0_BASE)
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun #define DMA_MAX_TRANSFER_SIZE		U32_MAX
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun #define GAUDI_DEFAULT_CARD_NAME		"HL2000"
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun #define GAUDI_MAX_PENDING_CS		1024
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun #if !IS_MAX_PENDING_CS_VALID(GAUDI_MAX_PENDING_CS)
62*4882a593Smuzhiyun #error "GAUDI_MAX_PENDING_CS must be power of 2 and greater than 1"
63*4882a593Smuzhiyun #endif
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun #define PCI_DMA_NUMBER_OF_CHNLS		3
66*4882a593Smuzhiyun #define HBM_DMA_NUMBER_OF_CHNLS		5
67*4882a593Smuzhiyun #define DMA_NUMBER_OF_CHNLS		(PCI_DMA_NUMBER_OF_CHNLS + \
68*4882a593Smuzhiyun 						HBM_DMA_NUMBER_OF_CHNLS)
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun #define MME_NUMBER_OF_SLAVE_ENGINES	2
71*4882a593Smuzhiyun #define MME_NUMBER_OF_ENGINES		(MME_NUMBER_OF_MASTER_ENGINES + \
72*4882a593Smuzhiyun 					MME_NUMBER_OF_SLAVE_ENGINES)
73*4882a593Smuzhiyun #define MME_NUMBER_OF_QMANS		(MME_NUMBER_OF_MASTER_ENGINES * \
74*4882a593Smuzhiyun 					QMAN_STREAMS)
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun #define QMAN_STREAMS		4
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun #define DMA_QMAN_OFFSET		(mmDMA1_QM_BASE - mmDMA0_QM_BASE)
79*4882a593Smuzhiyun #define TPC_QMAN_OFFSET		(mmTPC1_QM_BASE - mmTPC0_QM_BASE)
80*4882a593Smuzhiyun #define MME_QMAN_OFFSET		(mmMME1_QM_BASE - mmMME0_QM_BASE)
81*4882a593Smuzhiyun #define NIC_MACRO_QMAN_OFFSET	(mmNIC1_QM0_BASE - mmNIC0_QM0_BASE)
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun #define TPC_CFG_OFFSET		(mmTPC1_CFG_BASE - mmTPC0_CFG_BASE)
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun #define DMA_CORE_OFFSET		(mmDMA1_CORE_BASE - mmDMA0_CORE_BASE)
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun #define QMAN_LDMA_SRC_OFFSET	(mmDMA0_CORE_SRC_BASE_LO - mmDMA0_CORE_CFG_0)
88*4882a593Smuzhiyun #define QMAN_LDMA_DST_OFFSET	(mmDMA0_CORE_DST_BASE_LO - mmDMA0_CORE_CFG_0)
89*4882a593Smuzhiyun #define QMAN_LDMA_SIZE_OFFSET	(mmDMA0_CORE_DST_TSIZE_0 - mmDMA0_CORE_CFG_0)
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun #define QMAN_CPDMA_SRC_OFFSET	(mmDMA0_QM_CQ_PTR_LO_4 - mmDMA0_CORE_CFG_0)
92*4882a593Smuzhiyun #define QMAN_CPDMA_DST_OFFSET	(mmDMA0_CORE_DST_BASE_LO - mmDMA0_CORE_CFG_0)
93*4882a593Smuzhiyun #define QMAN_CPDMA_SIZE_OFFSET	(mmDMA0_QM_CQ_TSIZE_4 - mmDMA0_CORE_CFG_0)
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun #define SIF_RTR_CTRL_OFFSET	(mmSIF_RTR_CTRL_1_BASE - mmSIF_RTR_CTRL_0_BASE)
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun #define NIF_RTR_CTRL_OFFSET	(mmNIF_RTR_CTRL_1_BASE - mmNIF_RTR_CTRL_0_BASE)
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun #define MME_ACC_OFFSET		(mmMME1_ACC_BASE - mmMME0_ACC_BASE)
100*4882a593Smuzhiyun #define SRAM_BANK_OFFSET	(mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE)
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun #define NUM_OF_SOB_IN_BLOCK		\
103*4882a593Smuzhiyun 	(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \
104*4882a593Smuzhiyun 	mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun #define NUM_OF_MONITORS_IN_BLOCK	\
107*4882a593Smuzhiyun 	(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \
108*4882a593Smuzhiyun 	mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun /* DRAM Memory Map */
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun #define CPU_FW_IMAGE_SIZE	0x10000000	/* 256MB */
114*4882a593Smuzhiyun #define MMU_PAGE_TABLES_SIZE	0x0BF00000	/* 191MB */
115*4882a593Smuzhiyun #define MMU_CACHE_MNG_SIZE	0x00100000	/* 1MB */
116*4882a593Smuzhiyun #define RESERVED		0x04000000	/* 64MB */
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun #define CPU_FW_IMAGE_ADDR	DRAM_PHYS_BASE
119*4882a593Smuzhiyun #define MMU_PAGE_TABLES_ADDR	(CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)
120*4882a593Smuzhiyun #define MMU_CACHE_MNG_ADDR	(MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE)
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun #define DRAM_DRIVER_END_ADDR	(MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE +\
123*4882a593Smuzhiyun 								RESERVED)
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun #define DRAM_BASE_ADDR_USER	0x20000000
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun #if (DRAM_DRIVER_END_ADDR > DRAM_BASE_ADDR_USER)
128*4882a593Smuzhiyun #error "Driver must reserve no more than 512MB"
129*4882a593Smuzhiyun #endif
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun /* Internal QMANs PQ sizes */
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun #define MME_QMAN_LENGTH			1024
134*4882a593Smuzhiyun #define MME_QMAN_SIZE_IN_BYTES		(MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun #define HBM_DMA_QMAN_LENGTH		1024
137*4882a593Smuzhiyun #define HBM_DMA_QMAN_SIZE_IN_BYTES	\
138*4882a593Smuzhiyun 				(HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun #define TPC_QMAN_LENGTH			1024
141*4882a593Smuzhiyun #define TPC_QMAN_SIZE_IN_BYTES		(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun #define SRAM_USER_BASE_OFFSET  GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun /* Virtual address space */
146*4882a593Smuzhiyun #define VA_HOST_SPACE_START	0x1000000000000ull	/* 256TB */
147*4882a593Smuzhiyun #define VA_HOST_SPACE_END	0x3FF8000000000ull	/* 1PB - 1TB */
148*4882a593Smuzhiyun #define VA_HOST_SPACE_SIZE	(VA_HOST_SPACE_END - \
149*4882a593Smuzhiyun 					VA_HOST_SPACE_START) /* 767TB */
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun #define HW_CAP_PLL		BIT(0)
152*4882a593Smuzhiyun #define HW_CAP_HBM		BIT(1)
153*4882a593Smuzhiyun #define HW_CAP_MMU		BIT(2)
154*4882a593Smuzhiyun #define HW_CAP_MME		BIT(3)
155*4882a593Smuzhiyun #define HW_CAP_CPU		BIT(4)
156*4882a593Smuzhiyun #define HW_CAP_PCI_DMA		BIT(5)
157*4882a593Smuzhiyun #define HW_CAP_MSI		BIT(6)
158*4882a593Smuzhiyun #define HW_CAP_CPU_Q		BIT(7)
159*4882a593Smuzhiyun #define HW_CAP_HBM_DMA		BIT(8)
160*4882a593Smuzhiyun #define HW_CAP_CLK_GATE		BIT(9)
161*4882a593Smuzhiyun #define HW_CAP_SRAM_SCRAMBLER	BIT(10)
162*4882a593Smuzhiyun #define HW_CAP_HBM_SCRAMBLER	BIT(11)
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun #define HW_CAP_TPC0		BIT(24)
165*4882a593Smuzhiyun #define HW_CAP_TPC1		BIT(25)
166*4882a593Smuzhiyun #define HW_CAP_TPC2		BIT(26)
167*4882a593Smuzhiyun #define HW_CAP_TPC3		BIT(27)
168*4882a593Smuzhiyun #define HW_CAP_TPC4		BIT(28)
169*4882a593Smuzhiyun #define HW_CAP_TPC5		BIT(29)
170*4882a593Smuzhiyun #define HW_CAP_TPC6		BIT(30)
171*4882a593Smuzhiyun #define HW_CAP_TPC7		BIT(31)
172*4882a593Smuzhiyun #define HW_CAP_TPC_MASK		GENMASK(31, 24)
173*4882a593Smuzhiyun #define HW_CAP_TPC_SHIFT	24
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun #define GAUDI_CPU_PCI_MSB_ADDR(addr)	(((addr) & GENMASK_ULL(49, 39)) >> 39)
176*4882a593Smuzhiyun #define GAUDI_PCI_TO_CPU_ADDR(addr)			\
177*4882a593Smuzhiyun 	do {						\
178*4882a593Smuzhiyun 		(addr) &= ~GENMASK_ULL(49, 39);		\
179*4882a593Smuzhiyun 		(addr) |= BIT_ULL(39);			\
180*4882a593Smuzhiyun 	} while (0)
181*4882a593Smuzhiyun #define GAUDI_CPU_TO_PCI_ADDR(addr, extension)		\
182*4882a593Smuzhiyun 	do {						\
183*4882a593Smuzhiyun 		(addr) &= ~GENMASK_ULL(49, 39);		\
184*4882a593Smuzhiyun 		(addr) |= (u64) (extension) << 39;	\
185*4882a593Smuzhiyun 	} while (0)
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun enum gaudi_dma_channels {
188*4882a593Smuzhiyun 	GAUDI_PCI_DMA_1,
189*4882a593Smuzhiyun 	GAUDI_PCI_DMA_2,
190*4882a593Smuzhiyun 	GAUDI_PCI_DMA_3,
191*4882a593Smuzhiyun 	GAUDI_HBM_DMA_1,
192*4882a593Smuzhiyun 	GAUDI_HBM_DMA_2,
193*4882a593Smuzhiyun 	GAUDI_HBM_DMA_3,
194*4882a593Smuzhiyun 	GAUDI_HBM_DMA_4,
195*4882a593Smuzhiyun 	GAUDI_HBM_DMA_5,
196*4882a593Smuzhiyun 	GAUDI_DMA_MAX
197*4882a593Smuzhiyun };
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun enum gaudi_tpc_mask {
200*4882a593Smuzhiyun 	GAUDI_TPC_MASK_TPC0 = 0x01,
201*4882a593Smuzhiyun 	GAUDI_TPC_MASK_TPC1 = 0x02,
202*4882a593Smuzhiyun 	GAUDI_TPC_MASK_TPC2 = 0x04,
203*4882a593Smuzhiyun 	GAUDI_TPC_MASK_TPC3 = 0x08,
204*4882a593Smuzhiyun 	GAUDI_TPC_MASK_TPC4 = 0x10,
205*4882a593Smuzhiyun 	GAUDI_TPC_MASK_TPC5 = 0x20,
206*4882a593Smuzhiyun 	GAUDI_TPC_MASK_TPC6 = 0x40,
207*4882a593Smuzhiyun 	GAUDI_TPC_MASK_TPC7 = 0x80,
208*4882a593Smuzhiyun 	GAUDI_TPC_MASK_ALL = 0xFF
209*4882a593Smuzhiyun };
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun /**
212*4882a593Smuzhiyun  * struct gaudi_internal_qman_info - Internal QMAN information.
213*4882a593Smuzhiyun  * @pq_kernel_addr: Kernel address of the PQ memory area in the host.
214*4882a593Smuzhiyun  * @pq_dma_addr: DMA address of the PQ memory area in the host.
215*4882a593Smuzhiyun  * @pq_size: Size of allocated host memory for PQ.
216*4882a593Smuzhiyun  */
217*4882a593Smuzhiyun struct gaudi_internal_qman_info {
218*4882a593Smuzhiyun 	void		*pq_kernel_addr;
219*4882a593Smuzhiyun 	dma_addr_t	pq_dma_addr;
220*4882a593Smuzhiyun 	size_t		pq_size;
221*4882a593Smuzhiyun };
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun /**
224*4882a593Smuzhiyun  * struct gaudi_device - ASIC specific manage structure.
225*4882a593Smuzhiyun  * @cpucp_info_get: get information on device from CPU-CP
226*4882a593Smuzhiyun  * @hw_queues_lock: protects the H/W queues from concurrent access.
227*4882a593Smuzhiyun  * @clk_gate_mutex: protects code areas that require clock gating to be disabled
228*4882a593Smuzhiyun  *                  temporarily
229*4882a593Smuzhiyun  * @internal_qmans: Internal QMANs information. The array size is larger than
230*4882a593Smuzhiyun  *                  the actual number of internal queues because they are not in
231*4882a593Smuzhiyun  *                  consecutive order.
232*4882a593Smuzhiyun  * @hbm_bar_cur_addr: current address of HBM PCI bar.
233*4882a593Smuzhiyun  * @max_freq_value: current max clk frequency.
234*4882a593Smuzhiyun  * @events: array that holds all event id's
235*4882a593Smuzhiyun  * @events_stat: array that holds histogram of all received events.
236*4882a593Smuzhiyun  * @events_stat_aggregate: same as events_stat but doesn't get cleared on reset
237*4882a593Smuzhiyun  * @hw_cap_initialized: This field contains a bit per H/W engine. When that
238*4882a593Smuzhiyun  *                      engine is initialized, that bit is set by the driver to
239*4882a593Smuzhiyun  *                      signal we can use this engine in later code paths.
240*4882a593Smuzhiyun  *                      Each bit is cleared upon reset of its corresponding H/W
241*4882a593Smuzhiyun  *                      engine.
242*4882a593Smuzhiyun  * @multi_msi_mode: whether we are working in multi MSI single MSI mode.
243*4882a593Smuzhiyun  *                  Multi MSI is possible only with IOMMU enabled.
244*4882a593Smuzhiyun  * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
245*4882a593Smuzhiyun  *                    8-bit value so use u8.
246*4882a593Smuzhiyun  */
247*4882a593Smuzhiyun struct gaudi_device {
248*4882a593Smuzhiyun 	int (*cpucp_info_get)(struct hl_device *hdev);
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	/* TODO: remove hw_queues_lock after moving to scheduler code */
251*4882a593Smuzhiyun 	spinlock_t			hw_queues_lock;
252*4882a593Smuzhiyun 	struct mutex			clk_gate_mutex;
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 	struct gaudi_internal_qman_info	internal_qmans[GAUDI_QUEUE_ID_SIZE];
255*4882a593Smuzhiyun 
256*4882a593Smuzhiyun 	u64				hbm_bar_cur_addr;
257*4882a593Smuzhiyun 	u64				max_freq_value;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	u32				events[GAUDI_EVENT_SIZE];
260*4882a593Smuzhiyun 	u32				events_stat[GAUDI_EVENT_SIZE];
261*4882a593Smuzhiyun 	u32				events_stat_aggregate[GAUDI_EVENT_SIZE];
262*4882a593Smuzhiyun 	u32				hw_cap_initialized;
263*4882a593Smuzhiyun 	u8				multi_msi_mode;
264*4882a593Smuzhiyun 	u8				mmu_cache_inv_pi;
265*4882a593Smuzhiyun };
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun void gaudi_init_security(struct hl_device *hdev);
268*4882a593Smuzhiyun void gaudi_add_device_attr(struct hl_device *hdev,
269*4882a593Smuzhiyun 			struct attribute_group *dev_attr_grp);
270*4882a593Smuzhiyun void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
271*4882a593Smuzhiyun int gaudi_debug_coresight(struct hl_device *hdev, void *data);
272*4882a593Smuzhiyun void gaudi_halt_coresight(struct hl_device *hdev);
273*4882a593Smuzhiyun int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
274*4882a593Smuzhiyun void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun #endif /* GAUDIP_H_ */
277