1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun /*
4*4882a593Smuzhiyun * Copyright 2016-2019 HabanaLabs, Ltd.
5*4882a593Smuzhiyun * All Rights Reserved.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include "goyaP.h"
9*4882a593Smuzhiyun #include "../include/hw_ip/mmu/mmu_general.h"
10*4882a593Smuzhiyun #include "../include/hw_ip/mmu/mmu_v1_0.h"
11*4882a593Smuzhiyun #include "../include/goya/asic_reg/goya_masks.h"
12*4882a593Smuzhiyun #include "../include/goya/goya_reg_map.h"
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #include <linux/pci.h>
15*4882a593Smuzhiyun #include <linux/genalloc.h>
16*4882a593Smuzhiyun #include <linux/hwmon.h>
17*4882a593Smuzhiyun #include <linux/io-64-nonatomic-lo-hi.h>
18*4882a593Smuzhiyun #include <linux/iommu.h>
19*4882a593Smuzhiyun #include <linux/seq_file.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun /*
22*4882a593Smuzhiyun * GOYA security scheme:
23*4882a593Smuzhiyun *
24*4882a593Smuzhiyun * 1. Host is protected by:
25*4882a593Smuzhiyun * - Range registers (When MMU is enabled, DMA RR does NOT protect host)
26*4882a593Smuzhiyun * - MMU
27*4882a593Smuzhiyun *
28*4882a593Smuzhiyun * 2. DRAM is protected by:
29*4882a593Smuzhiyun * - Range registers (protect the first 512MB)
30*4882a593Smuzhiyun * - MMU (isolation between users)
31*4882a593Smuzhiyun *
32*4882a593Smuzhiyun * 3. Configuration is protected by:
33*4882a593Smuzhiyun * - Range registers
34*4882a593Smuzhiyun * - Protection bits
35*4882a593Smuzhiyun *
36*4882a593Smuzhiyun * When MMU is disabled:
37*4882a593Smuzhiyun *
38*4882a593Smuzhiyun * QMAN DMA: PQ, CQ, CP, DMA are secured.
39*4882a593Smuzhiyun * PQ, CB and the data are on the host.
40*4882a593Smuzhiyun *
41*4882a593Smuzhiyun * QMAN TPC/MME:
42*4882a593Smuzhiyun * PQ, CQ and CP are not secured.
43*4882a593Smuzhiyun * PQ, CB and the data are on the SRAM/DRAM.
44*4882a593Smuzhiyun *
45*4882a593Smuzhiyun * Since QMAN DMA is secured, the driver is parsing the DMA CB:
46*4882a593Smuzhiyun * - checks DMA pointer
47*4882a593Smuzhiyun * - WREG, MSG_PROT are not allowed.
48*4882a593Smuzhiyun * - MSG_LONG/SHORT are allowed.
49*4882a593Smuzhiyun *
50*4882a593Smuzhiyun * A read/write transaction by the QMAN to a protected area will succeed if
51*4882a593Smuzhiyun * and only if the QMAN's CP is secured and MSG_PROT is used
52*4882a593Smuzhiyun *
53*4882a593Smuzhiyun *
54*4882a593Smuzhiyun * When MMU is enabled:
55*4882a593Smuzhiyun *
56*4882a593Smuzhiyun * QMAN DMA: PQ, CQ and CP are secured.
57*4882a593Smuzhiyun * MMU is set to bypass on the Secure props register of the QMAN.
58*4882a593Smuzhiyun * The reasons we don't enable MMU for PQ, CQ and CP are:
59*4882a593Smuzhiyun * - PQ entry is in kernel address space and the driver doesn't map it.
60*4882a593Smuzhiyun * - CP writes to MSIX register and to kernel address space (completion
61*4882a593Smuzhiyun * queue).
62*4882a593Smuzhiyun *
63*4882a593Smuzhiyun * DMA is not secured but because CP is secured, the driver still needs to parse
64*4882a593Smuzhiyun * the CB, but doesn't need to check the DMA addresses.
65*4882a593Smuzhiyun *
66*4882a593Smuzhiyun * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
67*4882a593Smuzhiyun * the driver doesn't map memory in MMU.
68*4882a593Smuzhiyun *
69*4882a593Smuzhiyun * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
70*4882a593Smuzhiyun *
71*4882a593Smuzhiyun * DMA RR does NOT protect host because DMA is not secured
72*4882a593Smuzhiyun *
73*4882a593Smuzhiyun */
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun #define GOYA_BOOT_FIT_FILE "habanalabs/goya/goya-boot-fit.itb"
76*4882a593Smuzhiyun #define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb"
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun #define GOYA_MMU_REGS_NUM 63
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun #define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */
83*4882a593Smuzhiyun #define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */
84*4882a593Smuzhiyun #define GOYA_RESET_WAIT_MSEC 1 /* 1ms */
85*4882a593Smuzhiyun #define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */
86*4882a593Smuzhiyun #define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
87*4882a593Smuzhiyun #define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
88*4882a593Smuzhiyun #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
89*4882a593Smuzhiyun #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
90*4882a593Smuzhiyun #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
91*4882a593Smuzhiyun #define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun #define GOYA_QMAN0_FENCE_VAL 0xD169B243
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun #define GOYA_MAX_STRING_LEN 20
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun #define GOYA_CB_POOL_CB_CNT 512
98*4882a593Smuzhiyun #define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun #define IS_QM_IDLE(engine, qm_glbl_sts0) \
101*4882a593Smuzhiyun (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
102*4882a593Smuzhiyun #define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
103*4882a593Smuzhiyun #define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
104*4882a593Smuzhiyun #define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
107*4882a593Smuzhiyun (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
108*4882a593Smuzhiyun engine##_CMDQ_IDLE_MASK)
109*4882a593Smuzhiyun #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
110*4882a593Smuzhiyun IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
111*4882a593Smuzhiyun #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
112*4882a593Smuzhiyun IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun #define IS_DMA_IDLE(dma_core_sts0) \
115*4882a593Smuzhiyun !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun #define IS_TPC_IDLE(tpc_cfg_sts) \
118*4882a593Smuzhiyun (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun #define IS_MME_IDLE(mme_arch_sts) \
121*4882a593Smuzhiyun (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
125*4882a593Smuzhiyun "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
126*4882a593Smuzhiyun "goya cq 4", "goya cpu eq"
127*4882a593Smuzhiyun };
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun static u16 goya_packet_sizes[MAX_PACKET_ID] = {
130*4882a593Smuzhiyun [PACKET_WREG_32] = sizeof(struct packet_wreg32),
131*4882a593Smuzhiyun [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
132*4882a593Smuzhiyun [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
133*4882a593Smuzhiyun [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
134*4882a593Smuzhiyun [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
135*4882a593Smuzhiyun [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
136*4882a593Smuzhiyun [PACKET_FENCE] = sizeof(struct packet_fence),
137*4882a593Smuzhiyun [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
138*4882a593Smuzhiyun [PACKET_NOP] = sizeof(struct packet_nop),
139*4882a593Smuzhiyun [PACKET_STOP] = sizeof(struct packet_stop)
140*4882a593Smuzhiyun };
141*4882a593Smuzhiyun
validate_packet_id(enum packet_id id)142*4882a593Smuzhiyun static inline bool validate_packet_id(enum packet_id id)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun switch (id) {
145*4882a593Smuzhiyun case PACKET_WREG_32:
146*4882a593Smuzhiyun case PACKET_WREG_BULK:
147*4882a593Smuzhiyun case PACKET_MSG_LONG:
148*4882a593Smuzhiyun case PACKET_MSG_SHORT:
149*4882a593Smuzhiyun case PACKET_CP_DMA:
150*4882a593Smuzhiyun case PACKET_MSG_PROT:
151*4882a593Smuzhiyun case PACKET_FENCE:
152*4882a593Smuzhiyun case PACKET_LIN_DMA:
153*4882a593Smuzhiyun case PACKET_NOP:
154*4882a593Smuzhiyun case PACKET_STOP:
155*4882a593Smuzhiyun return true;
156*4882a593Smuzhiyun default:
157*4882a593Smuzhiyun return false;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
162*4882a593Smuzhiyun mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
163*4882a593Smuzhiyun mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
164*4882a593Smuzhiyun mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
165*4882a593Smuzhiyun mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
166*4882a593Smuzhiyun mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
167*4882a593Smuzhiyun mmTPC0_QM_GLBL_SECURE_PROPS,
168*4882a593Smuzhiyun mmTPC0_QM_GLBL_NON_SECURE_PROPS,
169*4882a593Smuzhiyun mmTPC0_CMDQ_GLBL_SECURE_PROPS,
170*4882a593Smuzhiyun mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
171*4882a593Smuzhiyun mmTPC0_CFG_ARUSER,
172*4882a593Smuzhiyun mmTPC0_CFG_AWUSER,
173*4882a593Smuzhiyun mmTPC1_QM_GLBL_SECURE_PROPS,
174*4882a593Smuzhiyun mmTPC1_QM_GLBL_NON_SECURE_PROPS,
175*4882a593Smuzhiyun mmTPC1_CMDQ_GLBL_SECURE_PROPS,
176*4882a593Smuzhiyun mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
177*4882a593Smuzhiyun mmTPC1_CFG_ARUSER,
178*4882a593Smuzhiyun mmTPC1_CFG_AWUSER,
179*4882a593Smuzhiyun mmTPC2_QM_GLBL_SECURE_PROPS,
180*4882a593Smuzhiyun mmTPC2_QM_GLBL_NON_SECURE_PROPS,
181*4882a593Smuzhiyun mmTPC2_CMDQ_GLBL_SECURE_PROPS,
182*4882a593Smuzhiyun mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
183*4882a593Smuzhiyun mmTPC2_CFG_ARUSER,
184*4882a593Smuzhiyun mmTPC2_CFG_AWUSER,
185*4882a593Smuzhiyun mmTPC3_QM_GLBL_SECURE_PROPS,
186*4882a593Smuzhiyun mmTPC3_QM_GLBL_NON_SECURE_PROPS,
187*4882a593Smuzhiyun mmTPC3_CMDQ_GLBL_SECURE_PROPS,
188*4882a593Smuzhiyun mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
189*4882a593Smuzhiyun mmTPC3_CFG_ARUSER,
190*4882a593Smuzhiyun mmTPC3_CFG_AWUSER,
191*4882a593Smuzhiyun mmTPC4_QM_GLBL_SECURE_PROPS,
192*4882a593Smuzhiyun mmTPC4_QM_GLBL_NON_SECURE_PROPS,
193*4882a593Smuzhiyun mmTPC4_CMDQ_GLBL_SECURE_PROPS,
194*4882a593Smuzhiyun mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
195*4882a593Smuzhiyun mmTPC4_CFG_ARUSER,
196*4882a593Smuzhiyun mmTPC4_CFG_AWUSER,
197*4882a593Smuzhiyun mmTPC5_QM_GLBL_SECURE_PROPS,
198*4882a593Smuzhiyun mmTPC5_QM_GLBL_NON_SECURE_PROPS,
199*4882a593Smuzhiyun mmTPC5_CMDQ_GLBL_SECURE_PROPS,
200*4882a593Smuzhiyun mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
201*4882a593Smuzhiyun mmTPC5_CFG_ARUSER,
202*4882a593Smuzhiyun mmTPC5_CFG_AWUSER,
203*4882a593Smuzhiyun mmTPC6_QM_GLBL_SECURE_PROPS,
204*4882a593Smuzhiyun mmTPC6_QM_GLBL_NON_SECURE_PROPS,
205*4882a593Smuzhiyun mmTPC6_CMDQ_GLBL_SECURE_PROPS,
206*4882a593Smuzhiyun mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
207*4882a593Smuzhiyun mmTPC6_CFG_ARUSER,
208*4882a593Smuzhiyun mmTPC6_CFG_AWUSER,
209*4882a593Smuzhiyun mmTPC7_QM_GLBL_SECURE_PROPS,
210*4882a593Smuzhiyun mmTPC7_QM_GLBL_NON_SECURE_PROPS,
211*4882a593Smuzhiyun mmTPC7_CMDQ_GLBL_SECURE_PROPS,
212*4882a593Smuzhiyun mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
213*4882a593Smuzhiyun mmTPC7_CFG_ARUSER,
214*4882a593Smuzhiyun mmTPC7_CFG_AWUSER,
215*4882a593Smuzhiyun mmMME_QM_GLBL_SECURE_PROPS,
216*4882a593Smuzhiyun mmMME_QM_GLBL_NON_SECURE_PROPS,
217*4882a593Smuzhiyun mmMME_CMDQ_GLBL_SECURE_PROPS,
218*4882a593Smuzhiyun mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
219*4882a593Smuzhiyun mmMME_SBA_CONTROL_DATA,
220*4882a593Smuzhiyun mmMME_SBB_CONTROL_DATA,
221*4882a593Smuzhiyun mmMME_SBC_CONTROL_DATA,
222*4882a593Smuzhiyun mmMME_WBC_CONTROL_DATA,
223*4882a593Smuzhiyun mmPCIE_WRAP_PSOC_ARUSER,
224*4882a593Smuzhiyun mmPCIE_WRAP_PSOC_AWUSER
225*4882a593Smuzhiyun };
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun static u32 goya_all_events[] = {
228*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PCIE_IF,
229*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC0_ECC,
230*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC1_ECC,
231*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC2_ECC,
232*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC3_ECC,
233*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC4_ECC,
234*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC5_ECC,
235*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC6_ECC,
236*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC7_ECC,
237*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_MME_ECC,
238*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
239*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_MMU_ECC,
240*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA_MACRO,
241*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA_ECC,
242*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
243*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PSOC_MEM,
244*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
245*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM0,
246*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM1,
247*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM2,
248*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM3,
249*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM4,
250*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM5,
251*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM6,
252*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM7,
253*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM8,
254*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM9,
255*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM10,
256*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM11,
257*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM12,
258*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM13,
259*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM14,
260*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM15,
261*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM16,
262*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM17,
263*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM18,
264*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM19,
265*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM20,
266*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM21,
267*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM22,
268*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM23,
269*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM24,
270*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM25,
271*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM26,
272*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM27,
273*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM28,
274*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SRAM29,
275*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_GIC500,
276*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PLL0,
277*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PLL1,
278*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PLL3,
279*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PLL4,
280*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PLL5,
281*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PLL6,
282*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_AXI_ECC,
283*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
284*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
285*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
286*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PCIE_DEC,
287*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC0_DEC,
288*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC1_DEC,
289*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC2_DEC,
290*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC3_DEC,
291*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC4_DEC,
292*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC5_DEC,
293*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC6_DEC,
294*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC7_DEC,
295*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_MME_WACS,
296*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_MME_WACSD,
297*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
298*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
299*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PSOC,
300*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
301*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
302*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
303*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
304*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
305*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
306*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
307*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
308*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
309*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
310*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
311*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
312*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
313*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
314*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
315*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
316*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC0_QM,
317*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC1_QM,
318*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC2_QM,
319*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC3_QM,
320*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC4_QM,
321*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC5_QM,
322*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC6_QM,
323*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC7_QM,
324*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_MME_QM,
325*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_MME_CMDQ,
326*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA0_QM,
327*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA1_QM,
328*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA2_QM,
329*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA3_QM,
330*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA4_QM,
331*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA0_CH,
332*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA1_CH,
333*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA2_CH,
334*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA3_CH,
335*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA4_CH,
336*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
337*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
338*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
339*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
340*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
341*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
342*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
343*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
344*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
345*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
346*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
347*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
348*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
349*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
350*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
351*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
352*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
353*4882a593Smuzhiyun };
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
356*4882a593Smuzhiyun static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
357*4882a593Smuzhiyun static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
358*4882a593Smuzhiyun static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
359*4882a593Smuzhiyun
goya_get_fixed_properties(struct hl_device * hdev)360*4882a593Smuzhiyun int goya_get_fixed_properties(struct hl_device *hdev)
361*4882a593Smuzhiyun {
362*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
363*4882a593Smuzhiyun int i;
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun prop->max_queues = GOYA_QUEUE_ID_SIZE;
366*4882a593Smuzhiyun prop->hw_queues_props = kcalloc(prop->max_queues,
367*4882a593Smuzhiyun sizeof(struct hw_queue_properties),
368*4882a593Smuzhiyun GFP_KERNEL);
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun if (!prop->hw_queues_props)
371*4882a593Smuzhiyun return -ENOMEM;
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
374*4882a593Smuzhiyun prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
375*4882a593Smuzhiyun prop->hw_queues_props[i].driver_only = 0;
376*4882a593Smuzhiyun prop->hw_queues_props[i].requires_kernel_cb = 1;
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun
379*4882a593Smuzhiyun for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
380*4882a593Smuzhiyun prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
381*4882a593Smuzhiyun prop->hw_queues_props[i].driver_only = 1;
382*4882a593Smuzhiyun prop->hw_queues_props[i].requires_kernel_cb = 0;
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
386*4882a593Smuzhiyun NUMBER_OF_INT_HW_QUEUES; i++) {
387*4882a593Smuzhiyun prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
388*4882a593Smuzhiyun prop->hw_queues_props[i].driver_only = 0;
389*4882a593Smuzhiyun prop->hw_queues_props[i].requires_kernel_cb = 0;
390*4882a593Smuzhiyun }
391*4882a593Smuzhiyun
392*4882a593Smuzhiyun prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun prop->dram_base_address = DRAM_PHYS_BASE;
395*4882a593Smuzhiyun prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
396*4882a593Smuzhiyun prop->dram_end_address = prop->dram_base_address + prop->dram_size;
397*4882a593Smuzhiyun prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun prop->sram_base_address = SRAM_BASE_ADDR;
400*4882a593Smuzhiyun prop->sram_size = SRAM_SIZE;
401*4882a593Smuzhiyun prop->sram_end_address = prop->sram_base_address + prop->sram_size;
402*4882a593Smuzhiyun prop->sram_user_base_address = prop->sram_base_address +
403*4882a593Smuzhiyun SRAM_USER_BASE_OFFSET;
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
406*4882a593Smuzhiyun prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
407*4882a593Smuzhiyun if (hdev->pldm)
408*4882a593Smuzhiyun prop->mmu_pgt_size = 0x800000; /* 8MB */
409*4882a593Smuzhiyun else
410*4882a593Smuzhiyun prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
411*4882a593Smuzhiyun prop->mmu_pte_size = HL_PTE_SIZE;
412*4882a593Smuzhiyun prop->mmu_hop_table_size = HOP_TABLE_SIZE;
413*4882a593Smuzhiyun prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
414*4882a593Smuzhiyun prop->dram_page_size = PAGE_SIZE_2MB;
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun prop->dmmu.hop0_shift = HOP0_SHIFT;
417*4882a593Smuzhiyun prop->dmmu.hop1_shift = HOP1_SHIFT;
418*4882a593Smuzhiyun prop->dmmu.hop2_shift = HOP2_SHIFT;
419*4882a593Smuzhiyun prop->dmmu.hop3_shift = HOP3_SHIFT;
420*4882a593Smuzhiyun prop->dmmu.hop4_shift = HOP4_SHIFT;
421*4882a593Smuzhiyun prop->dmmu.hop0_mask = HOP0_MASK;
422*4882a593Smuzhiyun prop->dmmu.hop1_mask = HOP1_MASK;
423*4882a593Smuzhiyun prop->dmmu.hop2_mask = HOP2_MASK;
424*4882a593Smuzhiyun prop->dmmu.hop3_mask = HOP3_MASK;
425*4882a593Smuzhiyun prop->dmmu.hop4_mask = HOP4_MASK;
426*4882a593Smuzhiyun prop->dmmu.start_addr = VA_DDR_SPACE_START;
427*4882a593Smuzhiyun prop->dmmu.end_addr = VA_DDR_SPACE_END;
428*4882a593Smuzhiyun prop->dmmu.page_size = PAGE_SIZE_2MB;
429*4882a593Smuzhiyun prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun /* shifts and masks are the same in PMMU and DMMU */
432*4882a593Smuzhiyun memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
433*4882a593Smuzhiyun prop->pmmu.start_addr = VA_HOST_SPACE_START;
434*4882a593Smuzhiyun prop->pmmu.end_addr = VA_HOST_SPACE_END;
435*4882a593Smuzhiyun prop->pmmu.page_size = PAGE_SIZE_4KB;
436*4882a593Smuzhiyun prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun /* PMMU and HPMMU are the same except of page size */
439*4882a593Smuzhiyun memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
440*4882a593Smuzhiyun prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
443*4882a593Smuzhiyun prop->cfg_size = CFG_SIZE;
444*4882a593Smuzhiyun prop->max_asid = MAX_ASID;
445*4882a593Smuzhiyun prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
446*4882a593Smuzhiyun prop->high_pll = PLL_HIGH_DEFAULT;
447*4882a593Smuzhiyun prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
448*4882a593Smuzhiyun prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
449*4882a593Smuzhiyun prop->max_power_default = MAX_POWER_DEFAULT;
450*4882a593Smuzhiyun prop->tpc_enabled_mask = TPC_ENABLED_MASK;
451*4882a593Smuzhiyun prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
452*4882a593Smuzhiyun prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
455*4882a593Smuzhiyun CARD_NAME_MAX_LEN);
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun prop->max_pending_cs = GOYA_MAX_PENDING_CS;
458*4882a593Smuzhiyun
459*4882a593Smuzhiyun return 0;
460*4882a593Smuzhiyun }
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun /*
463*4882a593Smuzhiyun * goya_pci_bars_map - Map PCI BARS of Goya device
464*4882a593Smuzhiyun *
465*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
466*4882a593Smuzhiyun *
467*4882a593Smuzhiyun * Request PCI regions and map them to kernel virtual addresses.
468*4882a593Smuzhiyun * Returns 0 on success
469*4882a593Smuzhiyun *
470*4882a593Smuzhiyun */
goya_pci_bars_map(struct hl_device * hdev)471*4882a593Smuzhiyun static int goya_pci_bars_map(struct hl_device *hdev)
472*4882a593Smuzhiyun {
473*4882a593Smuzhiyun static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
474*4882a593Smuzhiyun bool is_wc[3] = {false, false, true};
475*4882a593Smuzhiyun int rc;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun rc = hl_pci_bars_map(hdev, name, is_wc);
478*4882a593Smuzhiyun if (rc)
479*4882a593Smuzhiyun return rc;
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
482*4882a593Smuzhiyun (CFG_BASE - SRAM_BASE_ADDR);
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun return 0;
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun
goya_set_ddr_bar_base(struct hl_device * hdev,u64 addr)487*4882a593Smuzhiyun static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
488*4882a593Smuzhiyun {
489*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
490*4882a593Smuzhiyun struct hl_inbound_pci_region pci_region;
491*4882a593Smuzhiyun u64 old_addr = addr;
492*4882a593Smuzhiyun int rc;
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun if ((goya) && (goya->ddr_bar_cur_addr == addr))
495*4882a593Smuzhiyun return old_addr;
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun /* Inbound Region 1 - Bar 4 - Point to DDR */
498*4882a593Smuzhiyun pci_region.mode = PCI_BAR_MATCH_MODE;
499*4882a593Smuzhiyun pci_region.bar = DDR_BAR_ID;
500*4882a593Smuzhiyun pci_region.addr = addr;
501*4882a593Smuzhiyun rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
502*4882a593Smuzhiyun if (rc)
503*4882a593Smuzhiyun return U64_MAX;
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun if (goya) {
506*4882a593Smuzhiyun old_addr = goya->ddr_bar_cur_addr;
507*4882a593Smuzhiyun goya->ddr_bar_cur_addr = addr;
508*4882a593Smuzhiyun }
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun return old_addr;
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun /*
514*4882a593Smuzhiyun * goya_init_iatu - Initialize the iATU unit inside the PCI controller
515*4882a593Smuzhiyun *
516*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
517*4882a593Smuzhiyun *
518*4882a593Smuzhiyun * This is needed in case the firmware doesn't initialize the iATU
519*4882a593Smuzhiyun *
520*4882a593Smuzhiyun */
goya_init_iatu(struct hl_device * hdev)521*4882a593Smuzhiyun static int goya_init_iatu(struct hl_device *hdev)
522*4882a593Smuzhiyun {
523*4882a593Smuzhiyun struct hl_inbound_pci_region inbound_region;
524*4882a593Smuzhiyun struct hl_outbound_pci_region outbound_region;
525*4882a593Smuzhiyun int rc;
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
528*4882a593Smuzhiyun inbound_region.mode = PCI_BAR_MATCH_MODE;
529*4882a593Smuzhiyun inbound_region.bar = SRAM_CFG_BAR_ID;
530*4882a593Smuzhiyun inbound_region.addr = SRAM_BASE_ADDR;
531*4882a593Smuzhiyun rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
532*4882a593Smuzhiyun if (rc)
533*4882a593Smuzhiyun goto done;
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun /* Inbound Region 1 - Bar 4 - Point to DDR */
536*4882a593Smuzhiyun inbound_region.mode = PCI_BAR_MATCH_MODE;
537*4882a593Smuzhiyun inbound_region.bar = DDR_BAR_ID;
538*4882a593Smuzhiyun inbound_region.addr = DRAM_PHYS_BASE;
539*4882a593Smuzhiyun rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
540*4882a593Smuzhiyun if (rc)
541*4882a593Smuzhiyun goto done;
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun hdev->asic_funcs->set_dma_mask_from_fw(hdev);
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun /* Outbound Region 0 - Point to Host */
546*4882a593Smuzhiyun outbound_region.addr = HOST_PHYS_BASE;
547*4882a593Smuzhiyun outbound_region.size = HOST_PHYS_SIZE;
548*4882a593Smuzhiyun rc = hl_pci_set_outbound_region(hdev, &outbound_region);
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun done:
551*4882a593Smuzhiyun return rc;
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun
554*4882a593Smuzhiyun /*
555*4882a593Smuzhiyun * goya_early_init - GOYA early initialization code
556*4882a593Smuzhiyun *
557*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
558*4882a593Smuzhiyun *
559*4882a593Smuzhiyun * Verify PCI bars
560*4882a593Smuzhiyun * Set DMA masks
561*4882a593Smuzhiyun * PCI controller initialization
562*4882a593Smuzhiyun * Map PCI bars
563*4882a593Smuzhiyun *
564*4882a593Smuzhiyun */
goya_early_init(struct hl_device * hdev)565*4882a593Smuzhiyun static int goya_early_init(struct hl_device *hdev)
566*4882a593Smuzhiyun {
567*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
568*4882a593Smuzhiyun struct pci_dev *pdev = hdev->pdev;
569*4882a593Smuzhiyun u32 val;
570*4882a593Smuzhiyun int rc;
571*4882a593Smuzhiyun
572*4882a593Smuzhiyun rc = goya_get_fixed_properties(hdev);
573*4882a593Smuzhiyun if (rc) {
574*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to get fixed properties\n");
575*4882a593Smuzhiyun return rc;
576*4882a593Smuzhiyun }
577*4882a593Smuzhiyun
578*4882a593Smuzhiyun /* Check BAR sizes */
579*4882a593Smuzhiyun if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
580*4882a593Smuzhiyun dev_err(hdev->dev,
581*4882a593Smuzhiyun "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
582*4882a593Smuzhiyun SRAM_CFG_BAR_ID,
583*4882a593Smuzhiyun (unsigned long long) pci_resource_len(pdev,
584*4882a593Smuzhiyun SRAM_CFG_BAR_ID),
585*4882a593Smuzhiyun CFG_BAR_SIZE);
586*4882a593Smuzhiyun rc = -ENODEV;
587*4882a593Smuzhiyun goto free_queue_props;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
591*4882a593Smuzhiyun dev_err(hdev->dev,
592*4882a593Smuzhiyun "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
593*4882a593Smuzhiyun MSIX_BAR_ID,
594*4882a593Smuzhiyun (unsigned long long) pci_resource_len(pdev,
595*4882a593Smuzhiyun MSIX_BAR_ID),
596*4882a593Smuzhiyun MSIX_BAR_SIZE);
597*4882a593Smuzhiyun rc = -ENODEV;
598*4882a593Smuzhiyun goto free_queue_props;
599*4882a593Smuzhiyun }
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
604*4882a593Smuzhiyun mmCPU_BOOT_ERR0, GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
605*4882a593Smuzhiyun if (rc)
606*4882a593Smuzhiyun goto free_queue_props;
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun /* Goya Firmware does not support security */
609*4882a593Smuzhiyun prop->fw_security_disabled = true;
610*4882a593Smuzhiyun dev_info(hdev->dev, "firmware-level security is disabled\n");
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun if (!hdev->pldm) {
613*4882a593Smuzhiyun val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
614*4882a593Smuzhiyun if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
615*4882a593Smuzhiyun dev_warn(hdev->dev,
616*4882a593Smuzhiyun "PCI strap is not configured correctly, PCI bus errors may occur\n");
617*4882a593Smuzhiyun }
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun return 0;
620*4882a593Smuzhiyun
621*4882a593Smuzhiyun free_queue_props:
622*4882a593Smuzhiyun kfree(hdev->asic_prop.hw_queues_props);
623*4882a593Smuzhiyun return rc;
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun /*
627*4882a593Smuzhiyun * goya_early_fini - GOYA early finalization code
628*4882a593Smuzhiyun *
629*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
630*4882a593Smuzhiyun *
631*4882a593Smuzhiyun * Unmap PCI bars
632*4882a593Smuzhiyun *
633*4882a593Smuzhiyun */
goya_early_fini(struct hl_device * hdev)634*4882a593Smuzhiyun static int goya_early_fini(struct hl_device *hdev)
635*4882a593Smuzhiyun {
636*4882a593Smuzhiyun kfree(hdev->asic_prop.hw_queues_props);
637*4882a593Smuzhiyun hl_pci_fini(hdev);
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun return 0;
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun
goya_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)642*4882a593Smuzhiyun static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
643*4882a593Smuzhiyun {
644*4882a593Smuzhiyun /* mask to zero the MMBP and ASID bits */
645*4882a593Smuzhiyun WREG32_AND(reg, ~0x7FF);
646*4882a593Smuzhiyun WREG32_OR(reg, asid);
647*4882a593Smuzhiyun }
648*4882a593Smuzhiyun
goya_qman0_set_security(struct hl_device * hdev,bool secure)649*4882a593Smuzhiyun static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
650*4882a593Smuzhiyun {
651*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
652*4882a593Smuzhiyun
653*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU))
654*4882a593Smuzhiyun return;
655*4882a593Smuzhiyun
656*4882a593Smuzhiyun if (secure)
657*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
658*4882a593Smuzhiyun else
659*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
660*4882a593Smuzhiyun
661*4882a593Smuzhiyun RREG32(mmDMA_QM_0_GLBL_PROT);
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun /*
665*4882a593Smuzhiyun * goya_fetch_psoc_frequency - Fetch PSOC frequency values
666*4882a593Smuzhiyun *
667*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
668*4882a593Smuzhiyun *
669*4882a593Smuzhiyun */
goya_fetch_psoc_frequency(struct hl_device * hdev)670*4882a593Smuzhiyun static void goya_fetch_psoc_frequency(struct hl_device *hdev)
671*4882a593Smuzhiyun {
672*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
673*4882a593Smuzhiyun u32 trace_freq = 0;
674*4882a593Smuzhiyun u32 pll_clk = 0;
675*4882a593Smuzhiyun u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
676*4882a593Smuzhiyun u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
677*4882a593Smuzhiyun u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
678*4882a593Smuzhiyun u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
679*4882a593Smuzhiyun u32 od = RREG32(mmPSOC_PCI_PLL_OD);
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
682*4882a593Smuzhiyun if (div_sel == DIV_SEL_REF_CLK)
683*4882a593Smuzhiyun trace_freq = PLL_REF_CLK;
684*4882a593Smuzhiyun else
685*4882a593Smuzhiyun trace_freq = PLL_REF_CLK / (div_fctr + 1);
686*4882a593Smuzhiyun } else if (div_sel == DIV_SEL_PLL_CLK ||
687*4882a593Smuzhiyun div_sel == DIV_SEL_DIVIDED_PLL) {
688*4882a593Smuzhiyun pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
689*4882a593Smuzhiyun if (div_sel == DIV_SEL_PLL_CLK)
690*4882a593Smuzhiyun trace_freq = pll_clk;
691*4882a593Smuzhiyun else
692*4882a593Smuzhiyun trace_freq = pll_clk / (div_fctr + 1);
693*4882a593Smuzhiyun } else {
694*4882a593Smuzhiyun dev_warn(hdev->dev,
695*4882a593Smuzhiyun "Received invalid div select value: %d", div_sel);
696*4882a593Smuzhiyun }
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun prop->psoc_timestamp_frequency = trace_freq;
699*4882a593Smuzhiyun prop->psoc_pci_pll_nr = nr;
700*4882a593Smuzhiyun prop->psoc_pci_pll_nf = nf;
701*4882a593Smuzhiyun prop->psoc_pci_pll_od = od;
702*4882a593Smuzhiyun prop->psoc_pci_pll_div_factor = div_fctr;
703*4882a593Smuzhiyun }
704*4882a593Smuzhiyun
goya_late_init(struct hl_device * hdev)705*4882a593Smuzhiyun int goya_late_init(struct hl_device *hdev)
706*4882a593Smuzhiyun {
707*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
708*4882a593Smuzhiyun int rc;
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun goya_fetch_psoc_frequency(hdev);
711*4882a593Smuzhiyun
712*4882a593Smuzhiyun rc = goya_mmu_clear_pgt_range(hdev);
713*4882a593Smuzhiyun if (rc) {
714*4882a593Smuzhiyun dev_err(hdev->dev,
715*4882a593Smuzhiyun "Failed to clear MMU page tables range %d\n", rc);
716*4882a593Smuzhiyun return rc;
717*4882a593Smuzhiyun }
718*4882a593Smuzhiyun
719*4882a593Smuzhiyun rc = goya_mmu_set_dram_default_page(hdev);
720*4882a593Smuzhiyun if (rc) {
721*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
722*4882a593Smuzhiyun return rc;
723*4882a593Smuzhiyun }
724*4882a593Smuzhiyun
725*4882a593Smuzhiyun rc = goya_mmu_add_mappings_for_device_cpu(hdev);
726*4882a593Smuzhiyun if (rc)
727*4882a593Smuzhiyun return rc;
728*4882a593Smuzhiyun
729*4882a593Smuzhiyun rc = goya_init_cpu_queues(hdev);
730*4882a593Smuzhiyun if (rc)
731*4882a593Smuzhiyun return rc;
732*4882a593Smuzhiyun
733*4882a593Smuzhiyun rc = goya_test_cpu_queue(hdev);
734*4882a593Smuzhiyun if (rc)
735*4882a593Smuzhiyun return rc;
736*4882a593Smuzhiyun
737*4882a593Smuzhiyun rc = goya_cpucp_info_get(hdev);
738*4882a593Smuzhiyun if (rc) {
739*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
740*4882a593Smuzhiyun return rc;
741*4882a593Smuzhiyun }
742*4882a593Smuzhiyun
743*4882a593Smuzhiyun /* Now that we have the DRAM size in ASIC prop, we need to check
744*4882a593Smuzhiyun * its size and configure the DMA_IF DDR wrap protection (which is in
745*4882a593Smuzhiyun * the MMU block) accordingly. The value is the log2 of the DRAM size
746*4882a593Smuzhiyun */
747*4882a593Smuzhiyun WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
750*4882a593Smuzhiyun if (rc) {
751*4882a593Smuzhiyun dev_err(hdev->dev,
752*4882a593Smuzhiyun "Failed to enable PCI access from CPU %d\n", rc);
753*4882a593Smuzhiyun return rc;
754*4882a593Smuzhiyun }
755*4882a593Smuzhiyun
756*4882a593Smuzhiyun WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
757*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
758*4882a593Smuzhiyun
759*4882a593Smuzhiyun return 0;
760*4882a593Smuzhiyun }
761*4882a593Smuzhiyun
762*4882a593Smuzhiyun /*
763*4882a593Smuzhiyun * goya_late_fini - GOYA late tear-down code
764*4882a593Smuzhiyun *
765*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
766*4882a593Smuzhiyun *
767*4882a593Smuzhiyun * Free sensors allocated structures
768*4882a593Smuzhiyun */
goya_late_fini(struct hl_device * hdev)769*4882a593Smuzhiyun void goya_late_fini(struct hl_device *hdev)
770*4882a593Smuzhiyun {
771*4882a593Smuzhiyun const struct hwmon_channel_info **channel_info_arr;
772*4882a593Smuzhiyun int i = 0;
773*4882a593Smuzhiyun
774*4882a593Smuzhiyun if (!hdev->hl_chip_info->info)
775*4882a593Smuzhiyun return;
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun channel_info_arr = hdev->hl_chip_info->info;
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun while (channel_info_arr[i]) {
780*4882a593Smuzhiyun kfree(channel_info_arr[i]->config);
781*4882a593Smuzhiyun kfree(channel_info_arr[i]);
782*4882a593Smuzhiyun i++;
783*4882a593Smuzhiyun }
784*4882a593Smuzhiyun
785*4882a593Smuzhiyun kfree(channel_info_arr);
786*4882a593Smuzhiyun
787*4882a593Smuzhiyun hdev->hl_chip_info->info = NULL;
788*4882a593Smuzhiyun }
789*4882a593Smuzhiyun
790*4882a593Smuzhiyun /*
791*4882a593Smuzhiyun * goya_sw_init - Goya software initialization code
792*4882a593Smuzhiyun *
793*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
794*4882a593Smuzhiyun *
795*4882a593Smuzhiyun */
goya_sw_init(struct hl_device * hdev)796*4882a593Smuzhiyun static int goya_sw_init(struct hl_device *hdev)
797*4882a593Smuzhiyun {
798*4882a593Smuzhiyun struct goya_device *goya;
799*4882a593Smuzhiyun int rc;
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun /* Allocate device structure */
802*4882a593Smuzhiyun goya = kzalloc(sizeof(*goya), GFP_KERNEL);
803*4882a593Smuzhiyun if (!goya)
804*4882a593Smuzhiyun return -ENOMEM;
805*4882a593Smuzhiyun
806*4882a593Smuzhiyun /* according to goya_init_iatu */
807*4882a593Smuzhiyun goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
808*4882a593Smuzhiyun
809*4882a593Smuzhiyun goya->mme_clk = GOYA_PLL_FREQ_LOW;
810*4882a593Smuzhiyun goya->tpc_clk = GOYA_PLL_FREQ_LOW;
811*4882a593Smuzhiyun goya->ic_clk = GOYA_PLL_FREQ_LOW;
812*4882a593Smuzhiyun
813*4882a593Smuzhiyun hdev->asic_specific = goya;
814*4882a593Smuzhiyun
815*4882a593Smuzhiyun /* Create DMA pool for small allocations */
816*4882a593Smuzhiyun hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
817*4882a593Smuzhiyun &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
818*4882a593Smuzhiyun if (!hdev->dma_pool) {
819*4882a593Smuzhiyun dev_err(hdev->dev, "failed to create DMA pool\n");
820*4882a593Smuzhiyun rc = -ENOMEM;
821*4882a593Smuzhiyun goto free_goya_device;
822*4882a593Smuzhiyun }
823*4882a593Smuzhiyun
824*4882a593Smuzhiyun hdev->cpu_accessible_dma_mem =
825*4882a593Smuzhiyun hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
826*4882a593Smuzhiyun HL_CPU_ACCESSIBLE_MEM_SIZE,
827*4882a593Smuzhiyun &hdev->cpu_accessible_dma_address,
828*4882a593Smuzhiyun GFP_KERNEL | __GFP_ZERO);
829*4882a593Smuzhiyun
830*4882a593Smuzhiyun if (!hdev->cpu_accessible_dma_mem) {
831*4882a593Smuzhiyun rc = -ENOMEM;
832*4882a593Smuzhiyun goto free_dma_pool;
833*4882a593Smuzhiyun }
834*4882a593Smuzhiyun
835*4882a593Smuzhiyun dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
836*4882a593Smuzhiyun &hdev->cpu_accessible_dma_address);
837*4882a593Smuzhiyun
838*4882a593Smuzhiyun hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
839*4882a593Smuzhiyun if (!hdev->cpu_accessible_dma_pool) {
840*4882a593Smuzhiyun dev_err(hdev->dev,
841*4882a593Smuzhiyun "Failed to create CPU accessible DMA pool\n");
842*4882a593Smuzhiyun rc = -ENOMEM;
843*4882a593Smuzhiyun goto free_cpu_dma_mem;
844*4882a593Smuzhiyun }
845*4882a593Smuzhiyun
846*4882a593Smuzhiyun rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
847*4882a593Smuzhiyun (uintptr_t) hdev->cpu_accessible_dma_mem,
848*4882a593Smuzhiyun HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
849*4882a593Smuzhiyun if (rc) {
850*4882a593Smuzhiyun dev_err(hdev->dev,
851*4882a593Smuzhiyun "Failed to add memory to CPU accessible DMA pool\n");
852*4882a593Smuzhiyun rc = -EFAULT;
853*4882a593Smuzhiyun goto free_cpu_accessible_dma_pool;
854*4882a593Smuzhiyun }
855*4882a593Smuzhiyun
856*4882a593Smuzhiyun spin_lock_init(&goya->hw_queues_lock);
857*4882a593Smuzhiyun hdev->supports_coresight = true;
858*4882a593Smuzhiyun hdev->supports_soft_reset = true;
859*4882a593Smuzhiyun
860*4882a593Smuzhiyun return 0;
861*4882a593Smuzhiyun
862*4882a593Smuzhiyun free_cpu_accessible_dma_pool:
863*4882a593Smuzhiyun gen_pool_destroy(hdev->cpu_accessible_dma_pool);
864*4882a593Smuzhiyun free_cpu_dma_mem:
865*4882a593Smuzhiyun hdev->asic_funcs->asic_dma_free_coherent(hdev,
866*4882a593Smuzhiyun HL_CPU_ACCESSIBLE_MEM_SIZE,
867*4882a593Smuzhiyun hdev->cpu_accessible_dma_mem,
868*4882a593Smuzhiyun hdev->cpu_accessible_dma_address);
869*4882a593Smuzhiyun free_dma_pool:
870*4882a593Smuzhiyun dma_pool_destroy(hdev->dma_pool);
871*4882a593Smuzhiyun free_goya_device:
872*4882a593Smuzhiyun kfree(goya);
873*4882a593Smuzhiyun
874*4882a593Smuzhiyun return rc;
875*4882a593Smuzhiyun }
876*4882a593Smuzhiyun
877*4882a593Smuzhiyun /*
878*4882a593Smuzhiyun * goya_sw_fini - Goya software tear-down code
879*4882a593Smuzhiyun *
880*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
881*4882a593Smuzhiyun *
882*4882a593Smuzhiyun */
goya_sw_fini(struct hl_device * hdev)883*4882a593Smuzhiyun static int goya_sw_fini(struct hl_device *hdev)
884*4882a593Smuzhiyun {
885*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun gen_pool_destroy(hdev->cpu_accessible_dma_pool);
888*4882a593Smuzhiyun
889*4882a593Smuzhiyun hdev->asic_funcs->asic_dma_free_coherent(hdev,
890*4882a593Smuzhiyun HL_CPU_ACCESSIBLE_MEM_SIZE,
891*4882a593Smuzhiyun hdev->cpu_accessible_dma_mem,
892*4882a593Smuzhiyun hdev->cpu_accessible_dma_address);
893*4882a593Smuzhiyun
894*4882a593Smuzhiyun dma_pool_destroy(hdev->dma_pool);
895*4882a593Smuzhiyun
896*4882a593Smuzhiyun kfree(goya);
897*4882a593Smuzhiyun
898*4882a593Smuzhiyun return 0;
899*4882a593Smuzhiyun }
900*4882a593Smuzhiyun
goya_init_dma_qman(struct hl_device * hdev,int dma_id,dma_addr_t bus_address)901*4882a593Smuzhiyun static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
902*4882a593Smuzhiyun dma_addr_t bus_address)
903*4882a593Smuzhiyun {
904*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
905*4882a593Smuzhiyun u32 mtr_base_lo, mtr_base_hi;
906*4882a593Smuzhiyun u32 so_base_lo, so_base_hi;
907*4882a593Smuzhiyun u32 gic_base_lo, gic_base_hi;
908*4882a593Smuzhiyun u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
909*4882a593Smuzhiyun u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
912*4882a593Smuzhiyun mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
913*4882a593Smuzhiyun so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
914*4882a593Smuzhiyun so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
915*4882a593Smuzhiyun
916*4882a593Smuzhiyun gic_base_lo =
917*4882a593Smuzhiyun lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
918*4882a593Smuzhiyun gic_base_hi =
919*4882a593Smuzhiyun upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
920*4882a593Smuzhiyun
921*4882a593Smuzhiyun WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
922*4882a593Smuzhiyun WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
923*4882a593Smuzhiyun
924*4882a593Smuzhiyun WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
925*4882a593Smuzhiyun WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
926*4882a593Smuzhiyun WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
927*4882a593Smuzhiyun
928*4882a593Smuzhiyun WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
929*4882a593Smuzhiyun WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
930*4882a593Smuzhiyun WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
931*4882a593Smuzhiyun WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
932*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
933*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
934*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
935*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
936*4882a593Smuzhiyun
937*4882a593Smuzhiyun /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
938*4882a593Smuzhiyun WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
939*4882a593Smuzhiyun WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
940*4882a593Smuzhiyun
941*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_MMU)
942*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
943*4882a593Smuzhiyun else
944*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
945*4882a593Smuzhiyun
946*4882a593Smuzhiyun if (hdev->stop_on_err)
947*4882a593Smuzhiyun dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
948*4882a593Smuzhiyun
949*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
950*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
951*4882a593Smuzhiyun }
952*4882a593Smuzhiyun
goya_init_dma_ch(struct hl_device * hdev,int dma_id)953*4882a593Smuzhiyun static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
954*4882a593Smuzhiyun {
955*4882a593Smuzhiyun u32 gic_base_lo, gic_base_hi;
956*4882a593Smuzhiyun u64 sob_addr;
957*4882a593Smuzhiyun u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
958*4882a593Smuzhiyun
959*4882a593Smuzhiyun gic_base_lo =
960*4882a593Smuzhiyun lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
961*4882a593Smuzhiyun gic_base_hi =
962*4882a593Smuzhiyun upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
963*4882a593Smuzhiyun
964*4882a593Smuzhiyun WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
965*4882a593Smuzhiyun WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
966*4882a593Smuzhiyun WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
967*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
968*4882a593Smuzhiyun
969*4882a593Smuzhiyun if (dma_id)
970*4882a593Smuzhiyun sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
971*4882a593Smuzhiyun (dma_id - 1) * 4;
972*4882a593Smuzhiyun else
973*4882a593Smuzhiyun sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
974*4882a593Smuzhiyun
975*4882a593Smuzhiyun WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
976*4882a593Smuzhiyun WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
977*4882a593Smuzhiyun }
978*4882a593Smuzhiyun
979*4882a593Smuzhiyun /*
980*4882a593Smuzhiyun * goya_init_dma_qmans - Initialize QMAN DMA registers
981*4882a593Smuzhiyun *
982*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
983*4882a593Smuzhiyun *
984*4882a593Smuzhiyun * Initialize the H/W registers of the QMAN DMA channels
985*4882a593Smuzhiyun *
986*4882a593Smuzhiyun */
goya_init_dma_qmans(struct hl_device * hdev)987*4882a593Smuzhiyun void goya_init_dma_qmans(struct hl_device *hdev)
988*4882a593Smuzhiyun {
989*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
990*4882a593Smuzhiyun struct hl_hw_queue *q;
991*4882a593Smuzhiyun int i;
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_DMA)
994*4882a593Smuzhiyun return;
995*4882a593Smuzhiyun
996*4882a593Smuzhiyun q = &hdev->kernel_queues[0];
997*4882a593Smuzhiyun
998*4882a593Smuzhiyun for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
999*4882a593Smuzhiyun q->cq_id = q->msi_vec = i;
1000*4882a593Smuzhiyun goya_init_dma_qman(hdev, i, q->bus_address);
1001*4882a593Smuzhiyun goya_init_dma_ch(hdev, i);
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun
1004*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_DMA;
1005*4882a593Smuzhiyun }
1006*4882a593Smuzhiyun
1007*4882a593Smuzhiyun /*
1008*4882a593Smuzhiyun * goya_disable_external_queues - Disable external queues
1009*4882a593Smuzhiyun *
1010*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
1011*4882a593Smuzhiyun *
1012*4882a593Smuzhiyun */
goya_disable_external_queues(struct hl_device * hdev)1013*4882a593Smuzhiyun static void goya_disable_external_queues(struct hl_device *hdev)
1014*4882a593Smuzhiyun {
1015*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1016*4882a593Smuzhiyun
1017*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1018*4882a593Smuzhiyun return;
1019*4882a593Smuzhiyun
1020*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1021*4882a593Smuzhiyun WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1022*4882a593Smuzhiyun WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1023*4882a593Smuzhiyun WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1024*4882a593Smuzhiyun WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1025*4882a593Smuzhiyun }
1026*4882a593Smuzhiyun
goya_stop_queue(struct hl_device * hdev,u32 cfg_reg,u32 cp_sts_reg,u32 glbl_sts0_reg)1027*4882a593Smuzhiyun static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1028*4882a593Smuzhiyun u32 cp_sts_reg, u32 glbl_sts0_reg)
1029*4882a593Smuzhiyun {
1030*4882a593Smuzhiyun int rc;
1031*4882a593Smuzhiyun u32 status;
1032*4882a593Smuzhiyun
1033*4882a593Smuzhiyun /* use the values of TPC0 as they are all the same*/
1034*4882a593Smuzhiyun
1035*4882a593Smuzhiyun WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1036*4882a593Smuzhiyun
1037*4882a593Smuzhiyun status = RREG32(cp_sts_reg);
1038*4882a593Smuzhiyun if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1039*4882a593Smuzhiyun rc = hl_poll_timeout(
1040*4882a593Smuzhiyun hdev,
1041*4882a593Smuzhiyun cp_sts_reg,
1042*4882a593Smuzhiyun status,
1043*4882a593Smuzhiyun !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1044*4882a593Smuzhiyun 1000,
1045*4882a593Smuzhiyun QMAN_FENCE_TIMEOUT_USEC);
1046*4882a593Smuzhiyun
1047*4882a593Smuzhiyun /* if QMAN is stuck in fence no need to check for stop */
1048*4882a593Smuzhiyun if (rc)
1049*4882a593Smuzhiyun return 0;
1050*4882a593Smuzhiyun }
1051*4882a593Smuzhiyun
1052*4882a593Smuzhiyun rc = hl_poll_timeout(
1053*4882a593Smuzhiyun hdev,
1054*4882a593Smuzhiyun glbl_sts0_reg,
1055*4882a593Smuzhiyun status,
1056*4882a593Smuzhiyun (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1057*4882a593Smuzhiyun 1000,
1058*4882a593Smuzhiyun QMAN_STOP_TIMEOUT_USEC);
1059*4882a593Smuzhiyun
1060*4882a593Smuzhiyun if (rc) {
1061*4882a593Smuzhiyun dev_err(hdev->dev,
1062*4882a593Smuzhiyun "Timeout while waiting for QMAN to stop\n");
1063*4882a593Smuzhiyun return -EINVAL;
1064*4882a593Smuzhiyun }
1065*4882a593Smuzhiyun
1066*4882a593Smuzhiyun return 0;
1067*4882a593Smuzhiyun }
1068*4882a593Smuzhiyun
1069*4882a593Smuzhiyun /*
1070*4882a593Smuzhiyun * goya_stop_external_queues - Stop external queues
1071*4882a593Smuzhiyun *
1072*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
1073*4882a593Smuzhiyun *
1074*4882a593Smuzhiyun * Returns 0 on success
1075*4882a593Smuzhiyun *
1076*4882a593Smuzhiyun */
goya_stop_external_queues(struct hl_device * hdev)1077*4882a593Smuzhiyun static int goya_stop_external_queues(struct hl_device *hdev)
1078*4882a593Smuzhiyun {
1079*4882a593Smuzhiyun int rc, retval = 0;
1080*4882a593Smuzhiyun
1081*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1082*4882a593Smuzhiyun
1083*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1084*4882a593Smuzhiyun return retval;
1085*4882a593Smuzhiyun
1086*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1087*4882a593Smuzhiyun mmDMA_QM_0_GLBL_CFG1,
1088*4882a593Smuzhiyun mmDMA_QM_0_CP_STS,
1089*4882a593Smuzhiyun mmDMA_QM_0_GLBL_STS0);
1090*4882a593Smuzhiyun
1091*4882a593Smuzhiyun if (rc) {
1092*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1093*4882a593Smuzhiyun retval = -EIO;
1094*4882a593Smuzhiyun }
1095*4882a593Smuzhiyun
1096*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1097*4882a593Smuzhiyun mmDMA_QM_1_GLBL_CFG1,
1098*4882a593Smuzhiyun mmDMA_QM_1_CP_STS,
1099*4882a593Smuzhiyun mmDMA_QM_1_GLBL_STS0);
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun if (rc) {
1102*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1103*4882a593Smuzhiyun retval = -EIO;
1104*4882a593Smuzhiyun }
1105*4882a593Smuzhiyun
1106*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1107*4882a593Smuzhiyun mmDMA_QM_2_GLBL_CFG1,
1108*4882a593Smuzhiyun mmDMA_QM_2_CP_STS,
1109*4882a593Smuzhiyun mmDMA_QM_2_GLBL_STS0);
1110*4882a593Smuzhiyun
1111*4882a593Smuzhiyun if (rc) {
1112*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1113*4882a593Smuzhiyun retval = -EIO;
1114*4882a593Smuzhiyun }
1115*4882a593Smuzhiyun
1116*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1117*4882a593Smuzhiyun mmDMA_QM_3_GLBL_CFG1,
1118*4882a593Smuzhiyun mmDMA_QM_3_CP_STS,
1119*4882a593Smuzhiyun mmDMA_QM_3_GLBL_STS0);
1120*4882a593Smuzhiyun
1121*4882a593Smuzhiyun if (rc) {
1122*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1123*4882a593Smuzhiyun retval = -EIO;
1124*4882a593Smuzhiyun }
1125*4882a593Smuzhiyun
1126*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1127*4882a593Smuzhiyun mmDMA_QM_4_GLBL_CFG1,
1128*4882a593Smuzhiyun mmDMA_QM_4_CP_STS,
1129*4882a593Smuzhiyun mmDMA_QM_4_GLBL_STS0);
1130*4882a593Smuzhiyun
1131*4882a593Smuzhiyun if (rc) {
1132*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1133*4882a593Smuzhiyun retval = -EIO;
1134*4882a593Smuzhiyun }
1135*4882a593Smuzhiyun
1136*4882a593Smuzhiyun return retval;
1137*4882a593Smuzhiyun }
1138*4882a593Smuzhiyun
1139*4882a593Smuzhiyun /*
1140*4882a593Smuzhiyun * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1141*4882a593Smuzhiyun *
1142*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
1143*4882a593Smuzhiyun *
1144*4882a593Smuzhiyun * Returns 0 on success
1145*4882a593Smuzhiyun *
1146*4882a593Smuzhiyun */
goya_init_cpu_queues(struct hl_device * hdev)1147*4882a593Smuzhiyun int goya_init_cpu_queues(struct hl_device *hdev)
1148*4882a593Smuzhiyun {
1149*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1150*4882a593Smuzhiyun struct hl_eq *eq;
1151*4882a593Smuzhiyun u32 status;
1152*4882a593Smuzhiyun struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1153*4882a593Smuzhiyun int err;
1154*4882a593Smuzhiyun
1155*4882a593Smuzhiyun if (!hdev->cpu_queues_enable)
1156*4882a593Smuzhiyun return 0;
1157*4882a593Smuzhiyun
1158*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1159*4882a593Smuzhiyun return 0;
1160*4882a593Smuzhiyun
1161*4882a593Smuzhiyun eq = &hdev->event_queue;
1162*4882a593Smuzhiyun
1163*4882a593Smuzhiyun WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1164*4882a593Smuzhiyun WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1165*4882a593Smuzhiyun
1166*4882a593Smuzhiyun WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1167*4882a593Smuzhiyun WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1168*4882a593Smuzhiyun
1169*4882a593Smuzhiyun WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1170*4882a593Smuzhiyun lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1171*4882a593Smuzhiyun WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1172*4882a593Smuzhiyun upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1173*4882a593Smuzhiyun
1174*4882a593Smuzhiyun WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1175*4882a593Smuzhiyun WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1176*4882a593Smuzhiyun WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1177*4882a593Smuzhiyun
1178*4882a593Smuzhiyun /* Used for EQ CI */
1179*4882a593Smuzhiyun WREG32(mmCPU_EQ_CI, 0);
1180*4882a593Smuzhiyun
1181*4882a593Smuzhiyun WREG32(mmCPU_IF_PF_PQ_PI, 0);
1182*4882a593Smuzhiyun
1183*4882a593Smuzhiyun WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1184*4882a593Smuzhiyun
1185*4882a593Smuzhiyun WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1186*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun err = hl_poll_timeout(
1189*4882a593Smuzhiyun hdev,
1190*4882a593Smuzhiyun mmCPU_PQ_INIT_STATUS,
1191*4882a593Smuzhiyun status,
1192*4882a593Smuzhiyun (status == PQ_INIT_STATUS_READY_FOR_HOST),
1193*4882a593Smuzhiyun 1000,
1194*4882a593Smuzhiyun GOYA_CPU_TIMEOUT_USEC);
1195*4882a593Smuzhiyun
1196*4882a593Smuzhiyun if (err) {
1197*4882a593Smuzhiyun dev_err(hdev->dev,
1198*4882a593Smuzhiyun "Failed to setup communication with device CPU\n");
1199*4882a593Smuzhiyun return -EIO;
1200*4882a593Smuzhiyun }
1201*4882a593Smuzhiyun
1202*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1203*4882a593Smuzhiyun return 0;
1204*4882a593Smuzhiyun }
1205*4882a593Smuzhiyun
goya_set_pll_refclk(struct hl_device * hdev)1206*4882a593Smuzhiyun static void goya_set_pll_refclk(struct hl_device *hdev)
1207*4882a593Smuzhiyun {
1208*4882a593Smuzhiyun WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1209*4882a593Smuzhiyun WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1210*4882a593Smuzhiyun WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1211*4882a593Smuzhiyun WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1212*4882a593Smuzhiyun
1213*4882a593Smuzhiyun WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1214*4882a593Smuzhiyun WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1215*4882a593Smuzhiyun WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1216*4882a593Smuzhiyun WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1217*4882a593Smuzhiyun
1218*4882a593Smuzhiyun WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1219*4882a593Smuzhiyun WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1220*4882a593Smuzhiyun WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1221*4882a593Smuzhiyun WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1222*4882a593Smuzhiyun
1223*4882a593Smuzhiyun WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1224*4882a593Smuzhiyun WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1225*4882a593Smuzhiyun WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1226*4882a593Smuzhiyun WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1227*4882a593Smuzhiyun
1228*4882a593Smuzhiyun WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1229*4882a593Smuzhiyun WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1230*4882a593Smuzhiyun WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1231*4882a593Smuzhiyun WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1232*4882a593Smuzhiyun
1233*4882a593Smuzhiyun WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1234*4882a593Smuzhiyun WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1235*4882a593Smuzhiyun WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1236*4882a593Smuzhiyun WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1237*4882a593Smuzhiyun
1238*4882a593Smuzhiyun WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1239*4882a593Smuzhiyun WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1240*4882a593Smuzhiyun WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1241*4882a593Smuzhiyun WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1242*4882a593Smuzhiyun }
1243*4882a593Smuzhiyun
goya_disable_clk_rlx(struct hl_device * hdev)1244*4882a593Smuzhiyun static void goya_disable_clk_rlx(struct hl_device *hdev)
1245*4882a593Smuzhiyun {
1246*4882a593Smuzhiyun WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1247*4882a593Smuzhiyun WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1248*4882a593Smuzhiyun }
1249*4882a593Smuzhiyun
_goya_tpc_mbist_workaround(struct hl_device * hdev,u8 tpc_id)1250*4882a593Smuzhiyun static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1251*4882a593Smuzhiyun {
1252*4882a593Smuzhiyun u64 tpc_eml_address;
1253*4882a593Smuzhiyun u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1254*4882a593Smuzhiyun int err, slm_index;
1255*4882a593Smuzhiyun
1256*4882a593Smuzhiyun tpc_offset = tpc_id * 0x40000;
1257*4882a593Smuzhiyun tpc_eml_offset = tpc_id * 0x200000;
1258*4882a593Smuzhiyun tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1259*4882a593Smuzhiyun tpc_slm_offset = tpc_eml_address + 0x100000;
1260*4882a593Smuzhiyun
1261*4882a593Smuzhiyun /*
1262*4882a593Smuzhiyun * Workaround for Bug H2 #2443 :
1263*4882a593Smuzhiyun * "TPC SB is not initialized on chip reset"
1264*4882a593Smuzhiyun */
1265*4882a593Smuzhiyun
1266*4882a593Smuzhiyun val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1267*4882a593Smuzhiyun if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1268*4882a593Smuzhiyun dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1269*4882a593Smuzhiyun tpc_id);
1270*4882a593Smuzhiyun
1271*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1272*4882a593Smuzhiyun
1273*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1274*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1275*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1276*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1277*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1278*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1279*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1280*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1281*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1282*4882a593Smuzhiyun WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1283*4882a593Smuzhiyun
1284*4882a593Smuzhiyun WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1285*4882a593Smuzhiyun 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1286*4882a593Smuzhiyun
1287*4882a593Smuzhiyun err = hl_poll_timeout(
1288*4882a593Smuzhiyun hdev,
1289*4882a593Smuzhiyun mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1290*4882a593Smuzhiyun val,
1291*4882a593Smuzhiyun (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1292*4882a593Smuzhiyun 1000,
1293*4882a593Smuzhiyun HL_DEVICE_TIMEOUT_USEC);
1294*4882a593Smuzhiyun
1295*4882a593Smuzhiyun if (err)
1296*4882a593Smuzhiyun dev_err(hdev->dev,
1297*4882a593Smuzhiyun "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1298*4882a593Smuzhiyun
1299*4882a593Smuzhiyun WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1300*4882a593Smuzhiyun 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1301*4882a593Smuzhiyun
1302*4882a593Smuzhiyun msleep(GOYA_RESET_WAIT_MSEC);
1303*4882a593Smuzhiyun
1304*4882a593Smuzhiyun WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1305*4882a593Smuzhiyun ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1306*4882a593Smuzhiyun
1307*4882a593Smuzhiyun msleep(GOYA_RESET_WAIT_MSEC);
1308*4882a593Smuzhiyun
1309*4882a593Smuzhiyun for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1310*4882a593Smuzhiyun WREG32(tpc_slm_offset + (slm_index << 2), 0);
1311*4882a593Smuzhiyun
1312*4882a593Smuzhiyun val = RREG32(tpc_slm_offset);
1313*4882a593Smuzhiyun }
1314*4882a593Smuzhiyun
goya_tpc_mbist_workaround(struct hl_device * hdev)1315*4882a593Smuzhiyun static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1316*4882a593Smuzhiyun {
1317*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1318*4882a593Smuzhiyun int i;
1319*4882a593Smuzhiyun
1320*4882a593Smuzhiyun if (hdev->pldm)
1321*4882a593Smuzhiyun return;
1322*4882a593Smuzhiyun
1323*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1324*4882a593Smuzhiyun return;
1325*4882a593Smuzhiyun
1326*4882a593Smuzhiyun /* Workaround for H2 #2443 */
1327*4882a593Smuzhiyun
1328*4882a593Smuzhiyun for (i = 0 ; i < TPC_MAX_NUM ; i++)
1329*4882a593Smuzhiyun _goya_tpc_mbist_workaround(hdev, i);
1330*4882a593Smuzhiyun
1331*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1332*4882a593Smuzhiyun }
1333*4882a593Smuzhiyun
1334*4882a593Smuzhiyun /*
1335*4882a593Smuzhiyun * goya_init_golden_registers - Initialize golden registers
1336*4882a593Smuzhiyun *
1337*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
1338*4882a593Smuzhiyun *
1339*4882a593Smuzhiyun * Initialize the H/W registers of the device
1340*4882a593Smuzhiyun *
1341*4882a593Smuzhiyun */
goya_init_golden_registers(struct hl_device * hdev)1342*4882a593Smuzhiyun static void goya_init_golden_registers(struct hl_device *hdev)
1343*4882a593Smuzhiyun {
1344*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1345*4882a593Smuzhiyun u32 polynom[10], tpc_intr_mask, offset;
1346*4882a593Smuzhiyun int i;
1347*4882a593Smuzhiyun
1348*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1349*4882a593Smuzhiyun return;
1350*4882a593Smuzhiyun
1351*4882a593Smuzhiyun polynom[0] = 0x00020080;
1352*4882a593Smuzhiyun polynom[1] = 0x00401000;
1353*4882a593Smuzhiyun polynom[2] = 0x00200800;
1354*4882a593Smuzhiyun polynom[3] = 0x00002000;
1355*4882a593Smuzhiyun polynom[4] = 0x00080200;
1356*4882a593Smuzhiyun polynom[5] = 0x00040100;
1357*4882a593Smuzhiyun polynom[6] = 0x00100400;
1358*4882a593Smuzhiyun polynom[7] = 0x00004000;
1359*4882a593Smuzhiyun polynom[8] = 0x00010000;
1360*4882a593Smuzhiyun polynom[9] = 0x00008000;
1361*4882a593Smuzhiyun
1362*4882a593Smuzhiyun /* Mask all arithmetic interrupts from TPC */
1363*4882a593Smuzhiyun tpc_intr_mask = 0x7FFF;
1364*4882a593Smuzhiyun
1365*4882a593Smuzhiyun for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1366*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1367*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1368*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1369*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1370*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1371*4882a593Smuzhiyun
1372*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1373*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1374*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1375*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1376*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1377*4882a593Smuzhiyun
1378*4882a593Smuzhiyun
1379*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1380*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1381*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1382*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1383*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1384*4882a593Smuzhiyun
1385*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1386*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1387*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1388*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1389*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1390*4882a593Smuzhiyun
1391*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1392*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1393*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1394*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1395*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1396*4882a593Smuzhiyun
1397*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1398*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1399*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1400*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1401*4882a593Smuzhiyun WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1402*4882a593Smuzhiyun }
1403*4882a593Smuzhiyun
1404*4882a593Smuzhiyun WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1405*4882a593Smuzhiyun WREG32(mmMME_AGU, 0x0f0f0f10);
1406*4882a593Smuzhiyun WREG32(mmMME_SEI_MASK, ~0x0);
1407*4882a593Smuzhiyun
1408*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1409*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1410*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1411*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1412*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1413*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1414*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1415*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1416*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1417*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1418*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1419*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1420*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1421*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1422*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1423*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1424*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1425*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1426*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1427*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1428*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1429*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1430*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1431*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1432*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1433*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1434*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1435*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1436*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1437*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1438*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1439*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1440*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1441*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1442*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1443*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1444*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1445*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1446*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1447*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1448*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1449*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1450*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1451*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1452*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1453*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1454*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1455*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1456*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1457*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1458*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1459*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1460*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1461*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1462*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1463*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1464*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1465*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1466*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1467*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1468*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1469*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1470*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1471*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1472*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1473*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1474*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1475*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1476*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1477*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1478*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1479*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1480*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1481*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1482*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1483*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1484*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1485*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1486*4882a593Smuzhiyun WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1487*4882a593Smuzhiyun WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1488*4882a593Smuzhiyun WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1489*4882a593Smuzhiyun WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1490*4882a593Smuzhiyun WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1491*4882a593Smuzhiyun WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1492*4882a593Smuzhiyun
1493*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1494*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1495*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1496*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1497*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1498*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1499*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1500*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1501*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1502*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1503*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1504*4882a593Smuzhiyun WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1505*4882a593Smuzhiyun
1506*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1507*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1508*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1509*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1510*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1511*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1512*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1513*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1514*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1515*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1516*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1517*4882a593Smuzhiyun WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1518*4882a593Smuzhiyun
1519*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1520*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1521*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1522*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1523*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1524*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1525*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1526*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1527*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1528*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1529*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1530*4882a593Smuzhiyun WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1531*4882a593Smuzhiyun
1532*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1533*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1534*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1535*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1536*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1537*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1538*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1539*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1540*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1541*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1542*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1543*4882a593Smuzhiyun WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1544*4882a593Smuzhiyun
1545*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1546*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1547*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1548*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1549*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1550*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1551*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1552*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1553*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1554*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1555*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1556*4882a593Smuzhiyun WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1557*4882a593Smuzhiyun
1558*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1559*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1560*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1561*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1562*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1563*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1564*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1565*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1566*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1567*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1568*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1569*4882a593Smuzhiyun WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1570*4882a593Smuzhiyun
1571*4882a593Smuzhiyun for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1572*4882a593Smuzhiyun WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1573*4882a593Smuzhiyun WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1574*4882a593Smuzhiyun WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1575*4882a593Smuzhiyun WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1576*4882a593Smuzhiyun WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1577*4882a593Smuzhiyun WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1578*4882a593Smuzhiyun
1579*4882a593Smuzhiyun WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1580*4882a593Smuzhiyun WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1581*4882a593Smuzhiyun WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1582*4882a593Smuzhiyun WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1583*4882a593Smuzhiyun WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1584*4882a593Smuzhiyun WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1585*4882a593Smuzhiyun WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1586*4882a593Smuzhiyun WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1587*4882a593Smuzhiyun
1588*4882a593Smuzhiyun WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1589*4882a593Smuzhiyun WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1590*4882a593Smuzhiyun }
1591*4882a593Smuzhiyun
1592*4882a593Smuzhiyun for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1593*4882a593Smuzhiyun WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1594*4882a593Smuzhiyun 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1595*4882a593Smuzhiyun WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1596*4882a593Smuzhiyun 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1597*4882a593Smuzhiyun }
1598*4882a593Smuzhiyun
1599*4882a593Smuzhiyun for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1600*4882a593Smuzhiyun /*
1601*4882a593Smuzhiyun * Workaround for Bug H2 #2441 :
1602*4882a593Smuzhiyun * "ST.NOP set trace event illegal opcode"
1603*4882a593Smuzhiyun */
1604*4882a593Smuzhiyun WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1605*4882a593Smuzhiyun
1606*4882a593Smuzhiyun WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1607*4882a593Smuzhiyun 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1608*4882a593Smuzhiyun WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1609*4882a593Smuzhiyun 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1610*4882a593Smuzhiyun
1611*4882a593Smuzhiyun WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1612*4882a593Smuzhiyun ICACHE_FETCH_LINE_NUM, 2);
1613*4882a593Smuzhiyun }
1614*4882a593Smuzhiyun
1615*4882a593Smuzhiyun WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1616*4882a593Smuzhiyun WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1617*4882a593Smuzhiyun 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1618*4882a593Smuzhiyun
1619*4882a593Smuzhiyun WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1620*4882a593Smuzhiyun WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1621*4882a593Smuzhiyun 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1622*4882a593Smuzhiyun
1623*4882a593Smuzhiyun /*
1624*4882a593Smuzhiyun * Workaround for H2 #HW-23 bug
1625*4882a593Smuzhiyun * Set DMA max outstanding read requests to 240 on DMA CH 1.
1626*4882a593Smuzhiyun * This limitation is still large enough to not affect Gen4 bandwidth.
1627*4882a593Smuzhiyun * We need to only limit that DMA channel because the user can only read
1628*4882a593Smuzhiyun * from Host using DMA CH 1
1629*4882a593Smuzhiyun */
1630*4882a593Smuzhiyun WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1631*4882a593Smuzhiyun
1632*4882a593Smuzhiyun WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1633*4882a593Smuzhiyun
1634*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1635*4882a593Smuzhiyun }
1636*4882a593Smuzhiyun
goya_init_mme_qman(struct hl_device * hdev)1637*4882a593Smuzhiyun static void goya_init_mme_qman(struct hl_device *hdev)
1638*4882a593Smuzhiyun {
1639*4882a593Smuzhiyun u32 mtr_base_lo, mtr_base_hi;
1640*4882a593Smuzhiyun u32 so_base_lo, so_base_hi;
1641*4882a593Smuzhiyun u32 gic_base_lo, gic_base_hi;
1642*4882a593Smuzhiyun u64 qman_base_addr;
1643*4882a593Smuzhiyun
1644*4882a593Smuzhiyun mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1645*4882a593Smuzhiyun mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1646*4882a593Smuzhiyun so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1647*4882a593Smuzhiyun so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1648*4882a593Smuzhiyun
1649*4882a593Smuzhiyun gic_base_lo =
1650*4882a593Smuzhiyun lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1651*4882a593Smuzhiyun gic_base_hi =
1652*4882a593Smuzhiyun upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1653*4882a593Smuzhiyun
1654*4882a593Smuzhiyun qman_base_addr = hdev->asic_prop.sram_base_address +
1655*4882a593Smuzhiyun MME_QMAN_BASE_OFFSET;
1656*4882a593Smuzhiyun
1657*4882a593Smuzhiyun WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1658*4882a593Smuzhiyun WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1659*4882a593Smuzhiyun WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1660*4882a593Smuzhiyun WREG32(mmMME_QM_PQ_PI, 0);
1661*4882a593Smuzhiyun WREG32(mmMME_QM_PQ_CI, 0);
1662*4882a593Smuzhiyun WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1663*4882a593Smuzhiyun WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1664*4882a593Smuzhiyun WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1665*4882a593Smuzhiyun WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1666*4882a593Smuzhiyun
1667*4882a593Smuzhiyun WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1668*4882a593Smuzhiyun WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1669*4882a593Smuzhiyun WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1670*4882a593Smuzhiyun WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1671*4882a593Smuzhiyun
1672*4882a593Smuzhiyun /* QMAN CQ has 8 cache lines */
1673*4882a593Smuzhiyun WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1674*4882a593Smuzhiyun
1675*4882a593Smuzhiyun WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1676*4882a593Smuzhiyun WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1677*4882a593Smuzhiyun
1678*4882a593Smuzhiyun WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1679*4882a593Smuzhiyun
1680*4882a593Smuzhiyun WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1681*4882a593Smuzhiyun
1682*4882a593Smuzhiyun WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1683*4882a593Smuzhiyun
1684*4882a593Smuzhiyun WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1685*4882a593Smuzhiyun }
1686*4882a593Smuzhiyun
goya_init_mme_cmdq(struct hl_device * hdev)1687*4882a593Smuzhiyun static void goya_init_mme_cmdq(struct hl_device *hdev)
1688*4882a593Smuzhiyun {
1689*4882a593Smuzhiyun u32 mtr_base_lo, mtr_base_hi;
1690*4882a593Smuzhiyun u32 so_base_lo, so_base_hi;
1691*4882a593Smuzhiyun u32 gic_base_lo, gic_base_hi;
1692*4882a593Smuzhiyun
1693*4882a593Smuzhiyun mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1694*4882a593Smuzhiyun mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1695*4882a593Smuzhiyun so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1696*4882a593Smuzhiyun so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1697*4882a593Smuzhiyun
1698*4882a593Smuzhiyun gic_base_lo =
1699*4882a593Smuzhiyun lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1700*4882a593Smuzhiyun gic_base_hi =
1701*4882a593Smuzhiyun upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1702*4882a593Smuzhiyun
1703*4882a593Smuzhiyun WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1704*4882a593Smuzhiyun WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1705*4882a593Smuzhiyun WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1706*4882a593Smuzhiyun WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1707*4882a593Smuzhiyun
1708*4882a593Smuzhiyun /* CMDQ CQ has 20 cache lines */
1709*4882a593Smuzhiyun WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1710*4882a593Smuzhiyun
1711*4882a593Smuzhiyun WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1712*4882a593Smuzhiyun WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1713*4882a593Smuzhiyun
1714*4882a593Smuzhiyun WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1715*4882a593Smuzhiyun
1716*4882a593Smuzhiyun WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1717*4882a593Smuzhiyun
1718*4882a593Smuzhiyun WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1719*4882a593Smuzhiyun
1720*4882a593Smuzhiyun WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1721*4882a593Smuzhiyun }
1722*4882a593Smuzhiyun
goya_init_mme_qmans(struct hl_device * hdev)1723*4882a593Smuzhiyun void goya_init_mme_qmans(struct hl_device *hdev)
1724*4882a593Smuzhiyun {
1725*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1726*4882a593Smuzhiyun u32 so_base_lo, so_base_hi;
1727*4882a593Smuzhiyun
1728*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_MME)
1729*4882a593Smuzhiyun return;
1730*4882a593Smuzhiyun
1731*4882a593Smuzhiyun so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1732*4882a593Smuzhiyun so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1733*4882a593Smuzhiyun
1734*4882a593Smuzhiyun WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1735*4882a593Smuzhiyun WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1736*4882a593Smuzhiyun
1737*4882a593Smuzhiyun goya_init_mme_qman(hdev);
1738*4882a593Smuzhiyun goya_init_mme_cmdq(hdev);
1739*4882a593Smuzhiyun
1740*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_MME;
1741*4882a593Smuzhiyun }
1742*4882a593Smuzhiyun
goya_init_tpc_qman(struct hl_device * hdev,u32 base_off,int tpc_id)1743*4882a593Smuzhiyun static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1744*4882a593Smuzhiyun {
1745*4882a593Smuzhiyun u32 mtr_base_lo, mtr_base_hi;
1746*4882a593Smuzhiyun u32 so_base_lo, so_base_hi;
1747*4882a593Smuzhiyun u32 gic_base_lo, gic_base_hi;
1748*4882a593Smuzhiyun u64 qman_base_addr;
1749*4882a593Smuzhiyun u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1750*4882a593Smuzhiyun
1751*4882a593Smuzhiyun mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1752*4882a593Smuzhiyun mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1753*4882a593Smuzhiyun so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1754*4882a593Smuzhiyun so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1755*4882a593Smuzhiyun
1756*4882a593Smuzhiyun gic_base_lo =
1757*4882a593Smuzhiyun lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1758*4882a593Smuzhiyun gic_base_hi =
1759*4882a593Smuzhiyun upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1760*4882a593Smuzhiyun
1761*4882a593Smuzhiyun qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1762*4882a593Smuzhiyun
1763*4882a593Smuzhiyun WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1764*4882a593Smuzhiyun WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1765*4882a593Smuzhiyun WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1766*4882a593Smuzhiyun WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1767*4882a593Smuzhiyun WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1768*4882a593Smuzhiyun WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1769*4882a593Smuzhiyun WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1770*4882a593Smuzhiyun WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1771*4882a593Smuzhiyun WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1772*4882a593Smuzhiyun
1773*4882a593Smuzhiyun WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1774*4882a593Smuzhiyun WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1775*4882a593Smuzhiyun WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1776*4882a593Smuzhiyun WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1777*4882a593Smuzhiyun
1778*4882a593Smuzhiyun WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1779*4882a593Smuzhiyun
1780*4882a593Smuzhiyun WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1781*4882a593Smuzhiyun WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1782*4882a593Smuzhiyun
1783*4882a593Smuzhiyun WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1784*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1785*4882a593Smuzhiyun
1786*4882a593Smuzhiyun WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1787*4882a593Smuzhiyun
1788*4882a593Smuzhiyun WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1789*4882a593Smuzhiyun
1790*4882a593Smuzhiyun WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1791*4882a593Smuzhiyun }
1792*4882a593Smuzhiyun
goya_init_tpc_cmdq(struct hl_device * hdev,int tpc_id)1793*4882a593Smuzhiyun static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1794*4882a593Smuzhiyun {
1795*4882a593Smuzhiyun u32 mtr_base_lo, mtr_base_hi;
1796*4882a593Smuzhiyun u32 so_base_lo, so_base_hi;
1797*4882a593Smuzhiyun u32 gic_base_lo, gic_base_hi;
1798*4882a593Smuzhiyun u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1799*4882a593Smuzhiyun
1800*4882a593Smuzhiyun mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1801*4882a593Smuzhiyun mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1802*4882a593Smuzhiyun so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1803*4882a593Smuzhiyun so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1804*4882a593Smuzhiyun
1805*4882a593Smuzhiyun gic_base_lo =
1806*4882a593Smuzhiyun lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1807*4882a593Smuzhiyun gic_base_hi =
1808*4882a593Smuzhiyun upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1809*4882a593Smuzhiyun
1810*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1811*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1812*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1813*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1814*4882a593Smuzhiyun
1815*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1816*4882a593Smuzhiyun
1817*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1818*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1819*4882a593Smuzhiyun
1820*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1821*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1822*4882a593Smuzhiyun
1823*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1824*4882a593Smuzhiyun
1825*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1826*4882a593Smuzhiyun
1827*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1828*4882a593Smuzhiyun }
1829*4882a593Smuzhiyun
goya_init_tpc_qmans(struct hl_device * hdev)1830*4882a593Smuzhiyun void goya_init_tpc_qmans(struct hl_device *hdev)
1831*4882a593Smuzhiyun {
1832*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1833*4882a593Smuzhiyun u32 so_base_lo, so_base_hi;
1834*4882a593Smuzhiyun u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1835*4882a593Smuzhiyun mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1836*4882a593Smuzhiyun int i;
1837*4882a593Smuzhiyun
1838*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_TPC)
1839*4882a593Smuzhiyun return;
1840*4882a593Smuzhiyun
1841*4882a593Smuzhiyun so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1842*4882a593Smuzhiyun so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1843*4882a593Smuzhiyun
1844*4882a593Smuzhiyun for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1845*4882a593Smuzhiyun WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1846*4882a593Smuzhiyun so_base_lo);
1847*4882a593Smuzhiyun WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1848*4882a593Smuzhiyun so_base_hi);
1849*4882a593Smuzhiyun }
1850*4882a593Smuzhiyun
1851*4882a593Smuzhiyun goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1852*4882a593Smuzhiyun goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1853*4882a593Smuzhiyun goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1854*4882a593Smuzhiyun goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1855*4882a593Smuzhiyun goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1856*4882a593Smuzhiyun goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1857*4882a593Smuzhiyun goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1858*4882a593Smuzhiyun goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1859*4882a593Smuzhiyun
1860*4882a593Smuzhiyun for (i = 0 ; i < TPC_MAX_NUM ; i++)
1861*4882a593Smuzhiyun goya_init_tpc_cmdq(hdev, i);
1862*4882a593Smuzhiyun
1863*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_TPC;
1864*4882a593Smuzhiyun }
1865*4882a593Smuzhiyun
1866*4882a593Smuzhiyun /*
1867*4882a593Smuzhiyun * goya_disable_internal_queues - Disable internal queues
1868*4882a593Smuzhiyun *
1869*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
1870*4882a593Smuzhiyun *
1871*4882a593Smuzhiyun */
goya_disable_internal_queues(struct hl_device * hdev)1872*4882a593Smuzhiyun static void goya_disable_internal_queues(struct hl_device *hdev)
1873*4882a593Smuzhiyun {
1874*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1875*4882a593Smuzhiyun
1876*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MME))
1877*4882a593Smuzhiyun goto disable_tpc;
1878*4882a593Smuzhiyun
1879*4882a593Smuzhiyun WREG32(mmMME_QM_GLBL_CFG0, 0);
1880*4882a593Smuzhiyun WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1881*4882a593Smuzhiyun
1882*4882a593Smuzhiyun disable_tpc:
1883*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1884*4882a593Smuzhiyun return;
1885*4882a593Smuzhiyun
1886*4882a593Smuzhiyun WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1887*4882a593Smuzhiyun WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1888*4882a593Smuzhiyun
1889*4882a593Smuzhiyun WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1890*4882a593Smuzhiyun WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1891*4882a593Smuzhiyun
1892*4882a593Smuzhiyun WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1893*4882a593Smuzhiyun WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1894*4882a593Smuzhiyun
1895*4882a593Smuzhiyun WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1896*4882a593Smuzhiyun WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1899*4882a593Smuzhiyun WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1900*4882a593Smuzhiyun
1901*4882a593Smuzhiyun WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1902*4882a593Smuzhiyun WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1903*4882a593Smuzhiyun
1904*4882a593Smuzhiyun WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1905*4882a593Smuzhiyun WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1906*4882a593Smuzhiyun
1907*4882a593Smuzhiyun WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1908*4882a593Smuzhiyun WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1909*4882a593Smuzhiyun }
1910*4882a593Smuzhiyun
1911*4882a593Smuzhiyun /*
1912*4882a593Smuzhiyun * goya_stop_internal_queues - Stop internal queues
1913*4882a593Smuzhiyun *
1914*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
1915*4882a593Smuzhiyun *
1916*4882a593Smuzhiyun * Returns 0 on success
1917*4882a593Smuzhiyun *
1918*4882a593Smuzhiyun */
goya_stop_internal_queues(struct hl_device * hdev)1919*4882a593Smuzhiyun static int goya_stop_internal_queues(struct hl_device *hdev)
1920*4882a593Smuzhiyun {
1921*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
1922*4882a593Smuzhiyun int rc, retval = 0;
1923*4882a593Smuzhiyun
1924*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MME))
1925*4882a593Smuzhiyun goto stop_tpc;
1926*4882a593Smuzhiyun
1927*4882a593Smuzhiyun /*
1928*4882a593Smuzhiyun * Each queue (QMAN) is a separate H/W logic. That means that each
1929*4882a593Smuzhiyun * QMAN can be stopped independently and failure to stop one does NOT
1930*4882a593Smuzhiyun * mandate we should not try to stop other QMANs
1931*4882a593Smuzhiyun */
1932*4882a593Smuzhiyun
1933*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1934*4882a593Smuzhiyun mmMME_QM_GLBL_CFG1,
1935*4882a593Smuzhiyun mmMME_QM_CP_STS,
1936*4882a593Smuzhiyun mmMME_QM_GLBL_STS0);
1937*4882a593Smuzhiyun
1938*4882a593Smuzhiyun if (rc) {
1939*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop MME QMAN\n");
1940*4882a593Smuzhiyun retval = -EIO;
1941*4882a593Smuzhiyun }
1942*4882a593Smuzhiyun
1943*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1944*4882a593Smuzhiyun mmMME_CMDQ_GLBL_CFG1,
1945*4882a593Smuzhiyun mmMME_CMDQ_CP_STS,
1946*4882a593Smuzhiyun mmMME_CMDQ_GLBL_STS0);
1947*4882a593Smuzhiyun
1948*4882a593Smuzhiyun if (rc) {
1949*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1950*4882a593Smuzhiyun retval = -EIO;
1951*4882a593Smuzhiyun }
1952*4882a593Smuzhiyun
1953*4882a593Smuzhiyun stop_tpc:
1954*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1955*4882a593Smuzhiyun return retval;
1956*4882a593Smuzhiyun
1957*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1958*4882a593Smuzhiyun mmTPC0_QM_GLBL_CFG1,
1959*4882a593Smuzhiyun mmTPC0_QM_CP_STS,
1960*4882a593Smuzhiyun mmTPC0_QM_GLBL_STS0);
1961*4882a593Smuzhiyun
1962*4882a593Smuzhiyun if (rc) {
1963*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
1964*4882a593Smuzhiyun retval = -EIO;
1965*4882a593Smuzhiyun }
1966*4882a593Smuzhiyun
1967*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1968*4882a593Smuzhiyun mmTPC0_CMDQ_GLBL_CFG1,
1969*4882a593Smuzhiyun mmTPC0_CMDQ_CP_STS,
1970*4882a593Smuzhiyun mmTPC0_CMDQ_GLBL_STS0);
1971*4882a593Smuzhiyun
1972*4882a593Smuzhiyun if (rc) {
1973*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
1974*4882a593Smuzhiyun retval = -EIO;
1975*4882a593Smuzhiyun }
1976*4882a593Smuzhiyun
1977*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1978*4882a593Smuzhiyun mmTPC1_QM_GLBL_CFG1,
1979*4882a593Smuzhiyun mmTPC1_QM_CP_STS,
1980*4882a593Smuzhiyun mmTPC1_QM_GLBL_STS0);
1981*4882a593Smuzhiyun
1982*4882a593Smuzhiyun if (rc) {
1983*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
1984*4882a593Smuzhiyun retval = -EIO;
1985*4882a593Smuzhiyun }
1986*4882a593Smuzhiyun
1987*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1988*4882a593Smuzhiyun mmTPC1_CMDQ_GLBL_CFG1,
1989*4882a593Smuzhiyun mmTPC1_CMDQ_CP_STS,
1990*4882a593Smuzhiyun mmTPC1_CMDQ_GLBL_STS0);
1991*4882a593Smuzhiyun
1992*4882a593Smuzhiyun if (rc) {
1993*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
1994*4882a593Smuzhiyun retval = -EIO;
1995*4882a593Smuzhiyun }
1996*4882a593Smuzhiyun
1997*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
1998*4882a593Smuzhiyun mmTPC2_QM_GLBL_CFG1,
1999*4882a593Smuzhiyun mmTPC2_QM_CP_STS,
2000*4882a593Smuzhiyun mmTPC2_QM_GLBL_STS0);
2001*4882a593Smuzhiyun
2002*4882a593Smuzhiyun if (rc) {
2003*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2004*4882a593Smuzhiyun retval = -EIO;
2005*4882a593Smuzhiyun }
2006*4882a593Smuzhiyun
2007*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2008*4882a593Smuzhiyun mmTPC2_CMDQ_GLBL_CFG1,
2009*4882a593Smuzhiyun mmTPC2_CMDQ_CP_STS,
2010*4882a593Smuzhiyun mmTPC2_CMDQ_GLBL_STS0);
2011*4882a593Smuzhiyun
2012*4882a593Smuzhiyun if (rc) {
2013*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2014*4882a593Smuzhiyun retval = -EIO;
2015*4882a593Smuzhiyun }
2016*4882a593Smuzhiyun
2017*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2018*4882a593Smuzhiyun mmTPC3_QM_GLBL_CFG1,
2019*4882a593Smuzhiyun mmTPC3_QM_CP_STS,
2020*4882a593Smuzhiyun mmTPC3_QM_GLBL_STS0);
2021*4882a593Smuzhiyun
2022*4882a593Smuzhiyun if (rc) {
2023*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2024*4882a593Smuzhiyun retval = -EIO;
2025*4882a593Smuzhiyun }
2026*4882a593Smuzhiyun
2027*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2028*4882a593Smuzhiyun mmTPC3_CMDQ_GLBL_CFG1,
2029*4882a593Smuzhiyun mmTPC3_CMDQ_CP_STS,
2030*4882a593Smuzhiyun mmTPC3_CMDQ_GLBL_STS0);
2031*4882a593Smuzhiyun
2032*4882a593Smuzhiyun if (rc) {
2033*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2034*4882a593Smuzhiyun retval = -EIO;
2035*4882a593Smuzhiyun }
2036*4882a593Smuzhiyun
2037*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2038*4882a593Smuzhiyun mmTPC4_QM_GLBL_CFG1,
2039*4882a593Smuzhiyun mmTPC4_QM_CP_STS,
2040*4882a593Smuzhiyun mmTPC4_QM_GLBL_STS0);
2041*4882a593Smuzhiyun
2042*4882a593Smuzhiyun if (rc) {
2043*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2044*4882a593Smuzhiyun retval = -EIO;
2045*4882a593Smuzhiyun }
2046*4882a593Smuzhiyun
2047*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2048*4882a593Smuzhiyun mmTPC4_CMDQ_GLBL_CFG1,
2049*4882a593Smuzhiyun mmTPC4_CMDQ_CP_STS,
2050*4882a593Smuzhiyun mmTPC4_CMDQ_GLBL_STS0);
2051*4882a593Smuzhiyun
2052*4882a593Smuzhiyun if (rc) {
2053*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2054*4882a593Smuzhiyun retval = -EIO;
2055*4882a593Smuzhiyun }
2056*4882a593Smuzhiyun
2057*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2058*4882a593Smuzhiyun mmTPC5_QM_GLBL_CFG1,
2059*4882a593Smuzhiyun mmTPC5_QM_CP_STS,
2060*4882a593Smuzhiyun mmTPC5_QM_GLBL_STS0);
2061*4882a593Smuzhiyun
2062*4882a593Smuzhiyun if (rc) {
2063*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2064*4882a593Smuzhiyun retval = -EIO;
2065*4882a593Smuzhiyun }
2066*4882a593Smuzhiyun
2067*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2068*4882a593Smuzhiyun mmTPC5_CMDQ_GLBL_CFG1,
2069*4882a593Smuzhiyun mmTPC5_CMDQ_CP_STS,
2070*4882a593Smuzhiyun mmTPC5_CMDQ_GLBL_STS0);
2071*4882a593Smuzhiyun
2072*4882a593Smuzhiyun if (rc) {
2073*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2074*4882a593Smuzhiyun retval = -EIO;
2075*4882a593Smuzhiyun }
2076*4882a593Smuzhiyun
2077*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2078*4882a593Smuzhiyun mmTPC6_QM_GLBL_CFG1,
2079*4882a593Smuzhiyun mmTPC6_QM_CP_STS,
2080*4882a593Smuzhiyun mmTPC6_QM_GLBL_STS0);
2081*4882a593Smuzhiyun
2082*4882a593Smuzhiyun if (rc) {
2083*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2084*4882a593Smuzhiyun retval = -EIO;
2085*4882a593Smuzhiyun }
2086*4882a593Smuzhiyun
2087*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2088*4882a593Smuzhiyun mmTPC6_CMDQ_GLBL_CFG1,
2089*4882a593Smuzhiyun mmTPC6_CMDQ_CP_STS,
2090*4882a593Smuzhiyun mmTPC6_CMDQ_GLBL_STS0);
2091*4882a593Smuzhiyun
2092*4882a593Smuzhiyun if (rc) {
2093*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2094*4882a593Smuzhiyun retval = -EIO;
2095*4882a593Smuzhiyun }
2096*4882a593Smuzhiyun
2097*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2098*4882a593Smuzhiyun mmTPC7_QM_GLBL_CFG1,
2099*4882a593Smuzhiyun mmTPC7_QM_CP_STS,
2100*4882a593Smuzhiyun mmTPC7_QM_GLBL_STS0);
2101*4882a593Smuzhiyun
2102*4882a593Smuzhiyun if (rc) {
2103*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2104*4882a593Smuzhiyun retval = -EIO;
2105*4882a593Smuzhiyun }
2106*4882a593Smuzhiyun
2107*4882a593Smuzhiyun rc = goya_stop_queue(hdev,
2108*4882a593Smuzhiyun mmTPC7_CMDQ_GLBL_CFG1,
2109*4882a593Smuzhiyun mmTPC7_CMDQ_CP_STS,
2110*4882a593Smuzhiyun mmTPC7_CMDQ_GLBL_STS0);
2111*4882a593Smuzhiyun
2112*4882a593Smuzhiyun if (rc) {
2113*4882a593Smuzhiyun dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2114*4882a593Smuzhiyun retval = -EIO;
2115*4882a593Smuzhiyun }
2116*4882a593Smuzhiyun
2117*4882a593Smuzhiyun return retval;
2118*4882a593Smuzhiyun }
2119*4882a593Smuzhiyun
goya_dma_stall(struct hl_device * hdev)2120*4882a593Smuzhiyun static void goya_dma_stall(struct hl_device *hdev)
2121*4882a593Smuzhiyun {
2122*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2123*4882a593Smuzhiyun
2124*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2125*4882a593Smuzhiyun return;
2126*4882a593Smuzhiyun
2127*4882a593Smuzhiyun WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2128*4882a593Smuzhiyun WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2129*4882a593Smuzhiyun WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2130*4882a593Smuzhiyun WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2131*4882a593Smuzhiyun WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2132*4882a593Smuzhiyun }
2133*4882a593Smuzhiyun
goya_tpc_stall(struct hl_device * hdev)2134*4882a593Smuzhiyun static void goya_tpc_stall(struct hl_device *hdev)
2135*4882a593Smuzhiyun {
2136*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2137*4882a593Smuzhiyun
2138*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2139*4882a593Smuzhiyun return;
2140*4882a593Smuzhiyun
2141*4882a593Smuzhiyun WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2142*4882a593Smuzhiyun WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2143*4882a593Smuzhiyun WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2144*4882a593Smuzhiyun WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2145*4882a593Smuzhiyun WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2146*4882a593Smuzhiyun WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2147*4882a593Smuzhiyun WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2148*4882a593Smuzhiyun WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2149*4882a593Smuzhiyun }
2150*4882a593Smuzhiyun
goya_mme_stall(struct hl_device * hdev)2151*4882a593Smuzhiyun static void goya_mme_stall(struct hl_device *hdev)
2152*4882a593Smuzhiyun {
2153*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2154*4882a593Smuzhiyun
2155*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MME))
2156*4882a593Smuzhiyun return;
2157*4882a593Smuzhiyun
2158*4882a593Smuzhiyun WREG32(mmMME_STALL, 0xFFFFFFFF);
2159*4882a593Smuzhiyun }
2160*4882a593Smuzhiyun
goya_enable_msix(struct hl_device * hdev)2161*4882a593Smuzhiyun static int goya_enable_msix(struct hl_device *hdev)
2162*4882a593Smuzhiyun {
2163*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2164*4882a593Smuzhiyun int cq_cnt = hdev->asic_prop.completion_queues_count;
2165*4882a593Smuzhiyun int rc, i, irq_cnt_init, irq;
2166*4882a593Smuzhiyun
2167*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_MSIX)
2168*4882a593Smuzhiyun return 0;
2169*4882a593Smuzhiyun
2170*4882a593Smuzhiyun rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2171*4882a593Smuzhiyun GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2172*4882a593Smuzhiyun if (rc < 0) {
2173*4882a593Smuzhiyun dev_err(hdev->dev,
2174*4882a593Smuzhiyun "MSI-X: Failed to enable support -- %d/%d\n",
2175*4882a593Smuzhiyun GOYA_MSIX_ENTRIES, rc);
2176*4882a593Smuzhiyun return rc;
2177*4882a593Smuzhiyun }
2178*4882a593Smuzhiyun
2179*4882a593Smuzhiyun for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2180*4882a593Smuzhiyun irq = pci_irq_vector(hdev->pdev, i);
2181*4882a593Smuzhiyun rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2182*4882a593Smuzhiyun &hdev->completion_queue[i]);
2183*4882a593Smuzhiyun if (rc) {
2184*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2185*4882a593Smuzhiyun goto free_irqs;
2186*4882a593Smuzhiyun }
2187*4882a593Smuzhiyun }
2188*4882a593Smuzhiyun
2189*4882a593Smuzhiyun irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2190*4882a593Smuzhiyun
2191*4882a593Smuzhiyun rc = request_irq(irq, hl_irq_handler_eq, 0,
2192*4882a593Smuzhiyun goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2193*4882a593Smuzhiyun &hdev->event_queue);
2194*4882a593Smuzhiyun if (rc) {
2195*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2196*4882a593Smuzhiyun goto free_irqs;
2197*4882a593Smuzhiyun }
2198*4882a593Smuzhiyun
2199*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_MSIX;
2200*4882a593Smuzhiyun return 0;
2201*4882a593Smuzhiyun
2202*4882a593Smuzhiyun free_irqs:
2203*4882a593Smuzhiyun for (i = 0 ; i < irq_cnt_init ; i++)
2204*4882a593Smuzhiyun free_irq(pci_irq_vector(hdev->pdev, i),
2205*4882a593Smuzhiyun &hdev->completion_queue[i]);
2206*4882a593Smuzhiyun
2207*4882a593Smuzhiyun pci_free_irq_vectors(hdev->pdev);
2208*4882a593Smuzhiyun return rc;
2209*4882a593Smuzhiyun }
2210*4882a593Smuzhiyun
goya_sync_irqs(struct hl_device * hdev)2211*4882a593Smuzhiyun static void goya_sync_irqs(struct hl_device *hdev)
2212*4882a593Smuzhiyun {
2213*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2214*4882a593Smuzhiyun int i;
2215*4882a593Smuzhiyun
2216*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2217*4882a593Smuzhiyun return;
2218*4882a593Smuzhiyun
2219*4882a593Smuzhiyun /* Wait for all pending IRQs to be finished */
2220*4882a593Smuzhiyun for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2221*4882a593Smuzhiyun synchronize_irq(pci_irq_vector(hdev->pdev, i));
2222*4882a593Smuzhiyun
2223*4882a593Smuzhiyun synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2224*4882a593Smuzhiyun }
2225*4882a593Smuzhiyun
goya_disable_msix(struct hl_device * hdev)2226*4882a593Smuzhiyun static void goya_disable_msix(struct hl_device *hdev)
2227*4882a593Smuzhiyun {
2228*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2229*4882a593Smuzhiyun int i, irq;
2230*4882a593Smuzhiyun
2231*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2232*4882a593Smuzhiyun return;
2233*4882a593Smuzhiyun
2234*4882a593Smuzhiyun goya_sync_irqs(hdev);
2235*4882a593Smuzhiyun
2236*4882a593Smuzhiyun irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2237*4882a593Smuzhiyun free_irq(irq, &hdev->event_queue);
2238*4882a593Smuzhiyun
2239*4882a593Smuzhiyun for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2240*4882a593Smuzhiyun irq = pci_irq_vector(hdev->pdev, i);
2241*4882a593Smuzhiyun free_irq(irq, &hdev->completion_queue[i]);
2242*4882a593Smuzhiyun }
2243*4882a593Smuzhiyun
2244*4882a593Smuzhiyun pci_free_irq_vectors(hdev->pdev);
2245*4882a593Smuzhiyun
2246*4882a593Smuzhiyun goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2247*4882a593Smuzhiyun }
2248*4882a593Smuzhiyun
goya_enable_timestamp(struct hl_device * hdev)2249*4882a593Smuzhiyun static void goya_enable_timestamp(struct hl_device *hdev)
2250*4882a593Smuzhiyun {
2251*4882a593Smuzhiyun /* Disable the timestamp counter */
2252*4882a593Smuzhiyun WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2253*4882a593Smuzhiyun
2254*4882a593Smuzhiyun /* Zero the lower/upper parts of the 64-bit counter */
2255*4882a593Smuzhiyun WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2256*4882a593Smuzhiyun WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2257*4882a593Smuzhiyun
2258*4882a593Smuzhiyun /* Enable the counter */
2259*4882a593Smuzhiyun WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2260*4882a593Smuzhiyun }
2261*4882a593Smuzhiyun
goya_disable_timestamp(struct hl_device * hdev)2262*4882a593Smuzhiyun static void goya_disable_timestamp(struct hl_device *hdev)
2263*4882a593Smuzhiyun {
2264*4882a593Smuzhiyun /* Disable the timestamp counter */
2265*4882a593Smuzhiyun WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2266*4882a593Smuzhiyun }
2267*4882a593Smuzhiyun
goya_halt_engines(struct hl_device * hdev,bool hard_reset)2268*4882a593Smuzhiyun static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2269*4882a593Smuzhiyun {
2270*4882a593Smuzhiyun u32 wait_timeout_ms;
2271*4882a593Smuzhiyun
2272*4882a593Smuzhiyun dev_info(hdev->dev,
2273*4882a593Smuzhiyun "Halting compute engines and disabling interrupts\n");
2274*4882a593Smuzhiyun
2275*4882a593Smuzhiyun if (hdev->pldm)
2276*4882a593Smuzhiyun wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2277*4882a593Smuzhiyun else
2278*4882a593Smuzhiyun wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2279*4882a593Smuzhiyun
2280*4882a593Smuzhiyun goya_stop_external_queues(hdev);
2281*4882a593Smuzhiyun goya_stop_internal_queues(hdev);
2282*4882a593Smuzhiyun
2283*4882a593Smuzhiyun msleep(wait_timeout_ms);
2284*4882a593Smuzhiyun
2285*4882a593Smuzhiyun goya_dma_stall(hdev);
2286*4882a593Smuzhiyun goya_tpc_stall(hdev);
2287*4882a593Smuzhiyun goya_mme_stall(hdev);
2288*4882a593Smuzhiyun
2289*4882a593Smuzhiyun msleep(wait_timeout_ms);
2290*4882a593Smuzhiyun
2291*4882a593Smuzhiyun goya_disable_external_queues(hdev);
2292*4882a593Smuzhiyun goya_disable_internal_queues(hdev);
2293*4882a593Smuzhiyun
2294*4882a593Smuzhiyun goya_disable_timestamp(hdev);
2295*4882a593Smuzhiyun
2296*4882a593Smuzhiyun if (hard_reset) {
2297*4882a593Smuzhiyun goya_disable_msix(hdev);
2298*4882a593Smuzhiyun goya_mmu_remove_device_cpu_mappings(hdev);
2299*4882a593Smuzhiyun } else {
2300*4882a593Smuzhiyun goya_sync_irqs(hdev);
2301*4882a593Smuzhiyun }
2302*4882a593Smuzhiyun }
2303*4882a593Smuzhiyun
2304*4882a593Smuzhiyun /*
2305*4882a593Smuzhiyun * goya_load_firmware_to_device() - Load LINUX FW code to device.
2306*4882a593Smuzhiyun * @hdev: Pointer to hl_device structure.
2307*4882a593Smuzhiyun *
2308*4882a593Smuzhiyun * Copy LINUX fw code from firmware file to HBM BAR.
2309*4882a593Smuzhiyun *
2310*4882a593Smuzhiyun * Return: 0 on success, non-zero for failure.
2311*4882a593Smuzhiyun */
goya_load_firmware_to_device(struct hl_device * hdev)2312*4882a593Smuzhiyun static int goya_load_firmware_to_device(struct hl_device *hdev)
2313*4882a593Smuzhiyun {
2314*4882a593Smuzhiyun void __iomem *dst;
2315*4882a593Smuzhiyun
2316*4882a593Smuzhiyun dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2317*4882a593Smuzhiyun
2318*4882a593Smuzhiyun return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
2319*4882a593Smuzhiyun }
2320*4882a593Smuzhiyun
2321*4882a593Smuzhiyun /*
2322*4882a593Smuzhiyun * goya_load_boot_fit_to_device() - Load boot fit to device.
2323*4882a593Smuzhiyun * @hdev: Pointer to hl_device structure.
2324*4882a593Smuzhiyun *
2325*4882a593Smuzhiyun * Copy boot fit file to SRAM BAR.
2326*4882a593Smuzhiyun *
2327*4882a593Smuzhiyun * Return: 0 on success, non-zero for failure.
2328*4882a593Smuzhiyun */
goya_load_boot_fit_to_device(struct hl_device * hdev)2329*4882a593Smuzhiyun static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2330*4882a593Smuzhiyun {
2331*4882a593Smuzhiyun void __iomem *dst;
2332*4882a593Smuzhiyun
2333*4882a593Smuzhiyun dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2334*4882a593Smuzhiyun
2335*4882a593Smuzhiyun return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst);
2336*4882a593Smuzhiyun }
2337*4882a593Smuzhiyun
2338*4882a593Smuzhiyun /*
2339*4882a593Smuzhiyun * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2340*4882a593Smuzhiyun * The version string should be located by that offset.
2341*4882a593Smuzhiyun */
goya_read_device_fw_version(struct hl_device * hdev,enum hl_fw_component fwc)2342*4882a593Smuzhiyun static void goya_read_device_fw_version(struct hl_device *hdev,
2343*4882a593Smuzhiyun enum hl_fw_component fwc)
2344*4882a593Smuzhiyun {
2345*4882a593Smuzhiyun const char *name;
2346*4882a593Smuzhiyun u32 ver_off;
2347*4882a593Smuzhiyun char *dest;
2348*4882a593Smuzhiyun
2349*4882a593Smuzhiyun switch (fwc) {
2350*4882a593Smuzhiyun case FW_COMP_UBOOT:
2351*4882a593Smuzhiyun ver_off = RREG32(mmUBOOT_VER_OFFSET);
2352*4882a593Smuzhiyun dest = hdev->asic_prop.uboot_ver;
2353*4882a593Smuzhiyun name = "U-Boot";
2354*4882a593Smuzhiyun break;
2355*4882a593Smuzhiyun case FW_COMP_PREBOOT:
2356*4882a593Smuzhiyun ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2357*4882a593Smuzhiyun dest = hdev->asic_prop.preboot_ver;
2358*4882a593Smuzhiyun name = "Preboot";
2359*4882a593Smuzhiyun break;
2360*4882a593Smuzhiyun default:
2361*4882a593Smuzhiyun dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2362*4882a593Smuzhiyun return;
2363*4882a593Smuzhiyun }
2364*4882a593Smuzhiyun
2365*4882a593Smuzhiyun ver_off &= ~((u32)SRAM_BASE_ADDR);
2366*4882a593Smuzhiyun
2367*4882a593Smuzhiyun if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2368*4882a593Smuzhiyun memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2369*4882a593Smuzhiyun VERSION_MAX_LEN);
2370*4882a593Smuzhiyun } else {
2371*4882a593Smuzhiyun dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2372*4882a593Smuzhiyun name, ver_off);
2373*4882a593Smuzhiyun strcpy(dest, "unavailable");
2374*4882a593Smuzhiyun }
2375*4882a593Smuzhiyun }
2376*4882a593Smuzhiyun
goya_init_cpu(struct hl_device * hdev)2377*4882a593Smuzhiyun static int goya_init_cpu(struct hl_device *hdev)
2378*4882a593Smuzhiyun {
2379*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2380*4882a593Smuzhiyun int rc;
2381*4882a593Smuzhiyun
2382*4882a593Smuzhiyun if (!hdev->cpu_enable)
2383*4882a593Smuzhiyun return 0;
2384*4882a593Smuzhiyun
2385*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_CPU)
2386*4882a593Smuzhiyun return 0;
2387*4882a593Smuzhiyun
2388*4882a593Smuzhiyun /*
2389*4882a593Smuzhiyun * Before pushing u-boot/linux to device, need to set the ddr bar to
2390*4882a593Smuzhiyun * base address of dram
2391*4882a593Smuzhiyun */
2392*4882a593Smuzhiyun if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2393*4882a593Smuzhiyun dev_err(hdev->dev,
2394*4882a593Smuzhiyun "failed to map DDR bar to DRAM base address\n");
2395*4882a593Smuzhiyun return -EIO;
2396*4882a593Smuzhiyun }
2397*4882a593Smuzhiyun
2398*4882a593Smuzhiyun rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2399*4882a593Smuzhiyun mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
2400*4882a593Smuzhiyun mmCPU_CMD_STATUS_TO_HOST, mmCPU_BOOT_ERR0,
2401*4882a593Smuzhiyun false, GOYA_CPU_TIMEOUT_USEC,
2402*4882a593Smuzhiyun GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
2403*4882a593Smuzhiyun
2404*4882a593Smuzhiyun if (rc)
2405*4882a593Smuzhiyun return rc;
2406*4882a593Smuzhiyun
2407*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_CPU;
2408*4882a593Smuzhiyun
2409*4882a593Smuzhiyun return 0;
2410*4882a593Smuzhiyun }
2411*4882a593Smuzhiyun
goya_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)2412*4882a593Smuzhiyun static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2413*4882a593Smuzhiyun u64 phys_addr)
2414*4882a593Smuzhiyun {
2415*4882a593Smuzhiyun u32 status, timeout_usec;
2416*4882a593Smuzhiyun int rc;
2417*4882a593Smuzhiyun
2418*4882a593Smuzhiyun if (hdev->pldm)
2419*4882a593Smuzhiyun timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2420*4882a593Smuzhiyun else
2421*4882a593Smuzhiyun timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2422*4882a593Smuzhiyun
2423*4882a593Smuzhiyun WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2424*4882a593Smuzhiyun WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2425*4882a593Smuzhiyun WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2426*4882a593Smuzhiyun
2427*4882a593Smuzhiyun rc = hl_poll_timeout(
2428*4882a593Smuzhiyun hdev,
2429*4882a593Smuzhiyun MMU_ASID_BUSY,
2430*4882a593Smuzhiyun status,
2431*4882a593Smuzhiyun !(status & 0x80000000),
2432*4882a593Smuzhiyun 1000,
2433*4882a593Smuzhiyun timeout_usec);
2434*4882a593Smuzhiyun
2435*4882a593Smuzhiyun if (rc) {
2436*4882a593Smuzhiyun dev_err(hdev->dev,
2437*4882a593Smuzhiyun "Timeout during MMU hop0 config of asid %d\n", asid);
2438*4882a593Smuzhiyun return rc;
2439*4882a593Smuzhiyun }
2440*4882a593Smuzhiyun
2441*4882a593Smuzhiyun return 0;
2442*4882a593Smuzhiyun }
2443*4882a593Smuzhiyun
goya_mmu_init(struct hl_device * hdev)2444*4882a593Smuzhiyun int goya_mmu_init(struct hl_device *hdev)
2445*4882a593Smuzhiyun {
2446*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
2447*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2448*4882a593Smuzhiyun u64 hop0_addr;
2449*4882a593Smuzhiyun int rc, i;
2450*4882a593Smuzhiyun
2451*4882a593Smuzhiyun if (!hdev->mmu_enable)
2452*4882a593Smuzhiyun return 0;
2453*4882a593Smuzhiyun
2454*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_MMU)
2455*4882a593Smuzhiyun return 0;
2456*4882a593Smuzhiyun
2457*4882a593Smuzhiyun hdev->dram_supports_virtual_memory = true;
2458*4882a593Smuzhiyun hdev->dram_default_page_mapping = true;
2459*4882a593Smuzhiyun
2460*4882a593Smuzhiyun for (i = 0 ; i < prop->max_asid ; i++) {
2461*4882a593Smuzhiyun hop0_addr = prop->mmu_pgt_addr +
2462*4882a593Smuzhiyun (i * prop->mmu_hop_table_size);
2463*4882a593Smuzhiyun
2464*4882a593Smuzhiyun rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2465*4882a593Smuzhiyun if (rc) {
2466*4882a593Smuzhiyun dev_err(hdev->dev,
2467*4882a593Smuzhiyun "failed to set hop0 addr for asid %d\n", i);
2468*4882a593Smuzhiyun goto err;
2469*4882a593Smuzhiyun }
2470*4882a593Smuzhiyun }
2471*4882a593Smuzhiyun
2472*4882a593Smuzhiyun goya->hw_cap_initialized |= HW_CAP_MMU;
2473*4882a593Smuzhiyun
2474*4882a593Smuzhiyun /* init MMU cache manage page */
2475*4882a593Smuzhiyun WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2476*4882a593Smuzhiyun lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2477*4882a593Smuzhiyun WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2478*4882a593Smuzhiyun
2479*4882a593Smuzhiyun /* Remove follower feature due to performance bug */
2480*4882a593Smuzhiyun WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2481*4882a593Smuzhiyun (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2482*4882a593Smuzhiyun
2483*4882a593Smuzhiyun hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2484*4882a593Smuzhiyun VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2485*4882a593Smuzhiyun
2486*4882a593Smuzhiyun WREG32(mmMMU_MMU_ENABLE, 1);
2487*4882a593Smuzhiyun WREG32(mmMMU_SPI_MASK, 0xF);
2488*4882a593Smuzhiyun
2489*4882a593Smuzhiyun return 0;
2490*4882a593Smuzhiyun
2491*4882a593Smuzhiyun err:
2492*4882a593Smuzhiyun return rc;
2493*4882a593Smuzhiyun }
2494*4882a593Smuzhiyun
2495*4882a593Smuzhiyun /*
2496*4882a593Smuzhiyun * goya_hw_init - Goya hardware initialization code
2497*4882a593Smuzhiyun *
2498*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
2499*4882a593Smuzhiyun *
2500*4882a593Smuzhiyun * Returns 0 on success
2501*4882a593Smuzhiyun *
2502*4882a593Smuzhiyun */
goya_hw_init(struct hl_device * hdev)2503*4882a593Smuzhiyun static int goya_hw_init(struct hl_device *hdev)
2504*4882a593Smuzhiyun {
2505*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
2506*4882a593Smuzhiyun int rc;
2507*4882a593Smuzhiyun
2508*4882a593Smuzhiyun dev_info(hdev->dev, "Starting initialization of H/W\n");
2509*4882a593Smuzhiyun
2510*4882a593Smuzhiyun /* Perform read from the device to make sure device is up */
2511*4882a593Smuzhiyun RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2512*4882a593Smuzhiyun
2513*4882a593Smuzhiyun /*
2514*4882a593Smuzhiyun * Let's mark in the H/W that we have reached this point. We check
2515*4882a593Smuzhiyun * this value in the reset_before_init function to understand whether
2516*4882a593Smuzhiyun * we need to reset the chip before doing H/W init. This register is
2517*4882a593Smuzhiyun * cleared by the H/W upon H/W reset
2518*4882a593Smuzhiyun */
2519*4882a593Smuzhiyun WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2520*4882a593Smuzhiyun
2521*4882a593Smuzhiyun rc = goya_init_cpu(hdev);
2522*4882a593Smuzhiyun if (rc) {
2523*4882a593Smuzhiyun dev_err(hdev->dev, "failed to initialize CPU\n");
2524*4882a593Smuzhiyun return rc;
2525*4882a593Smuzhiyun }
2526*4882a593Smuzhiyun
2527*4882a593Smuzhiyun goya_tpc_mbist_workaround(hdev);
2528*4882a593Smuzhiyun
2529*4882a593Smuzhiyun goya_init_golden_registers(hdev);
2530*4882a593Smuzhiyun
2531*4882a593Smuzhiyun /*
2532*4882a593Smuzhiyun * After CPU initialization is finished, change DDR bar mapping inside
2533*4882a593Smuzhiyun * iATU to point to the start address of the MMU page tables
2534*4882a593Smuzhiyun */
2535*4882a593Smuzhiyun if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2536*4882a593Smuzhiyun ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2537*4882a593Smuzhiyun dev_err(hdev->dev,
2538*4882a593Smuzhiyun "failed to map DDR bar to MMU page tables\n");
2539*4882a593Smuzhiyun return -EIO;
2540*4882a593Smuzhiyun }
2541*4882a593Smuzhiyun
2542*4882a593Smuzhiyun rc = goya_mmu_init(hdev);
2543*4882a593Smuzhiyun if (rc)
2544*4882a593Smuzhiyun return rc;
2545*4882a593Smuzhiyun
2546*4882a593Smuzhiyun goya_init_security(hdev);
2547*4882a593Smuzhiyun
2548*4882a593Smuzhiyun goya_init_dma_qmans(hdev);
2549*4882a593Smuzhiyun
2550*4882a593Smuzhiyun goya_init_mme_qmans(hdev);
2551*4882a593Smuzhiyun
2552*4882a593Smuzhiyun goya_init_tpc_qmans(hdev);
2553*4882a593Smuzhiyun
2554*4882a593Smuzhiyun goya_enable_timestamp(hdev);
2555*4882a593Smuzhiyun
2556*4882a593Smuzhiyun /* MSI-X must be enabled before CPU queues are initialized */
2557*4882a593Smuzhiyun rc = goya_enable_msix(hdev);
2558*4882a593Smuzhiyun if (rc)
2559*4882a593Smuzhiyun goto disable_queues;
2560*4882a593Smuzhiyun
2561*4882a593Smuzhiyun /* Perform read from the device to flush all MSI-X configuration */
2562*4882a593Smuzhiyun RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2563*4882a593Smuzhiyun
2564*4882a593Smuzhiyun return 0;
2565*4882a593Smuzhiyun
2566*4882a593Smuzhiyun disable_queues:
2567*4882a593Smuzhiyun goya_disable_internal_queues(hdev);
2568*4882a593Smuzhiyun goya_disable_external_queues(hdev);
2569*4882a593Smuzhiyun
2570*4882a593Smuzhiyun return rc;
2571*4882a593Smuzhiyun }
2572*4882a593Smuzhiyun
2573*4882a593Smuzhiyun /*
2574*4882a593Smuzhiyun * goya_hw_fini - Goya hardware tear-down code
2575*4882a593Smuzhiyun *
2576*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
2577*4882a593Smuzhiyun * @hard_reset: should we do hard reset to all engines or just reset the
2578*4882a593Smuzhiyun * compute/dma engines
2579*4882a593Smuzhiyun */
goya_hw_fini(struct hl_device * hdev,bool hard_reset)2580*4882a593Smuzhiyun static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2581*4882a593Smuzhiyun {
2582*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2583*4882a593Smuzhiyun u32 reset_timeout_ms, cpu_timeout_ms, status;
2584*4882a593Smuzhiyun
2585*4882a593Smuzhiyun if (hdev->pldm) {
2586*4882a593Smuzhiyun reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2587*4882a593Smuzhiyun cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2588*4882a593Smuzhiyun } else {
2589*4882a593Smuzhiyun reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2590*4882a593Smuzhiyun cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2591*4882a593Smuzhiyun }
2592*4882a593Smuzhiyun
2593*4882a593Smuzhiyun if (hard_reset) {
2594*4882a593Smuzhiyun /* I don't know what is the state of the CPU so make sure it is
2595*4882a593Smuzhiyun * stopped in any means necessary
2596*4882a593Smuzhiyun */
2597*4882a593Smuzhiyun WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2598*4882a593Smuzhiyun WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2599*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2600*4882a593Smuzhiyun
2601*4882a593Smuzhiyun msleep(cpu_timeout_ms);
2602*4882a593Smuzhiyun
2603*4882a593Smuzhiyun goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2604*4882a593Smuzhiyun goya_disable_clk_rlx(hdev);
2605*4882a593Smuzhiyun goya_set_pll_refclk(hdev);
2606*4882a593Smuzhiyun
2607*4882a593Smuzhiyun WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2608*4882a593Smuzhiyun dev_info(hdev->dev,
2609*4882a593Smuzhiyun "Issued HARD reset command, going to wait %dms\n",
2610*4882a593Smuzhiyun reset_timeout_ms);
2611*4882a593Smuzhiyun } else {
2612*4882a593Smuzhiyun WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2613*4882a593Smuzhiyun dev_info(hdev->dev,
2614*4882a593Smuzhiyun "Issued SOFT reset command, going to wait %dms\n",
2615*4882a593Smuzhiyun reset_timeout_ms);
2616*4882a593Smuzhiyun }
2617*4882a593Smuzhiyun
2618*4882a593Smuzhiyun /*
2619*4882a593Smuzhiyun * After hard reset, we can't poll the BTM_FSM register because the PSOC
2620*4882a593Smuzhiyun * itself is in reset. In either reset we need to wait until the reset
2621*4882a593Smuzhiyun * is deasserted
2622*4882a593Smuzhiyun */
2623*4882a593Smuzhiyun msleep(reset_timeout_ms);
2624*4882a593Smuzhiyun
2625*4882a593Smuzhiyun status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2626*4882a593Smuzhiyun if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2627*4882a593Smuzhiyun dev_err(hdev->dev,
2628*4882a593Smuzhiyun "Timeout while waiting for device to reset 0x%x\n",
2629*4882a593Smuzhiyun status);
2630*4882a593Smuzhiyun
2631*4882a593Smuzhiyun if (!hard_reset) {
2632*4882a593Smuzhiyun goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2633*4882a593Smuzhiyun HW_CAP_GOLDEN | HW_CAP_TPC);
2634*4882a593Smuzhiyun WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2635*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2636*4882a593Smuzhiyun return;
2637*4882a593Smuzhiyun }
2638*4882a593Smuzhiyun
2639*4882a593Smuzhiyun /* Chicken bit to re-initiate boot sequencer flow */
2640*4882a593Smuzhiyun WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2641*4882a593Smuzhiyun 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2642*4882a593Smuzhiyun /* Move boot manager FSM to pre boot sequencer init state */
2643*4882a593Smuzhiyun WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2644*4882a593Smuzhiyun 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2645*4882a593Smuzhiyun
2646*4882a593Smuzhiyun goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2647*4882a593Smuzhiyun HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2648*4882a593Smuzhiyun HW_CAP_DMA | HW_CAP_MME |
2649*4882a593Smuzhiyun HW_CAP_MMU | HW_CAP_TPC_MBIST |
2650*4882a593Smuzhiyun HW_CAP_GOLDEN | HW_CAP_TPC);
2651*4882a593Smuzhiyun memset(goya->events_stat, 0, sizeof(goya->events_stat));
2652*4882a593Smuzhiyun }
2653*4882a593Smuzhiyun
goya_suspend(struct hl_device * hdev)2654*4882a593Smuzhiyun int goya_suspend(struct hl_device *hdev)
2655*4882a593Smuzhiyun {
2656*4882a593Smuzhiyun int rc;
2657*4882a593Smuzhiyun
2658*4882a593Smuzhiyun rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
2659*4882a593Smuzhiyun if (rc)
2660*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2661*4882a593Smuzhiyun
2662*4882a593Smuzhiyun return rc;
2663*4882a593Smuzhiyun }
2664*4882a593Smuzhiyun
goya_resume(struct hl_device * hdev)2665*4882a593Smuzhiyun int goya_resume(struct hl_device *hdev)
2666*4882a593Smuzhiyun {
2667*4882a593Smuzhiyun return goya_init_iatu(hdev);
2668*4882a593Smuzhiyun }
2669*4882a593Smuzhiyun
goya_cb_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)2670*4882a593Smuzhiyun static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2671*4882a593Smuzhiyun void *cpu_addr, dma_addr_t dma_addr, size_t size)
2672*4882a593Smuzhiyun {
2673*4882a593Smuzhiyun int rc;
2674*4882a593Smuzhiyun
2675*4882a593Smuzhiyun vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2676*4882a593Smuzhiyun VM_DONTCOPY | VM_NORESERVE;
2677*4882a593Smuzhiyun
2678*4882a593Smuzhiyun rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2679*4882a593Smuzhiyun (dma_addr - HOST_PHYS_BASE), size);
2680*4882a593Smuzhiyun if (rc)
2681*4882a593Smuzhiyun dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2682*4882a593Smuzhiyun
2683*4882a593Smuzhiyun return rc;
2684*4882a593Smuzhiyun }
2685*4882a593Smuzhiyun
goya_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)2686*4882a593Smuzhiyun void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2687*4882a593Smuzhiyun {
2688*4882a593Smuzhiyun u32 db_reg_offset, db_value;
2689*4882a593Smuzhiyun
2690*4882a593Smuzhiyun switch (hw_queue_id) {
2691*4882a593Smuzhiyun case GOYA_QUEUE_ID_DMA_0:
2692*4882a593Smuzhiyun db_reg_offset = mmDMA_QM_0_PQ_PI;
2693*4882a593Smuzhiyun break;
2694*4882a593Smuzhiyun
2695*4882a593Smuzhiyun case GOYA_QUEUE_ID_DMA_1:
2696*4882a593Smuzhiyun db_reg_offset = mmDMA_QM_1_PQ_PI;
2697*4882a593Smuzhiyun break;
2698*4882a593Smuzhiyun
2699*4882a593Smuzhiyun case GOYA_QUEUE_ID_DMA_2:
2700*4882a593Smuzhiyun db_reg_offset = mmDMA_QM_2_PQ_PI;
2701*4882a593Smuzhiyun break;
2702*4882a593Smuzhiyun
2703*4882a593Smuzhiyun case GOYA_QUEUE_ID_DMA_3:
2704*4882a593Smuzhiyun db_reg_offset = mmDMA_QM_3_PQ_PI;
2705*4882a593Smuzhiyun break;
2706*4882a593Smuzhiyun
2707*4882a593Smuzhiyun case GOYA_QUEUE_ID_DMA_4:
2708*4882a593Smuzhiyun db_reg_offset = mmDMA_QM_4_PQ_PI;
2709*4882a593Smuzhiyun break;
2710*4882a593Smuzhiyun
2711*4882a593Smuzhiyun case GOYA_QUEUE_ID_CPU_PQ:
2712*4882a593Smuzhiyun db_reg_offset = mmCPU_IF_PF_PQ_PI;
2713*4882a593Smuzhiyun break;
2714*4882a593Smuzhiyun
2715*4882a593Smuzhiyun case GOYA_QUEUE_ID_MME:
2716*4882a593Smuzhiyun db_reg_offset = mmMME_QM_PQ_PI;
2717*4882a593Smuzhiyun break;
2718*4882a593Smuzhiyun
2719*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC0:
2720*4882a593Smuzhiyun db_reg_offset = mmTPC0_QM_PQ_PI;
2721*4882a593Smuzhiyun break;
2722*4882a593Smuzhiyun
2723*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC1:
2724*4882a593Smuzhiyun db_reg_offset = mmTPC1_QM_PQ_PI;
2725*4882a593Smuzhiyun break;
2726*4882a593Smuzhiyun
2727*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC2:
2728*4882a593Smuzhiyun db_reg_offset = mmTPC2_QM_PQ_PI;
2729*4882a593Smuzhiyun break;
2730*4882a593Smuzhiyun
2731*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC3:
2732*4882a593Smuzhiyun db_reg_offset = mmTPC3_QM_PQ_PI;
2733*4882a593Smuzhiyun break;
2734*4882a593Smuzhiyun
2735*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC4:
2736*4882a593Smuzhiyun db_reg_offset = mmTPC4_QM_PQ_PI;
2737*4882a593Smuzhiyun break;
2738*4882a593Smuzhiyun
2739*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC5:
2740*4882a593Smuzhiyun db_reg_offset = mmTPC5_QM_PQ_PI;
2741*4882a593Smuzhiyun break;
2742*4882a593Smuzhiyun
2743*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC6:
2744*4882a593Smuzhiyun db_reg_offset = mmTPC6_QM_PQ_PI;
2745*4882a593Smuzhiyun break;
2746*4882a593Smuzhiyun
2747*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC7:
2748*4882a593Smuzhiyun db_reg_offset = mmTPC7_QM_PQ_PI;
2749*4882a593Smuzhiyun break;
2750*4882a593Smuzhiyun
2751*4882a593Smuzhiyun default:
2752*4882a593Smuzhiyun /* Should never get here */
2753*4882a593Smuzhiyun dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2754*4882a593Smuzhiyun hw_queue_id);
2755*4882a593Smuzhiyun return;
2756*4882a593Smuzhiyun }
2757*4882a593Smuzhiyun
2758*4882a593Smuzhiyun db_value = pi;
2759*4882a593Smuzhiyun
2760*4882a593Smuzhiyun /* ring the doorbell */
2761*4882a593Smuzhiyun WREG32(db_reg_offset, db_value);
2762*4882a593Smuzhiyun
2763*4882a593Smuzhiyun if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2764*4882a593Smuzhiyun WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2765*4882a593Smuzhiyun GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2766*4882a593Smuzhiyun }
2767*4882a593Smuzhiyun
goya_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)2768*4882a593Smuzhiyun void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2769*4882a593Smuzhiyun {
2770*4882a593Smuzhiyun /* The QMANs are on the SRAM so need to copy to IO space */
2771*4882a593Smuzhiyun memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2772*4882a593Smuzhiyun }
2773*4882a593Smuzhiyun
goya_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)2774*4882a593Smuzhiyun static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2775*4882a593Smuzhiyun dma_addr_t *dma_handle, gfp_t flags)
2776*4882a593Smuzhiyun {
2777*4882a593Smuzhiyun void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2778*4882a593Smuzhiyun dma_handle, flags);
2779*4882a593Smuzhiyun
2780*4882a593Smuzhiyun /* Shift to the device's base physical address of host memory */
2781*4882a593Smuzhiyun if (kernel_addr)
2782*4882a593Smuzhiyun *dma_handle += HOST_PHYS_BASE;
2783*4882a593Smuzhiyun
2784*4882a593Smuzhiyun return kernel_addr;
2785*4882a593Smuzhiyun }
2786*4882a593Smuzhiyun
goya_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)2787*4882a593Smuzhiyun static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2788*4882a593Smuzhiyun void *cpu_addr, dma_addr_t dma_handle)
2789*4882a593Smuzhiyun {
2790*4882a593Smuzhiyun /* Cancel the device's base physical address of host memory */
2791*4882a593Smuzhiyun dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2792*4882a593Smuzhiyun
2793*4882a593Smuzhiyun dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2794*4882a593Smuzhiyun }
2795*4882a593Smuzhiyun
goya_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)2796*4882a593Smuzhiyun void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2797*4882a593Smuzhiyun dma_addr_t *dma_handle, u16 *queue_len)
2798*4882a593Smuzhiyun {
2799*4882a593Smuzhiyun void *base;
2800*4882a593Smuzhiyun u32 offset;
2801*4882a593Smuzhiyun
2802*4882a593Smuzhiyun *dma_handle = hdev->asic_prop.sram_base_address;
2803*4882a593Smuzhiyun
2804*4882a593Smuzhiyun base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2805*4882a593Smuzhiyun
2806*4882a593Smuzhiyun switch (queue_id) {
2807*4882a593Smuzhiyun case GOYA_QUEUE_ID_MME:
2808*4882a593Smuzhiyun offset = MME_QMAN_BASE_OFFSET;
2809*4882a593Smuzhiyun *queue_len = MME_QMAN_LENGTH;
2810*4882a593Smuzhiyun break;
2811*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC0:
2812*4882a593Smuzhiyun offset = TPC0_QMAN_BASE_OFFSET;
2813*4882a593Smuzhiyun *queue_len = TPC_QMAN_LENGTH;
2814*4882a593Smuzhiyun break;
2815*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC1:
2816*4882a593Smuzhiyun offset = TPC1_QMAN_BASE_OFFSET;
2817*4882a593Smuzhiyun *queue_len = TPC_QMAN_LENGTH;
2818*4882a593Smuzhiyun break;
2819*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC2:
2820*4882a593Smuzhiyun offset = TPC2_QMAN_BASE_OFFSET;
2821*4882a593Smuzhiyun *queue_len = TPC_QMAN_LENGTH;
2822*4882a593Smuzhiyun break;
2823*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC3:
2824*4882a593Smuzhiyun offset = TPC3_QMAN_BASE_OFFSET;
2825*4882a593Smuzhiyun *queue_len = TPC_QMAN_LENGTH;
2826*4882a593Smuzhiyun break;
2827*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC4:
2828*4882a593Smuzhiyun offset = TPC4_QMAN_BASE_OFFSET;
2829*4882a593Smuzhiyun *queue_len = TPC_QMAN_LENGTH;
2830*4882a593Smuzhiyun break;
2831*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC5:
2832*4882a593Smuzhiyun offset = TPC5_QMAN_BASE_OFFSET;
2833*4882a593Smuzhiyun *queue_len = TPC_QMAN_LENGTH;
2834*4882a593Smuzhiyun break;
2835*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC6:
2836*4882a593Smuzhiyun offset = TPC6_QMAN_BASE_OFFSET;
2837*4882a593Smuzhiyun *queue_len = TPC_QMAN_LENGTH;
2838*4882a593Smuzhiyun break;
2839*4882a593Smuzhiyun case GOYA_QUEUE_ID_TPC7:
2840*4882a593Smuzhiyun offset = TPC7_QMAN_BASE_OFFSET;
2841*4882a593Smuzhiyun *queue_len = TPC_QMAN_LENGTH;
2842*4882a593Smuzhiyun break;
2843*4882a593Smuzhiyun default:
2844*4882a593Smuzhiyun dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2845*4882a593Smuzhiyun return NULL;
2846*4882a593Smuzhiyun }
2847*4882a593Smuzhiyun
2848*4882a593Smuzhiyun base += offset;
2849*4882a593Smuzhiyun *dma_handle += offset;
2850*4882a593Smuzhiyun
2851*4882a593Smuzhiyun return base;
2852*4882a593Smuzhiyun }
2853*4882a593Smuzhiyun
goya_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)2854*4882a593Smuzhiyun static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2855*4882a593Smuzhiyun {
2856*4882a593Smuzhiyun struct packet_msg_prot *fence_pkt;
2857*4882a593Smuzhiyun u32 *fence_ptr;
2858*4882a593Smuzhiyun dma_addr_t fence_dma_addr;
2859*4882a593Smuzhiyun struct hl_cb *cb;
2860*4882a593Smuzhiyun u32 tmp, timeout;
2861*4882a593Smuzhiyun int rc;
2862*4882a593Smuzhiyun
2863*4882a593Smuzhiyun if (hdev->pldm)
2864*4882a593Smuzhiyun timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2865*4882a593Smuzhiyun else
2866*4882a593Smuzhiyun timeout = HL_DEVICE_TIMEOUT_USEC;
2867*4882a593Smuzhiyun
2868*4882a593Smuzhiyun if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
2869*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
2870*4882a593Smuzhiyun "Can't send driver job on QMAN0 because the device is not idle\n");
2871*4882a593Smuzhiyun return -EBUSY;
2872*4882a593Smuzhiyun }
2873*4882a593Smuzhiyun
2874*4882a593Smuzhiyun fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2875*4882a593Smuzhiyun &fence_dma_addr);
2876*4882a593Smuzhiyun if (!fence_ptr) {
2877*4882a593Smuzhiyun dev_err(hdev->dev,
2878*4882a593Smuzhiyun "Failed to allocate fence memory for QMAN0\n");
2879*4882a593Smuzhiyun return -ENOMEM;
2880*4882a593Smuzhiyun }
2881*4882a593Smuzhiyun
2882*4882a593Smuzhiyun goya_qman0_set_security(hdev, true);
2883*4882a593Smuzhiyun
2884*4882a593Smuzhiyun cb = job->patched_cb;
2885*4882a593Smuzhiyun
2886*4882a593Smuzhiyun fence_pkt = cb->kernel_address +
2887*4882a593Smuzhiyun job->job_cb_size - sizeof(struct packet_msg_prot);
2888*4882a593Smuzhiyun
2889*4882a593Smuzhiyun tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2890*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_EB_SHIFT) |
2891*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_MB_SHIFT);
2892*4882a593Smuzhiyun fence_pkt->ctl = cpu_to_le32(tmp);
2893*4882a593Smuzhiyun fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2894*4882a593Smuzhiyun fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2895*4882a593Smuzhiyun
2896*4882a593Smuzhiyun rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2897*4882a593Smuzhiyun job->job_cb_size, cb->bus_address);
2898*4882a593Smuzhiyun if (rc) {
2899*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2900*4882a593Smuzhiyun goto free_fence_ptr;
2901*4882a593Smuzhiyun }
2902*4882a593Smuzhiyun
2903*4882a593Smuzhiyun rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2904*4882a593Smuzhiyun (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2905*4882a593Smuzhiyun timeout, true);
2906*4882a593Smuzhiyun
2907*4882a593Smuzhiyun hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2908*4882a593Smuzhiyun
2909*4882a593Smuzhiyun if (rc == -ETIMEDOUT) {
2910*4882a593Smuzhiyun dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2911*4882a593Smuzhiyun goto free_fence_ptr;
2912*4882a593Smuzhiyun }
2913*4882a593Smuzhiyun
2914*4882a593Smuzhiyun free_fence_ptr:
2915*4882a593Smuzhiyun hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2916*4882a593Smuzhiyun fence_dma_addr);
2917*4882a593Smuzhiyun
2918*4882a593Smuzhiyun goya_qman0_set_security(hdev, false);
2919*4882a593Smuzhiyun
2920*4882a593Smuzhiyun return rc;
2921*4882a593Smuzhiyun }
2922*4882a593Smuzhiyun
goya_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,long * result)2923*4882a593Smuzhiyun int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
2924*4882a593Smuzhiyun u32 timeout, long *result)
2925*4882a593Smuzhiyun {
2926*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
2927*4882a593Smuzhiyun
2928*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2929*4882a593Smuzhiyun if (result)
2930*4882a593Smuzhiyun *result = 0;
2931*4882a593Smuzhiyun return 0;
2932*4882a593Smuzhiyun }
2933*4882a593Smuzhiyun
2934*4882a593Smuzhiyun if (!timeout)
2935*4882a593Smuzhiyun timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
2936*4882a593Smuzhiyun
2937*4882a593Smuzhiyun return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2938*4882a593Smuzhiyun timeout, result);
2939*4882a593Smuzhiyun }
2940*4882a593Smuzhiyun
goya_test_queue(struct hl_device * hdev,u32 hw_queue_id)2941*4882a593Smuzhiyun int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2942*4882a593Smuzhiyun {
2943*4882a593Smuzhiyun struct packet_msg_prot *fence_pkt;
2944*4882a593Smuzhiyun dma_addr_t pkt_dma_addr;
2945*4882a593Smuzhiyun u32 fence_val, tmp;
2946*4882a593Smuzhiyun dma_addr_t fence_dma_addr;
2947*4882a593Smuzhiyun u32 *fence_ptr;
2948*4882a593Smuzhiyun int rc;
2949*4882a593Smuzhiyun
2950*4882a593Smuzhiyun fence_val = GOYA_QMAN0_FENCE_VAL;
2951*4882a593Smuzhiyun
2952*4882a593Smuzhiyun fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2953*4882a593Smuzhiyun &fence_dma_addr);
2954*4882a593Smuzhiyun if (!fence_ptr) {
2955*4882a593Smuzhiyun dev_err(hdev->dev,
2956*4882a593Smuzhiyun "Failed to allocate memory for H/W queue %d testing\n",
2957*4882a593Smuzhiyun hw_queue_id);
2958*4882a593Smuzhiyun return -ENOMEM;
2959*4882a593Smuzhiyun }
2960*4882a593Smuzhiyun
2961*4882a593Smuzhiyun *fence_ptr = 0;
2962*4882a593Smuzhiyun
2963*4882a593Smuzhiyun fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
2964*4882a593Smuzhiyun sizeof(struct packet_msg_prot),
2965*4882a593Smuzhiyun GFP_KERNEL, &pkt_dma_addr);
2966*4882a593Smuzhiyun if (!fence_pkt) {
2967*4882a593Smuzhiyun dev_err(hdev->dev,
2968*4882a593Smuzhiyun "Failed to allocate packet for H/W queue %d testing\n",
2969*4882a593Smuzhiyun hw_queue_id);
2970*4882a593Smuzhiyun rc = -ENOMEM;
2971*4882a593Smuzhiyun goto free_fence_ptr;
2972*4882a593Smuzhiyun }
2973*4882a593Smuzhiyun
2974*4882a593Smuzhiyun tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2975*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_EB_SHIFT) |
2976*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_MB_SHIFT);
2977*4882a593Smuzhiyun fence_pkt->ctl = cpu_to_le32(tmp);
2978*4882a593Smuzhiyun fence_pkt->value = cpu_to_le32(fence_val);
2979*4882a593Smuzhiyun fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2980*4882a593Smuzhiyun
2981*4882a593Smuzhiyun rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
2982*4882a593Smuzhiyun sizeof(struct packet_msg_prot),
2983*4882a593Smuzhiyun pkt_dma_addr);
2984*4882a593Smuzhiyun if (rc) {
2985*4882a593Smuzhiyun dev_err(hdev->dev,
2986*4882a593Smuzhiyun "Failed to send fence packet to H/W queue %d\n",
2987*4882a593Smuzhiyun hw_queue_id);
2988*4882a593Smuzhiyun goto free_pkt;
2989*4882a593Smuzhiyun }
2990*4882a593Smuzhiyun
2991*4882a593Smuzhiyun rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
2992*4882a593Smuzhiyun 1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
2993*4882a593Smuzhiyun
2994*4882a593Smuzhiyun hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
2995*4882a593Smuzhiyun
2996*4882a593Smuzhiyun if (rc == -ETIMEDOUT) {
2997*4882a593Smuzhiyun dev_err(hdev->dev,
2998*4882a593Smuzhiyun "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
2999*4882a593Smuzhiyun hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3000*4882a593Smuzhiyun rc = -EIO;
3001*4882a593Smuzhiyun }
3002*4882a593Smuzhiyun
3003*4882a593Smuzhiyun free_pkt:
3004*4882a593Smuzhiyun hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3005*4882a593Smuzhiyun pkt_dma_addr);
3006*4882a593Smuzhiyun free_fence_ptr:
3007*4882a593Smuzhiyun hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3008*4882a593Smuzhiyun fence_dma_addr);
3009*4882a593Smuzhiyun return rc;
3010*4882a593Smuzhiyun }
3011*4882a593Smuzhiyun
goya_test_cpu_queue(struct hl_device * hdev)3012*4882a593Smuzhiyun int goya_test_cpu_queue(struct hl_device *hdev)
3013*4882a593Smuzhiyun {
3014*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
3015*4882a593Smuzhiyun
3016*4882a593Smuzhiyun /*
3017*4882a593Smuzhiyun * check capability here as send_cpu_message() won't update the result
3018*4882a593Smuzhiyun * value if no capability
3019*4882a593Smuzhiyun */
3020*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3021*4882a593Smuzhiyun return 0;
3022*4882a593Smuzhiyun
3023*4882a593Smuzhiyun return hl_fw_test_cpu_queue(hdev);
3024*4882a593Smuzhiyun }
3025*4882a593Smuzhiyun
goya_test_queues(struct hl_device * hdev)3026*4882a593Smuzhiyun int goya_test_queues(struct hl_device *hdev)
3027*4882a593Smuzhiyun {
3028*4882a593Smuzhiyun int i, rc, ret_val = 0;
3029*4882a593Smuzhiyun
3030*4882a593Smuzhiyun for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3031*4882a593Smuzhiyun rc = goya_test_queue(hdev, i);
3032*4882a593Smuzhiyun if (rc)
3033*4882a593Smuzhiyun ret_val = -EINVAL;
3034*4882a593Smuzhiyun }
3035*4882a593Smuzhiyun
3036*4882a593Smuzhiyun return ret_val;
3037*4882a593Smuzhiyun }
3038*4882a593Smuzhiyun
goya_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)3039*4882a593Smuzhiyun static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3040*4882a593Smuzhiyun gfp_t mem_flags, dma_addr_t *dma_handle)
3041*4882a593Smuzhiyun {
3042*4882a593Smuzhiyun void *kernel_addr;
3043*4882a593Smuzhiyun
3044*4882a593Smuzhiyun if (size > GOYA_DMA_POOL_BLK_SIZE)
3045*4882a593Smuzhiyun return NULL;
3046*4882a593Smuzhiyun
3047*4882a593Smuzhiyun kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3048*4882a593Smuzhiyun
3049*4882a593Smuzhiyun /* Shift to the device's base physical address of host memory */
3050*4882a593Smuzhiyun if (kernel_addr)
3051*4882a593Smuzhiyun *dma_handle += HOST_PHYS_BASE;
3052*4882a593Smuzhiyun
3053*4882a593Smuzhiyun return kernel_addr;
3054*4882a593Smuzhiyun }
3055*4882a593Smuzhiyun
goya_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)3056*4882a593Smuzhiyun static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3057*4882a593Smuzhiyun dma_addr_t dma_addr)
3058*4882a593Smuzhiyun {
3059*4882a593Smuzhiyun /* Cancel the device's base physical address of host memory */
3060*4882a593Smuzhiyun dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3061*4882a593Smuzhiyun
3062*4882a593Smuzhiyun dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3063*4882a593Smuzhiyun }
3064*4882a593Smuzhiyun
goya_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)3065*4882a593Smuzhiyun void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3066*4882a593Smuzhiyun dma_addr_t *dma_handle)
3067*4882a593Smuzhiyun {
3068*4882a593Smuzhiyun void *vaddr;
3069*4882a593Smuzhiyun
3070*4882a593Smuzhiyun vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3071*4882a593Smuzhiyun *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3072*4882a593Smuzhiyun VA_CPU_ACCESSIBLE_MEM_ADDR;
3073*4882a593Smuzhiyun
3074*4882a593Smuzhiyun return vaddr;
3075*4882a593Smuzhiyun }
3076*4882a593Smuzhiyun
goya_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)3077*4882a593Smuzhiyun void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3078*4882a593Smuzhiyun void *vaddr)
3079*4882a593Smuzhiyun {
3080*4882a593Smuzhiyun hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3081*4882a593Smuzhiyun }
3082*4882a593Smuzhiyun
goya_dma_map_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)3083*4882a593Smuzhiyun static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3084*4882a593Smuzhiyun int nents, enum dma_data_direction dir)
3085*4882a593Smuzhiyun {
3086*4882a593Smuzhiyun struct scatterlist *sg;
3087*4882a593Smuzhiyun int i;
3088*4882a593Smuzhiyun
3089*4882a593Smuzhiyun if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3090*4882a593Smuzhiyun return -ENOMEM;
3091*4882a593Smuzhiyun
3092*4882a593Smuzhiyun /* Shift to the device's base physical address of host memory */
3093*4882a593Smuzhiyun for_each_sg(sgl, sg, nents, i)
3094*4882a593Smuzhiyun sg->dma_address += HOST_PHYS_BASE;
3095*4882a593Smuzhiyun
3096*4882a593Smuzhiyun return 0;
3097*4882a593Smuzhiyun }
3098*4882a593Smuzhiyun
goya_dma_unmap_sg(struct hl_device * hdev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)3099*4882a593Smuzhiyun static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3100*4882a593Smuzhiyun int nents, enum dma_data_direction dir)
3101*4882a593Smuzhiyun {
3102*4882a593Smuzhiyun struct scatterlist *sg;
3103*4882a593Smuzhiyun int i;
3104*4882a593Smuzhiyun
3105*4882a593Smuzhiyun /* Cancel the device's base physical address of host memory */
3106*4882a593Smuzhiyun for_each_sg(sgl, sg, nents, i)
3107*4882a593Smuzhiyun sg->dma_address -= HOST_PHYS_BASE;
3108*4882a593Smuzhiyun
3109*4882a593Smuzhiyun dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3110*4882a593Smuzhiyun }
3111*4882a593Smuzhiyun
goya_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)3112*4882a593Smuzhiyun u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3113*4882a593Smuzhiyun {
3114*4882a593Smuzhiyun struct scatterlist *sg, *sg_next_iter;
3115*4882a593Smuzhiyun u32 count, dma_desc_cnt;
3116*4882a593Smuzhiyun u64 len, len_next;
3117*4882a593Smuzhiyun dma_addr_t addr, addr_next;
3118*4882a593Smuzhiyun
3119*4882a593Smuzhiyun dma_desc_cnt = 0;
3120*4882a593Smuzhiyun
3121*4882a593Smuzhiyun for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3122*4882a593Smuzhiyun
3123*4882a593Smuzhiyun len = sg_dma_len(sg);
3124*4882a593Smuzhiyun addr = sg_dma_address(sg);
3125*4882a593Smuzhiyun
3126*4882a593Smuzhiyun if (len == 0)
3127*4882a593Smuzhiyun break;
3128*4882a593Smuzhiyun
3129*4882a593Smuzhiyun while ((count + 1) < sgt->nents) {
3130*4882a593Smuzhiyun sg_next_iter = sg_next(sg);
3131*4882a593Smuzhiyun len_next = sg_dma_len(sg_next_iter);
3132*4882a593Smuzhiyun addr_next = sg_dma_address(sg_next_iter);
3133*4882a593Smuzhiyun
3134*4882a593Smuzhiyun if (len_next == 0)
3135*4882a593Smuzhiyun break;
3136*4882a593Smuzhiyun
3137*4882a593Smuzhiyun if ((addr + len == addr_next) &&
3138*4882a593Smuzhiyun (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3139*4882a593Smuzhiyun len += len_next;
3140*4882a593Smuzhiyun count++;
3141*4882a593Smuzhiyun sg = sg_next_iter;
3142*4882a593Smuzhiyun } else {
3143*4882a593Smuzhiyun break;
3144*4882a593Smuzhiyun }
3145*4882a593Smuzhiyun }
3146*4882a593Smuzhiyun
3147*4882a593Smuzhiyun dma_desc_cnt++;
3148*4882a593Smuzhiyun }
3149*4882a593Smuzhiyun
3150*4882a593Smuzhiyun return dma_desc_cnt * sizeof(struct packet_lin_dma);
3151*4882a593Smuzhiyun }
3152*4882a593Smuzhiyun
goya_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)3153*4882a593Smuzhiyun static int goya_pin_memory_before_cs(struct hl_device *hdev,
3154*4882a593Smuzhiyun struct hl_cs_parser *parser,
3155*4882a593Smuzhiyun struct packet_lin_dma *user_dma_pkt,
3156*4882a593Smuzhiyun u64 addr, enum dma_data_direction dir)
3157*4882a593Smuzhiyun {
3158*4882a593Smuzhiyun struct hl_userptr *userptr;
3159*4882a593Smuzhiyun int rc;
3160*4882a593Smuzhiyun
3161*4882a593Smuzhiyun if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3162*4882a593Smuzhiyun parser->job_userptr_list, &userptr))
3163*4882a593Smuzhiyun goto already_pinned;
3164*4882a593Smuzhiyun
3165*4882a593Smuzhiyun userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3166*4882a593Smuzhiyun if (!userptr)
3167*4882a593Smuzhiyun return -ENOMEM;
3168*4882a593Smuzhiyun
3169*4882a593Smuzhiyun rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3170*4882a593Smuzhiyun userptr);
3171*4882a593Smuzhiyun if (rc)
3172*4882a593Smuzhiyun goto free_userptr;
3173*4882a593Smuzhiyun
3174*4882a593Smuzhiyun list_add_tail(&userptr->job_node, parser->job_userptr_list);
3175*4882a593Smuzhiyun
3176*4882a593Smuzhiyun rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3177*4882a593Smuzhiyun userptr->sgt->nents, dir);
3178*4882a593Smuzhiyun if (rc) {
3179*4882a593Smuzhiyun dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3180*4882a593Smuzhiyun goto unpin_memory;
3181*4882a593Smuzhiyun }
3182*4882a593Smuzhiyun
3183*4882a593Smuzhiyun userptr->dma_mapped = true;
3184*4882a593Smuzhiyun userptr->dir = dir;
3185*4882a593Smuzhiyun
3186*4882a593Smuzhiyun already_pinned:
3187*4882a593Smuzhiyun parser->patched_cb_size +=
3188*4882a593Smuzhiyun goya_get_dma_desc_list_size(hdev, userptr->sgt);
3189*4882a593Smuzhiyun
3190*4882a593Smuzhiyun return 0;
3191*4882a593Smuzhiyun
3192*4882a593Smuzhiyun unpin_memory:
3193*4882a593Smuzhiyun list_del(&userptr->job_node);
3194*4882a593Smuzhiyun hl_unpin_host_memory(hdev, userptr);
3195*4882a593Smuzhiyun free_userptr:
3196*4882a593Smuzhiyun kfree(userptr);
3197*4882a593Smuzhiyun return rc;
3198*4882a593Smuzhiyun }
3199*4882a593Smuzhiyun
goya_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)3200*4882a593Smuzhiyun static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3201*4882a593Smuzhiyun struct hl_cs_parser *parser,
3202*4882a593Smuzhiyun struct packet_lin_dma *user_dma_pkt)
3203*4882a593Smuzhiyun {
3204*4882a593Smuzhiyun u64 device_memory_addr, addr;
3205*4882a593Smuzhiyun enum dma_data_direction dir;
3206*4882a593Smuzhiyun enum goya_dma_direction user_dir;
3207*4882a593Smuzhiyun bool sram_addr = true;
3208*4882a593Smuzhiyun bool skip_host_mem_pin = false;
3209*4882a593Smuzhiyun bool user_memset;
3210*4882a593Smuzhiyun u32 ctl;
3211*4882a593Smuzhiyun int rc = 0;
3212*4882a593Smuzhiyun
3213*4882a593Smuzhiyun ctl = le32_to_cpu(user_dma_pkt->ctl);
3214*4882a593Smuzhiyun
3215*4882a593Smuzhiyun user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3216*4882a593Smuzhiyun GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3217*4882a593Smuzhiyun
3218*4882a593Smuzhiyun user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3219*4882a593Smuzhiyun GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3220*4882a593Smuzhiyun
3221*4882a593Smuzhiyun switch (user_dir) {
3222*4882a593Smuzhiyun case DMA_HOST_TO_DRAM:
3223*4882a593Smuzhiyun dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3224*4882a593Smuzhiyun dir = DMA_TO_DEVICE;
3225*4882a593Smuzhiyun sram_addr = false;
3226*4882a593Smuzhiyun addr = le64_to_cpu(user_dma_pkt->src_addr);
3227*4882a593Smuzhiyun device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3228*4882a593Smuzhiyun if (user_memset)
3229*4882a593Smuzhiyun skip_host_mem_pin = true;
3230*4882a593Smuzhiyun break;
3231*4882a593Smuzhiyun
3232*4882a593Smuzhiyun case DMA_DRAM_TO_HOST:
3233*4882a593Smuzhiyun dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3234*4882a593Smuzhiyun dir = DMA_FROM_DEVICE;
3235*4882a593Smuzhiyun sram_addr = false;
3236*4882a593Smuzhiyun addr = le64_to_cpu(user_dma_pkt->dst_addr);
3237*4882a593Smuzhiyun device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3238*4882a593Smuzhiyun break;
3239*4882a593Smuzhiyun
3240*4882a593Smuzhiyun case DMA_HOST_TO_SRAM:
3241*4882a593Smuzhiyun dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3242*4882a593Smuzhiyun dir = DMA_TO_DEVICE;
3243*4882a593Smuzhiyun addr = le64_to_cpu(user_dma_pkt->src_addr);
3244*4882a593Smuzhiyun device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3245*4882a593Smuzhiyun if (user_memset)
3246*4882a593Smuzhiyun skip_host_mem_pin = true;
3247*4882a593Smuzhiyun break;
3248*4882a593Smuzhiyun
3249*4882a593Smuzhiyun case DMA_SRAM_TO_HOST:
3250*4882a593Smuzhiyun dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3251*4882a593Smuzhiyun dir = DMA_FROM_DEVICE;
3252*4882a593Smuzhiyun addr = le64_to_cpu(user_dma_pkt->dst_addr);
3253*4882a593Smuzhiyun device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3254*4882a593Smuzhiyun break;
3255*4882a593Smuzhiyun default:
3256*4882a593Smuzhiyun dev_err(hdev->dev, "DMA direction is undefined\n");
3257*4882a593Smuzhiyun return -EFAULT;
3258*4882a593Smuzhiyun }
3259*4882a593Smuzhiyun
3260*4882a593Smuzhiyun if (sram_addr) {
3261*4882a593Smuzhiyun if (!hl_mem_area_inside_range(device_memory_addr,
3262*4882a593Smuzhiyun le32_to_cpu(user_dma_pkt->tsize),
3263*4882a593Smuzhiyun hdev->asic_prop.sram_user_base_address,
3264*4882a593Smuzhiyun hdev->asic_prop.sram_end_address)) {
3265*4882a593Smuzhiyun
3266*4882a593Smuzhiyun dev_err(hdev->dev,
3267*4882a593Smuzhiyun "SRAM address 0x%llx + 0x%x is invalid\n",
3268*4882a593Smuzhiyun device_memory_addr,
3269*4882a593Smuzhiyun user_dma_pkt->tsize);
3270*4882a593Smuzhiyun return -EFAULT;
3271*4882a593Smuzhiyun }
3272*4882a593Smuzhiyun } else {
3273*4882a593Smuzhiyun if (!hl_mem_area_inside_range(device_memory_addr,
3274*4882a593Smuzhiyun le32_to_cpu(user_dma_pkt->tsize),
3275*4882a593Smuzhiyun hdev->asic_prop.dram_user_base_address,
3276*4882a593Smuzhiyun hdev->asic_prop.dram_end_address)) {
3277*4882a593Smuzhiyun
3278*4882a593Smuzhiyun dev_err(hdev->dev,
3279*4882a593Smuzhiyun "DRAM address 0x%llx + 0x%x is invalid\n",
3280*4882a593Smuzhiyun device_memory_addr,
3281*4882a593Smuzhiyun user_dma_pkt->tsize);
3282*4882a593Smuzhiyun return -EFAULT;
3283*4882a593Smuzhiyun }
3284*4882a593Smuzhiyun }
3285*4882a593Smuzhiyun
3286*4882a593Smuzhiyun if (skip_host_mem_pin)
3287*4882a593Smuzhiyun parser->patched_cb_size += sizeof(*user_dma_pkt);
3288*4882a593Smuzhiyun else {
3289*4882a593Smuzhiyun if ((dir == DMA_TO_DEVICE) &&
3290*4882a593Smuzhiyun (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3291*4882a593Smuzhiyun dev_err(hdev->dev,
3292*4882a593Smuzhiyun "Can't DMA from host on queue other then 1\n");
3293*4882a593Smuzhiyun return -EFAULT;
3294*4882a593Smuzhiyun }
3295*4882a593Smuzhiyun
3296*4882a593Smuzhiyun rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3297*4882a593Smuzhiyun addr, dir);
3298*4882a593Smuzhiyun }
3299*4882a593Smuzhiyun
3300*4882a593Smuzhiyun return rc;
3301*4882a593Smuzhiyun }
3302*4882a593Smuzhiyun
goya_validate_dma_pkt_no_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)3303*4882a593Smuzhiyun static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3304*4882a593Smuzhiyun struct hl_cs_parser *parser,
3305*4882a593Smuzhiyun struct packet_lin_dma *user_dma_pkt)
3306*4882a593Smuzhiyun {
3307*4882a593Smuzhiyun u64 sram_memory_addr, dram_memory_addr;
3308*4882a593Smuzhiyun enum goya_dma_direction user_dir;
3309*4882a593Smuzhiyun u32 ctl;
3310*4882a593Smuzhiyun
3311*4882a593Smuzhiyun ctl = le32_to_cpu(user_dma_pkt->ctl);
3312*4882a593Smuzhiyun user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3313*4882a593Smuzhiyun GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3314*4882a593Smuzhiyun
3315*4882a593Smuzhiyun if (user_dir == DMA_DRAM_TO_SRAM) {
3316*4882a593Smuzhiyun dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3317*4882a593Smuzhiyun dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3318*4882a593Smuzhiyun sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3319*4882a593Smuzhiyun } else {
3320*4882a593Smuzhiyun dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3321*4882a593Smuzhiyun sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3322*4882a593Smuzhiyun dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3323*4882a593Smuzhiyun }
3324*4882a593Smuzhiyun
3325*4882a593Smuzhiyun if (!hl_mem_area_inside_range(sram_memory_addr,
3326*4882a593Smuzhiyun le32_to_cpu(user_dma_pkt->tsize),
3327*4882a593Smuzhiyun hdev->asic_prop.sram_user_base_address,
3328*4882a593Smuzhiyun hdev->asic_prop.sram_end_address)) {
3329*4882a593Smuzhiyun dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3330*4882a593Smuzhiyun sram_memory_addr, user_dma_pkt->tsize);
3331*4882a593Smuzhiyun return -EFAULT;
3332*4882a593Smuzhiyun }
3333*4882a593Smuzhiyun
3334*4882a593Smuzhiyun if (!hl_mem_area_inside_range(dram_memory_addr,
3335*4882a593Smuzhiyun le32_to_cpu(user_dma_pkt->tsize),
3336*4882a593Smuzhiyun hdev->asic_prop.dram_user_base_address,
3337*4882a593Smuzhiyun hdev->asic_prop.dram_end_address)) {
3338*4882a593Smuzhiyun dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3339*4882a593Smuzhiyun dram_memory_addr, user_dma_pkt->tsize);
3340*4882a593Smuzhiyun return -EFAULT;
3341*4882a593Smuzhiyun }
3342*4882a593Smuzhiyun
3343*4882a593Smuzhiyun parser->patched_cb_size += sizeof(*user_dma_pkt);
3344*4882a593Smuzhiyun
3345*4882a593Smuzhiyun return 0;
3346*4882a593Smuzhiyun }
3347*4882a593Smuzhiyun
goya_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)3348*4882a593Smuzhiyun static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3349*4882a593Smuzhiyun struct hl_cs_parser *parser,
3350*4882a593Smuzhiyun struct packet_lin_dma *user_dma_pkt)
3351*4882a593Smuzhiyun {
3352*4882a593Smuzhiyun enum goya_dma_direction user_dir;
3353*4882a593Smuzhiyun u32 ctl;
3354*4882a593Smuzhiyun int rc;
3355*4882a593Smuzhiyun
3356*4882a593Smuzhiyun dev_dbg(hdev->dev, "DMA packet details:\n");
3357*4882a593Smuzhiyun dev_dbg(hdev->dev, "source == 0x%llx\n",
3358*4882a593Smuzhiyun le64_to_cpu(user_dma_pkt->src_addr));
3359*4882a593Smuzhiyun dev_dbg(hdev->dev, "destination == 0x%llx\n",
3360*4882a593Smuzhiyun le64_to_cpu(user_dma_pkt->dst_addr));
3361*4882a593Smuzhiyun dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3362*4882a593Smuzhiyun
3363*4882a593Smuzhiyun ctl = le32_to_cpu(user_dma_pkt->ctl);
3364*4882a593Smuzhiyun user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3365*4882a593Smuzhiyun GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3366*4882a593Smuzhiyun
3367*4882a593Smuzhiyun /*
3368*4882a593Smuzhiyun * Special handling for DMA with size 0. The H/W has a bug where
3369*4882a593Smuzhiyun * this can cause the QMAN DMA to get stuck, so block it here.
3370*4882a593Smuzhiyun */
3371*4882a593Smuzhiyun if (user_dma_pkt->tsize == 0) {
3372*4882a593Smuzhiyun dev_err(hdev->dev,
3373*4882a593Smuzhiyun "Got DMA with size 0, might reset the device\n");
3374*4882a593Smuzhiyun return -EINVAL;
3375*4882a593Smuzhiyun }
3376*4882a593Smuzhiyun
3377*4882a593Smuzhiyun if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3378*4882a593Smuzhiyun rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3379*4882a593Smuzhiyun else
3380*4882a593Smuzhiyun rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3381*4882a593Smuzhiyun
3382*4882a593Smuzhiyun return rc;
3383*4882a593Smuzhiyun }
3384*4882a593Smuzhiyun
goya_validate_dma_pkt_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)3385*4882a593Smuzhiyun static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3386*4882a593Smuzhiyun struct hl_cs_parser *parser,
3387*4882a593Smuzhiyun struct packet_lin_dma *user_dma_pkt)
3388*4882a593Smuzhiyun {
3389*4882a593Smuzhiyun dev_dbg(hdev->dev, "DMA packet details:\n");
3390*4882a593Smuzhiyun dev_dbg(hdev->dev, "source == 0x%llx\n",
3391*4882a593Smuzhiyun le64_to_cpu(user_dma_pkt->src_addr));
3392*4882a593Smuzhiyun dev_dbg(hdev->dev, "destination == 0x%llx\n",
3393*4882a593Smuzhiyun le64_to_cpu(user_dma_pkt->dst_addr));
3394*4882a593Smuzhiyun dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3395*4882a593Smuzhiyun
3396*4882a593Smuzhiyun /*
3397*4882a593Smuzhiyun * WA for HW-23.
3398*4882a593Smuzhiyun * We can't allow user to read from Host using QMANs other than 1.
3399*4882a593Smuzhiyun * PMMU and HPMMU addresses are equal, check only one of them.
3400*4882a593Smuzhiyun */
3401*4882a593Smuzhiyun if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3402*4882a593Smuzhiyun hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3403*4882a593Smuzhiyun le32_to_cpu(user_dma_pkt->tsize),
3404*4882a593Smuzhiyun hdev->asic_prop.pmmu.start_addr,
3405*4882a593Smuzhiyun hdev->asic_prop.pmmu.end_addr)) {
3406*4882a593Smuzhiyun dev_err(hdev->dev,
3407*4882a593Smuzhiyun "Can't DMA from host on queue other then 1\n");
3408*4882a593Smuzhiyun return -EFAULT;
3409*4882a593Smuzhiyun }
3410*4882a593Smuzhiyun
3411*4882a593Smuzhiyun if (user_dma_pkt->tsize == 0) {
3412*4882a593Smuzhiyun dev_err(hdev->dev,
3413*4882a593Smuzhiyun "Got DMA with size 0, might reset the device\n");
3414*4882a593Smuzhiyun return -EINVAL;
3415*4882a593Smuzhiyun }
3416*4882a593Smuzhiyun
3417*4882a593Smuzhiyun parser->patched_cb_size += sizeof(*user_dma_pkt);
3418*4882a593Smuzhiyun
3419*4882a593Smuzhiyun return 0;
3420*4882a593Smuzhiyun }
3421*4882a593Smuzhiyun
goya_validate_wreg32(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_wreg32 * wreg_pkt)3422*4882a593Smuzhiyun static int goya_validate_wreg32(struct hl_device *hdev,
3423*4882a593Smuzhiyun struct hl_cs_parser *parser,
3424*4882a593Smuzhiyun struct packet_wreg32 *wreg_pkt)
3425*4882a593Smuzhiyun {
3426*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
3427*4882a593Smuzhiyun u32 sob_start_addr, sob_end_addr;
3428*4882a593Smuzhiyun u16 reg_offset;
3429*4882a593Smuzhiyun
3430*4882a593Smuzhiyun reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3431*4882a593Smuzhiyun GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3432*4882a593Smuzhiyun
3433*4882a593Smuzhiyun dev_dbg(hdev->dev, "WREG32 packet details:\n");
3434*4882a593Smuzhiyun dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3435*4882a593Smuzhiyun dev_dbg(hdev->dev, "value == 0x%x\n",
3436*4882a593Smuzhiyun le32_to_cpu(wreg_pkt->value));
3437*4882a593Smuzhiyun
3438*4882a593Smuzhiyun if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3439*4882a593Smuzhiyun dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3440*4882a593Smuzhiyun reg_offset);
3441*4882a593Smuzhiyun return -EPERM;
3442*4882a593Smuzhiyun }
3443*4882a593Smuzhiyun
3444*4882a593Smuzhiyun /*
3445*4882a593Smuzhiyun * With MMU, DMA channels are not secured, so it doesn't matter where
3446*4882a593Smuzhiyun * the WR COMP will be written to because it will go out with
3447*4882a593Smuzhiyun * non-secured property
3448*4882a593Smuzhiyun */
3449*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_MMU)
3450*4882a593Smuzhiyun return 0;
3451*4882a593Smuzhiyun
3452*4882a593Smuzhiyun sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3453*4882a593Smuzhiyun sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3454*4882a593Smuzhiyun
3455*4882a593Smuzhiyun if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3456*4882a593Smuzhiyun (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3457*4882a593Smuzhiyun
3458*4882a593Smuzhiyun dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3459*4882a593Smuzhiyun wreg_pkt->value);
3460*4882a593Smuzhiyun return -EPERM;
3461*4882a593Smuzhiyun }
3462*4882a593Smuzhiyun
3463*4882a593Smuzhiyun return 0;
3464*4882a593Smuzhiyun }
3465*4882a593Smuzhiyun
goya_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)3466*4882a593Smuzhiyun static int goya_validate_cb(struct hl_device *hdev,
3467*4882a593Smuzhiyun struct hl_cs_parser *parser, bool is_mmu)
3468*4882a593Smuzhiyun {
3469*4882a593Smuzhiyun u32 cb_parsed_length = 0;
3470*4882a593Smuzhiyun int rc = 0;
3471*4882a593Smuzhiyun
3472*4882a593Smuzhiyun parser->patched_cb_size = 0;
3473*4882a593Smuzhiyun
3474*4882a593Smuzhiyun /* cb_user_size is more than 0 so loop will always be executed */
3475*4882a593Smuzhiyun while (cb_parsed_length < parser->user_cb_size) {
3476*4882a593Smuzhiyun enum packet_id pkt_id;
3477*4882a593Smuzhiyun u16 pkt_size;
3478*4882a593Smuzhiyun struct goya_packet *user_pkt;
3479*4882a593Smuzhiyun
3480*4882a593Smuzhiyun user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3481*4882a593Smuzhiyun
3482*4882a593Smuzhiyun pkt_id = (enum packet_id) (
3483*4882a593Smuzhiyun (le64_to_cpu(user_pkt->header) &
3484*4882a593Smuzhiyun PACKET_HEADER_PACKET_ID_MASK) >>
3485*4882a593Smuzhiyun PACKET_HEADER_PACKET_ID_SHIFT);
3486*4882a593Smuzhiyun
3487*4882a593Smuzhiyun if (!validate_packet_id(pkt_id)) {
3488*4882a593Smuzhiyun dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3489*4882a593Smuzhiyun rc = -EINVAL;
3490*4882a593Smuzhiyun break;
3491*4882a593Smuzhiyun }
3492*4882a593Smuzhiyun
3493*4882a593Smuzhiyun pkt_size = goya_packet_sizes[pkt_id];
3494*4882a593Smuzhiyun cb_parsed_length += pkt_size;
3495*4882a593Smuzhiyun if (cb_parsed_length > parser->user_cb_size) {
3496*4882a593Smuzhiyun dev_err(hdev->dev,
3497*4882a593Smuzhiyun "packet 0x%x is out of CB boundary\n", pkt_id);
3498*4882a593Smuzhiyun rc = -EINVAL;
3499*4882a593Smuzhiyun break;
3500*4882a593Smuzhiyun }
3501*4882a593Smuzhiyun
3502*4882a593Smuzhiyun switch (pkt_id) {
3503*4882a593Smuzhiyun case PACKET_WREG_32:
3504*4882a593Smuzhiyun /*
3505*4882a593Smuzhiyun * Although it is validated after copy in patch_cb(),
3506*4882a593Smuzhiyun * need to validate here as well because patch_cb() is
3507*4882a593Smuzhiyun * not called in MMU path while this function is called
3508*4882a593Smuzhiyun */
3509*4882a593Smuzhiyun rc = goya_validate_wreg32(hdev,
3510*4882a593Smuzhiyun parser, (struct packet_wreg32 *) user_pkt);
3511*4882a593Smuzhiyun parser->patched_cb_size += pkt_size;
3512*4882a593Smuzhiyun break;
3513*4882a593Smuzhiyun
3514*4882a593Smuzhiyun case PACKET_WREG_BULK:
3515*4882a593Smuzhiyun dev_err(hdev->dev,
3516*4882a593Smuzhiyun "User not allowed to use WREG_BULK\n");
3517*4882a593Smuzhiyun rc = -EPERM;
3518*4882a593Smuzhiyun break;
3519*4882a593Smuzhiyun
3520*4882a593Smuzhiyun case PACKET_MSG_PROT:
3521*4882a593Smuzhiyun dev_err(hdev->dev,
3522*4882a593Smuzhiyun "User not allowed to use MSG_PROT\n");
3523*4882a593Smuzhiyun rc = -EPERM;
3524*4882a593Smuzhiyun break;
3525*4882a593Smuzhiyun
3526*4882a593Smuzhiyun case PACKET_CP_DMA:
3527*4882a593Smuzhiyun dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3528*4882a593Smuzhiyun rc = -EPERM;
3529*4882a593Smuzhiyun break;
3530*4882a593Smuzhiyun
3531*4882a593Smuzhiyun case PACKET_STOP:
3532*4882a593Smuzhiyun dev_err(hdev->dev, "User not allowed to use STOP\n");
3533*4882a593Smuzhiyun rc = -EPERM;
3534*4882a593Smuzhiyun break;
3535*4882a593Smuzhiyun
3536*4882a593Smuzhiyun case PACKET_LIN_DMA:
3537*4882a593Smuzhiyun if (is_mmu)
3538*4882a593Smuzhiyun rc = goya_validate_dma_pkt_mmu(hdev, parser,
3539*4882a593Smuzhiyun (struct packet_lin_dma *) user_pkt);
3540*4882a593Smuzhiyun else
3541*4882a593Smuzhiyun rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3542*4882a593Smuzhiyun (struct packet_lin_dma *) user_pkt);
3543*4882a593Smuzhiyun break;
3544*4882a593Smuzhiyun
3545*4882a593Smuzhiyun case PACKET_MSG_LONG:
3546*4882a593Smuzhiyun case PACKET_MSG_SHORT:
3547*4882a593Smuzhiyun case PACKET_FENCE:
3548*4882a593Smuzhiyun case PACKET_NOP:
3549*4882a593Smuzhiyun parser->patched_cb_size += pkt_size;
3550*4882a593Smuzhiyun break;
3551*4882a593Smuzhiyun
3552*4882a593Smuzhiyun default:
3553*4882a593Smuzhiyun dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3554*4882a593Smuzhiyun pkt_id);
3555*4882a593Smuzhiyun rc = -EINVAL;
3556*4882a593Smuzhiyun break;
3557*4882a593Smuzhiyun }
3558*4882a593Smuzhiyun
3559*4882a593Smuzhiyun if (rc)
3560*4882a593Smuzhiyun break;
3561*4882a593Smuzhiyun }
3562*4882a593Smuzhiyun
3563*4882a593Smuzhiyun /*
3564*4882a593Smuzhiyun * The new CB should have space at the end for two MSG_PROT packets:
3565*4882a593Smuzhiyun * 1. A packet that will act as a completion packet
3566*4882a593Smuzhiyun * 2. A packet that will generate MSI-X interrupt
3567*4882a593Smuzhiyun */
3568*4882a593Smuzhiyun parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3569*4882a593Smuzhiyun
3570*4882a593Smuzhiyun return rc;
3571*4882a593Smuzhiyun }
3572*4882a593Smuzhiyun
goya_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)3573*4882a593Smuzhiyun static int goya_patch_dma_packet(struct hl_device *hdev,
3574*4882a593Smuzhiyun struct hl_cs_parser *parser,
3575*4882a593Smuzhiyun struct packet_lin_dma *user_dma_pkt,
3576*4882a593Smuzhiyun struct packet_lin_dma *new_dma_pkt,
3577*4882a593Smuzhiyun u32 *new_dma_pkt_size)
3578*4882a593Smuzhiyun {
3579*4882a593Smuzhiyun struct hl_userptr *userptr;
3580*4882a593Smuzhiyun struct scatterlist *sg, *sg_next_iter;
3581*4882a593Smuzhiyun u32 count, dma_desc_cnt;
3582*4882a593Smuzhiyun u64 len, len_next;
3583*4882a593Smuzhiyun dma_addr_t dma_addr, dma_addr_next;
3584*4882a593Smuzhiyun enum goya_dma_direction user_dir;
3585*4882a593Smuzhiyun u64 device_memory_addr, addr;
3586*4882a593Smuzhiyun enum dma_data_direction dir;
3587*4882a593Smuzhiyun struct sg_table *sgt;
3588*4882a593Smuzhiyun bool skip_host_mem_pin = false;
3589*4882a593Smuzhiyun bool user_memset;
3590*4882a593Smuzhiyun u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3591*4882a593Smuzhiyun
3592*4882a593Smuzhiyun ctl = le32_to_cpu(user_dma_pkt->ctl);
3593*4882a593Smuzhiyun
3594*4882a593Smuzhiyun user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3595*4882a593Smuzhiyun GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3596*4882a593Smuzhiyun
3597*4882a593Smuzhiyun user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3598*4882a593Smuzhiyun GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3599*4882a593Smuzhiyun
3600*4882a593Smuzhiyun if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3601*4882a593Smuzhiyun (user_dma_pkt->tsize == 0)) {
3602*4882a593Smuzhiyun memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3603*4882a593Smuzhiyun *new_dma_pkt_size = sizeof(*new_dma_pkt);
3604*4882a593Smuzhiyun return 0;
3605*4882a593Smuzhiyun }
3606*4882a593Smuzhiyun
3607*4882a593Smuzhiyun if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3608*4882a593Smuzhiyun addr = le64_to_cpu(user_dma_pkt->src_addr);
3609*4882a593Smuzhiyun device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3610*4882a593Smuzhiyun dir = DMA_TO_DEVICE;
3611*4882a593Smuzhiyun if (user_memset)
3612*4882a593Smuzhiyun skip_host_mem_pin = true;
3613*4882a593Smuzhiyun } else {
3614*4882a593Smuzhiyun addr = le64_to_cpu(user_dma_pkt->dst_addr);
3615*4882a593Smuzhiyun device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3616*4882a593Smuzhiyun dir = DMA_FROM_DEVICE;
3617*4882a593Smuzhiyun }
3618*4882a593Smuzhiyun
3619*4882a593Smuzhiyun if ((!skip_host_mem_pin) &&
3620*4882a593Smuzhiyun (hl_userptr_is_pinned(hdev, addr,
3621*4882a593Smuzhiyun le32_to_cpu(user_dma_pkt->tsize),
3622*4882a593Smuzhiyun parser->job_userptr_list, &userptr) == false)) {
3623*4882a593Smuzhiyun dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3624*4882a593Smuzhiyun addr, user_dma_pkt->tsize);
3625*4882a593Smuzhiyun return -EFAULT;
3626*4882a593Smuzhiyun }
3627*4882a593Smuzhiyun
3628*4882a593Smuzhiyun if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3629*4882a593Smuzhiyun memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3630*4882a593Smuzhiyun *new_dma_pkt_size = sizeof(*user_dma_pkt);
3631*4882a593Smuzhiyun return 0;
3632*4882a593Smuzhiyun }
3633*4882a593Smuzhiyun
3634*4882a593Smuzhiyun user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3635*4882a593Smuzhiyun
3636*4882a593Smuzhiyun user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3637*4882a593Smuzhiyun
3638*4882a593Smuzhiyun sgt = userptr->sgt;
3639*4882a593Smuzhiyun dma_desc_cnt = 0;
3640*4882a593Smuzhiyun
3641*4882a593Smuzhiyun for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3642*4882a593Smuzhiyun len = sg_dma_len(sg);
3643*4882a593Smuzhiyun dma_addr = sg_dma_address(sg);
3644*4882a593Smuzhiyun
3645*4882a593Smuzhiyun if (len == 0)
3646*4882a593Smuzhiyun break;
3647*4882a593Smuzhiyun
3648*4882a593Smuzhiyun while ((count + 1) < sgt->nents) {
3649*4882a593Smuzhiyun sg_next_iter = sg_next(sg);
3650*4882a593Smuzhiyun len_next = sg_dma_len(sg_next_iter);
3651*4882a593Smuzhiyun dma_addr_next = sg_dma_address(sg_next_iter);
3652*4882a593Smuzhiyun
3653*4882a593Smuzhiyun if (len_next == 0)
3654*4882a593Smuzhiyun break;
3655*4882a593Smuzhiyun
3656*4882a593Smuzhiyun if ((dma_addr + len == dma_addr_next) &&
3657*4882a593Smuzhiyun (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3658*4882a593Smuzhiyun len += len_next;
3659*4882a593Smuzhiyun count++;
3660*4882a593Smuzhiyun sg = sg_next_iter;
3661*4882a593Smuzhiyun } else {
3662*4882a593Smuzhiyun break;
3663*4882a593Smuzhiyun }
3664*4882a593Smuzhiyun }
3665*4882a593Smuzhiyun
3666*4882a593Smuzhiyun ctl = le32_to_cpu(user_dma_pkt->ctl);
3667*4882a593Smuzhiyun if (likely(dma_desc_cnt))
3668*4882a593Smuzhiyun ctl &= ~GOYA_PKT_CTL_EB_MASK;
3669*4882a593Smuzhiyun ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3670*4882a593Smuzhiyun GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3671*4882a593Smuzhiyun new_dma_pkt->ctl = cpu_to_le32(ctl);
3672*4882a593Smuzhiyun new_dma_pkt->tsize = cpu_to_le32((u32) len);
3673*4882a593Smuzhiyun
3674*4882a593Smuzhiyun if (dir == DMA_TO_DEVICE) {
3675*4882a593Smuzhiyun new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3676*4882a593Smuzhiyun new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3677*4882a593Smuzhiyun } else {
3678*4882a593Smuzhiyun new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3679*4882a593Smuzhiyun new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3680*4882a593Smuzhiyun }
3681*4882a593Smuzhiyun
3682*4882a593Smuzhiyun if (!user_memset)
3683*4882a593Smuzhiyun device_memory_addr += len;
3684*4882a593Smuzhiyun dma_desc_cnt++;
3685*4882a593Smuzhiyun new_dma_pkt++;
3686*4882a593Smuzhiyun }
3687*4882a593Smuzhiyun
3688*4882a593Smuzhiyun if (!dma_desc_cnt) {
3689*4882a593Smuzhiyun dev_err(hdev->dev,
3690*4882a593Smuzhiyun "Error of 0 SG entries when patching DMA packet\n");
3691*4882a593Smuzhiyun return -EFAULT;
3692*4882a593Smuzhiyun }
3693*4882a593Smuzhiyun
3694*4882a593Smuzhiyun /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3695*4882a593Smuzhiyun new_dma_pkt--;
3696*4882a593Smuzhiyun new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3697*4882a593Smuzhiyun
3698*4882a593Smuzhiyun *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3699*4882a593Smuzhiyun
3700*4882a593Smuzhiyun return 0;
3701*4882a593Smuzhiyun }
3702*4882a593Smuzhiyun
goya_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)3703*4882a593Smuzhiyun static int goya_patch_cb(struct hl_device *hdev,
3704*4882a593Smuzhiyun struct hl_cs_parser *parser)
3705*4882a593Smuzhiyun {
3706*4882a593Smuzhiyun u32 cb_parsed_length = 0;
3707*4882a593Smuzhiyun u32 cb_patched_cur_length = 0;
3708*4882a593Smuzhiyun int rc = 0;
3709*4882a593Smuzhiyun
3710*4882a593Smuzhiyun /* cb_user_size is more than 0 so loop will always be executed */
3711*4882a593Smuzhiyun while (cb_parsed_length < parser->user_cb_size) {
3712*4882a593Smuzhiyun enum packet_id pkt_id;
3713*4882a593Smuzhiyun u16 pkt_size;
3714*4882a593Smuzhiyun u32 new_pkt_size = 0;
3715*4882a593Smuzhiyun struct goya_packet *user_pkt, *kernel_pkt;
3716*4882a593Smuzhiyun
3717*4882a593Smuzhiyun user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3718*4882a593Smuzhiyun kernel_pkt = parser->patched_cb->kernel_address +
3719*4882a593Smuzhiyun cb_patched_cur_length;
3720*4882a593Smuzhiyun
3721*4882a593Smuzhiyun pkt_id = (enum packet_id) (
3722*4882a593Smuzhiyun (le64_to_cpu(user_pkt->header) &
3723*4882a593Smuzhiyun PACKET_HEADER_PACKET_ID_MASK) >>
3724*4882a593Smuzhiyun PACKET_HEADER_PACKET_ID_SHIFT);
3725*4882a593Smuzhiyun
3726*4882a593Smuzhiyun if (!validate_packet_id(pkt_id)) {
3727*4882a593Smuzhiyun dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3728*4882a593Smuzhiyun rc = -EINVAL;
3729*4882a593Smuzhiyun break;
3730*4882a593Smuzhiyun }
3731*4882a593Smuzhiyun
3732*4882a593Smuzhiyun pkt_size = goya_packet_sizes[pkt_id];
3733*4882a593Smuzhiyun cb_parsed_length += pkt_size;
3734*4882a593Smuzhiyun if (cb_parsed_length > parser->user_cb_size) {
3735*4882a593Smuzhiyun dev_err(hdev->dev,
3736*4882a593Smuzhiyun "packet 0x%x is out of CB boundary\n", pkt_id);
3737*4882a593Smuzhiyun rc = -EINVAL;
3738*4882a593Smuzhiyun break;
3739*4882a593Smuzhiyun }
3740*4882a593Smuzhiyun
3741*4882a593Smuzhiyun switch (pkt_id) {
3742*4882a593Smuzhiyun case PACKET_LIN_DMA:
3743*4882a593Smuzhiyun rc = goya_patch_dma_packet(hdev, parser,
3744*4882a593Smuzhiyun (struct packet_lin_dma *) user_pkt,
3745*4882a593Smuzhiyun (struct packet_lin_dma *) kernel_pkt,
3746*4882a593Smuzhiyun &new_pkt_size);
3747*4882a593Smuzhiyun cb_patched_cur_length += new_pkt_size;
3748*4882a593Smuzhiyun break;
3749*4882a593Smuzhiyun
3750*4882a593Smuzhiyun case PACKET_WREG_32:
3751*4882a593Smuzhiyun memcpy(kernel_pkt, user_pkt, pkt_size);
3752*4882a593Smuzhiyun cb_patched_cur_length += pkt_size;
3753*4882a593Smuzhiyun rc = goya_validate_wreg32(hdev, parser,
3754*4882a593Smuzhiyun (struct packet_wreg32 *) kernel_pkt);
3755*4882a593Smuzhiyun break;
3756*4882a593Smuzhiyun
3757*4882a593Smuzhiyun case PACKET_WREG_BULK:
3758*4882a593Smuzhiyun dev_err(hdev->dev,
3759*4882a593Smuzhiyun "User not allowed to use WREG_BULK\n");
3760*4882a593Smuzhiyun rc = -EPERM;
3761*4882a593Smuzhiyun break;
3762*4882a593Smuzhiyun
3763*4882a593Smuzhiyun case PACKET_MSG_PROT:
3764*4882a593Smuzhiyun dev_err(hdev->dev,
3765*4882a593Smuzhiyun "User not allowed to use MSG_PROT\n");
3766*4882a593Smuzhiyun rc = -EPERM;
3767*4882a593Smuzhiyun break;
3768*4882a593Smuzhiyun
3769*4882a593Smuzhiyun case PACKET_CP_DMA:
3770*4882a593Smuzhiyun dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3771*4882a593Smuzhiyun rc = -EPERM;
3772*4882a593Smuzhiyun break;
3773*4882a593Smuzhiyun
3774*4882a593Smuzhiyun case PACKET_STOP:
3775*4882a593Smuzhiyun dev_err(hdev->dev, "User not allowed to use STOP\n");
3776*4882a593Smuzhiyun rc = -EPERM;
3777*4882a593Smuzhiyun break;
3778*4882a593Smuzhiyun
3779*4882a593Smuzhiyun case PACKET_MSG_LONG:
3780*4882a593Smuzhiyun case PACKET_MSG_SHORT:
3781*4882a593Smuzhiyun case PACKET_FENCE:
3782*4882a593Smuzhiyun case PACKET_NOP:
3783*4882a593Smuzhiyun memcpy(kernel_pkt, user_pkt, pkt_size);
3784*4882a593Smuzhiyun cb_patched_cur_length += pkt_size;
3785*4882a593Smuzhiyun break;
3786*4882a593Smuzhiyun
3787*4882a593Smuzhiyun default:
3788*4882a593Smuzhiyun dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3789*4882a593Smuzhiyun pkt_id);
3790*4882a593Smuzhiyun rc = -EINVAL;
3791*4882a593Smuzhiyun break;
3792*4882a593Smuzhiyun }
3793*4882a593Smuzhiyun
3794*4882a593Smuzhiyun if (rc)
3795*4882a593Smuzhiyun break;
3796*4882a593Smuzhiyun }
3797*4882a593Smuzhiyun
3798*4882a593Smuzhiyun return rc;
3799*4882a593Smuzhiyun }
3800*4882a593Smuzhiyun
goya_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)3801*4882a593Smuzhiyun static int goya_parse_cb_mmu(struct hl_device *hdev,
3802*4882a593Smuzhiyun struct hl_cs_parser *parser)
3803*4882a593Smuzhiyun {
3804*4882a593Smuzhiyun u64 patched_cb_handle;
3805*4882a593Smuzhiyun u32 patched_cb_size;
3806*4882a593Smuzhiyun struct hl_cb *user_cb;
3807*4882a593Smuzhiyun int rc;
3808*4882a593Smuzhiyun
3809*4882a593Smuzhiyun /*
3810*4882a593Smuzhiyun * The new CB should have space at the end for two MSG_PROT pkt:
3811*4882a593Smuzhiyun * 1. A packet that will act as a completion packet
3812*4882a593Smuzhiyun * 2. A packet that will generate MSI-X interrupt
3813*4882a593Smuzhiyun */
3814*4882a593Smuzhiyun parser->patched_cb_size = parser->user_cb_size +
3815*4882a593Smuzhiyun sizeof(struct packet_msg_prot) * 2;
3816*4882a593Smuzhiyun
3817*4882a593Smuzhiyun rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
3818*4882a593Smuzhiyun parser->patched_cb_size, false, false,
3819*4882a593Smuzhiyun &patched_cb_handle);
3820*4882a593Smuzhiyun
3821*4882a593Smuzhiyun if (rc) {
3822*4882a593Smuzhiyun dev_err(hdev->dev,
3823*4882a593Smuzhiyun "Failed to allocate patched CB for DMA CS %d\n",
3824*4882a593Smuzhiyun rc);
3825*4882a593Smuzhiyun return rc;
3826*4882a593Smuzhiyun }
3827*4882a593Smuzhiyun
3828*4882a593Smuzhiyun patched_cb_handle >>= PAGE_SHIFT;
3829*4882a593Smuzhiyun parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3830*4882a593Smuzhiyun (u32) patched_cb_handle);
3831*4882a593Smuzhiyun /* hl_cb_get should never fail here so use kernel WARN */
3832*4882a593Smuzhiyun WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3833*4882a593Smuzhiyun (u32) patched_cb_handle);
3834*4882a593Smuzhiyun if (!parser->patched_cb) {
3835*4882a593Smuzhiyun rc = -EFAULT;
3836*4882a593Smuzhiyun goto out;
3837*4882a593Smuzhiyun }
3838*4882a593Smuzhiyun
3839*4882a593Smuzhiyun /*
3840*4882a593Smuzhiyun * The check that parser->user_cb_size <= parser->user_cb->size was done
3841*4882a593Smuzhiyun * in validate_queue_index().
3842*4882a593Smuzhiyun */
3843*4882a593Smuzhiyun memcpy(parser->patched_cb->kernel_address,
3844*4882a593Smuzhiyun parser->user_cb->kernel_address,
3845*4882a593Smuzhiyun parser->user_cb_size);
3846*4882a593Smuzhiyun
3847*4882a593Smuzhiyun patched_cb_size = parser->patched_cb_size;
3848*4882a593Smuzhiyun
3849*4882a593Smuzhiyun /* validate patched CB instead of user CB */
3850*4882a593Smuzhiyun user_cb = parser->user_cb;
3851*4882a593Smuzhiyun parser->user_cb = parser->patched_cb;
3852*4882a593Smuzhiyun rc = goya_validate_cb(hdev, parser, true);
3853*4882a593Smuzhiyun parser->user_cb = user_cb;
3854*4882a593Smuzhiyun
3855*4882a593Smuzhiyun if (rc) {
3856*4882a593Smuzhiyun hl_cb_put(parser->patched_cb);
3857*4882a593Smuzhiyun goto out;
3858*4882a593Smuzhiyun }
3859*4882a593Smuzhiyun
3860*4882a593Smuzhiyun if (patched_cb_size != parser->patched_cb_size) {
3861*4882a593Smuzhiyun dev_err(hdev->dev, "user CB size mismatch\n");
3862*4882a593Smuzhiyun hl_cb_put(parser->patched_cb);
3863*4882a593Smuzhiyun rc = -EINVAL;
3864*4882a593Smuzhiyun goto out;
3865*4882a593Smuzhiyun }
3866*4882a593Smuzhiyun
3867*4882a593Smuzhiyun out:
3868*4882a593Smuzhiyun /*
3869*4882a593Smuzhiyun * Always call cb destroy here because we still have 1 reference
3870*4882a593Smuzhiyun * to it by calling cb_get earlier. After the job will be completed,
3871*4882a593Smuzhiyun * cb_put will release it, but here we want to remove it from the
3872*4882a593Smuzhiyun * idr
3873*4882a593Smuzhiyun */
3874*4882a593Smuzhiyun hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3875*4882a593Smuzhiyun patched_cb_handle << PAGE_SHIFT);
3876*4882a593Smuzhiyun
3877*4882a593Smuzhiyun return rc;
3878*4882a593Smuzhiyun }
3879*4882a593Smuzhiyun
goya_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)3880*4882a593Smuzhiyun static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3881*4882a593Smuzhiyun struct hl_cs_parser *parser)
3882*4882a593Smuzhiyun {
3883*4882a593Smuzhiyun u64 patched_cb_handle;
3884*4882a593Smuzhiyun int rc;
3885*4882a593Smuzhiyun
3886*4882a593Smuzhiyun rc = goya_validate_cb(hdev, parser, false);
3887*4882a593Smuzhiyun
3888*4882a593Smuzhiyun if (rc)
3889*4882a593Smuzhiyun goto free_userptr;
3890*4882a593Smuzhiyun
3891*4882a593Smuzhiyun rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
3892*4882a593Smuzhiyun parser->patched_cb_size, false, false,
3893*4882a593Smuzhiyun &patched_cb_handle);
3894*4882a593Smuzhiyun if (rc) {
3895*4882a593Smuzhiyun dev_err(hdev->dev,
3896*4882a593Smuzhiyun "Failed to allocate patched CB for DMA CS %d\n", rc);
3897*4882a593Smuzhiyun goto free_userptr;
3898*4882a593Smuzhiyun }
3899*4882a593Smuzhiyun
3900*4882a593Smuzhiyun patched_cb_handle >>= PAGE_SHIFT;
3901*4882a593Smuzhiyun parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3902*4882a593Smuzhiyun (u32) patched_cb_handle);
3903*4882a593Smuzhiyun /* hl_cb_get should never fail here so use kernel WARN */
3904*4882a593Smuzhiyun WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3905*4882a593Smuzhiyun (u32) patched_cb_handle);
3906*4882a593Smuzhiyun if (!parser->patched_cb) {
3907*4882a593Smuzhiyun rc = -EFAULT;
3908*4882a593Smuzhiyun goto out;
3909*4882a593Smuzhiyun }
3910*4882a593Smuzhiyun
3911*4882a593Smuzhiyun rc = goya_patch_cb(hdev, parser);
3912*4882a593Smuzhiyun
3913*4882a593Smuzhiyun if (rc)
3914*4882a593Smuzhiyun hl_cb_put(parser->patched_cb);
3915*4882a593Smuzhiyun
3916*4882a593Smuzhiyun out:
3917*4882a593Smuzhiyun /*
3918*4882a593Smuzhiyun * Always call cb destroy here because we still have 1 reference
3919*4882a593Smuzhiyun * to it by calling cb_get earlier. After the job will be completed,
3920*4882a593Smuzhiyun * cb_put will release it, but here we want to remove it from the
3921*4882a593Smuzhiyun * idr
3922*4882a593Smuzhiyun */
3923*4882a593Smuzhiyun hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3924*4882a593Smuzhiyun patched_cb_handle << PAGE_SHIFT);
3925*4882a593Smuzhiyun
3926*4882a593Smuzhiyun free_userptr:
3927*4882a593Smuzhiyun if (rc)
3928*4882a593Smuzhiyun hl_userptr_delete_list(hdev, parser->job_userptr_list);
3929*4882a593Smuzhiyun return rc;
3930*4882a593Smuzhiyun }
3931*4882a593Smuzhiyun
goya_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)3932*4882a593Smuzhiyun static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
3933*4882a593Smuzhiyun struct hl_cs_parser *parser)
3934*4882a593Smuzhiyun {
3935*4882a593Smuzhiyun struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3936*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
3937*4882a593Smuzhiyun
3938*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_MMU)
3939*4882a593Smuzhiyun return 0;
3940*4882a593Smuzhiyun
3941*4882a593Smuzhiyun /* For internal queue jobs, just check if CB address is valid */
3942*4882a593Smuzhiyun if (hl_mem_area_inside_range(
3943*4882a593Smuzhiyun (u64) (uintptr_t) parser->user_cb,
3944*4882a593Smuzhiyun parser->user_cb_size,
3945*4882a593Smuzhiyun asic_prop->sram_user_base_address,
3946*4882a593Smuzhiyun asic_prop->sram_end_address))
3947*4882a593Smuzhiyun return 0;
3948*4882a593Smuzhiyun
3949*4882a593Smuzhiyun if (hl_mem_area_inside_range(
3950*4882a593Smuzhiyun (u64) (uintptr_t) parser->user_cb,
3951*4882a593Smuzhiyun parser->user_cb_size,
3952*4882a593Smuzhiyun asic_prop->dram_user_base_address,
3953*4882a593Smuzhiyun asic_prop->dram_end_address))
3954*4882a593Smuzhiyun return 0;
3955*4882a593Smuzhiyun
3956*4882a593Smuzhiyun dev_err(hdev->dev,
3957*4882a593Smuzhiyun "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
3958*4882a593Smuzhiyun parser->user_cb, parser->user_cb_size);
3959*4882a593Smuzhiyun
3960*4882a593Smuzhiyun return -EFAULT;
3961*4882a593Smuzhiyun }
3962*4882a593Smuzhiyun
goya_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)3963*4882a593Smuzhiyun int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
3964*4882a593Smuzhiyun {
3965*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
3966*4882a593Smuzhiyun
3967*4882a593Smuzhiyun if (parser->queue_type == QUEUE_TYPE_INT)
3968*4882a593Smuzhiyun return goya_parse_cb_no_ext_queue(hdev, parser);
3969*4882a593Smuzhiyun
3970*4882a593Smuzhiyun if (goya->hw_cap_initialized & HW_CAP_MMU)
3971*4882a593Smuzhiyun return goya_parse_cb_mmu(hdev, parser);
3972*4882a593Smuzhiyun else
3973*4882a593Smuzhiyun return goya_parse_cb_no_mmu(hdev, parser);
3974*4882a593Smuzhiyun }
3975*4882a593Smuzhiyun
goya_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u64 cq_addr,u32 cq_val,u32 msix_vec,bool eb)3976*4882a593Smuzhiyun void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
3977*4882a593Smuzhiyun u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
3978*4882a593Smuzhiyun bool eb)
3979*4882a593Smuzhiyun {
3980*4882a593Smuzhiyun struct packet_msg_prot *cq_pkt;
3981*4882a593Smuzhiyun u32 tmp;
3982*4882a593Smuzhiyun
3983*4882a593Smuzhiyun cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
3984*4882a593Smuzhiyun
3985*4882a593Smuzhiyun tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3986*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_EB_SHIFT) |
3987*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_MB_SHIFT);
3988*4882a593Smuzhiyun cq_pkt->ctl = cpu_to_le32(tmp);
3989*4882a593Smuzhiyun cq_pkt->value = cpu_to_le32(cq_val);
3990*4882a593Smuzhiyun cq_pkt->addr = cpu_to_le64(cq_addr);
3991*4882a593Smuzhiyun
3992*4882a593Smuzhiyun cq_pkt++;
3993*4882a593Smuzhiyun
3994*4882a593Smuzhiyun tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3995*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_MB_SHIFT);
3996*4882a593Smuzhiyun cq_pkt->ctl = cpu_to_le32(tmp);
3997*4882a593Smuzhiyun cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
3998*4882a593Smuzhiyun cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
3999*4882a593Smuzhiyun }
4000*4882a593Smuzhiyun
goya_update_eq_ci(struct hl_device * hdev,u32 val)4001*4882a593Smuzhiyun void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4002*4882a593Smuzhiyun {
4003*4882a593Smuzhiyun WREG32(mmCPU_EQ_CI, val);
4004*4882a593Smuzhiyun }
4005*4882a593Smuzhiyun
goya_restore_phase_topology(struct hl_device * hdev)4006*4882a593Smuzhiyun void goya_restore_phase_topology(struct hl_device *hdev)
4007*4882a593Smuzhiyun {
4008*4882a593Smuzhiyun
4009*4882a593Smuzhiyun }
4010*4882a593Smuzhiyun
goya_clear_sm_regs(struct hl_device * hdev)4011*4882a593Smuzhiyun static void goya_clear_sm_regs(struct hl_device *hdev)
4012*4882a593Smuzhiyun {
4013*4882a593Smuzhiyun int i, num_of_sob_in_longs, num_of_mon_in_longs;
4014*4882a593Smuzhiyun
4015*4882a593Smuzhiyun num_of_sob_in_longs =
4016*4882a593Smuzhiyun ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4017*4882a593Smuzhiyun
4018*4882a593Smuzhiyun num_of_mon_in_longs =
4019*4882a593Smuzhiyun ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4020*4882a593Smuzhiyun
4021*4882a593Smuzhiyun for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4022*4882a593Smuzhiyun WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4023*4882a593Smuzhiyun
4024*4882a593Smuzhiyun for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4025*4882a593Smuzhiyun WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4026*4882a593Smuzhiyun
4027*4882a593Smuzhiyun /* Flush all WREG to prevent race */
4028*4882a593Smuzhiyun i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4029*4882a593Smuzhiyun }
4030*4882a593Smuzhiyun
4031*4882a593Smuzhiyun /*
4032*4882a593Smuzhiyun * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
4033*4882a593Smuzhiyun * address.
4034*4882a593Smuzhiyun *
4035*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
4036*4882a593Smuzhiyun * @addr: device or host mapped address
4037*4882a593Smuzhiyun * @val: returned value
4038*4882a593Smuzhiyun *
4039*4882a593Smuzhiyun * In case of DDR address that is not mapped into the default aperture that
4040*4882a593Smuzhiyun * the DDR bar exposes, the function will configure the iATU so that the DDR
4041*4882a593Smuzhiyun * bar will be positioned at a base address that allows reading from the
4042*4882a593Smuzhiyun * required address. Configuring the iATU during normal operation can
4043*4882a593Smuzhiyun * lead to undefined behavior and therefore, should be done with extreme care
4044*4882a593Smuzhiyun *
4045*4882a593Smuzhiyun */
goya_debugfs_read32(struct hl_device * hdev,u64 addr,u32 * val)4046*4882a593Smuzhiyun static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4047*4882a593Smuzhiyun {
4048*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
4049*4882a593Smuzhiyun u64 ddr_bar_addr;
4050*4882a593Smuzhiyun int rc = 0;
4051*4882a593Smuzhiyun
4052*4882a593Smuzhiyun if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4053*4882a593Smuzhiyun *val = RREG32(addr - CFG_BASE);
4054*4882a593Smuzhiyun
4055*4882a593Smuzhiyun } else if ((addr >= SRAM_BASE_ADDR) &&
4056*4882a593Smuzhiyun (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4057*4882a593Smuzhiyun
4058*4882a593Smuzhiyun *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4059*4882a593Smuzhiyun (addr - SRAM_BASE_ADDR));
4060*4882a593Smuzhiyun
4061*4882a593Smuzhiyun } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4062*4882a593Smuzhiyun
4063*4882a593Smuzhiyun u64 bar_base_addr = DRAM_PHYS_BASE +
4064*4882a593Smuzhiyun (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4065*4882a593Smuzhiyun
4066*4882a593Smuzhiyun ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4067*4882a593Smuzhiyun if (ddr_bar_addr != U64_MAX) {
4068*4882a593Smuzhiyun *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4069*4882a593Smuzhiyun (addr - bar_base_addr));
4070*4882a593Smuzhiyun
4071*4882a593Smuzhiyun ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4072*4882a593Smuzhiyun ddr_bar_addr);
4073*4882a593Smuzhiyun }
4074*4882a593Smuzhiyun if (ddr_bar_addr == U64_MAX)
4075*4882a593Smuzhiyun rc = -EIO;
4076*4882a593Smuzhiyun
4077*4882a593Smuzhiyun } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4078*4882a593Smuzhiyun *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4079*4882a593Smuzhiyun
4080*4882a593Smuzhiyun } else {
4081*4882a593Smuzhiyun rc = -EFAULT;
4082*4882a593Smuzhiyun }
4083*4882a593Smuzhiyun
4084*4882a593Smuzhiyun return rc;
4085*4882a593Smuzhiyun }
4086*4882a593Smuzhiyun
4087*4882a593Smuzhiyun /*
4088*4882a593Smuzhiyun * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4089*4882a593Smuzhiyun * address.
4090*4882a593Smuzhiyun *
4091*4882a593Smuzhiyun * @hdev: pointer to hl_device structure
4092*4882a593Smuzhiyun * @addr: device or host mapped address
4093*4882a593Smuzhiyun * @val: returned value
4094*4882a593Smuzhiyun *
4095*4882a593Smuzhiyun * In case of DDR address that is not mapped into the default aperture that
4096*4882a593Smuzhiyun * the DDR bar exposes, the function will configure the iATU so that the DDR
4097*4882a593Smuzhiyun * bar will be positioned at a base address that allows writing to the
4098*4882a593Smuzhiyun * required address. Configuring the iATU during normal operation can
4099*4882a593Smuzhiyun * lead to undefined behavior and therefore, should be done with extreme care
4100*4882a593Smuzhiyun *
4101*4882a593Smuzhiyun */
goya_debugfs_write32(struct hl_device * hdev,u64 addr,u32 val)4102*4882a593Smuzhiyun static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4103*4882a593Smuzhiyun {
4104*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
4105*4882a593Smuzhiyun u64 ddr_bar_addr;
4106*4882a593Smuzhiyun int rc = 0;
4107*4882a593Smuzhiyun
4108*4882a593Smuzhiyun if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4109*4882a593Smuzhiyun WREG32(addr - CFG_BASE, val);
4110*4882a593Smuzhiyun
4111*4882a593Smuzhiyun } else if ((addr >= SRAM_BASE_ADDR) &&
4112*4882a593Smuzhiyun (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4113*4882a593Smuzhiyun
4114*4882a593Smuzhiyun writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4115*4882a593Smuzhiyun (addr - SRAM_BASE_ADDR));
4116*4882a593Smuzhiyun
4117*4882a593Smuzhiyun } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4118*4882a593Smuzhiyun
4119*4882a593Smuzhiyun u64 bar_base_addr = DRAM_PHYS_BASE +
4120*4882a593Smuzhiyun (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4121*4882a593Smuzhiyun
4122*4882a593Smuzhiyun ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4123*4882a593Smuzhiyun if (ddr_bar_addr != U64_MAX) {
4124*4882a593Smuzhiyun writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4125*4882a593Smuzhiyun (addr - bar_base_addr));
4126*4882a593Smuzhiyun
4127*4882a593Smuzhiyun ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4128*4882a593Smuzhiyun ddr_bar_addr);
4129*4882a593Smuzhiyun }
4130*4882a593Smuzhiyun if (ddr_bar_addr == U64_MAX)
4131*4882a593Smuzhiyun rc = -EIO;
4132*4882a593Smuzhiyun
4133*4882a593Smuzhiyun } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4134*4882a593Smuzhiyun *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4135*4882a593Smuzhiyun
4136*4882a593Smuzhiyun } else {
4137*4882a593Smuzhiyun rc = -EFAULT;
4138*4882a593Smuzhiyun }
4139*4882a593Smuzhiyun
4140*4882a593Smuzhiyun return rc;
4141*4882a593Smuzhiyun }
4142*4882a593Smuzhiyun
goya_debugfs_read64(struct hl_device * hdev,u64 addr,u64 * val)4143*4882a593Smuzhiyun static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4144*4882a593Smuzhiyun {
4145*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
4146*4882a593Smuzhiyun u64 ddr_bar_addr;
4147*4882a593Smuzhiyun int rc = 0;
4148*4882a593Smuzhiyun
4149*4882a593Smuzhiyun if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4150*4882a593Smuzhiyun u32 val_l = RREG32(addr - CFG_BASE);
4151*4882a593Smuzhiyun u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4152*4882a593Smuzhiyun
4153*4882a593Smuzhiyun *val = (((u64) val_h) << 32) | val_l;
4154*4882a593Smuzhiyun
4155*4882a593Smuzhiyun } else if ((addr >= SRAM_BASE_ADDR) &&
4156*4882a593Smuzhiyun (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4157*4882a593Smuzhiyun
4158*4882a593Smuzhiyun *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4159*4882a593Smuzhiyun (addr - SRAM_BASE_ADDR));
4160*4882a593Smuzhiyun
4161*4882a593Smuzhiyun } else if (addr <=
4162*4882a593Smuzhiyun DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4163*4882a593Smuzhiyun
4164*4882a593Smuzhiyun u64 bar_base_addr = DRAM_PHYS_BASE +
4165*4882a593Smuzhiyun (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4166*4882a593Smuzhiyun
4167*4882a593Smuzhiyun ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4168*4882a593Smuzhiyun if (ddr_bar_addr != U64_MAX) {
4169*4882a593Smuzhiyun *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
4170*4882a593Smuzhiyun (addr - bar_base_addr));
4171*4882a593Smuzhiyun
4172*4882a593Smuzhiyun ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4173*4882a593Smuzhiyun ddr_bar_addr);
4174*4882a593Smuzhiyun }
4175*4882a593Smuzhiyun if (ddr_bar_addr == U64_MAX)
4176*4882a593Smuzhiyun rc = -EIO;
4177*4882a593Smuzhiyun
4178*4882a593Smuzhiyun } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4179*4882a593Smuzhiyun *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4180*4882a593Smuzhiyun
4181*4882a593Smuzhiyun } else {
4182*4882a593Smuzhiyun rc = -EFAULT;
4183*4882a593Smuzhiyun }
4184*4882a593Smuzhiyun
4185*4882a593Smuzhiyun return rc;
4186*4882a593Smuzhiyun }
4187*4882a593Smuzhiyun
goya_debugfs_write64(struct hl_device * hdev,u64 addr,u64 val)4188*4882a593Smuzhiyun static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4189*4882a593Smuzhiyun {
4190*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
4191*4882a593Smuzhiyun u64 ddr_bar_addr;
4192*4882a593Smuzhiyun int rc = 0;
4193*4882a593Smuzhiyun
4194*4882a593Smuzhiyun if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4195*4882a593Smuzhiyun WREG32(addr - CFG_BASE, lower_32_bits(val));
4196*4882a593Smuzhiyun WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
4197*4882a593Smuzhiyun
4198*4882a593Smuzhiyun } else if ((addr >= SRAM_BASE_ADDR) &&
4199*4882a593Smuzhiyun (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4200*4882a593Smuzhiyun
4201*4882a593Smuzhiyun writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4202*4882a593Smuzhiyun (addr - SRAM_BASE_ADDR));
4203*4882a593Smuzhiyun
4204*4882a593Smuzhiyun } else if (addr <=
4205*4882a593Smuzhiyun DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4206*4882a593Smuzhiyun
4207*4882a593Smuzhiyun u64 bar_base_addr = DRAM_PHYS_BASE +
4208*4882a593Smuzhiyun (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4209*4882a593Smuzhiyun
4210*4882a593Smuzhiyun ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4211*4882a593Smuzhiyun if (ddr_bar_addr != U64_MAX) {
4212*4882a593Smuzhiyun writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4213*4882a593Smuzhiyun (addr - bar_base_addr));
4214*4882a593Smuzhiyun
4215*4882a593Smuzhiyun ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4216*4882a593Smuzhiyun ddr_bar_addr);
4217*4882a593Smuzhiyun }
4218*4882a593Smuzhiyun if (ddr_bar_addr == U64_MAX)
4219*4882a593Smuzhiyun rc = -EIO;
4220*4882a593Smuzhiyun
4221*4882a593Smuzhiyun } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4222*4882a593Smuzhiyun *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4223*4882a593Smuzhiyun
4224*4882a593Smuzhiyun } else {
4225*4882a593Smuzhiyun rc = -EFAULT;
4226*4882a593Smuzhiyun }
4227*4882a593Smuzhiyun
4228*4882a593Smuzhiyun return rc;
4229*4882a593Smuzhiyun }
4230*4882a593Smuzhiyun
goya_read_pte(struct hl_device * hdev,u64 addr)4231*4882a593Smuzhiyun static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4232*4882a593Smuzhiyun {
4233*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4234*4882a593Smuzhiyun
4235*4882a593Smuzhiyun if (hdev->hard_reset_pending)
4236*4882a593Smuzhiyun return U64_MAX;
4237*4882a593Smuzhiyun
4238*4882a593Smuzhiyun return readq(hdev->pcie_bar[DDR_BAR_ID] +
4239*4882a593Smuzhiyun (addr - goya->ddr_bar_cur_addr));
4240*4882a593Smuzhiyun }
4241*4882a593Smuzhiyun
goya_write_pte(struct hl_device * hdev,u64 addr,u64 val)4242*4882a593Smuzhiyun static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4243*4882a593Smuzhiyun {
4244*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4245*4882a593Smuzhiyun
4246*4882a593Smuzhiyun if (hdev->hard_reset_pending)
4247*4882a593Smuzhiyun return;
4248*4882a593Smuzhiyun
4249*4882a593Smuzhiyun writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4250*4882a593Smuzhiyun (addr - goya->ddr_bar_cur_addr));
4251*4882a593Smuzhiyun }
4252*4882a593Smuzhiyun
_goya_get_event_desc(u16 event_type)4253*4882a593Smuzhiyun static const char *_goya_get_event_desc(u16 event_type)
4254*4882a593Smuzhiyun {
4255*4882a593Smuzhiyun switch (event_type) {
4256*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4257*4882a593Smuzhiyun return "PCIe_if";
4258*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4259*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4260*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4261*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4262*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4263*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4264*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4265*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4266*4882a593Smuzhiyun return "TPC%d_ecc";
4267*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_ECC:
4268*4882a593Smuzhiyun return "MME_ecc";
4269*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4270*4882a593Smuzhiyun return "MME_ecc_ext";
4271*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4272*4882a593Smuzhiyun return "MMU_ecc";
4273*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4274*4882a593Smuzhiyun return "DMA_macro";
4275*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4276*4882a593Smuzhiyun return "DMA_ecc";
4277*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4278*4882a593Smuzhiyun return "CPU_if_ecc";
4279*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4280*4882a593Smuzhiyun return "PSOC_mem";
4281*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4282*4882a593Smuzhiyun return "PSOC_coresight";
4283*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4284*4882a593Smuzhiyun return "SRAM%d";
4285*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_GIC500:
4286*4882a593Smuzhiyun return "GIC500";
4287*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4288*4882a593Smuzhiyun return "PLL%d";
4289*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4290*4882a593Smuzhiyun return "AXI_ecc";
4291*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4292*4882a593Smuzhiyun return "L2_ram_ecc";
4293*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4294*4882a593Smuzhiyun return "PSOC_gpio_05_sw_reset";
4295*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4296*4882a593Smuzhiyun return "PSOC_gpio_10_vrhot_icrit";
4297*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4298*4882a593Smuzhiyun return "PCIe_dec";
4299*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4300*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4301*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4302*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4303*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4304*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4305*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4306*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4307*4882a593Smuzhiyun return "TPC%d_dec";
4308*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_WACS:
4309*4882a593Smuzhiyun return "MME_wacs";
4310*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4311*4882a593Smuzhiyun return "MME_wacsd";
4312*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4313*4882a593Smuzhiyun return "CPU_axi_splitter";
4314*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4315*4882a593Smuzhiyun return "PSOC_axi_dec";
4316*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC:
4317*4882a593Smuzhiyun return "PSOC";
4318*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4319*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4320*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4321*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4322*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4323*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4324*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4325*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4326*4882a593Smuzhiyun return "TPC%d_krn_err";
4327*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4328*4882a593Smuzhiyun return "TPC%d_cq";
4329*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4330*4882a593Smuzhiyun return "TPC%d_qm";
4331*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_QM:
4332*4882a593Smuzhiyun return "MME_qm";
4333*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4334*4882a593Smuzhiyun return "MME_cq";
4335*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4336*4882a593Smuzhiyun return "DMA%d_qm";
4337*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4338*4882a593Smuzhiyun return "DMA%d_ch";
4339*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4340*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4341*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4342*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4343*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4344*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4345*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4346*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4347*4882a593Smuzhiyun return "TPC%d_bmon_spmu";
4348*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4349*4882a593Smuzhiyun return "DMA_bm_ch%d";
4350*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4351*4882a593Smuzhiyun return "POWER_ENV_S";
4352*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4353*4882a593Smuzhiyun return "POWER_ENV_E";
4354*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4355*4882a593Smuzhiyun return "THERMAL_ENV_S";
4356*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4357*4882a593Smuzhiyun return "THERMAL_ENV_E";
4358*4882a593Smuzhiyun default:
4359*4882a593Smuzhiyun return "N/A";
4360*4882a593Smuzhiyun }
4361*4882a593Smuzhiyun }
4362*4882a593Smuzhiyun
goya_get_event_desc(u16 event_type,char * desc,size_t size)4363*4882a593Smuzhiyun static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4364*4882a593Smuzhiyun {
4365*4882a593Smuzhiyun u8 index;
4366*4882a593Smuzhiyun
4367*4882a593Smuzhiyun switch (event_type) {
4368*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4369*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4370*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4371*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4372*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4373*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4374*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4375*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4376*4882a593Smuzhiyun index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4377*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4378*4882a593Smuzhiyun break;
4379*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4380*4882a593Smuzhiyun index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4381*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4382*4882a593Smuzhiyun break;
4383*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4384*4882a593Smuzhiyun index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4385*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4386*4882a593Smuzhiyun break;
4387*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4388*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4389*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4390*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4391*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4392*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4393*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4394*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4395*4882a593Smuzhiyun index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4396*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4397*4882a593Smuzhiyun break;
4398*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4399*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4400*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4401*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4402*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4403*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4404*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4405*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4406*4882a593Smuzhiyun index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4407*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4408*4882a593Smuzhiyun break;
4409*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4410*4882a593Smuzhiyun index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4411*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4412*4882a593Smuzhiyun break;
4413*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4414*4882a593Smuzhiyun index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4415*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4416*4882a593Smuzhiyun break;
4417*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4418*4882a593Smuzhiyun index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4419*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4420*4882a593Smuzhiyun break;
4421*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4422*4882a593Smuzhiyun index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4423*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4424*4882a593Smuzhiyun break;
4425*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4426*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4427*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4428*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4429*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4430*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4431*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4432*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4433*4882a593Smuzhiyun index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4434*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4435*4882a593Smuzhiyun break;
4436*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4437*4882a593Smuzhiyun index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4438*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type), index);
4439*4882a593Smuzhiyun break;
4440*4882a593Smuzhiyun default:
4441*4882a593Smuzhiyun snprintf(desc, size, _goya_get_event_desc(event_type));
4442*4882a593Smuzhiyun break;
4443*4882a593Smuzhiyun }
4444*4882a593Smuzhiyun }
4445*4882a593Smuzhiyun
goya_print_razwi_info(struct hl_device * hdev)4446*4882a593Smuzhiyun static void goya_print_razwi_info(struct hl_device *hdev)
4447*4882a593Smuzhiyun {
4448*4882a593Smuzhiyun if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4449*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4450*4882a593Smuzhiyun WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4451*4882a593Smuzhiyun }
4452*4882a593Smuzhiyun
4453*4882a593Smuzhiyun if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4454*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4455*4882a593Smuzhiyun WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4456*4882a593Smuzhiyun }
4457*4882a593Smuzhiyun
4458*4882a593Smuzhiyun if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4459*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4460*4882a593Smuzhiyun WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4461*4882a593Smuzhiyun }
4462*4882a593Smuzhiyun
4463*4882a593Smuzhiyun if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4464*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4465*4882a593Smuzhiyun WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4466*4882a593Smuzhiyun }
4467*4882a593Smuzhiyun }
4468*4882a593Smuzhiyun
goya_print_mmu_error_info(struct hl_device * hdev)4469*4882a593Smuzhiyun static void goya_print_mmu_error_info(struct hl_device *hdev)
4470*4882a593Smuzhiyun {
4471*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4472*4882a593Smuzhiyun u64 addr;
4473*4882a593Smuzhiyun u32 val;
4474*4882a593Smuzhiyun
4475*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4476*4882a593Smuzhiyun return;
4477*4882a593Smuzhiyun
4478*4882a593Smuzhiyun val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4479*4882a593Smuzhiyun if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4480*4882a593Smuzhiyun addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4481*4882a593Smuzhiyun addr <<= 32;
4482*4882a593Smuzhiyun addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4483*4882a593Smuzhiyun
4484*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4485*4882a593Smuzhiyun addr);
4486*4882a593Smuzhiyun
4487*4882a593Smuzhiyun WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4488*4882a593Smuzhiyun }
4489*4882a593Smuzhiyun }
4490*4882a593Smuzhiyun
goya_print_irq_info(struct hl_device * hdev,u16 event_type,bool razwi)4491*4882a593Smuzhiyun static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4492*4882a593Smuzhiyun bool razwi)
4493*4882a593Smuzhiyun {
4494*4882a593Smuzhiyun char desc[20] = "";
4495*4882a593Smuzhiyun
4496*4882a593Smuzhiyun goya_get_event_desc(event_type, desc, sizeof(desc));
4497*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4498*4882a593Smuzhiyun event_type, desc);
4499*4882a593Smuzhiyun
4500*4882a593Smuzhiyun if (razwi) {
4501*4882a593Smuzhiyun goya_print_razwi_info(hdev);
4502*4882a593Smuzhiyun goya_print_mmu_error_info(hdev);
4503*4882a593Smuzhiyun }
4504*4882a593Smuzhiyun }
4505*4882a593Smuzhiyun
goya_unmask_irq_arr(struct hl_device * hdev,u32 * irq_arr,size_t irq_arr_size)4506*4882a593Smuzhiyun static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4507*4882a593Smuzhiyun size_t irq_arr_size)
4508*4882a593Smuzhiyun {
4509*4882a593Smuzhiyun struct cpucp_unmask_irq_arr_packet *pkt;
4510*4882a593Smuzhiyun size_t total_pkt_size;
4511*4882a593Smuzhiyun long result;
4512*4882a593Smuzhiyun int rc;
4513*4882a593Smuzhiyun int irq_num_entries, irq_arr_index;
4514*4882a593Smuzhiyun __le32 *goya_irq_arr;
4515*4882a593Smuzhiyun
4516*4882a593Smuzhiyun total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
4517*4882a593Smuzhiyun irq_arr_size;
4518*4882a593Smuzhiyun
4519*4882a593Smuzhiyun /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
4520*4882a593Smuzhiyun total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4521*4882a593Smuzhiyun
4522*4882a593Smuzhiyun /* total_pkt_size is casted to u16 later on */
4523*4882a593Smuzhiyun if (total_pkt_size > USHRT_MAX) {
4524*4882a593Smuzhiyun dev_err(hdev->dev, "too many elements in IRQ array\n");
4525*4882a593Smuzhiyun return -EINVAL;
4526*4882a593Smuzhiyun }
4527*4882a593Smuzhiyun
4528*4882a593Smuzhiyun pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4529*4882a593Smuzhiyun if (!pkt)
4530*4882a593Smuzhiyun return -ENOMEM;
4531*4882a593Smuzhiyun
4532*4882a593Smuzhiyun irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4533*4882a593Smuzhiyun pkt->length = cpu_to_le32(irq_num_entries);
4534*4882a593Smuzhiyun
4535*4882a593Smuzhiyun /* We must perform any necessary endianness conversation on the irq
4536*4882a593Smuzhiyun * array being passed to the goya hardware
4537*4882a593Smuzhiyun */
4538*4882a593Smuzhiyun for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4539*4882a593Smuzhiyun irq_arr_index < irq_num_entries ; irq_arr_index++)
4540*4882a593Smuzhiyun goya_irq_arr[irq_arr_index] =
4541*4882a593Smuzhiyun cpu_to_le32(irq_arr[irq_arr_index]);
4542*4882a593Smuzhiyun
4543*4882a593Smuzhiyun pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4544*4882a593Smuzhiyun CPUCP_PKT_CTL_OPCODE_SHIFT);
4545*4882a593Smuzhiyun
4546*4882a593Smuzhiyun rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4547*4882a593Smuzhiyun total_pkt_size, 0, &result);
4548*4882a593Smuzhiyun
4549*4882a593Smuzhiyun if (rc)
4550*4882a593Smuzhiyun dev_err(hdev->dev, "failed to unmask IRQ array\n");
4551*4882a593Smuzhiyun
4552*4882a593Smuzhiyun kfree(pkt);
4553*4882a593Smuzhiyun
4554*4882a593Smuzhiyun return rc;
4555*4882a593Smuzhiyun }
4556*4882a593Smuzhiyun
goya_soft_reset_late_init(struct hl_device * hdev)4557*4882a593Smuzhiyun static int goya_soft_reset_late_init(struct hl_device *hdev)
4558*4882a593Smuzhiyun {
4559*4882a593Smuzhiyun /*
4560*4882a593Smuzhiyun * Unmask all IRQs since some could have been received
4561*4882a593Smuzhiyun * during the soft reset
4562*4882a593Smuzhiyun */
4563*4882a593Smuzhiyun return goya_unmask_irq_arr(hdev, goya_all_events,
4564*4882a593Smuzhiyun sizeof(goya_all_events));
4565*4882a593Smuzhiyun }
4566*4882a593Smuzhiyun
goya_unmask_irq(struct hl_device * hdev,u16 event_type)4567*4882a593Smuzhiyun static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4568*4882a593Smuzhiyun {
4569*4882a593Smuzhiyun struct cpucp_packet pkt;
4570*4882a593Smuzhiyun long result;
4571*4882a593Smuzhiyun int rc;
4572*4882a593Smuzhiyun
4573*4882a593Smuzhiyun memset(&pkt, 0, sizeof(pkt));
4574*4882a593Smuzhiyun
4575*4882a593Smuzhiyun pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4576*4882a593Smuzhiyun CPUCP_PKT_CTL_OPCODE_SHIFT);
4577*4882a593Smuzhiyun pkt.value = cpu_to_le64(event_type);
4578*4882a593Smuzhiyun
4579*4882a593Smuzhiyun rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4580*4882a593Smuzhiyun 0, &result);
4581*4882a593Smuzhiyun
4582*4882a593Smuzhiyun if (rc)
4583*4882a593Smuzhiyun dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4584*4882a593Smuzhiyun
4585*4882a593Smuzhiyun return rc;
4586*4882a593Smuzhiyun }
4587*4882a593Smuzhiyun
goya_print_clk_change_info(struct hl_device * hdev,u16 event_type)4588*4882a593Smuzhiyun static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4589*4882a593Smuzhiyun {
4590*4882a593Smuzhiyun switch (event_type) {
4591*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4592*4882a593Smuzhiyun hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
4593*4882a593Smuzhiyun dev_info_ratelimited(hdev->dev,
4594*4882a593Smuzhiyun "Clock throttling due to power consumption\n");
4595*4882a593Smuzhiyun break;
4596*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4597*4882a593Smuzhiyun hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
4598*4882a593Smuzhiyun dev_info_ratelimited(hdev->dev,
4599*4882a593Smuzhiyun "Power envelop is safe, back to optimal clock\n");
4600*4882a593Smuzhiyun break;
4601*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4602*4882a593Smuzhiyun hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
4603*4882a593Smuzhiyun dev_info_ratelimited(hdev->dev,
4604*4882a593Smuzhiyun "Clock throttling due to overheating\n");
4605*4882a593Smuzhiyun break;
4606*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4607*4882a593Smuzhiyun hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
4608*4882a593Smuzhiyun dev_info_ratelimited(hdev->dev,
4609*4882a593Smuzhiyun "Thermal envelop is safe, back to optimal clock\n");
4610*4882a593Smuzhiyun break;
4611*4882a593Smuzhiyun
4612*4882a593Smuzhiyun default:
4613*4882a593Smuzhiyun dev_err(hdev->dev, "Received invalid clock change event %d\n",
4614*4882a593Smuzhiyun event_type);
4615*4882a593Smuzhiyun break;
4616*4882a593Smuzhiyun }
4617*4882a593Smuzhiyun }
4618*4882a593Smuzhiyun
goya_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)4619*4882a593Smuzhiyun void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4620*4882a593Smuzhiyun {
4621*4882a593Smuzhiyun u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4622*4882a593Smuzhiyun u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4623*4882a593Smuzhiyun >> EQ_CTL_EVENT_TYPE_SHIFT);
4624*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4625*4882a593Smuzhiyun
4626*4882a593Smuzhiyun if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
4627*4882a593Smuzhiyun dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
4628*4882a593Smuzhiyun event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
4629*4882a593Smuzhiyun return;
4630*4882a593Smuzhiyun }
4631*4882a593Smuzhiyun
4632*4882a593Smuzhiyun goya->events_stat[event_type]++;
4633*4882a593Smuzhiyun goya->events_stat_aggregate[event_type]++;
4634*4882a593Smuzhiyun
4635*4882a593Smuzhiyun switch (event_type) {
4636*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4637*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4638*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4639*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4640*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4641*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4642*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4643*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4644*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4645*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_ECC:
4646*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4647*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4648*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4649*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4650*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4651*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4652*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4653*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4654*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_GIC500:
4655*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4656*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4657*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4658*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4659*4882a593Smuzhiyun goya_print_irq_info(hdev, event_type, false);
4660*4882a593Smuzhiyun if (hdev->hard_reset_on_fw_events)
4661*4882a593Smuzhiyun hl_device_reset(hdev, true, false);
4662*4882a593Smuzhiyun break;
4663*4882a593Smuzhiyun
4664*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4665*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4666*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4667*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4668*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4669*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4670*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4671*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4672*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4673*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_WACS:
4674*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4675*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4676*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4677*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC:
4678*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4679*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4680*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4681*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4682*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4683*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4684*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4685*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4686*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4687*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_QM:
4688*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4689*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4690*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4691*4882a593Smuzhiyun goya_print_irq_info(hdev, event_type, true);
4692*4882a593Smuzhiyun goya_unmask_irq(hdev, event_type);
4693*4882a593Smuzhiyun break;
4694*4882a593Smuzhiyun
4695*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4696*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4697*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4698*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4699*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4700*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4701*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4702*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4703*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4704*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4705*4882a593Smuzhiyun goya_print_irq_info(hdev, event_type, false);
4706*4882a593Smuzhiyun goya_unmask_irq(hdev, event_type);
4707*4882a593Smuzhiyun break;
4708*4882a593Smuzhiyun
4709*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4710*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4711*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4712*4882a593Smuzhiyun case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4713*4882a593Smuzhiyun goya_print_clk_change_info(hdev, event_type);
4714*4882a593Smuzhiyun goya_unmask_irq(hdev, event_type);
4715*4882a593Smuzhiyun break;
4716*4882a593Smuzhiyun
4717*4882a593Smuzhiyun default:
4718*4882a593Smuzhiyun dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4719*4882a593Smuzhiyun event_type);
4720*4882a593Smuzhiyun break;
4721*4882a593Smuzhiyun }
4722*4882a593Smuzhiyun }
4723*4882a593Smuzhiyun
goya_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)4724*4882a593Smuzhiyun void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4725*4882a593Smuzhiyun {
4726*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4727*4882a593Smuzhiyun
4728*4882a593Smuzhiyun if (aggregate) {
4729*4882a593Smuzhiyun *size = (u32) sizeof(goya->events_stat_aggregate);
4730*4882a593Smuzhiyun return goya->events_stat_aggregate;
4731*4882a593Smuzhiyun }
4732*4882a593Smuzhiyun
4733*4882a593Smuzhiyun *size = (u32) sizeof(goya->events_stat);
4734*4882a593Smuzhiyun return goya->events_stat;
4735*4882a593Smuzhiyun }
4736*4882a593Smuzhiyun
goya_memset_device_memory(struct hl_device * hdev,u64 addr,u64 size,u64 val,bool is_dram)4737*4882a593Smuzhiyun static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4738*4882a593Smuzhiyun u64 val, bool is_dram)
4739*4882a593Smuzhiyun {
4740*4882a593Smuzhiyun struct packet_lin_dma *lin_dma_pkt;
4741*4882a593Smuzhiyun struct hl_cs_job *job;
4742*4882a593Smuzhiyun u32 cb_size, ctl;
4743*4882a593Smuzhiyun struct hl_cb *cb;
4744*4882a593Smuzhiyun int rc, lin_dma_pkts_cnt;
4745*4882a593Smuzhiyun
4746*4882a593Smuzhiyun lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4747*4882a593Smuzhiyun cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4748*4882a593Smuzhiyun sizeof(struct packet_msg_prot);
4749*4882a593Smuzhiyun cb = hl_cb_kernel_create(hdev, cb_size, false);
4750*4882a593Smuzhiyun if (!cb)
4751*4882a593Smuzhiyun return -ENOMEM;
4752*4882a593Smuzhiyun
4753*4882a593Smuzhiyun lin_dma_pkt = cb->kernel_address;
4754*4882a593Smuzhiyun
4755*4882a593Smuzhiyun do {
4756*4882a593Smuzhiyun memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4757*4882a593Smuzhiyun
4758*4882a593Smuzhiyun ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4759*4882a593Smuzhiyun (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4760*4882a593Smuzhiyun (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4761*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_RB_SHIFT) |
4762*4882a593Smuzhiyun (1 << GOYA_PKT_CTL_MB_SHIFT));
4763*4882a593Smuzhiyun ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4764*4882a593Smuzhiyun GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4765*4882a593Smuzhiyun lin_dma_pkt->ctl = cpu_to_le32(ctl);
4766*4882a593Smuzhiyun
4767*4882a593Smuzhiyun lin_dma_pkt->src_addr = cpu_to_le64(val);
4768*4882a593Smuzhiyun lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4769*4882a593Smuzhiyun if (lin_dma_pkts_cnt > 1)
4770*4882a593Smuzhiyun lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4771*4882a593Smuzhiyun else
4772*4882a593Smuzhiyun lin_dma_pkt->tsize = cpu_to_le32(size);
4773*4882a593Smuzhiyun
4774*4882a593Smuzhiyun size -= SZ_2G;
4775*4882a593Smuzhiyun addr += SZ_2G;
4776*4882a593Smuzhiyun lin_dma_pkt++;
4777*4882a593Smuzhiyun } while (--lin_dma_pkts_cnt);
4778*4882a593Smuzhiyun
4779*4882a593Smuzhiyun job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4780*4882a593Smuzhiyun if (!job) {
4781*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to allocate a new job\n");
4782*4882a593Smuzhiyun rc = -ENOMEM;
4783*4882a593Smuzhiyun goto release_cb;
4784*4882a593Smuzhiyun }
4785*4882a593Smuzhiyun
4786*4882a593Smuzhiyun job->id = 0;
4787*4882a593Smuzhiyun job->user_cb = cb;
4788*4882a593Smuzhiyun job->user_cb->cs_cnt++;
4789*4882a593Smuzhiyun job->user_cb_size = cb_size;
4790*4882a593Smuzhiyun job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4791*4882a593Smuzhiyun job->patched_cb = job->user_cb;
4792*4882a593Smuzhiyun job->job_cb_size = job->user_cb_size;
4793*4882a593Smuzhiyun
4794*4882a593Smuzhiyun hl_debugfs_add_job(hdev, job);
4795*4882a593Smuzhiyun
4796*4882a593Smuzhiyun rc = goya_send_job_on_qman0(hdev, job);
4797*4882a593Smuzhiyun
4798*4882a593Smuzhiyun hl_debugfs_remove_job(hdev, job);
4799*4882a593Smuzhiyun kfree(job);
4800*4882a593Smuzhiyun cb->cs_cnt--;
4801*4882a593Smuzhiyun
4802*4882a593Smuzhiyun release_cb:
4803*4882a593Smuzhiyun hl_cb_put(cb);
4804*4882a593Smuzhiyun hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4805*4882a593Smuzhiyun
4806*4882a593Smuzhiyun return rc;
4807*4882a593Smuzhiyun }
4808*4882a593Smuzhiyun
goya_context_switch(struct hl_device * hdev,u32 asid)4809*4882a593Smuzhiyun int goya_context_switch(struct hl_device *hdev, u32 asid)
4810*4882a593Smuzhiyun {
4811*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
4812*4882a593Smuzhiyun u64 addr = prop->sram_base_address, sob_addr;
4813*4882a593Smuzhiyun u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4814*4882a593Smuzhiyun u64 val = 0x7777777777777777ull;
4815*4882a593Smuzhiyun int rc, dma_id;
4816*4882a593Smuzhiyun u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4817*4882a593Smuzhiyun mmDMA_CH_0_WR_COMP_ADDR_LO;
4818*4882a593Smuzhiyun
4819*4882a593Smuzhiyun rc = goya_memset_device_memory(hdev, addr, size, val, false);
4820*4882a593Smuzhiyun if (rc) {
4821*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4822*4882a593Smuzhiyun return rc;
4823*4882a593Smuzhiyun }
4824*4882a593Smuzhiyun
4825*4882a593Smuzhiyun /* we need to reset registers that the user is allowed to change */
4826*4882a593Smuzhiyun sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4827*4882a593Smuzhiyun WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4828*4882a593Smuzhiyun
4829*4882a593Smuzhiyun for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4830*4882a593Smuzhiyun sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4831*4882a593Smuzhiyun (dma_id - 1) * 4;
4832*4882a593Smuzhiyun WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4833*4882a593Smuzhiyun lower_32_bits(sob_addr));
4834*4882a593Smuzhiyun }
4835*4882a593Smuzhiyun
4836*4882a593Smuzhiyun WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4837*4882a593Smuzhiyun
4838*4882a593Smuzhiyun goya_mmu_prepare(hdev, asid);
4839*4882a593Smuzhiyun
4840*4882a593Smuzhiyun goya_clear_sm_regs(hdev);
4841*4882a593Smuzhiyun
4842*4882a593Smuzhiyun return 0;
4843*4882a593Smuzhiyun }
4844*4882a593Smuzhiyun
goya_mmu_clear_pgt_range(struct hl_device * hdev)4845*4882a593Smuzhiyun static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4846*4882a593Smuzhiyun {
4847*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
4848*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4849*4882a593Smuzhiyun u64 addr = prop->mmu_pgt_addr;
4850*4882a593Smuzhiyun u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4851*4882a593Smuzhiyun MMU_CACHE_MNG_SIZE;
4852*4882a593Smuzhiyun
4853*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4854*4882a593Smuzhiyun return 0;
4855*4882a593Smuzhiyun
4856*4882a593Smuzhiyun return goya_memset_device_memory(hdev, addr, size, 0, true);
4857*4882a593Smuzhiyun }
4858*4882a593Smuzhiyun
goya_mmu_set_dram_default_page(struct hl_device * hdev)4859*4882a593Smuzhiyun static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4860*4882a593Smuzhiyun {
4861*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4862*4882a593Smuzhiyun u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4863*4882a593Smuzhiyun u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4864*4882a593Smuzhiyun u64 val = 0x9999999999999999ull;
4865*4882a593Smuzhiyun
4866*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4867*4882a593Smuzhiyun return 0;
4868*4882a593Smuzhiyun
4869*4882a593Smuzhiyun return goya_memset_device_memory(hdev, addr, size, val, true);
4870*4882a593Smuzhiyun }
4871*4882a593Smuzhiyun
goya_mmu_add_mappings_for_device_cpu(struct hl_device * hdev)4872*4882a593Smuzhiyun static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4873*4882a593Smuzhiyun {
4874*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
4875*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4876*4882a593Smuzhiyun s64 off, cpu_off;
4877*4882a593Smuzhiyun int rc;
4878*4882a593Smuzhiyun
4879*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4880*4882a593Smuzhiyun return 0;
4881*4882a593Smuzhiyun
4882*4882a593Smuzhiyun for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4883*4882a593Smuzhiyun rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
4884*4882a593Smuzhiyun prop->dram_base_address + off, PAGE_SIZE_2MB,
4885*4882a593Smuzhiyun (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4886*4882a593Smuzhiyun if (rc) {
4887*4882a593Smuzhiyun dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4888*4882a593Smuzhiyun prop->dram_base_address + off);
4889*4882a593Smuzhiyun goto unmap;
4890*4882a593Smuzhiyun }
4891*4882a593Smuzhiyun }
4892*4882a593Smuzhiyun
4893*4882a593Smuzhiyun if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4894*4882a593Smuzhiyun rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4895*4882a593Smuzhiyun hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
4896*4882a593Smuzhiyun
4897*4882a593Smuzhiyun if (rc) {
4898*4882a593Smuzhiyun dev_err(hdev->dev,
4899*4882a593Smuzhiyun "Map failed for CPU accessible memory\n");
4900*4882a593Smuzhiyun off -= PAGE_SIZE_2MB;
4901*4882a593Smuzhiyun goto unmap;
4902*4882a593Smuzhiyun }
4903*4882a593Smuzhiyun } else {
4904*4882a593Smuzhiyun for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4905*4882a593Smuzhiyun rc = hl_mmu_map(hdev->kernel_ctx,
4906*4882a593Smuzhiyun VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4907*4882a593Smuzhiyun hdev->cpu_accessible_dma_address + cpu_off,
4908*4882a593Smuzhiyun PAGE_SIZE_4KB, true);
4909*4882a593Smuzhiyun if (rc) {
4910*4882a593Smuzhiyun dev_err(hdev->dev,
4911*4882a593Smuzhiyun "Map failed for CPU accessible memory\n");
4912*4882a593Smuzhiyun cpu_off -= PAGE_SIZE_4KB;
4913*4882a593Smuzhiyun goto unmap_cpu;
4914*4882a593Smuzhiyun }
4915*4882a593Smuzhiyun }
4916*4882a593Smuzhiyun }
4917*4882a593Smuzhiyun
4918*4882a593Smuzhiyun goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4919*4882a593Smuzhiyun goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4920*4882a593Smuzhiyun WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4921*4882a593Smuzhiyun WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4922*4882a593Smuzhiyun
4923*4882a593Smuzhiyun /* Make sure configuration is flushed to device */
4924*4882a593Smuzhiyun RREG32(mmCPU_IF_AWUSER_OVR_EN);
4925*4882a593Smuzhiyun
4926*4882a593Smuzhiyun goya->device_cpu_mmu_mappings_done = true;
4927*4882a593Smuzhiyun
4928*4882a593Smuzhiyun return 0;
4929*4882a593Smuzhiyun
4930*4882a593Smuzhiyun unmap_cpu:
4931*4882a593Smuzhiyun for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4932*4882a593Smuzhiyun if (hl_mmu_unmap(hdev->kernel_ctx,
4933*4882a593Smuzhiyun VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4934*4882a593Smuzhiyun PAGE_SIZE_4KB, true))
4935*4882a593Smuzhiyun dev_warn_ratelimited(hdev->dev,
4936*4882a593Smuzhiyun "failed to unmap address 0x%llx\n",
4937*4882a593Smuzhiyun VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4938*4882a593Smuzhiyun unmap:
4939*4882a593Smuzhiyun for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4940*4882a593Smuzhiyun if (hl_mmu_unmap(hdev->kernel_ctx,
4941*4882a593Smuzhiyun prop->dram_base_address + off, PAGE_SIZE_2MB,
4942*4882a593Smuzhiyun true))
4943*4882a593Smuzhiyun dev_warn_ratelimited(hdev->dev,
4944*4882a593Smuzhiyun "failed to unmap address 0x%llx\n",
4945*4882a593Smuzhiyun prop->dram_base_address + off);
4946*4882a593Smuzhiyun
4947*4882a593Smuzhiyun return rc;
4948*4882a593Smuzhiyun }
4949*4882a593Smuzhiyun
goya_mmu_remove_device_cpu_mappings(struct hl_device * hdev)4950*4882a593Smuzhiyun void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4951*4882a593Smuzhiyun {
4952*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
4953*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4954*4882a593Smuzhiyun u32 off, cpu_off;
4955*4882a593Smuzhiyun
4956*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4957*4882a593Smuzhiyun return;
4958*4882a593Smuzhiyun
4959*4882a593Smuzhiyun if (!goya->device_cpu_mmu_mappings_done)
4960*4882a593Smuzhiyun return;
4961*4882a593Smuzhiyun
4962*4882a593Smuzhiyun WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4963*4882a593Smuzhiyun WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4964*4882a593Smuzhiyun
4965*4882a593Smuzhiyun if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4966*4882a593Smuzhiyun if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4967*4882a593Smuzhiyun PAGE_SIZE_2MB, true))
4968*4882a593Smuzhiyun dev_warn(hdev->dev,
4969*4882a593Smuzhiyun "Failed to unmap CPU accessible memory\n");
4970*4882a593Smuzhiyun } else {
4971*4882a593Smuzhiyun for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4972*4882a593Smuzhiyun if (hl_mmu_unmap(hdev->kernel_ctx,
4973*4882a593Smuzhiyun VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4974*4882a593Smuzhiyun PAGE_SIZE_4KB,
4975*4882a593Smuzhiyun (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
4976*4882a593Smuzhiyun dev_warn_ratelimited(hdev->dev,
4977*4882a593Smuzhiyun "failed to unmap address 0x%llx\n",
4978*4882a593Smuzhiyun VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4979*4882a593Smuzhiyun }
4980*4882a593Smuzhiyun
4981*4882a593Smuzhiyun for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
4982*4882a593Smuzhiyun if (hl_mmu_unmap(hdev->kernel_ctx,
4983*4882a593Smuzhiyun prop->dram_base_address + off, PAGE_SIZE_2MB,
4984*4882a593Smuzhiyun (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
4985*4882a593Smuzhiyun dev_warn_ratelimited(hdev->dev,
4986*4882a593Smuzhiyun "Failed to unmap address 0x%llx\n",
4987*4882a593Smuzhiyun prop->dram_base_address + off);
4988*4882a593Smuzhiyun
4989*4882a593Smuzhiyun goya->device_cpu_mmu_mappings_done = false;
4990*4882a593Smuzhiyun }
4991*4882a593Smuzhiyun
goya_mmu_prepare(struct hl_device * hdev,u32 asid)4992*4882a593Smuzhiyun static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4993*4882a593Smuzhiyun {
4994*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
4995*4882a593Smuzhiyun int i;
4996*4882a593Smuzhiyun
4997*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4998*4882a593Smuzhiyun return;
4999*4882a593Smuzhiyun
5000*4882a593Smuzhiyun if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
5001*4882a593Smuzhiyun WARN(1, "asid %u is too big\n", asid);
5002*4882a593Smuzhiyun return;
5003*4882a593Smuzhiyun }
5004*4882a593Smuzhiyun
5005*4882a593Smuzhiyun /* zero the MMBP and ASID bits and then set the ASID */
5006*4882a593Smuzhiyun for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
5007*4882a593Smuzhiyun goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
5008*4882a593Smuzhiyun }
5009*4882a593Smuzhiyun
goya_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5010*4882a593Smuzhiyun static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5011*4882a593Smuzhiyun u32 flags)
5012*4882a593Smuzhiyun {
5013*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
5014*4882a593Smuzhiyun u32 status, timeout_usec;
5015*4882a593Smuzhiyun int rc;
5016*4882a593Smuzhiyun
5017*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5018*4882a593Smuzhiyun hdev->hard_reset_pending)
5019*4882a593Smuzhiyun return 0;
5020*4882a593Smuzhiyun
5021*4882a593Smuzhiyun /* no need in L1 only invalidation in Goya */
5022*4882a593Smuzhiyun if (!is_hard)
5023*4882a593Smuzhiyun return 0;
5024*4882a593Smuzhiyun
5025*4882a593Smuzhiyun if (hdev->pldm)
5026*4882a593Smuzhiyun timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5027*4882a593Smuzhiyun else
5028*4882a593Smuzhiyun timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5029*4882a593Smuzhiyun
5030*4882a593Smuzhiyun mutex_lock(&hdev->mmu_cache_lock);
5031*4882a593Smuzhiyun
5032*4882a593Smuzhiyun /* L0 & L1 invalidation */
5033*4882a593Smuzhiyun WREG32(mmSTLB_INV_ALL_START, 1);
5034*4882a593Smuzhiyun
5035*4882a593Smuzhiyun rc = hl_poll_timeout(
5036*4882a593Smuzhiyun hdev,
5037*4882a593Smuzhiyun mmSTLB_INV_ALL_START,
5038*4882a593Smuzhiyun status,
5039*4882a593Smuzhiyun !status,
5040*4882a593Smuzhiyun 1000,
5041*4882a593Smuzhiyun timeout_usec);
5042*4882a593Smuzhiyun
5043*4882a593Smuzhiyun mutex_unlock(&hdev->mmu_cache_lock);
5044*4882a593Smuzhiyun
5045*4882a593Smuzhiyun if (rc) {
5046*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
5047*4882a593Smuzhiyun "MMU cache invalidation timeout\n");
5048*4882a593Smuzhiyun hl_device_reset(hdev, true, false);
5049*4882a593Smuzhiyun }
5050*4882a593Smuzhiyun
5051*4882a593Smuzhiyun return rc;
5052*4882a593Smuzhiyun }
5053*4882a593Smuzhiyun
goya_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 asid,u64 va,u64 size)5054*4882a593Smuzhiyun static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5055*4882a593Smuzhiyun bool is_hard, u32 asid, u64 va, u64 size)
5056*4882a593Smuzhiyun {
5057*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
5058*4882a593Smuzhiyun u32 status, timeout_usec, inv_data, pi;
5059*4882a593Smuzhiyun int rc;
5060*4882a593Smuzhiyun
5061*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5062*4882a593Smuzhiyun hdev->hard_reset_pending)
5063*4882a593Smuzhiyun return 0;
5064*4882a593Smuzhiyun
5065*4882a593Smuzhiyun /* no need in L1 only invalidation in Goya */
5066*4882a593Smuzhiyun if (!is_hard)
5067*4882a593Smuzhiyun return 0;
5068*4882a593Smuzhiyun
5069*4882a593Smuzhiyun if (hdev->pldm)
5070*4882a593Smuzhiyun timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5071*4882a593Smuzhiyun else
5072*4882a593Smuzhiyun timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5073*4882a593Smuzhiyun
5074*4882a593Smuzhiyun mutex_lock(&hdev->mmu_cache_lock);
5075*4882a593Smuzhiyun
5076*4882a593Smuzhiyun /*
5077*4882a593Smuzhiyun * TODO: currently invalidate entire L0 & L1 as in regular hard
5078*4882a593Smuzhiyun * invalidation. Need to apply invalidation of specific cache lines with
5079*4882a593Smuzhiyun * mask of ASID & VA & size.
5080*4882a593Smuzhiyun * Note that L1 with be flushed entirely in any case.
5081*4882a593Smuzhiyun */
5082*4882a593Smuzhiyun
5083*4882a593Smuzhiyun /* L0 & L1 invalidation */
5084*4882a593Smuzhiyun inv_data = RREG32(mmSTLB_CACHE_INV);
5085*4882a593Smuzhiyun /* PI is 8 bit */
5086*4882a593Smuzhiyun pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5087*4882a593Smuzhiyun WREG32(mmSTLB_CACHE_INV,
5088*4882a593Smuzhiyun (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5089*4882a593Smuzhiyun
5090*4882a593Smuzhiyun rc = hl_poll_timeout(
5091*4882a593Smuzhiyun hdev,
5092*4882a593Smuzhiyun mmSTLB_INV_CONSUMER_INDEX,
5093*4882a593Smuzhiyun status,
5094*4882a593Smuzhiyun status == pi,
5095*4882a593Smuzhiyun 1000,
5096*4882a593Smuzhiyun timeout_usec);
5097*4882a593Smuzhiyun
5098*4882a593Smuzhiyun mutex_unlock(&hdev->mmu_cache_lock);
5099*4882a593Smuzhiyun
5100*4882a593Smuzhiyun if (rc) {
5101*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
5102*4882a593Smuzhiyun "MMU cache invalidation timeout\n");
5103*4882a593Smuzhiyun hl_device_reset(hdev, true, false);
5104*4882a593Smuzhiyun }
5105*4882a593Smuzhiyun
5106*4882a593Smuzhiyun return rc;
5107*4882a593Smuzhiyun }
5108*4882a593Smuzhiyun
goya_send_heartbeat(struct hl_device * hdev)5109*4882a593Smuzhiyun int goya_send_heartbeat(struct hl_device *hdev)
5110*4882a593Smuzhiyun {
5111*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
5112*4882a593Smuzhiyun
5113*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5114*4882a593Smuzhiyun return 0;
5115*4882a593Smuzhiyun
5116*4882a593Smuzhiyun return hl_fw_send_heartbeat(hdev);
5117*4882a593Smuzhiyun }
5118*4882a593Smuzhiyun
goya_cpucp_info_get(struct hl_device * hdev)5119*4882a593Smuzhiyun int goya_cpucp_info_get(struct hl_device *hdev)
5120*4882a593Smuzhiyun {
5121*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
5122*4882a593Smuzhiyun struct asic_fixed_properties *prop = &hdev->asic_prop;
5123*4882a593Smuzhiyun u64 dram_size;
5124*4882a593Smuzhiyun int rc;
5125*4882a593Smuzhiyun
5126*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5127*4882a593Smuzhiyun return 0;
5128*4882a593Smuzhiyun
5129*4882a593Smuzhiyun rc = hl_fw_cpucp_info_get(hdev);
5130*4882a593Smuzhiyun if (rc)
5131*4882a593Smuzhiyun return rc;
5132*4882a593Smuzhiyun
5133*4882a593Smuzhiyun dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
5134*4882a593Smuzhiyun if (dram_size) {
5135*4882a593Smuzhiyun if ((!is_power_of_2(dram_size)) ||
5136*4882a593Smuzhiyun (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5137*4882a593Smuzhiyun dev_err(hdev->dev,
5138*4882a593Smuzhiyun "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5139*4882a593Smuzhiyun dram_size);
5140*4882a593Smuzhiyun dram_size = DRAM_PHYS_DEFAULT_SIZE;
5141*4882a593Smuzhiyun }
5142*4882a593Smuzhiyun
5143*4882a593Smuzhiyun prop->dram_size = dram_size;
5144*4882a593Smuzhiyun prop->dram_end_address = prop->dram_base_address + dram_size;
5145*4882a593Smuzhiyun }
5146*4882a593Smuzhiyun
5147*4882a593Smuzhiyun if (!strlen(prop->cpucp_info.card_name))
5148*4882a593Smuzhiyun strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5149*4882a593Smuzhiyun CARD_NAME_MAX_LEN);
5150*4882a593Smuzhiyun
5151*4882a593Smuzhiyun return 0;
5152*4882a593Smuzhiyun }
5153*4882a593Smuzhiyun
goya_set_clock_gating(struct hl_device * hdev)5154*4882a593Smuzhiyun static void goya_set_clock_gating(struct hl_device *hdev)
5155*4882a593Smuzhiyun {
5156*4882a593Smuzhiyun /* clock gating not supported in Goya */
5157*4882a593Smuzhiyun }
5158*4882a593Smuzhiyun
goya_disable_clock_gating(struct hl_device * hdev)5159*4882a593Smuzhiyun static void goya_disable_clock_gating(struct hl_device *hdev)
5160*4882a593Smuzhiyun {
5161*4882a593Smuzhiyun /* clock gating not supported in Goya */
5162*4882a593Smuzhiyun }
5163*4882a593Smuzhiyun
goya_is_device_idle(struct hl_device * hdev,u64 * mask,struct seq_file * s)5164*4882a593Smuzhiyun static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
5165*4882a593Smuzhiyun struct seq_file *s)
5166*4882a593Smuzhiyun {
5167*4882a593Smuzhiyun const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5168*4882a593Smuzhiyun const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5169*4882a593Smuzhiyun u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5170*4882a593Smuzhiyun mme_arch_sts;
5171*4882a593Smuzhiyun bool is_idle = true, is_eng_idle;
5172*4882a593Smuzhiyun u64 offset;
5173*4882a593Smuzhiyun int i;
5174*4882a593Smuzhiyun
5175*4882a593Smuzhiyun if (s)
5176*4882a593Smuzhiyun seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
5177*4882a593Smuzhiyun "--- ------- ------------ -------------\n");
5178*4882a593Smuzhiyun
5179*4882a593Smuzhiyun offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5180*4882a593Smuzhiyun
5181*4882a593Smuzhiyun for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5182*4882a593Smuzhiyun qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5183*4882a593Smuzhiyun dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5184*4882a593Smuzhiyun is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5185*4882a593Smuzhiyun IS_DMA_IDLE(dma_core_sts0);
5186*4882a593Smuzhiyun is_idle &= is_eng_idle;
5187*4882a593Smuzhiyun
5188*4882a593Smuzhiyun if (mask)
5189*4882a593Smuzhiyun *mask |= ((u64) !is_eng_idle) <<
5190*4882a593Smuzhiyun (GOYA_ENGINE_ID_DMA_0 + i);
5191*4882a593Smuzhiyun if (s)
5192*4882a593Smuzhiyun seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5193*4882a593Smuzhiyun qm_glbl_sts0, dma_core_sts0);
5194*4882a593Smuzhiyun }
5195*4882a593Smuzhiyun
5196*4882a593Smuzhiyun if (s)
5197*4882a593Smuzhiyun seq_puts(s,
5198*4882a593Smuzhiyun "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
5199*4882a593Smuzhiyun "--- ------- ------------ -------------- ----------\n");
5200*4882a593Smuzhiyun
5201*4882a593Smuzhiyun offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5202*4882a593Smuzhiyun
5203*4882a593Smuzhiyun for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5204*4882a593Smuzhiyun qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5205*4882a593Smuzhiyun cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5206*4882a593Smuzhiyun tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5207*4882a593Smuzhiyun is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5208*4882a593Smuzhiyun IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5209*4882a593Smuzhiyun IS_TPC_IDLE(tpc_cfg_sts);
5210*4882a593Smuzhiyun is_idle &= is_eng_idle;
5211*4882a593Smuzhiyun
5212*4882a593Smuzhiyun if (mask)
5213*4882a593Smuzhiyun *mask |= ((u64) !is_eng_idle) <<
5214*4882a593Smuzhiyun (GOYA_ENGINE_ID_TPC_0 + i);
5215*4882a593Smuzhiyun if (s)
5216*4882a593Smuzhiyun seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5217*4882a593Smuzhiyun qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5218*4882a593Smuzhiyun }
5219*4882a593Smuzhiyun
5220*4882a593Smuzhiyun if (s)
5221*4882a593Smuzhiyun seq_puts(s,
5222*4882a593Smuzhiyun "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
5223*4882a593Smuzhiyun "--- ------- ------------ -------------- -----------\n");
5224*4882a593Smuzhiyun
5225*4882a593Smuzhiyun qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5226*4882a593Smuzhiyun cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5227*4882a593Smuzhiyun mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5228*4882a593Smuzhiyun is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5229*4882a593Smuzhiyun IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5230*4882a593Smuzhiyun IS_MME_IDLE(mme_arch_sts);
5231*4882a593Smuzhiyun is_idle &= is_eng_idle;
5232*4882a593Smuzhiyun
5233*4882a593Smuzhiyun if (mask)
5234*4882a593Smuzhiyun *mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0;
5235*4882a593Smuzhiyun if (s) {
5236*4882a593Smuzhiyun seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5237*4882a593Smuzhiyun cmdq_glbl_sts0, mme_arch_sts);
5238*4882a593Smuzhiyun seq_puts(s, "\n");
5239*4882a593Smuzhiyun }
5240*4882a593Smuzhiyun
5241*4882a593Smuzhiyun return is_idle;
5242*4882a593Smuzhiyun }
5243*4882a593Smuzhiyun
goya_hw_queues_lock(struct hl_device * hdev)5244*4882a593Smuzhiyun static void goya_hw_queues_lock(struct hl_device *hdev)
5245*4882a593Smuzhiyun __acquires(&goya->hw_queues_lock)
5246*4882a593Smuzhiyun {
5247*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
5248*4882a593Smuzhiyun
5249*4882a593Smuzhiyun spin_lock(&goya->hw_queues_lock);
5250*4882a593Smuzhiyun }
5251*4882a593Smuzhiyun
goya_hw_queues_unlock(struct hl_device * hdev)5252*4882a593Smuzhiyun static void goya_hw_queues_unlock(struct hl_device *hdev)
5253*4882a593Smuzhiyun __releases(&goya->hw_queues_lock)
5254*4882a593Smuzhiyun {
5255*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
5256*4882a593Smuzhiyun
5257*4882a593Smuzhiyun spin_unlock(&goya->hw_queues_lock);
5258*4882a593Smuzhiyun }
5259*4882a593Smuzhiyun
goya_get_pci_id(struct hl_device * hdev)5260*4882a593Smuzhiyun static u32 goya_get_pci_id(struct hl_device *hdev)
5261*4882a593Smuzhiyun {
5262*4882a593Smuzhiyun return hdev->pdev->device;
5263*4882a593Smuzhiyun }
5264*4882a593Smuzhiyun
goya_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)5265*4882a593Smuzhiyun static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5266*4882a593Smuzhiyun size_t max_size)
5267*4882a593Smuzhiyun {
5268*4882a593Smuzhiyun struct goya_device *goya = hdev->asic_specific;
5269*4882a593Smuzhiyun
5270*4882a593Smuzhiyun if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5271*4882a593Smuzhiyun return 0;
5272*4882a593Smuzhiyun
5273*4882a593Smuzhiyun return hl_fw_get_eeprom_data(hdev, data, max_size);
5274*4882a593Smuzhiyun }
5275*4882a593Smuzhiyun
goya_get_hw_state(struct hl_device * hdev)5276*4882a593Smuzhiyun static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5277*4882a593Smuzhiyun {
5278*4882a593Smuzhiyun return RREG32(mmHW_STATE);
5279*4882a593Smuzhiyun }
5280*4882a593Smuzhiyun
goya_ctx_init(struct hl_ctx * ctx)5281*4882a593Smuzhiyun static int goya_ctx_init(struct hl_ctx *ctx)
5282*4882a593Smuzhiyun {
5283*4882a593Smuzhiyun return 0;
5284*4882a593Smuzhiyun }
5285*4882a593Smuzhiyun
goya_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)5286*4882a593Smuzhiyun u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5287*4882a593Smuzhiyun {
5288*4882a593Smuzhiyun return cq_idx;
5289*4882a593Smuzhiyun }
5290*4882a593Smuzhiyun
goya_get_signal_cb_size(struct hl_device * hdev)5291*4882a593Smuzhiyun static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5292*4882a593Smuzhiyun {
5293*4882a593Smuzhiyun return 0;
5294*4882a593Smuzhiyun }
5295*4882a593Smuzhiyun
goya_get_wait_cb_size(struct hl_device * hdev)5296*4882a593Smuzhiyun static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5297*4882a593Smuzhiyun {
5298*4882a593Smuzhiyun return 0;
5299*4882a593Smuzhiyun }
5300*4882a593Smuzhiyun
goya_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id)5301*4882a593Smuzhiyun static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
5302*4882a593Smuzhiyun {
5303*4882a593Smuzhiyun
5304*4882a593Smuzhiyun }
5305*4882a593Smuzhiyun
goya_gen_wait_cb(struct hl_device * hdev,void * data,u16 sob_id,u16 sob_val,u16 mon_id,u32 q_idx)5306*4882a593Smuzhiyun static void goya_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
5307*4882a593Smuzhiyun u16 sob_val, u16 mon_id, u32 q_idx)
5308*4882a593Smuzhiyun {
5309*4882a593Smuzhiyun
5310*4882a593Smuzhiyun }
5311*4882a593Smuzhiyun
goya_reset_sob(struct hl_device * hdev,void * data)5312*4882a593Smuzhiyun static void goya_reset_sob(struct hl_device *hdev, void *data)
5313*4882a593Smuzhiyun {
5314*4882a593Smuzhiyun
5315*4882a593Smuzhiyun }
5316*4882a593Smuzhiyun
goya_set_dma_mask_from_fw(struct hl_device * hdev)5317*4882a593Smuzhiyun static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
5318*4882a593Smuzhiyun {
5319*4882a593Smuzhiyun if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
5320*4882a593Smuzhiyun HL_POWER9_HOST_MAGIC) {
5321*4882a593Smuzhiyun dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
5322*4882a593Smuzhiyun hdev->power9_64bit_dma_enable = 1;
5323*4882a593Smuzhiyun hdev->dma_mask = 64;
5324*4882a593Smuzhiyun } else {
5325*4882a593Smuzhiyun dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
5326*4882a593Smuzhiyun hdev->power9_64bit_dma_enable = 0;
5327*4882a593Smuzhiyun hdev->dma_mask = 48;
5328*4882a593Smuzhiyun }
5329*4882a593Smuzhiyun }
5330*4882a593Smuzhiyun
goya_get_device_time(struct hl_device * hdev)5331*4882a593Smuzhiyun u64 goya_get_device_time(struct hl_device *hdev)
5332*4882a593Smuzhiyun {
5333*4882a593Smuzhiyun u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5334*4882a593Smuzhiyun
5335*4882a593Smuzhiyun return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5336*4882a593Smuzhiyun }
5337*4882a593Smuzhiyun
5338*4882a593Smuzhiyun static const struct hl_asic_funcs goya_funcs = {
5339*4882a593Smuzhiyun .early_init = goya_early_init,
5340*4882a593Smuzhiyun .early_fini = goya_early_fini,
5341*4882a593Smuzhiyun .late_init = goya_late_init,
5342*4882a593Smuzhiyun .late_fini = goya_late_fini,
5343*4882a593Smuzhiyun .sw_init = goya_sw_init,
5344*4882a593Smuzhiyun .sw_fini = goya_sw_fini,
5345*4882a593Smuzhiyun .hw_init = goya_hw_init,
5346*4882a593Smuzhiyun .hw_fini = goya_hw_fini,
5347*4882a593Smuzhiyun .halt_engines = goya_halt_engines,
5348*4882a593Smuzhiyun .suspend = goya_suspend,
5349*4882a593Smuzhiyun .resume = goya_resume,
5350*4882a593Smuzhiyun .cb_mmap = goya_cb_mmap,
5351*4882a593Smuzhiyun .ring_doorbell = goya_ring_doorbell,
5352*4882a593Smuzhiyun .pqe_write = goya_pqe_write,
5353*4882a593Smuzhiyun .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5354*4882a593Smuzhiyun .asic_dma_free_coherent = goya_dma_free_coherent,
5355*4882a593Smuzhiyun .get_int_queue_base = goya_get_int_queue_base,
5356*4882a593Smuzhiyun .test_queues = goya_test_queues,
5357*4882a593Smuzhiyun .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5358*4882a593Smuzhiyun .asic_dma_pool_free = goya_dma_pool_free,
5359*4882a593Smuzhiyun .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5360*4882a593Smuzhiyun .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5361*4882a593Smuzhiyun .hl_dma_unmap_sg = goya_dma_unmap_sg,
5362*4882a593Smuzhiyun .cs_parser = goya_cs_parser,
5363*4882a593Smuzhiyun .asic_dma_map_sg = goya_dma_map_sg,
5364*4882a593Smuzhiyun .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5365*4882a593Smuzhiyun .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5366*4882a593Smuzhiyun .update_eq_ci = goya_update_eq_ci,
5367*4882a593Smuzhiyun .context_switch = goya_context_switch,
5368*4882a593Smuzhiyun .restore_phase_topology = goya_restore_phase_topology,
5369*4882a593Smuzhiyun .debugfs_read32 = goya_debugfs_read32,
5370*4882a593Smuzhiyun .debugfs_write32 = goya_debugfs_write32,
5371*4882a593Smuzhiyun .debugfs_read64 = goya_debugfs_read64,
5372*4882a593Smuzhiyun .debugfs_write64 = goya_debugfs_write64,
5373*4882a593Smuzhiyun .add_device_attr = goya_add_device_attr,
5374*4882a593Smuzhiyun .handle_eqe = goya_handle_eqe,
5375*4882a593Smuzhiyun .set_pll_profile = goya_set_pll_profile,
5376*4882a593Smuzhiyun .get_events_stat = goya_get_events_stat,
5377*4882a593Smuzhiyun .read_pte = goya_read_pte,
5378*4882a593Smuzhiyun .write_pte = goya_write_pte,
5379*4882a593Smuzhiyun .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5380*4882a593Smuzhiyun .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5381*4882a593Smuzhiyun .send_heartbeat = goya_send_heartbeat,
5382*4882a593Smuzhiyun .set_clock_gating = goya_set_clock_gating,
5383*4882a593Smuzhiyun .disable_clock_gating = goya_disable_clock_gating,
5384*4882a593Smuzhiyun .debug_coresight = goya_debug_coresight,
5385*4882a593Smuzhiyun .is_device_idle = goya_is_device_idle,
5386*4882a593Smuzhiyun .soft_reset_late_init = goya_soft_reset_late_init,
5387*4882a593Smuzhiyun .hw_queues_lock = goya_hw_queues_lock,
5388*4882a593Smuzhiyun .hw_queues_unlock = goya_hw_queues_unlock,
5389*4882a593Smuzhiyun .get_pci_id = goya_get_pci_id,
5390*4882a593Smuzhiyun .get_eeprom_data = goya_get_eeprom_data,
5391*4882a593Smuzhiyun .send_cpu_message = goya_send_cpu_message,
5392*4882a593Smuzhiyun .get_hw_state = goya_get_hw_state,
5393*4882a593Smuzhiyun .pci_bars_map = goya_pci_bars_map,
5394*4882a593Smuzhiyun .init_iatu = goya_init_iatu,
5395*4882a593Smuzhiyun .rreg = hl_rreg,
5396*4882a593Smuzhiyun .wreg = hl_wreg,
5397*4882a593Smuzhiyun .halt_coresight = goya_halt_coresight,
5398*4882a593Smuzhiyun .ctx_init = goya_ctx_init,
5399*4882a593Smuzhiyun .get_clk_rate = goya_get_clk_rate,
5400*4882a593Smuzhiyun .get_queue_id_for_cq = goya_get_queue_id_for_cq,
5401*4882a593Smuzhiyun .read_device_fw_version = goya_read_device_fw_version,
5402*4882a593Smuzhiyun .load_firmware_to_device = goya_load_firmware_to_device,
5403*4882a593Smuzhiyun .load_boot_fit_to_device = goya_load_boot_fit_to_device,
5404*4882a593Smuzhiyun .get_signal_cb_size = goya_get_signal_cb_size,
5405*4882a593Smuzhiyun .get_wait_cb_size = goya_get_wait_cb_size,
5406*4882a593Smuzhiyun .gen_signal_cb = goya_gen_signal_cb,
5407*4882a593Smuzhiyun .gen_wait_cb = goya_gen_wait_cb,
5408*4882a593Smuzhiyun .reset_sob = goya_reset_sob,
5409*4882a593Smuzhiyun .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
5410*4882a593Smuzhiyun .get_device_time = goya_get_device_time
5411*4882a593Smuzhiyun };
5412*4882a593Smuzhiyun
5413*4882a593Smuzhiyun /*
5414*4882a593Smuzhiyun * goya_set_asic_funcs - set Goya function pointers
5415*4882a593Smuzhiyun *
5416*4882a593Smuzhiyun * @*hdev: pointer to hl_device structure
5417*4882a593Smuzhiyun *
5418*4882a593Smuzhiyun */
goya_set_asic_funcs(struct hl_device * hdev)5419*4882a593Smuzhiyun void goya_set_asic_funcs(struct hl_device *hdev)
5420*4882a593Smuzhiyun {
5421*4882a593Smuzhiyun hdev->asic_funcs = &goya_funcs;
5422*4882a593Smuzhiyun }
5423