xref: /OK3568_Linux_fs/kernel/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun // Copyright (C) 2019-2020 NVIDIA CORPORATION.  All rights reserved.
3*4882a593Smuzhiyun 
4*4882a593Smuzhiyun #include <linux/bitfield.h>
5*4882a593Smuzhiyun #include <linux/delay.h>
6*4882a593Smuzhiyun #include <linux/of.h>
7*4882a593Smuzhiyun #include <linux/platform_device.h>
8*4882a593Smuzhiyun #include <linux/slab.h>
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include "arm-smmu.h"
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun /*
13*4882a593Smuzhiyun  * Tegra194 has three ARM MMU-500 Instances.
14*4882a593Smuzhiyun  * Two of them are used together and must be programmed identically for
15*4882a593Smuzhiyun  * interleaved IOVA accesses across them and translates accesses from
16*4882a593Smuzhiyun  * non-isochronous HW devices.
17*4882a593Smuzhiyun  * Third one is used for translating accesses from isochronous HW devices.
18*4882a593Smuzhiyun  * This implementation supports programming of the two instances that must
19*4882a593Smuzhiyun  * be programmed identically.
20*4882a593Smuzhiyun  * The third instance usage is through standard arm-smmu driver itself and
21*4882a593Smuzhiyun  * is out of scope of this implementation.
22*4882a593Smuzhiyun  */
23*4882a593Smuzhiyun #define NUM_SMMU_INSTANCES 2
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun struct nvidia_smmu {
26*4882a593Smuzhiyun 	struct arm_smmu_device	smmu;
27*4882a593Smuzhiyun 	void __iomem		*bases[NUM_SMMU_INSTANCES];
28*4882a593Smuzhiyun };
29*4882a593Smuzhiyun 
nvidia_smmu_page(struct arm_smmu_device * smmu,unsigned int inst,int page)30*4882a593Smuzhiyun static inline void __iomem *nvidia_smmu_page(struct arm_smmu_device *smmu,
31*4882a593Smuzhiyun 					     unsigned int inst, int page)
32*4882a593Smuzhiyun {
33*4882a593Smuzhiyun 	struct nvidia_smmu *nvidia_smmu;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun 	nvidia_smmu = container_of(smmu, struct nvidia_smmu, smmu);
36*4882a593Smuzhiyun 	return nvidia_smmu->bases[inst] + (page << smmu->pgshift);
37*4882a593Smuzhiyun }
38*4882a593Smuzhiyun 
nvidia_smmu_read_reg(struct arm_smmu_device * smmu,int page,int offset)39*4882a593Smuzhiyun static u32 nvidia_smmu_read_reg(struct arm_smmu_device *smmu,
40*4882a593Smuzhiyun 				int page, int offset)
41*4882a593Smuzhiyun {
42*4882a593Smuzhiyun 	void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun 	return readl_relaxed(reg);
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun 
nvidia_smmu_write_reg(struct arm_smmu_device * smmu,int page,int offset,u32 val)47*4882a593Smuzhiyun static void nvidia_smmu_write_reg(struct arm_smmu_device *smmu,
48*4882a593Smuzhiyun 				  int page, int offset, u32 val)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun 	unsigned int i;
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
53*4882a593Smuzhiyun 		void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun 		writel_relaxed(val, reg);
56*4882a593Smuzhiyun 	}
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun 
nvidia_smmu_read_reg64(struct arm_smmu_device * smmu,int page,int offset)59*4882a593Smuzhiyun static u64 nvidia_smmu_read_reg64(struct arm_smmu_device *smmu,
60*4882a593Smuzhiyun 				  int page, int offset)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun 	void __iomem *reg = nvidia_smmu_page(smmu, 0, page) + offset;
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	return readq_relaxed(reg);
65*4882a593Smuzhiyun }
66*4882a593Smuzhiyun 
nvidia_smmu_write_reg64(struct arm_smmu_device * smmu,int page,int offset,u64 val)67*4882a593Smuzhiyun static void nvidia_smmu_write_reg64(struct arm_smmu_device *smmu,
68*4882a593Smuzhiyun 				    int page, int offset, u64 val)
69*4882a593Smuzhiyun {
70*4882a593Smuzhiyun 	unsigned int i;
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun 	for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
73*4882a593Smuzhiyun 		void __iomem *reg = nvidia_smmu_page(smmu, i, page) + offset;
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun 		writeq_relaxed(val, reg);
76*4882a593Smuzhiyun 	}
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun 
nvidia_smmu_tlb_sync(struct arm_smmu_device * smmu,int page,int sync,int status)79*4882a593Smuzhiyun static void nvidia_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
80*4882a593Smuzhiyun 				 int sync, int status)
81*4882a593Smuzhiyun {
82*4882a593Smuzhiyun 	unsigned int delay;
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	arm_smmu_writel(smmu, page, sync, 0);
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
87*4882a593Smuzhiyun 		unsigned int spin_cnt;
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
90*4882a593Smuzhiyun 			u32 val = 0;
91*4882a593Smuzhiyun 			unsigned int i;
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 			for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
94*4882a593Smuzhiyun 				void __iomem *reg;
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 				reg = nvidia_smmu_page(smmu, i, page) + status;
97*4882a593Smuzhiyun 				val |= readl_relaxed(reg);
98*4882a593Smuzhiyun 			}
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 			if (!(val & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
101*4882a593Smuzhiyun 				return;
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 			cpu_relax();
104*4882a593Smuzhiyun 		}
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 		udelay(delay);
107*4882a593Smuzhiyun 	}
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	dev_err_ratelimited(smmu->dev,
110*4882a593Smuzhiyun 			    "TLB sync timed out -- SMMU may be deadlocked\n");
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
nvidia_smmu_reset(struct arm_smmu_device * smmu)113*4882a593Smuzhiyun static int nvidia_smmu_reset(struct arm_smmu_device *smmu)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun 	unsigned int i;
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun 	for (i = 0; i < NUM_SMMU_INSTANCES; i++) {
118*4882a593Smuzhiyun 		u32 val;
119*4882a593Smuzhiyun 		void __iomem *reg = nvidia_smmu_page(smmu, i, ARM_SMMU_GR0) +
120*4882a593Smuzhiyun 				    ARM_SMMU_GR0_sGFSR;
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 		/* clear global FSR */
123*4882a593Smuzhiyun 		val = readl_relaxed(reg);
124*4882a593Smuzhiyun 		writel_relaxed(val, reg);
125*4882a593Smuzhiyun 	}
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	return 0;
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun 
nvidia_smmu_global_fault_inst(int irq,struct arm_smmu_device * smmu,int inst)130*4882a593Smuzhiyun static irqreturn_t nvidia_smmu_global_fault_inst(int irq,
131*4882a593Smuzhiyun 						 struct arm_smmu_device *smmu,
132*4882a593Smuzhiyun 						 int inst)
133*4882a593Smuzhiyun {
134*4882a593Smuzhiyun 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
135*4882a593Smuzhiyun 	void __iomem *gr0_base = nvidia_smmu_page(smmu, inst, 0);
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun 	gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
138*4882a593Smuzhiyun 	if (!gfsr)
139*4882a593Smuzhiyun 		return IRQ_NONE;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
142*4882a593Smuzhiyun 	gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
143*4882a593Smuzhiyun 	gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	dev_err_ratelimited(smmu->dev,
146*4882a593Smuzhiyun 			    "Unexpected global fault, this could be serious\n");
147*4882a593Smuzhiyun 	dev_err_ratelimited(smmu->dev,
148*4882a593Smuzhiyun 			    "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
149*4882a593Smuzhiyun 			    gfsr, gfsynr0, gfsynr1, gfsynr2);
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	writel_relaxed(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
152*4882a593Smuzhiyun 	return IRQ_HANDLED;
153*4882a593Smuzhiyun }
154*4882a593Smuzhiyun 
nvidia_smmu_global_fault(int irq,void * dev)155*4882a593Smuzhiyun static irqreturn_t nvidia_smmu_global_fault(int irq, void *dev)
156*4882a593Smuzhiyun {
157*4882a593Smuzhiyun 	unsigned int inst;
158*4882a593Smuzhiyun 	irqreturn_t ret = IRQ_NONE;
159*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = dev;
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	for (inst = 0; inst < NUM_SMMU_INSTANCES; inst++) {
162*4882a593Smuzhiyun 		irqreturn_t irq_ret;
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 		irq_ret = nvidia_smmu_global_fault_inst(irq, smmu, inst);
165*4882a593Smuzhiyun 		if (irq_ret == IRQ_HANDLED)
166*4882a593Smuzhiyun 			ret = IRQ_HANDLED;
167*4882a593Smuzhiyun 	}
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	return ret;
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun 
nvidia_smmu_context_fault_bank(int irq,struct arm_smmu_device * smmu,int idx,int inst)172*4882a593Smuzhiyun static irqreturn_t nvidia_smmu_context_fault_bank(int irq,
173*4882a593Smuzhiyun 						  struct arm_smmu_device *smmu,
174*4882a593Smuzhiyun 						  int idx, int inst)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun 	u32 fsr, fsynr, cbfrsynra;
177*4882a593Smuzhiyun 	unsigned long iova;
178*4882a593Smuzhiyun 	void __iomem *gr1_base = nvidia_smmu_page(smmu, inst, 1);
179*4882a593Smuzhiyun 	void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx);
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
182*4882a593Smuzhiyun 	if (!(fsr & ARM_SMMU_FSR_FAULT))
183*4882a593Smuzhiyun 		return IRQ_NONE;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
186*4882a593Smuzhiyun 	iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
187*4882a593Smuzhiyun 	cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(idx));
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun 	dev_err_ratelimited(smmu->dev,
190*4882a593Smuzhiyun 			    "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
191*4882a593Smuzhiyun 			    fsr, iova, fsynr, cbfrsynra, idx);
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	writel_relaxed(fsr, cb_base + ARM_SMMU_CB_FSR);
194*4882a593Smuzhiyun 	return IRQ_HANDLED;
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun 
nvidia_smmu_context_fault(int irq,void * dev)197*4882a593Smuzhiyun static irqreturn_t nvidia_smmu_context_fault(int irq, void *dev)
198*4882a593Smuzhiyun {
199*4882a593Smuzhiyun 	int idx;
200*4882a593Smuzhiyun 	unsigned int inst;
201*4882a593Smuzhiyun 	irqreturn_t ret = IRQ_NONE;
202*4882a593Smuzhiyun 	struct arm_smmu_device *smmu;
203*4882a593Smuzhiyun 	struct iommu_domain *domain = dev;
204*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	smmu_domain = container_of(domain, struct arm_smmu_domain, domain);
207*4882a593Smuzhiyun 	smmu = smmu_domain->smmu;
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun 	for (inst = 0; inst < NUM_SMMU_INSTANCES; inst++) {
210*4882a593Smuzhiyun 		irqreturn_t irq_ret;
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 		/*
213*4882a593Smuzhiyun 		 * Interrupt line is shared between all contexts.
214*4882a593Smuzhiyun 		 * Check for faults across all contexts.
215*4882a593Smuzhiyun 		 */
216*4882a593Smuzhiyun 		for (idx = 0; idx < smmu->num_context_banks; idx++) {
217*4882a593Smuzhiyun 			irq_ret = nvidia_smmu_context_fault_bank(irq, smmu,
218*4882a593Smuzhiyun 								 idx, inst);
219*4882a593Smuzhiyun 			if (irq_ret == IRQ_HANDLED)
220*4882a593Smuzhiyun 				ret = IRQ_HANDLED;
221*4882a593Smuzhiyun 		}
222*4882a593Smuzhiyun 	}
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	return ret;
225*4882a593Smuzhiyun }
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun static const struct arm_smmu_impl nvidia_smmu_impl = {
228*4882a593Smuzhiyun 	.read_reg = nvidia_smmu_read_reg,
229*4882a593Smuzhiyun 	.write_reg = nvidia_smmu_write_reg,
230*4882a593Smuzhiyun 	.read_reg64 = nvidia_smmu_read_reg64,
231*4882a593Smuzhiyun 	.write_reg64 = nvidia_smmu_write_reg64,
232*4882a593Smuzhiyun 	.reset = nvidia_smmu_reset,
233*4882a593Smuzhiyun 	.tlb_sync = nvidia_smmu_tlb_sync,
234*4882a593Smuzhiyun 	.global_fault = nvidia_smmu_global_fault,
235*4882a593Smuzhiyun 	.context_fault = nvidia_smmu_context_fault,
236*4882a593Smuzhiyun };
237*4882a593Smuzhiyun 
nvidia_smmu_impl_init(struct arm_smmu_device * smmu)238*4882a593Smuzhiyun struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun 	struct resource *res;
241*4882a593Smuzhiyun 	struct device *dev = smmu->dev;
242*4882a593Smuzhiyun 	struct nvidia_smmu *nvidia_smmu;
243*4882a593Smuzhiyun 	struct platform_device *pdev = to_platform_device(dev);
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun 	nvidia_smmu = devm_kzalloc(dev, sizeof(*nvidia_smmu), GFP_KERNEL);
246*4882a593Smuzhiyun 	if (!nvidia_smmu)
247*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	/*
250*4882a593Smuzhiyun 	 * Copy the data from struct arm_smmu_device *smmu allocated in
251*4882a593Smuzhiyun 	 * arm-smmu.c. The smmu from struct nvidia_smmu replaces the smmu
252*4882a593Smuzhiyun 	 * pointer used in arm-smmu.c once this function returns.
253*4882a593Smuzhiyun 	 * This is necessary to derive nvidia_smmu from smmu pointer passed
254*4882a593Smuzhiyun 	 * through arm_smmu_impl function calls subsequently.
255*4882a593Smuzhiyun 	 */
256*4882a593Smuzhiyun 	nvidia_smmu->smmu = *smmu;
257*4882a593Smuzhiyun 	/* Instance 0 is ioremapped by arm-smmu.c. */
258*4882a593Smuzhiyun 	nvidia_smmu->bases[0] = smmu->base;
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
261*4882a593Smuzhiyun 	if (!res)
262*4882a593Smuzhiyun 		return ERR_PTR(-ENODEV);
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	nvidia_smmu->bases[1] = devm_ioremap_resource(dev, res);
265*4882a593Smuzhiyun 	if (IS_ERR(nvidia_smmu->bases[1]))
266*4882a593Smuzhiyun 		return ERR_CAST(nvidia_smmu->bases[1]);
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	nvidia_smmu->smmu.impl = &nvidia_smmu_impl;
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	/*
271*4882a593Smuzhiyun 	 * Free the struct arm_smmu_device *smmu allocated in arm-smmu.c.
272*4882a593Smuzhiyun 	 * Once this function returns, arm-smmu.c would use arm_smmu_device
273*4882a593Smuzhiyun 	 * allocated as part of struct nvidia_smmu.
274*4882a593Smuzhiyun 	 */
275*4882a593Smuzhiyun 	devm_kfree(dev, smmu);
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	return &nvidia_smmu->smmu;
278*4882a593Smuzhiyun }
279