xref: /OK3568_Linux_fs/kernel/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * IOMMU API for ARM architected SMMUv3 implementations.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2015 ARM Limited
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Author: Will Deacon <will.deacon@arm.com>
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * This driver is powered by bad coffee and bombay mix.
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include <linux/acpi.h>
13*4882a593Smuzhiyun #include <linux/acpi_iort.h>
14*4882a593Smuzhiyun #include <linux/bitops.h>
15*4882a593Smuzhiyun #include <linux/crash_dump.h>
16*4882a593Smuzhiyun #include <linux/delay.h>
17*4882a593Smuzhiyun #include <linux/dma-iommu.h>
18*4882a593Smuzhiyun #include <linux/err.h>
19*4882a593Smuzhiyun #include <linux/interrupt.h>
20*4882a593Smuzhiyun #include <linux/io-pgtable.h>
21*4882a593Smuzhiyun #include <linux/iopoll.h>
22*4882a593Smuzhiyun #include <linux/module.h>
23*4882a593Smuzhiyun #include <linux/msi.h>
24*4882a593Smuzhiyun #include <linux/of.h>
25*4882a593Smuzhiyun #include <linux/of_address.h>
26*4882a593Smuzhiyun #include <linux/of_iommu.h>
27*4882a593Smuzhiyun #include <linux/of_platform.h>
28*4882a593Smuzhiyun #include <linux/pci.h>
29*4882a593Smuzhiyun #include <linux/pci-ats.h>
30*4882a593Smuzhiyun #include <linux/platform_device.h>
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun #include <linux/amba/bus.h>
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #include "arm-smmu-v3.h"
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun static bool disable_bypass = 1;
37*4882a593Smuzhiyun module_param(disable_bypass, bool, 0444);
38*4882a593Smuzhiyun MODULE_PARM_DESC(disable_bypass,
39*4882a593Smuzhiyun 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun static bool disable_msipolling;
42*4882a593Smuzhiyun module_param(disable_msipolling, bool, 0444);
43*4882a593Smuzhiyun MODULE_PARM_DESC(disable_msipolling,
44*4882a593Smuzhiyun 	"Disable MSI-based polling for CMD_SYNC completion.");
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun enum arm_smmu_msi_index {
47*4882a593Smuzhiyun 	EVTQ_MSI_INDEX,
48*4882a593Smuzhiyun 	GERROR_MSI_INDEX,
49*4882a593Smuzhiyun 	PRIQ_MSI_INDEX,
50*4882a593Smuzhiyun 	ARM_SMMU_MAX_MSIS,
51*4882a593Smuzhiyun };
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54*4882a593Smuzhiyun 	[EVTQ_MSI_INDEX] = {
55*4882a593Smuzhiyun 		ARM_SMMU_EVTQ_IRQ_CFG0,
56*4882a593Smuzhiyun 		ARM_SMMU_EVTQ_IRQ_CFG1,
57*4882a593Smuzhiyun 		ARM_SMMU_EVTQ_IRQ_CFG2,
58*4882a593Smuzhiyun 	},
59*4882a593Smuzhiyun 	[GERROR_MSI_INDEX] = {
60*4882a593Smuzhiyun 		ARM_SMMU_GERROR_IRQ_CFG0,
61*4882a593Smuzhiyun 		ARM_SMMU_GERROR_IRQ_CFG1,
62*4882a593Smuzhiyun 		ARM_SMMU_GERROR_IRQ_CFG2,
63*4882a593Smuzhiyun 	},
64*4882a593Smuzhiyun 	[PRIQ_MSI_INDEX] = {
65*4882a593Smuzhiyun 		ARM_SMMU_PRIQ_IRQ_CFG0,
66*4882a593Smuzhiyun 		ARM_SMMU_PRIQ_IRQ_CFG1,
67*4882a593Smuzhiyun 		ARM_SMMU_PRIQ_IRQ_CFG2,
68*4882a593Smuzhiyun 	},
69*4882a593Smuzhiyun };
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun struct arm_smmu_option_prop {
72*4882a593Smuzhiyun 	u32 opt;
73*4882a593Smuzhiyun 	const char *prop;
74*4882a593Smuzhiyun };
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77*4882a593Smuzhiyun DEFINE_MUTEX(arm_smmu_asid_lock);
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun static struct arm_smmu_option_prop arm_smmu_options[] = {
80*4882a593Smuzhiyun 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
81*4882a593Smuzhiyun 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
82*4882a593Smuzhiyun 	{ 0, NULL},
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun 
arm_smmu_page1_fixup(unsigned long offset,struct arm_smmu_device * smmu)85*4882a593Smuzhiyun static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
86*4882a593Smuzhiyun 						 struct arm_smmu_device *smmu)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun 	if (offset > SZ_64K)
89*4882a593Smuzhiyun 		return smmu->page1 + offset - SZ_64K;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	return smmu->base + offset;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun 
to_smmu_domain(struct iommu_domain * dom)94*4882a593Smuzhiyun static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun 	return container_of(dom, struct arm_smmu_domain, domain);
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun 
parse_driver_options(struct arm_smmu_device * smmu)99*4882a593Smuzhiyun static void parse_driver_options(struct arm_smmu_device *smmu)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun 	int i = 0;
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	do {
104*4882a593Smuzhiyun 		if (of_property_read_bool(smmu->dev->of_node,
105*4882a593Smuzhiyun 						arm_smmu_options[i].prop)) {
106*4882a593Smuzhiyun 			smmu->options |= arm_smmu_options[i].opt;
107*4882a593Smuzhiyun 			dev_notice(smmu->dev, "option %s\n",
108*4882a593Smuzhiyun 				arm_smmu_options[i].prop);
109*4882a593Smuzhiyun 		}
110*4882a593Smuzhiyun 	} while (arm_smmu_options[++i].opt);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)114*4882a593Smuzhiyun static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun 	u32 space, prod, cons;
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 	prod = Q_IDX(q, q->prod);
119*4882a593Smuzhiyun 	cons = Q_IDX(q, q->cons);
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
122*4882a593Smuzhiyun 		space = (1 << q->max_n_shift) - (prod - cons);
123*4882a593Smuzhiyun 	else
124*4882a593Smuzhiyun 		space = cons - prod;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	return space >= n;
127*4882a593Smuzhiyun }
128*4882a593Smuzhiyun 
queue_full(struct arm_smmu_ll_queue * q)129*4882a593Smuzhiyun static bool queue_full(struct arm_smmu_ll_queue *q)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
132*4882a593Smuzhiyun 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun 
queue_empty(struct arm_smmu_ll_queue * q)135*4882a593Smuzhiyun static bool queue_empty(struct arm_smmu_ll_queue *q)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
138*4882a593Smuzhiyun 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun 
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)141*4882a593Smuzhiyun static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
144*4882a593Smuzhiyun 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
145*4882a593Smuzhiyun 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
146*4882a593Smuzhiyun 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun 
queue_sync_cons_out(struct arm_smmu_queue * q)149*4882a593Smuzhiyun static void queue_sync_cons_out(struct arm_smmu_queue *q)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun 	/*
152*4882a593Smuzhiyun 	 * Ensure that all CPU accesses (reads and writes) to the queue
153*4882a593Smuzhiyun 	 * are complete before we update the cons pointer.
154*4882a593Smuzhiyun 	 */
155*4882a593Smuzhiyun 	__iomb();
156*4882a593Smuzhiyun 	writel_relaxed(q->llq.cons, q->cons_reg);
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun 
queue_inc_cons(struct arm_smmu_ll_queue * q)159*4882a593Smuzhiyun static void queue_inc_cons(struct arm_smmu_ll_queue *q)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
162*4882a593Smuzhiyun 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun 
queue_sync_prod_in(struct arm_smmu_queue * q)165*4882a593Smuzhiyun static int queue_sync_prod_in(struct arm_smmu_queue *q)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun 	u32 prod;
168*4882a593Smuzhiyun 	int ret = 0;
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun 	/*
171*4882a593Smuzhiyun 	 * We can't use the _relaxed() variant here, as we must prevent
172*4882a593Smuzhiyun 	 * speculative reads of the queue before we have determined that
173*4882a593Smuzhiyun 	 * prod has indeed moved.
174*4882a593Smuzhiyun 	 */
175*4882a593Smuzhiyun 	prod = readl(q->prod_reg);
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
178*4882a593Smuzhiyun 		ret = -EOVERFLOW;
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	q->llq.prod = prod;
181*4882a593Smuzhiyun 	return ret;
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun 
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)184*4882a593Smuzhiyun static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
185*4882a593Smuzhiyun {
186*4882a593Smuzhiyun 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
187*4882a593Smuzhiyun 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun 
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)190*4882a593Smuzhiyun static void queue_poll_init(struct arm_smmu_device *smmu,
191*4882a593Smuzhiyun 			    struct arm_smmu_queue_poll *qp)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun 	qp->delay = 1;
194*4882a593Smuzhiyun 	qp->spin_cnt = 0;
195*4882a593Smuzhiyun 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
196*4882a593Smuzhiyun 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun 
queue_poll(struct arm_smmu_queue_poll * qp)199*4882a593Smuzhiyun static int queue_poll(struct arm_smmu_queue_poll *qp)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
202*4882a593Smuzhiyun 		return -ETIMEDOUT;
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	if (qp->wfe) {
205*4882a593Smuzhiyun 		wfe();
206*4882a593Smuzhiyun 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
207*4882a593Smuzhiyun 		cpu_relax();
208*4882a593Smuzhiyun 	} else {
209*4882a593Smuzhiyun 		udelay(qp->delay);
210*4882a593Smuzhiyun 		qp->delay *= 2;
211*4882a593Smuzhiyun 		qp->spin_cnt = 0;
212*4882a593Smuzhiyun 	}
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	return 0;
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)217*4882a593Smuzhiyun static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun 	int i;
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	for (i = 0; i < n_dwords; ++i)
222*4882a593Smuzhiyun 		*dst++ = cpu_to_le64(*src++);
223*4882a593Smuzhiyun }
224*4882a593Smuzhiyun 
queue_read(u64 * dst,__le64 * src,size_t n_dwords)225*4882a593Smuzhiyun static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun 	int i;
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	for (i = 0; i < n_dwords; ++i)
230*4882a593Smuzhiyun 		*dst++ = le64_to_cpu(*src++);
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)233*4882a593Smuzhiyun static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun 	if (queue_empty(&q->llq))
236*4882a593Smuzhiyun 		return -EAGAIN;
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
239*4882a593Smuzhiyun 	queue_inc_cons(&q->llq);
240*4882a593Smuzhiyun 	queue_sync_cons_out(q);
241*4882a593Smuzhiyun 	return 0;
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)245*4882a593Smuzhiyun static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
248*4882a593Smuzhiyun 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	switch (ent->opcode) {
251*4882a593Smuzhiyun 	case CMDQ_OP_TLBI_EL2_ALL:
252*4882a593Smuzhiyun 	case CMDQ_OP_TLBI_NSNH_ALL:
253*4882a593Smuzhiyun 		break;
254*4882a593Smuzhiyun 	case CMDQ_OP_PREFETCH_CFG:
255*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
256*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
257*4882a593Smuzhiyun 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
258*4882a593Smuzhiyun 		break;
259*4882a593Smuzhiyun 	case CMDQ_OP_CFGI_CD:
260*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261*4882a593Smuzhiyun 		fallthrough;
262*4882a593Smuzhiyun 	case CMDQ_OP_CFGI_STE:
263*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265*4882a593Smuzhiyun 		break;
266*4882a593Smuzhiyun 	case CMDQ_OP_CFGI_CD_ALL:
267*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268*4882a593Smuzhiyun 		break;
269*4882a593Smuzhiyun 	case CMDQ_OP_CFGI_ALL:
270*4882a593Smuzhiyun 		/* Cover the entire SID range */
271*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272*4882a593Smuzhiyun 		break;
273*4882a593Smuzhiyun 	case CMDQ_OP_TLBI_NH_VA:
274*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
275*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
276*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
277*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
278*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
279*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
280*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
281*4882a593Smuzhiyun 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
282*4882a593Smuzhiyun 		break;
283*4882a593Smuzhiyun 	case CMDQ_OP_TLBI_S2_IPA:
284*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
285*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
286*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
287*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
288*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
289*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
290*4882a593Smuzhiyun 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
291*4882a593Smuzhiyun 		break;
292*4882a593Smuzhiyun 	case CMDQ_OP_TLBI_NH_ASID:
293*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
294*4882a593Smuzhiyun 		fallthrough;
295*4882a593Smuzhiyun 	case CMDQ_OP_TLBI_S12_VMALL:
296*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
297*4882a593Smuzhiyun 		break;
298*4882a593Smuzhiyun 	case CMDQ_OP_ATC_INV:
299*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
300*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
301*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
302*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
303*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
304*4882a593Smuzhiyun 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
305*4882a593Smuzhiyun 		break;
306*4882a593Smuzhiyun 	case CMDQ_OP_PRI_RESP:
307*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
308*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
309*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
310*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
311*4882a593Smuzhiyun 		switch (ent->pri.resp) {
312*4882a593Smuzhiyun 		case PRI_RESP_DENY:
313*4882a593Smuzhiyun 		case PRI_RESP_FAIL:
314*4882a593Smuzhiyun 		case PRI_RESP_SUCC:
315*4882a593Smuzhiyun 			break;
316*4882a593Smuzhiyun 		default:
317*4882a593Smuzhiyun 			return -EINVAL;
318*4882a593Smuzhiyun 		}
319*4882a593Smuzhiyun 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
320*4882a593Smuzhiyun 		break;
321*4882a593Smuzhiyun 	case CMDQ_OP_CMD_SYNC:
322*4882a593Smuzhiyun 		if (ent->sync.msiaddr) {
323*4882a593Smuzhiyun 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324*4882a593Smuzhiyun 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325*4882a593Smuzhiyun 		} else {
326*4882a593Smuzhiyun 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327*4882a593Smuzhiyun 		}
328*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329*4882a593Smuzhiyun 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330*4882a593Smuzhiyun 		break;
331*4882a593Smuzhiyun 	default:
332*4882a593Smuzhiyun 		return -ENOENT;
333*4882a593Smuzhiyun 	}
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	return 0;
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,u32 prod)338*4882a593Smuzhiyun static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339*4882a593Smuzhiyun 					 u32 prod)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun 	struct arm_smmu_queue *q = &smmu->cmdq.q;
342*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent ent = {
343*4882a593Smuzhiyun 		.opcode = CMDQ_OP_CMD_SYNC,
344*4882a593Smuzhiyun 	};
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 	/*
347*4882a593Smuzhiyun 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348*4882a593Smuzhiyun 	 * payload, so the write will zero the entire command on that platform.
349*4882a593Smuzhiyun 	 */
350*4882a593Smuzhiyun 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351*4882a593Smuzhiyun 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
352*4882a593Smuzhiyun 				   q->ent_dwords * 8;
353*4882a593Smuzhiyun 	}
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	arm_smmu_cmdq_build_cmd(cmd, &ent);
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)358*4882a593Smuzhiyun static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun 	static const char *cerror_str[] = {
361*4882a593Smuzhiyun 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
362*4882a593Smuzhiyun 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
363*4882a593Smuzhiyun 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
364*4882a593Smuzhiyun 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
365*4882a593Smuzhiyun 	};
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	int i;
368*4882a593Smuzhiyun 	u64 cmd[CMDQ_ENT_DWORDS];
369*4882a593Smuzhiyun 	struct arm_smmu_queue *q = &smmu->cmdq.q;
370*4882a593Smuzhiyun 	u32 cons = readl_relaxed(q->cons_reg);
371*4882a593Smuzhiyun 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd_sync = {
373*4882a593Smuzhiyun 		.opcode = CMDQ_OP_CMD_SYNC,
374*4882a593Smuzhiyun 	};
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377*4882a593Smuzhiyun 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
378*4882a593Smuzhiyun 
379*4882a593Smuzhiyun 	switch (idx) {
380*4882a593Smuzhiyun 	case CMDQ_ERR_CERROR_ABT_IDX:
381*4882a593Smuzhiyun 		dev_err(smmu->dev, "retrying command fetch\n");
382*4882a593Smuzhiyun 	case CMDQ_ERR_CERROR_NONE_IDX:
383*4882a593Smuzhiyun 		return;
384*4882a593Smuzhiyun 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
385*4882a593Smuzhiyun 		/*
386*4882a593Smuzhiyun 		 * ATC Invalidation Completion timeout. CONS is still pointing
387*4882a593Smuzhiyun 		 * at the CMD_SYNC. Attempt to complete other pending commands
388*4882a593Smuzhiyun 		 * by repeating the CMD_SYNC, though we might well end up back
389*4882a593Smuzhiyun 		 * here since the ATC invalidation may still be pending.
390*4882a593Smuzhiyun 		 */
391*4882a593Smuzhiyun 		return;
392*4882a593Smuzhiyun 	case CMDQ_ERR_CERROR_ILL_IDX:
393*4882a593Smuzhiyun 	default:
394*4882a593Smuzhiyun 		break;
395*4882a593Smuzhiyun 	}
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	/*
398*4882a593Smuzhiyun 	 * We may have concurrent producers, so we need to be careful
399*4882a593Smuzhiyun 	 * not to touch any of the shadow cmdq state.
400*4882a593Smuzhiyun 	 */
401*4882a593Smuzhiyun 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
402*4882a593Smuzhiyun 	dev_err(smmu->dev, "skipping command in error state:\n");
403*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
404*4882a593Smuzhiyun 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	/* Convert the erroneous command into a CMD_SYNC */
407*4882a593Smuzhiyun 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
408*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
409*4882a593Smuzhiyun 		return;
410*4882a593Smuzhiyun 	}
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun /*
416*4882a593Smuzhiyun  * Command queue locking.
417*4882a593Smuzhiyun  * This is a form of bastardised rwlock with the following major changes:
418*4882a593Smuzhiyun  *
419*4882a593Smuzhiyun  * - The only LOCK routines are exclusive_trylock() and shared_lock().
420*4882a593Smuzhiyun  *   Neither have barrier semantics, and instead provide only a control
421*4882a593Smuzhiyun  *   dependency.
422*4882a593Smuzhiyun  *
423*4882a593Smuzhiyun  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
424*4882a593Smuzhiyun  *   fails if the caller appears to be the last lock holder (yes, this is
425*4882a593Smuzhiyun  *   racy). All successful UNLOCK routines have RELEASE semantics.
426*4882a593Smuzhiyun  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)427*4882a593Smuzhiyun static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
428*4882a593Smuzhiyun {
429*4882a593Smuzhiyun 	int val;
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun 	/*
432*4882a593Smuzhiyun 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
433*4882a593Smuzhiyun 	 * lock counter. When held in exclusive state, the lock counter is set
434*4882a593Smuzhiyun 	 * to INT_MIN so these increments won't hurt as the value will remain
435*4882a593Smuzhiyun 	 * negative.
436*4882a593Smuzhiyun 	 */
437*4882a593Smuzhiyun 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
438*4882a593Smuzhiyun 		return;
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	do {
441*4882a593Smuzhiyun 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
442*4882a593Smuzhiyun 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)445*4882a593Smuzhiyun static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
446*4882a593Smuzhiyun {
447*4882a593Smuzhiyun 	(void)atomic_dec_return_release(&cmdq->lock);
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)450*4882a593Smuzhiyun static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun 	if (atomic_read(&cmdq->lock) == 1)
453*4882a593Smuzhiyun 		return false;
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun 	arm_smmu_cmdq_shared_unlock(cmdq);
456*4882a593Smuzhiyun 	return true;
457*4882a593Smuzhiyun }
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
460*4882a593Smuzhiyun ({									\
461*4882a593Smuzhiyun 	bool __ret;							\
462*4882a593Smuzhiyun 	local_irq_save(flags);						\
463*4882a593Smuzhiyun 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
464*4882a593Smuzhiyun 	if (!__ret)							\
465*4882a593Smuzhiyun 		local_irq_restore(flags);				\
466*4882a593Smuzhiyun 	__ret;								\
467*4882a593Smuzhiyun })
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
470*4882a593Smuzhiyun ({									\
471*4882a593Smuzhiyun 	atomic_set_release(&cmdq->lock, 0);				\
472*4882a593Smuzhiyun 	local_irq_restore(flags);					\
473*4882a593Smuzhiyun })
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 
476*4882a593Smuzhiyun /*
477*4882a593Smuzhiyun  * Command queue insertion.
478*4882a593Smuzhiyun  * This is made fiddly by our attempts to achieve some sort of scalability
479*4882a593Smuzhiyun  * since there is one queue shared amongst all of the CPUs in the system.  If
480*4882a593Smuzhiyun  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
481*4882a593Smuzhiyun  * then you'll *love* this monstrosity.
482*4882a593Smuzhiyun  *
483*4882a593Smuzhiyun  * The basic idea is to split the queue up into ranges of commands that are
484*4882a593Smuzhiyun  * owned by a given CPU; the owner may not have written all of the commands
485*4882a593Smuzhiyun  * itself, but is responsible for advancing the hardware prod pointer when
486*4882a593Smuzhiyun  * the time comes. The algorithm is roughly:
487*4882a593Smuzhiyun  *
488*4882a593Smuzhiyun  * 	1. Allocate some space in the queue. At this point we also discover
489*4882a593Smuzhiyun  *	   whether the head of the queue is currently owned by another CPU,
490*4882a593Smuzhiyun  *	   or whether we are the owner.
491*4882a593Smuzhiyun  *
492*4882a593Smuzhiyun  *	2. Write our commands into our allocated slots in the queue.
493*4882a593Smuzhiyun  *
494*4882a593Smuzhiyun  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
495*4882a593Smuzhiyun  *
496*4882a593Smuzhiyun  *	4. If we are an owner:
497*4882a593Smuzhiyun  *		a. Wait for the previous owner to finish.
498*4882a593Smuzhiyun  *		b. Mark the queue head as unowned, which tells us the range
499*4882a593Smuzhiyun  *		   that we are responsible for publishing.
500*4882a593Smuzhiyun  *		c. Wait for all commands in our owned range to become valid.
501*4882a593Smuzhiyun  *		d. Advance the hardware prod pointer.
502*4882a593Smuzhiyun  *		e. Tell the next owner we've finished.
503*4882a593Smuzhiyun  *
504*4882a593Smuzhiyun  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
505*4882a593Smuzhiyun  *	   owner), then we need to stick around until it has completed:
506*4882a593Smuzhiyun  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
507*4882a593Smuzhiyun  *		   to clear the first 4 bytes.
508*4882a593Smuzhiyun  *		b. Otherwise, we spin waiting for the hardware cons pointer to
509*4882a593Smuzhiyun  *		   advance past our command.
510*4882a593Smuzhiyun  *
511*4882a593Smuzhiyun  * The devil is in the details, particularly the use of locking for handling
512*4882a593Smuzhiyun  * SYNC completion and freeing up space in the queue before we think that it is
513*4882a593Smuzhiyun  * full.
514*4882a593Smuzhiyun  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)515*4882a593Smuzhiyun static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
516*4882a593Smuzhiyun 					       u32 sprod, u32 eprod, bool set)
517*4882a593Smuzhiyun {
518*4882a593Smuzhiyun 	u32 swidx, sbidx, ewidx, ebidx;
519*4882a593Smuzhiyun 	struct arm_smmu_ll_queue llq = {
520*4882a593Smuzhiyun 		.max_n_shift	= cmdq->q.llq.max_n_shift,
521*4882a593Smuzhiyun 		.prod		= sprod,
522*4882a593Smuzhiyun 	};
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
525*4882a593Smuzhiyun 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
526*4882a593Smuzhiyun 
527*4882a593Smuzhiyun 	while (llq.prod != eprod) {
528*4882a593Smuzhiyun 		unsigned long mask;
529*4882a593Smuzhiyun 		atomic_long_t *ptr;
530*4882a593Smuzhiyun 		u32 limit = BITS_PER_LONG;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
533*4882a593Smuzhiyun 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun 		ptr = &cmdq->valid_map[swidx];
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 		if ((swidx == ewidx) && (sbidx < ebidx))
538*4882a593Smuzhiyun 			limit = ebidx;
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 		mask = GENMASK(limit - 1, sbidx);
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun 		/*
543*4882a593Smuzhiyun 		 * The valid bit is the inverse of the wrap bit. This means
544*4882a593Smuzhiyun 		 * that a zero-initialised queue is invalid and, after marking
545*4882a593Smuzhiyun 		 * all entries as valid, they become invalid again when we
546*4882a593Smuzhiyun 		 * wrap.
547*4882a593Smuzhiyun 		 */
548*4882a593Smuzhiyun 		if (set) {
549*4882a593Smuzhiyun 			atomic_long_xor(mask, ptr);
550*4882a593Smuzhiyun 		} else { /* Poll */
551*4882a593Smuzhiyun 			unsigned long valid;
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
554*4882a593Smuzhiyun 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
555*4882a593Smuzhiyun 		}
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
558*4882a593Smuzhiyun 	}
559*4882a593Smuzhiyun }
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)562*4882a593Smuzhiyun static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
563*4882a593Smuzhiyun 					u32 sprod, u32 eprod)
564*4882a593Smuzhiyun {
565*4882a593Smuzhiyun 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
566*4882a593Smuzhiyun }
567*4882a593Smuzhiyun 
568*4882a593Smuzhiyun /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)569*4882a593Smuzhiyun static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
570*4882a593Smuzhiyun 					 u32 sprod, u32 eprod)
571*4882a593Smuzhiyun {
572*4882a593Smuzhiyun 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
573*4882a593Smuzhiyun }
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)576*4882a593Smuzhiyun static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
577*4882a593Smuzhiyun 					     struct arm_smmu_ll_queue *llq)
578*4882a593Smuzhiyun {
579*4882a593Smuzhiyun 	unsigned long flags;
580*4882a593Smuzhiyun 	struct arm_smmu_queue_poll qp;
581*4882a593Smuzhiyun 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
582*4882a593Smuzhiyun 	int ret = 0;
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	/*
585*4882a593Smuzhiyun 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
586*4882a593Smuzhiyun 	 * that fails, spin until somebody else updates it for us.
587*4882a593Smuzhiyun 	 */
588*4882a593Smuzhiyun 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
589*4882a593Smuzhiyun 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
590*4882a593Smuzhiyun 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
591*4882a593Smuzhiyun 		llq->val = READ_ONCE(cmdq->q.llq.val);
592*4882a593Smuzhiyun 		return 0;
593*4882a593Smuzhiyun 	}
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	queue_poll_init(smmu, &qp);
596*4882a593Smuzhiyun 	do {
597*4882a593Smuzhiyun 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
598*4882a593Smuzhiyun 		if (!queue_full(llq))
599*4882a593Smuzhiyun 			break;
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun 		ret = queue_poll(&qp);
602*4882a593Smuzhiyun 	} while (!ret);
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 	return ret;
605*4882a593Smuzhiyun }
606*4882a593Smuzhiyun 
607*4882a593Smuzhiyun /*
608*4882a593Smuzhiyun  * Wait until the SMMU signals a CMD_SYNC completion MSI.
609*4882a593Smuzhiyun  * Must be called with the cmdq lock held in some capacity.
610*4882a593Smuzhiyun  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)611*4882a593Smuzhiyun static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
612*4882a593Smuzhiyun 					  struct arm_smmu_ll_queue *llq)
613*4882a593Smuzhiyun {
614*4882a593Smuzhiyun 	int ret = 0;
615*4882a593Smuzhiyun 	struct arm_smmu_queue_poll qp;
616*4882a593Smuzhiyun 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
617*4882a593Smuzhiyun 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 	queue_poll_init(smmu, &qp);
620*4882a593Smuzhiyun 
621*4882a593Smuzhiyun 	/*
622*4882a593Smuzhiyun 	 * The MSI won't generate an event, since it's being written back
623*4882a593Smuzhiyun 	 * into the command queue.
624*4882a593Smuzhiyun 	 */
625*4882a593Smuzhiyun 	qp.wfe = false;
626*4882a593Smuzhiyun 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
627*4882a593Smuzhiyun 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
628*4882a593Smuzhiyun 	return ret;
629*4882a593Smuzhiyun }
630*4882a593Smuzhiyun 
631*4882a593Smuzhiyun /*
632*4882a593Smuzhiyun  * Wait until the SMMU cons index passes llq->prod.
633*4882a593Smuzhiyun  * Must be called with the cmdq lock held in some capacity.
634*4882a593Smuzhiyun  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)635*4882a593Smuzhiyun static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
636*4882a593Smuzhiyun 					       struct arm_smmu_ll_queue *llq)
637*4882a593Smuzhiyun {
638*4882a593Smuzhiyun 	struct arm_smmu_queue_poll qp;
639*4882a593Smuzhiyun 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
640*4882a593Smuzhiyun 	u32 prod = llq->prod;
641*4882a593Smuzhiyun 	int ret = 0;
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun 	queue_poll_init(smmu, &qp);
644*4882a593Smuzhiyun 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
645*4882a593Smuzhiyun 	do {
646*4882a593Smuzhiyun 		if (queue_consumed(llq, prod))
647*4882a593Smuzhiyun 			break;
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun 		ret = queue_poll(&qp);
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 		/*
652*4882a593Smuzhiyun 		 * This needs to be a readl() so that our subsequent call
653*4882a593Smuzhiyun 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
654*4882a593Smuzhiyun 		 *
655*4882a593Smuzhiyun 		 * Specifically, we need to ensure that we observe all
656*4882a593Smuzhiyun 		 * shared_lock()s by other CMD_SYNCs that share our owner,
657*4882a593Smuzhiyun 		 * so that a failing call to tryunlock() means that we're
658*4882a593Smuzhiyun 		 * the last one out and therefore we can safely advance
659*4882a593Smuzhiyun 		 * cmdq->q.llq.cons. Roughly speaking:
660*4882a593Smuzhiyun 		 *
661*4882a593Smuzhiyun 		 * CPU 0		CPU1			CPU2 (us)
662*4882a593Smuzhiyun 		 *
663*4882a593Smuzhiyun 		 * if (sync)
664*4882a593Smuzhiyun 		 * 	shared_lock();
665*4882a593Smuzhiyun 		 *
666*4882a593Smuzhiyun 		 * dma_wmb();
667*4882a593Smuzhiyun 		 * set_valid_map();
668*4882a593Smuzhiyun 		 *
669*4882a593Smuzhiyun 		 * 			if (owner) {
670*4882a593Smuzhiyun 		 *				poll_valid_map();
671*4882a593Smuzhiyun 		 *				<control dependency>
672*4882a593Smuzhiyun 		 *				writel(prod_reg);
673*4882a593Smuzhiyun 		 *
674*4882a593Smuzhiyun 		 *						readl(cons_reg);
675*4882a593Smuzhiyun 		 *						tryunlock();
676*4882a593Smuzhiyun 		 *
677*4882a593Smuzhiyun 		 * Requires us to see CPU 0's shared_lock() acquisition.
678*4882a593Smuzhiyun 		 */
679*4882a593Smuzhiyun 		llq->cons = readl(cmdq->q.cons_reg);
680*4882a593Smuzhiyun 	} while (!ret);
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	return ret;
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)685*4882a593Smuzhiyun static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
686*4882a593Smuzhiyun 					 struct arm_smmu_ll_queue *llq)
687*4882a593Smuzhiyun {
688*4882a593Smuzhiyun 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
689*4882a593Smuzhiyun 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
690*4882a593Smuzhiyun 
691*4882a593Smuzhiyun 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
692*4882a593Smuzhiyun }
693*4882a593Smuzhiyun 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)694*4882a593Smuzhiyun static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
695*4882a593Smuzhiyun 					u32 prod, int n)
696*4882a593Smuzhiyun {
697*4882a593Smuzhiyun 	int i;
698*4882a593Smuzhiyun 	struct arm_smmu_ll_queue llq = {
699*4882a593Smuzhiyun 		.max_n_shift	= cmdq->q.llq.max_n_shift,
700*4882a593Smuzhiyun 		.prod		= prod,
701*4882a593Smuzhiyun 	};
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun 	for (i = 0; i < n; ++i) {
704*4882a593Smuzhiyun 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
705*4882a593Smuzhiyun 
706*4882a593Smuzhiyun 		prod = queue_inc_prod_n(&llq, i);
707*4882a593Smuzhiyun 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
708*4882a593Smuzhiyun 	}
709*4882a593Smuzhiyun }
710*4882a593Smuzhiyun 
711*4882a593Smuzhiyun /*
712*4882a593Smuzhiyun  * This is the actual insertion function, and provides the following
713*4882a593Smuzhiyun  * ordering guarantees to callers:
714*4882a593Smuzhiyun  *
715*4882a593Smuzhiyun  * - There is a dma_wmb() before publishing any commands to the queue.
716*4882a593Smuzhiyun  *   This can be relied upon to order prior writes to data structures
717*4882a593Smuzhiyun  *   in memory (such as a CD or an STE) before the command.
718*4882a593Smuzhiyun  *
719*4882a593Smuzhiyun  * - On completion of a CMD_SYNC, there is a control dependency.
720*4882a593Smuzhiyun  *   This can be relied upon to order subsequent writes to memory (e.g.
721*4882a593Smuzhiyun  *   freeing an IOVA) after completion of the CMD_SYNC.
722*4882a593Smuzhiyun  *
723*4882a593Smuzhiyun  * - Command insertion is totally ordered, so if two CPUs each race to
724*4882a593Smuzhiyun  *   insert their own list of commands then all of the commands from one
725*4882a593Smuzhiyun  *   CPU will appear before any of the commands from the other CPU.
726*4882a593Smuzhiyun  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)727*4882a593Smuzhiyun static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
728*4882a593Smuzhiyun 				       u64 *cmds, int n, bool sync)
729*4882a593Smuzhiyun {
730*4882a593Smuzhiyun 	u64 cmd_sync[CMDQ_ENT_DWORDS];
731*4882a593Smuzhiyun 	u32 prod;
732*4882a593Smuzhiyun 	unsigned long flags;
733*4882a593Smuzhiyun 	bool owner;
734*4882a593Smuzhiyun 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
735*4882a593Smuzhiyun 	struct arm_smmu_ll_queue llq = {
736*4882a593Smuzhiyun 		.max_n_shift = cmdq->q.llq.max_n_shift,
737*4882a593Smuzhiyun 	}, head = llq;
738*4882a593Smuzhiyun 	int ret = 0;
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	/* 1. Allocate some space in the queue */
741*4882a593Smuzhiyun 	local_irq_save(flags);
742*4882a593Smuzhiyun 	llq.val = READ_ONCE(cmdq->q.llq.val);
743*4882a593Smuzhiyun 	do {
744*4882a593Smuzhiyun 		u64 old;
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 		while (!queue_has_space(&llq, n + sync)) {
747*4882a593Smuzhiyun 			local_irq_restore(flags);
748*4882a593Smuzhiyun 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
749*4882a593Smuzhiyun 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
750*4882a593Smuzhiyun 			local_irq_save(flags);
751*4882a593Smuzhiyun 		}
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun 		head.cons = llq.cons;
754*4882a593Smuzhiyun 		head.prod = queue_inc_prod_n(&llq, n + sync) |
755*4882a593Smuzhiyun 					     CMDQ_PROD_OWNED_FLAG;
756*4882a593Smuzhiyun 
757*4882a593Smuzhiyun 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
758*4882a593Smuzhiyun 		if (old == llq.val)
759*4882a593Smuzhiyun 			break;
760*4882a593Smuzhiyun 
761*4882a593Smuzhiyun 		llq.val = old;
762*4882a593Smuzhiyun 	} while (1);
763*4882a593Smuzhiyun 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
764*4882a593Smuzhiyun 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
765*4882a593Smuzhiyun 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
766*4882a593Smuzhiyun 
767*4882a593Smuzhiyun 	/*
768*4882a593Smuzhiyun 	 * 2. Write our commands into the queue
769*4882a593Smuzhiyun 	 * Dependency ordering from the cmpxchg() loop above.
770*4882a593Smuzhiyun 	 */
771*4882a593Smuzhiyun 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
772*4882a593Smuzhiyun 	if (sync) {
773*4882a593Smuzhiyun 		prod = queue_inc_prod_n(&llq, n);
774*4882a593Smuzhiyun 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
775*4882a593Smuzhiyun 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
776*4882a593Smuzhiyun 
777*4882a593Smuzhiyun 		/*
778*4882a593Smuzhiyun 		 * In order to determine completion of our CMD_SYNC, we must
779*4882a593Smuzhiyun 		 * ensure that the queue can't wrap twice without us noticing.
780*4882a593Smuzhiyun 		 * We achieve that by taking the cmdq lock as shared before
781*4882a593Smuzhiyun 		 * marking our slot as valid.
782*4882a593Smuzhiyun 		 */
783*4882a593Smuzhiyun 		arm_smmu_cmdq_shared_lock(cmdq);
784*4882a593Smuzhiyun 	}
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun 	/* 3. Mark our slots as valid, ensuring commands are visible first */
787*4882a593Smuzhiyun 	dma_wmb();
788*4882a593Smuzhiyun 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
789*4882a593Smuzhiyun 
790*4882a593Smuzhiyun 	/* 4. If we are the owner, take control of the SMMU hardware */
791*4882a593Smuzhiyun 	if (owner) {
792*4882a593Smuzhiyun 		/* a. Wait for previous owner to finish */
793*4882a593Smuzhiyun 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
794*4882a593Smuzhiyun 
795*4882a593Smuzhiyun 		/* b. Stop gathering work by clearing the owned flag */
796*4882a593Smuzhiyun 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
797*4882a593Smuzhiyun 						   &cmdq->q.llq.atomic.prod);
798*4882a593Smuzhiyun 		prod &= ~CMDQ_PROD_OWNED_FLAG;
799*4882a593Smuzhiyun 
800*4882a593Smuzhiyun 		/*
801*4882a593Smuzhiyun 		 * c. Wait for any gathered work to be written to the queue.
802*4882a593Smuzhiyun 		 * Note that we read our own entries so that we have the control
803*4882a593Smuzhiyun 		 * dependency required by (d).
804*4882a593Smuzhiyun 		 */
805*4882a593Smuzhiyun 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
806*4882a593Smuzhiyun 
807*4882a593Smuzhiyun 		/*
808*4882a593Smuzhiyun 		 * d. Advance the hardware prod pointer
809*4882a593Smuzhiyun 		 * Control dependency ordering from the entries becoming valid.
810*4882a593Smuzhiyun 		 */
811*4882a593Smuzhiyun 		writel_relaxed(prod, cmdq->q.prod_reg);
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun 		/*
814*4882a593Smuzhiyun 		 * e. Tell the next owner we're done
815*4882a593Smuzhiyun 		 * Make sure we've updated the hardware first, so that we don't
816*4882a593Smuzhiyun 		 * race to update prod and potentially move it backwards.
817*4882a593Smuzhiyun 		 */
818*4882a593Smuzhiyun 		atomic_set_release(&cmdq->owner_prod, prod);
819*4882a593Smuzhiyun 	}
820*4882a593Smuzhiyun 
821*4882a593Smuzhiyun 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
822*4882a593Smuzhiyun 	if (sync) {
823*4882a593Smuzhiyun 		llq.prod = queue_inc_prod_n(&llq, n);
824*4882a593Smuzhiyun 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
825*4882a593Smuzhiyun 		if (ret) {
826*4882a593Smuzhiyun 			dev_err_ratelimited(smmu->dev,
827*4882a593Smuzhiyun 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
828*4882a593Smuzhiyun 					    llq.prod,
829*4882a593Smuzhiyun 					    readl_relaxed(cmdq->q.prod_reg),
830*4882a593Smuzhiyun 					    readl_relaxed(cmdq->q.cons_reg));
831*4882a593Smuzhiyun 		}
832*4882a593Smuzhiyun 
833*4882a593Smuzhiyun 		/*
834*4882a593Smuzhiyun 		 * Try to unlock the cmdq lock. This will fail if we're the last
835*4882a593Smuzhiyun 		 * reader, in which case we can safely update cmdq->q.llq.cons
836*4882a593Smuzhiyun 		 */
837*4882a593Smuzhiyun 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
838*4882a593Smuzhiyun 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
839*4882a593Smuzhiyun 			arm_smmu_cmdq_shared_unlock(cmdq);
840*4882a593Smuzhiyun 		}
841*4882a593Smuzhiyun 	}
842*4882a593Smuzhiyun 
843*4882a593Smuzhiyun 	local_irq_restore(flags);
844*4882a593Smuzhiyun 	return ret;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)847*4882a593Smuzhiyun static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
848*4882a593Smuzhiyun 				   struct arm_smmu_cmdq_ent *ent)
849*4882a593Smuzhiyun {
850*4882a593Smuzhiyun 	u64 cmd[CMDQ_ENT_DWORDS];
851*4882a593Smuzhiyun 
852*4882a593Smuzhiyun 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
853*4882a593Smuzhiyun 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
854*4882a593Smuzhiyun 			 ent->opcode);
855*4882a593Smuzhiyun 		return -EINVAL;
856*4882a593Smuzhiyun 	}
857*4882a593Smuzhiyun 
858*4882a593Smuzhiyun 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
859*4882a593Smuzhiyun }
860*4882a593Smuzhiyun 
arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)861*4882a593Smuzhiyun static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
862*4882a593Smuzhiyun {
863*4882a593Smuzhiyun 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
864*4882a593Smuzhiyun }
865*4882a593Smuzhiyun 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)866*4882a593Smuzhiyun static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
867*4882a593Smuzhiyun 				    struct arm_smmu_cmdq_batch *cmds,
868*4882a593Smuzhiyun 				    struct arm_smmu_cmdq_ent *cmd)
869*4882a593Smuzhiyun {
870*4882a593Smuzhiyun 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
871*4882a593Smuzhiyun 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
872*4882a593Smuzhiyun 		cmds->num = 0;
873*4882a593Smuzhiyun 	}
874*4882a593Smuzhiyun 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
875*4882a593Smuzhiyun 	cmds->num++;
876*4882a593Smuzhiyun }
877*4882a593Smuzhiyun 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)878*4882a593Smuzhiyun static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
879*4882a593Smuzhiyun 				      struct arm_smmu_cmdq_batch *cmds)
880*4882a593Smuzhiyun {
881*4882a593Smuzhiyun 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
882*4882a593Smuzhiyun }
883*4882a593Smuzhiyun 
884*4882a593Smuzhiyun /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)885*4882a593Smuzhiyun void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
886*4882a593Smuzhiyun {
887*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd = {
888*4882a593Smuzhiyun 		.opcode = CMDQ_OP_TLBI_NH_ASID,
889*4882a593Smuzhiyun 		.tlbi.asid = asid,
890*4882a593Smuzhiyun 	};
891*4882a593Smuzhiyun 
892*4882a593Smuzhiyun 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
893*4882a593Smuzhiyun 	arm_smmu_cmdq_issue_sync(smmu);
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun 
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)896*4882a593Smuzhiyun static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
897*4882a593Smuzhiyun 			     int ssid, bool leaf)
898*4882a593Smuzhiyun {
899*4882a593Smuzhiyun 	size_t i;
900*4882a593Smuzhiyun 	unsigned long flags;
901*4882a593Smuzhiyun 	struct arm_smmu_master *master;
902*4882a593Smuzhiyun 	struct arm_smmu_cmdq_batch cmds = {};
903*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
904*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd = {
905*4882a593Smuzhiyun 		.opcode	= CMDQ_OP_CFGI_CD,
906*4882a593Smuzhiyun 		.cfgi	= {
907*4882a593Smuzhiyun 			.ssid	= ssid,
908*4882a593Smuzhiyun 			.leaf	= leaf,
909*4882a593Smuzhiyun 		},
910*4882a593Smuzhiyun 	};
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
913*4882a593Smuzhiyun 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
914*4882a593Smuzhiyun 		for (i = 0; i < master->num_sids; i++) {
915*4882a593Smuzhiyun 			cmd.cfgi.sid = master->sids[i];
916*4882a593Smuzhiyun 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
917*4882a593Smuzhiyun 		}
918*4882a593Smuzhiyun 	}
919*4882a593Smuzhiyun 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
920*4882a593Smuzhiyun 
921*4882a593Smuzhiyun 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun 
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)924*4882a593Smuzhiyun static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
925*4882a593Smuzhiyun 					struct arm_smmu_l1_ctx_desc *l1_desc)
926*4882a593Smuzhiyun {
927*4882a593Smuzhiyun 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
928*4882a593Smuzhiyun 
929*4882a593Smuzhiyun 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
930*4882a593Smuzhiyun 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
931*4882a593Smuzhiyun 	if (!l1_desc->l2ptr) {
932*4882a593Smuzhiyun 		dev_warn(smmu->dev,
933*4882a593Smuzhiyun 			 "failed to allocate context descriptor table\n");
934*4882a593Smuzhiyun 		return -ENOMEM;
935*4882a593Smuzhiyun 	}
936*4882a593Smuzhiyun 	return 0;
937*4882a593Smuzhiyun }
938*4882a593Smuzhiyun 
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)939*4882a593Smuzhiyun static void arm_smmu_write_cd_l1_desc(__le64 *dst,
940*4882a593Smuzhiyun 				      struct arm_smmu_l1_ctx_desc *l1_desc)
941*4882a593Smuzhiyun {
942*4882a593Smuzhiyun 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
943*4882a593Smuzhiyun 		  CTXDESC_L1_DESC_V;
944*4882a593Smuzhiyun 
945*4882a593Smuzhiyun 	/* See comment in arm_smmu_write_ctx_desc() */
946*4882a593Smuzhiyun 	WRITE_ONCE(*dst, cpu_to_le64(val));
947*4882a593Smuzhiyun }
948*4882a593Smuzhiyun 
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)949*4882a593Smuzhiyun static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
950*4882a593Smuzhiyun 				   u32 ssid)
951*4882a593Smuzhiyun {
952*4882a593Smuzhiyun 	__le64 *l1ptr;
953*4882a593Smuzhiyun 	unsigned int idx;
954*4882a593Smuzhiyun 	struct arm_smmu_l1_ctx_desc *l1_desc;
955*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
956*4882a593Smuzhiyun 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
959*4882a593Smuzhiyun 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
960*4882a593Smuzhiyun 
961*4882a593Smuzhiyun 	idx = ssid >> CTXDESC_SPLIT;
962*4882a593Smuzhiyun 	l1_desc = &cdcfg->l1_desc[idx];
963*4882a593Smuzhiyun 	if (!l1_desc->l2ptr) {
964*4882a593Smuzhiyun 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
965*4882a593Smuzhiyun 			return NULL;
966*4882a593Smuzhiyun 
967*4882a593Smuzhiyun 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
968*4882a593Smuzhiyun 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
969*4882a593Smuzhiyun 		/* An invalid L1CD can be cached */
970*4882a593Smuzhiyun 		arm_smmu_sync_cd(smmu_domain, ssid, false);
971*4882a593Smuzhiyun 	}
972*4882a593Smuzhiyun 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
973*4882a593Smuzhiyun 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
974*4882a593Smuzhiyun }
975*4882a593Smuzhiyun 
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)976*4882a593Smuzhiyun int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
977*4882a593Smuzhiyun 			    struct arm_smmu_ctx_desc *cd)
978*4882a593Smuzhiyun {
979*4882a593Smuzhiyun 	/*
980*4882a593Smuzhiyun 	 * This function handles the following cases:
981*4882a593Smuzhiyun 	 *
982*4882a593Smuzhiyun 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
983*4882a593Smuzhiyun 	 * (2) Install a secondary CD, for SID+SSID traffic.
984*4882a593Smuzhiyun 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
985*4882a593Smuzhiyun 	 *     CD, then invalidate the old entry and mappings.
986*4882a593Smuzhiyun 	 * (4) Remove a secondary CD.
987*4882a593Smuzhiyun 	 */
988*4882a593Smuzhiyun 	u64 val;
989*4882a593Smuzhiyun 	bool cd_live;
990*4882a593Smuzhiyun 	__le64 *cdptr;
991*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
992*4882a593Smuzhiyun 
993*4882a593Smuzhiyun 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
994*4882a593Smuzhiyun 		return -E2BIG;
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
997*4882a593Smuzhiyun 	if (!cdptr)
998*4882a593Smuzhiyun 		return -ENOMEM;
999*4882a593Smuzhiyun 
1000*4882a593Smuzhiyun 	val = le64_to_cpu(cdptr[0]);
1001*4882a593Smuzhiyun 	cd_live = !!(val & CTXDESC_CD_0_V);
1002*4882a593Smuzhiyun 
1003*4882a593Smuzhiyun 	if (!cd) { /* (4) */
1004*4882a593Smuzhiyun 		val = 0;
1005*4882a593Smuzhiyun 	} else if (cd_live) { /* (3) */
1006*4882a593Smuzhiyun 		val &= ~CTXDESC_CD_0_ASID;
1007*4882a593Smuzhiyun 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1008*4882a593Smuzhiyun 		/*
1009*4882a593Smuzhiyun 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1010*4882a593Smuzhiyun 		 * this substream's traffic
1011*4882a593Smuzhiyun 		 */
1012*4882a593Smuzhiyun 	} else { /* (1) and (2) */
1013*4882a593Smuzhiyun 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1014*4882a593Smuzhiyun 		cdptr[2] = 0;
1015*4882a593Smuzhiyun 		cdptr[3] = cpu_to_le64(cd->mair);
1016*4882a593Smuzhiyun 
1017*4882a593Smuzhiyun 		/*
1018*4882a593Smuzhiyun 		 * STE is live, and the SMMU might read dwords of this CD in any
1019*4882a593Smuzhiyun 		 * order. Ensure that it observes valid values before reading
1020*4882a593Smuzhiyun 		 * V=1.
1021*4882a593Smuzhiyun 		 */
1022*4882a593Smuzhiyun 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1023*4882a593Smuzhiyun 
1024*4882a593Smuzhiyun 		val = cd->tcr |
1025*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
1026*4882a593Smuzhiyun 			CTXDESC_CD_0_ENDI |
1027*4882a593Smuzhiyun #endif
1028*4882a593Smuzhiyun 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1029*4882a593Smuzhiyun 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1030*4882a593Smuzhiyun 			CTXDESC_CD_0_AA64 |
1031*4882a593Smuzhiyun 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1032*4882a593Smuzhiyun 			CTXDESC_CD_0_V;
1033*4882a593Smuzhiyun 
1034*4882a593Smuzhiyun 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1035*4882a593Smuzhiyun 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1036*4882a593Smuzhiyun 			val |= CTXDESC_CD_0_S;
1037*4882a593Smuzhiyun 	}
1038*4882a593Smuzhiyun 
1039*4882a593Smuzhiyun 	/*
1040*4882a593Smuzhiyun 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1041*4882a593Smuzhiyun 	 * "Configuration structures and configuration invalidation completion"
1042*4882a593Smuzhiyun 	 *
1043*4882a593Smuzhiyun 	 *   The size of single-copy atomic reads made by the SMMU is
1044*4882a593Smuzhiyun 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1045*4882a593Smuzhiyun 	 *   field within an aligned 64-bit span of a structure can be altered
1046*4882a593Smuzhiyun 	 *   without first making the structure invalid.
1047*4882a593Smuzhiyun 	 */
1048*4882a593Smuzhiyun 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1049*4882a593Smuzhiyun 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1050*4882a593Smuzhiyun 	return 0;
1051*4882a593Smuzhiyun }
1052*4882a593Smuzhiyun 
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1053*4882a593Smuzhiyun static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1054*4882a593Smuzhiyun {
1055*4882a593Smuzhiyun 	int ret;
1056*4882a593Smuzhiyun 	size_t l1size;
1057*4882a593Smuzhiyun 	size_t max_contexts;
1058*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1059*4882a593Smuzhiyun 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1060*4882a593Smuzhiyun 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1061*4882a593Smuzhiyun 
1062*4882a593Smuzhiyun 	max_contexts = 1 << cfg->s1cdmax;
1063*4882a593Smuzhiyun 
1064*4882a593Smuzhiyun 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1065*4882a593Smuzhiyun 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1066*4882a593Smuzhiyun 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1067*4882a593Smuzhiyun 		cdcfg->num_l1_ents = max_contexts;
1068*4882a593Smuzhiyun 
1069*4882a593Smuzhiyun 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1070*4882a593Smuzhiyun 	} else {
1071*4882a593Smuzhiyun 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1072*4882a593Smuzhiyun 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1073*4882a593Smuzhiyun 						  CTXDESC_L2_ENTRIES);
1074*4882a593Smuzhiyun 
1075*4882a593Smuzhiyun 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1076*4882a593Smuzhiyun 					      sizeof(*cdcfg->l1_desc),
1077*4882a593Smuzhiyun 					      GFP_KERNEL);
1078*4882a593Smuzhiyun 		if (!cdcfg->l1_desc)
1079*4882a593Smuzhiyun 			return -ENOMEM;
1080*4882a593Smuzhiyun 
1081*4882a593Smuzhiyun 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1082*4882a593Smuzhiyun 	}
1083*4882a593Smuzhiyun 
1084*4882a593Smuzhiyun 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1085*4882a593Smuzhiyun 					   GFP_KERNEL);
1086*4882a593Smuzhiyun 	if (!cdcfg->cdtab) {
1087*4882a593Smuzhiyun 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1088*4882a593Smuzhiyun 		ret = -ENOMEM;
1089*4882a593Smuzhiyun 		goto err_free_l1;
1090*4882a593Smuzhiyun 	}
1091*4882a593Smuzhiyun 
1092*4882a593Smuzhiyun 	return 0;
1093*4882a593Smuzhiyun 
1094*4882a593Smuzhiyun err_free_l1:
1095*4882a593Smuzhiyun 	if (cdcfg->l1_desc) {
1096*4882a593Smuzhiyun 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1097*4882a593Smuzhiyun 		cdcfg->l1_desc = NULL;
1098*4882a593Smuzhiyun 	}
1099*4882a593Smuzhiyun 	return ret;
1100*4882a593Smuzhiyun }
1101*4882a593Smuzhiyun 
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1102*4882a593Smuzhiyun static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1103*4882a593Smuzhiyun {
1104*4882a593Smuzhiyun 	int i;
1105*4882a593Smuzhiyun 	size_t size, l1size;
1106*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1107*4882a593Smuzhiyun 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1108*4882a593Smuzhiyun 
1109*4882a593Smuzhiyun 	if (cdcfg->l1_desc) {
1110*4882a593Smuzhiyun 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1111*4882a593Smuzhiyun 
1112*4882a593Smuzhiyun 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1113*4882a593Smuzhiyun 			if (!cdcfg->l1_desc[i].l2ptr)
1114*4882a593Smuzhiyun 				continue;
1115*4882a593Smuzhiyun 
1116*4882a593Smuzhiyun 			dmam_free_coherent(smmu->dev, size,
1117*4882a593Smuzhiyun 					   cdcfg->l1_desc[i].l2ptr,
1118*4882a593Smuzhiyun 					   cdcfg->l1_desc[i].l2ptr_dma);
1119*4882a593Smuzhiyun 		}
1120*4882a593Smuzhiyun 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1121*4882a593Smuzhiyun 		cdcfg->l1_desc = NULL;
1122*4882a593Smuzhiyun 
1123*4882a593Smuzhiyun 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124*4882a593Smuzhiyun 	} else {
1125*4882a593Smuzhiyun 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1126*4882a593Smuzhiyun 	}
1127*4882a593Smuzhiyun 
1128*4882a593Smuzhiyun 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1129*4882a593Smuzhiyun 	cdcfg->cdtab_dma = 0;
1130*4882a593Smuzhiyun 	cdcfg->cdtab = NULL;
1131*4882a593Smuzhiyun }
1132*4882a593Smuzhiyun 
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1133*4882a593Smuzhiyun bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1134*4882a593Smuzhiyun {
1135*4882a593Smuzhiyun 	bool free;
1136*4882a593Smuzhiyun 	struct arm_smmu_ctx_desc *old_cd;
1137*4882a593Smuzhiyun 
1138*4882a593Smuzhiyun 	if (!cd->asid)
1139*4882a593Smuzhiyun 		return false;
1140*4882a593Smuzhiyun 
1141*4882a593Smuzhiyun 	free = refcount_dec_and_test(&cd->refs);
1142*4882a593Smuzhiyun 	if (free) {
1143*4882a593Smuzhiyun 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1144*4882a593Smuzhiyun 		WARN_ON(old_cd != cd);
1145*4882a593Smuzhiyun 	}
1146*4882a593Smuzhiyun 	return free;
1147*4882a593Smuzhiyun }
1148*4882a593Smuzhiyun 
1149*4882a593Smuzhiyun /* Stream table manipulation functions */
1150*4882a593Smuzhiyun static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1151*4882a593Smuzhiyun arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1152*4882a593Smuzhiyun {
1153*4882a593Smuzhiyun 	u64 val = 0;
1154*4882a593Smuzhiyun 
1155*4882a593Smuzhiyun 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1156*4882a593Smuzhiyun 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1157*4882a593Smuzhiyun 
1158*4882a593Smuzhiyun 	/* See comment in arm_smmu_write_ctx_desc() */
1159*4882a593Smuzhiyun 	WRITE_ONCE(*dst, cpu_to_le64(val));
1160*4882a593Smuzhiyun }
1161*4882a593Smuzhiyun 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1162*4882a593Smuzhiyun static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1163*4882a593Smuzhiyun {
1164*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd = {
1165*4882a593Smuzhiyun 		.opcode	= CMDQ_OP_CFGI_STE,
1166*4882a593Smuzhiyun 		.cfgi	= {
1167*4882a593Smuzhiyun 			.sid	= sid,
1168*4882a593Smuzhiyun 			.leaf	= true,
1169*4882a593Smuzhiyun 		},
1170*4882a593Smuzhiyun 	};
1171*4882a593Smuzhiyun 
1172*4882a593Smuzhiyun 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1173*4882a593Smuzhiyun 	arm_smmu_cmdq_issue_sync(smmu);
1174*4882a593Smuzhiyun }
1175*4882a593Smuzhiyun 
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1176*4882a593Smuzhiyun static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1177*4882a593Smuzhiyun 				      __le64 *dst)
1178*4882a593Smuzhiyun {
1179*4882a593Smuzhiyun 	/*
1180*4882a593Smuzhiyun 	 * This is hideously complicated, but we only really care about
1181*4882a593Smuzhiyun 	 * three cases at the moment:
1182*4882a593Smuzhiyun 	 *
1183*4882a593Smuzhiyun 	 * 1. Invalid (all zero) -> bypass/fault (init)
1184*4882a593Smuzhiyun 	 * 2. Bypass/fault -> translation/bypass (attach)
1185*4882a593Smuzhiyun 	 * 3. Translation/bypass -> bypass/fault (detach)
1186*4882a593Smuzhiyun 	 *
1187*4882a593Smuzhiyun 	 * Given that we can't update the STE atomically and the SMMU
1188*4882a593Smuzhiyun 	 * doesn't read the thing in a defined order, that leaves us
1189*4882a593Smuzhiyun 	 * with the following maintenance requirements:
1190*4882a593Smuzhiyun 	 *
1191*4882a593Smuzhiyun 	 * 1. Update Config, return (init time STEs aren't live)
1192*4882a593Smuzhiyun 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1193*4882a593Smuzhiyun 	 * 3. Update Config, sync
1194*4882a593Smuzhiyun 	 */
1195*4882a593Smuzhiyun 	u64 val = le64_to_cpu(dst[0]);
1196*4882a593Smuzhiyun 	bool ste_live = false;
1197*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = NULL;
1198*4882a593Smuzhiyun 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1199*4882a593Smuzhiyun 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1200*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = NULL;
1201*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1202*4882a593Smuzhiyun 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1203*4882a593Smuzhiyun 		.prefetch	= {
1204*4882a593Smuzhiyun 			.sid	= sid,
1205*4882a593Smuzhiyun 		},
1206*4882a593Smuzhiyun 	};
1207*4882a593Smuzhiyun 
1208*4882a593Smuzhiyun 	if (master) {
1209*4882a593Smuzhiyun 		smmu_domain = master->domain;
1210*4882a593Smuzhiyun 		smmu = master->smmu;
1211*4882a593Smuzhiyun 	}
1212*4882a593Smuzhiyun 
1213*4882a593Smuzhiyun 	if (smmu_domain) {
1214*4882a593Smuzhiyun 		switch (smmu_domain->stage) {
1215*4882a593Smuzhiyun 		case ARM_SMMU_DOMAIN_S1:
1216*4882a593Smuzhiyun 			s1_cfg = &smmu_domain->s1_cfg;
1217*4882a593Smuzhiyun 			break;
1218*4882a593Smuzhiyun 		case ARM_SMMU_DOMAIN_S2:
1219*4882a593Smuzhiyun 		case ARM_SMMU_DOMAIN_NESTED:
1220*4882a593Smuzhiyun 			s2_cfg = &smmu_domain->s2_cfg;
1221*4882a593Smuzhiyun 			break;
1222*4882a593Smuzhiyun 		default:
1223*4882a593Smuzhiyun 			break;
1224*4882a593Smuzhiyun 		}
1225*4882a593Smuzhiyun 	}
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun 	if (val & STRTAB_STE_0_V) {
1228*4882a593Smuzhiyun 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1229*4882a593Smuzhiyun 		case STRTAB_STE_0_CFG_BYPASS:
1230*4882a593Smuzhiyun 			break;
1231*4882a593Smuzhiyun 		case STRTAB_STE_0_CFG_S1_TRANS:
1232*4882a593Smuzhiyun 		case STRTAB_STE_0_CFG_S2_TRANS:
1233*4882a593Smuzhiyun 			ste_live = true;
1234*4882a593Smuzhiyun 			break;
1235*4882a593Smuzhiyun 		case STRTAB_STE_0_CFG_ABORT:
1236*4882a593Smuzhiyun 			BUG_ON(!disable_bypass);
1237*4882a593Smuzhiyun 			break;
1238*4882a593Smuzhiyun 		default:
1239*4882a593Smuzhiyun 			BUG(); /* STE corruption */
1240*4882a593Smuzhiyun 		}
1241*4882a593Smuzhiyun 	}
1242*4882a593Smuzhiyun 
1243*4882a593Smuzhiyun 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1244*4882a593Smuzhiyun 	val = STRTAB_STE_0_V;
1245*4882a593Smuzhiyun 
1246*4882a593Smuzhiyun 	/* Bypass/fault */
1247*4882a593Smuzhiyun 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1248*4882a593Smuzhiyun 		if (!smmu_domain && disable_bypass)
1249*4882a593Smuzhiyun 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1250*4882a593Smuzhiyun 		else
1251*4882a593Smuzhiyun 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1252*4882a593Smuzhiyun 
1253*4882a593Smuzhiyun 		dst[0] = cpu_to_le64(val);
1254*4882a593Smuzhiyun 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1255*4882a593Smuzhiyun 						STRTAB_STE_1_SHCFG_INCOMING));
1256*4882a593Smuzhiyun 		dst[2] = 0; /* Nuke the VMID */
1257*4882a593Smuzhiyun 		/*
1258*4882a593Smuzhiyun 		 * The SMMU can perform negative caching, so we must sync
1259*4882a593Smuzhiyun 		 * the STE regardless of whether the old value was live.
1260*4882a593Smuzhiyun 		 */
1261*4882a593Smuzhiyun 		if (smmu)
1262*4882a593Smuzhiyun 			arm_smmu_sync_ste_for_sid(smmu, sid);
1263*4882a593Smuzhiyun 		return;
1264*4882a593Smuzhiyun 	}
1265*4882a593Smuzhiyun 
1266*4882a593Smuzhiyun 	if (s1_cfg) {
1267*4882a593Smuzhiyun 		BUG_ON(ste_live);
1268*4882a593Smuzhiyun 		dst[1] = cpu_to_le64(
1269*4882a593Smuzhiyun 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1270*4882a593Smuzhiyun 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1271*4882a593Smuzhiyun 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1272*4882a593Smuzhiyun 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1273*4882a593Smuzhiyun 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1274*4882a593Smuzhiyun 
1275*4882a593Smuzhiyun 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1276*4882a593Smuzhiyun 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1277*4882a593Smuzhiyun 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1278*4882a593Smuzhiyun 
1279*4882a593Smuzhiyun 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1280*4882a593Smuzhiyun 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1281*4882a593Smuzhiyun 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1282*4882a593Smuzhiyun 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1283*4882a593Smuzhiyun 	}
1284*4882a593Smuzhiyun 
1285*4882a593Smuzhiyun 	if (s2_cfg) {
1286*4882a593Smuzhiyun 		BUG_ON(ste_live);
1287*4882a593Smuzhiyun 		dst[2] = cpu_to_le64(
1288*4882a593Smuzhiyun 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1289*4882a593Smuzhiyun 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1290*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
1291*4882a593Smuzhiyun 			 STRTAB_STE_2_S2ENDI |
1292*4882a593Smuzhiyun #endif
1293*4882a593Smuzhiyun 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1294*4882a593Smuzhiyun 			 STRTAB_STE_2_S2R);
1295*4882a593Smuzhiyun 
1296*4882a593Smuzhiyun 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1297*4882a593Smuzhiyun 
1298*4882a593Smuzhiyun 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1299*4882a593Smuzhiyun 	}
1300*4882a593Smuzhiyun 
1301*4882a593Smuzhiyun 	if (master->ats_enabled)
1302*4882a593Smuzhiyun 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1303*4882a593Smuzhiyun 						 STRTAB_STE_1_EATS_TRANS));
1304*4882a593Smuzhiyun 
1305*4882a593Smuzhiyun 	arm_smmu_sync_ste_for_sid(smmu, sid);
1306*4882a593Smuzhiyun 	/* See comment in arm_smmu_write_ctx_desc() */
1307*4882a593Smuzhiyun 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1308*4882a593Smuzhiyun 	arm_smmu_sync_ste_for_sid(smmu, sid);
1309*4882a593Smuzhiyun 
1310*4882a593Smuzhiyun 	/* It's likely that we'll want to use the new STE soon */
1311*4882a593Smuzhiyun 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1312*4882a593Smuzhiyun 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1313*4882a593Smuzhiyun }
1314*4882a593Smuzhiyun 
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent)1315*4882a593Smuzhiyun static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1316*4882a593Smuzhiyun {
1317*4882a593Smuzhiyun 	unsigned int i;
1318*4882a593Smuzhiyun 
1319*4882a593Smuzhiyun 	for (i = 0; i < nent; ++i) {
1320*4882a593Smuzhiyun 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1321*4882a593Smuzhiyun 		strtab += STRTAB_STE_DWORDS;
1322*4882a593Smuzhiyun 	}
1323*4882a593Smuzhiyun }
1324*4882a593Smuzhiyun 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1325*4882a593Smuzhiyun static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1326*4882a593Smuzhiyun {
1327*4882a593Smuzhiyun 	size_t size;
1328*4882a593Smuzhiyun 	void *strtab;
1329*4882a593Smuzhiyun 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1330*4882a593Smuzhiyun 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1331*4882a593Smuzhiyun 
1332*4882a593Smuzhiyun 	if (desc->l2ptr)
1333*4882a593Smuzhiyun 		return 0;
1334*4882a593Smuzhiyun 
1335*4882a593Smuzhiyun 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1336*4882a593Smuzhiyun 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1337*4882a593Smuzhiyun 
1338*4882a593Smuzhiyun 	desc->span = STRTAB_SPLIT + 1;
1339*4882a593Smuzhiyun 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1340*4882a593Smuzhiyun 					  GFP_KERNEL);
1341*4882a593Smuzhiyun 	if (!desc->l2ptr) {
1342*4882a593Smuzhiyun 		dev_err(smmu->dev,
1343*4882a593Smuzhiyun 			"failed to allocate l2 stream table for SID %u\n",
1344*4882a593Smuzhiyun 			sid);
1345*4882a593Smuzhiyun 		return -ENOMEM;
1346*4882a593Smuzhiyun 	}
1347*4882a593Smuzhiyun 
1348*4882a593Smuzhiyun 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1349*4882a593Smuzhiyun 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1350*4882a593Smuzhiyun 	return 0;
1351*4882a593Smuzhiyun }
1352*4882a593Smuzhiyun 
1353*4882a593Smuzhiyun /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1354*4882a593Smuzhiyun static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1355*4882a593Smuzhiyun {
1356*4882a593Smuzhiyun 	int i;
1357*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = dev;
1358*4882a593Smuzhiyun 	struct arm_smmu_queue *q = &smmu->evtq.q;
1359*4882a593Smuzhiyun 	struct arm_smmu_ll_queue *llq = &q->llq;
1360*4882a593Smuzhiyun 	u64 evt[EVTQ_ENT_DWORDS];
1361*4882a593Smuzhiyun 
1362*4882a593Smuzhiyun 	do {
1363*4882a593Smuzhiyun 		while (!queue_remove_raw(q, evt)) {
1364*4882a593Smuzhiyun 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1365*4882a593Smuzhiyun 
1366*4882a593Smuzhiyun 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1367*4882a593Smuzhiyun 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1368*4882a593Smuzhiyun 				dev_info(smmu->dev, "\t0x%016llx\n",
1369*4882a593Smuzhiyun 					 (unsigned long long)evt[i]);
1370*4882a593Smuzhiyun 
1371*4882a593Smuzhiyun 			cond_resched();
1372*4882a593Smuzhiyun 		}
1373*4882a593Smuzhiyun 
1374*4882a593Smuzhiyun 		/*
1375*4882a593Smuzhiyun 		 * Not much we can do on overflow, so scream and pretend we're
1376*4882a593Smuzhiyun 		 * trying harder.
1377*4882a593Smuzhiyun 		 */
1378*4882a593Smuzhiyun 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1379*4882a593Smuzhiyun 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1380*4882a593Smuzhiyun 	} while (!queue_empty(llq));
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 	/* Sync our overflow flag, as we believe we're up to speed */
1383*4882a593Smuzhiyun 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1384*4882a593Smuzhiyun 		    Q_IDX(llq, llq->cons);
1385*4882a593Smuzhiyun 	return IRQ_HANDLED;
1386*4882a593Smuzhiyun }
1387*4882a593Smuzhiyun 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1388*4882a593Smuzhiyun static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1389*4882a593Smuzhiyun {
1390*4882a593Smuzhiyun 	u32 sid, ssid;
1391*4882a593Smuzhiyun 	u16 grpid;
1392*4882a593Smuzhiyun 	bool ssv, last;
1393*4882a593Smuzhiyun 
1394*4882a593Smuzhiyun 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1395*4882a593Smuzhiyun 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1396*4882a593Smuzhiyun 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1397*4882a593Smuzhiyun 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1398*4882a593Smuzhiyun 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1399*4882a593Smuzhiyun 
1400*4882a593Smuzhiyun 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1401*4882a593Smuzhiyun 	dev_info(smmu->dev,
1402*4882a593Smuzhiyun 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1403*4882a593Smuzhiyun 		 sid, ssid, grpid, last ? "L" : "",
1404*4882a593Smuzhiyun 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1405*4882a593Smuzhiyun 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1406*4882a593Smuzhiyun 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1407*4882a593Smuzhiyun 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1408*4882a593Smuzhiyun 		 evt[1] & PRIQ_1_ADDR_MASK);
1409*4882a593Smuzhiyun 
1410*4882a593Smuzhiyun 	if (last) {
1411*4882a593Smuzhiyun 		struct arm_smmu_cmdq_ent cmd = {
1412*4882a593Smuzhiyun 			.opcode			= CMDQ_OP_PRI_RESP,
1413*4882a593Smuzhiyun 			.substream_valid	= ssv,
1414*4882a593Smuzhiyun 			.pri			= {
1415*4882a593Smuzhiyun 				.sid	= sid,
1416*4882a593Smuzhiyun 				.ssid	= ssid,
1417*4882a593Smuzhiyun 				.grpid	= grpid,
1418*4882a593Smuzhiyun 				.resp	= PRI_RESP_DENY,
1419*4882a593Smuzhiyun 			},
1420*4882a593Smuzhiyun 		};
1421*4882a593Smuzhiyun 
1422*4882a593Smuzhiyun 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1423*4882a593Smuzhiyun 	}
1424*4882a593Smuzhiyun }
1425*4882a593Smuzhiyun 
arm_smmu_priq_thread(int irq,void * dev)1426*4882a593Smuzhiyun static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1427*4882a593Smuzhiyun {
1428*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = dev;
1429*4882a593Smuzhiyun 	struct arm_smmu_queue *q = &smmu->priq.q;
1430*4882a593Smuzhiyun 	struct arm_smmu_ll_queue *llq = &q->llq;
1431*4882a593Smuzhiyun 	u64 evt[PRIQ_ENT_DWORDS];
1432*4882a593Smuzhiyun 
1433*4882a593Smuzhiyun 	do {
1434*4882a593Smuzhiyun 		while (!queue_remove_raw(q, evt))
1435*4882a593Smuzhiyun 			arm_smmu_handle_ppr(smmu, evt);
1436*4882a593Smuzhiyun 
1437*4882a593Smuzhiyun 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1438*4882a593Smuzhiyun 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1439*4882a593Smuzhiyun 	} while (!queue_empty(llq));
1440*4882a593Smuzhiyun 
1441*4882a593Smuzhiyun 	/* Sync our overflow flag, as we believe we're up to speed */
1442*4882a593Smuzhiyun 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1443*4882a593Smuzhiyun 		      Q_IDX(llq, llq->cons);
1444*4882a593Smuzhiyun 	queue_sync_cons_out(q);
1445*4882a593Smuzhiyun 	return IRQ_HANDLED;
1446*4882a593Smuzhiyun }
1447*4882a593Smuzhiyun 
1448*4882a593Smuzhiyun static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1449*4882a593Smuzhiyun 
arm_smmu_gerror_handler(int irq,void * dev)1450*4882a593Smuzhiyun static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1451*4882a593Smuzhiyun {
1452*4882a593Smuzhiyun 	u32 gerror, gerrorn, active;
1453*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = dev;
1454*4882a593Smuzhiyun 
1455*4882a593Smuzhiyun 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1456*4882a593Smuzhiyun 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1457*4882a593Smuzhiyun 
1458*4882a593Smuzhiyun 	active = gerror ^ gerrorn;
1459*4882a593Smuzhiyun 	if (!(active & GERROR_ERR_MASK))
1460*4882a593Smuzhiyun 		return IRQ_NONE; /* No errors pending */
1461*4882a593Smuzhiyun 
1462*4882a593Smuzhiyun 	dev_warn(smmu->dev,
1463*4882a593Smuzhiyun 		 "unexpected global error reported (0x%08x), this could be serious\n",
1464*4882a593Smuzhiyun 		 active);
1465*4882a593Smuzhiyun 
1466*4882a593Smuzhiyun 	if (active & GERROR_SFM_ERR) {
1467*4882a593Smuzhiyun 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1468*4882a593Smuzhiyun 		arm_smmu_device_disable(smmu);
1469*4882a593Smuzhiyun 	}
1470*4882a593Smuzhiyun 
1471*4882a593Smuzhiyun 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1472*4882a593Smuzhiyun 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1473*4882a593Smuzhiyun 
1474*4882a593Smuzhiyun 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1475*4882a593Smuzhiyun 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1476*4882a593Smuzhiyun 
1477*4882a593Smuzhiyun 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1478*4882a593Smuzhiyun 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1479*4882a593Smuzhiyun 
1480*4882a593Smuzhiyun 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1481*4882a593Smuzhiyun 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1482*4882a593Smuzhiyun 
1483*4882a593Smuzhiyun 	if (active & GERROR_PRIQ_ABT_ERR)
1484*4882a593Smuzhiyun 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1485*4882a593Smuzhiyun 
1486*4882a593Smuzhiyun 	if (active & GERROR_EVTQ_ABT_ERR)
1487*4882a593Smuzhiyun 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1488*4882a593Smuzhiyun 
1489*4882a593Smuzhiyun 	if (active & GERROR_CMDQ_ERR)
1490*4882a593Smuzhiyun 		arm_smmu_cmdq_skip_err(smmu);
1491*4882a593Smuzhiyun 
1492*4882a593Smuzhiyun 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1493*4882a593Smuzhiyun 	return IRQ_HANDLED;
1494*4882a593Smuzhiyun }
1495*4882a593Smuzhiyun 
arm_smmu_combined_irq_thread(int irq,void * dev)1496*4882a593Smuzhiyun static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1497*4882a593Smuzhiyun {
1498*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = dev;
1499*4882a593Smuzhiyun 
1500*4882a593Smuzhiyun 	arm_smmu_evtq_thread(irq, dev);
1501*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1502*4882a593Smuzhiyun 		arm_smmu_priq_thread(irq, dev);
1503*4882a593Smuzhiyun 
1504*4882a593Smuzhiyun 	return IRQ_HANDLED;
1505*4882a593Smuzhiyun }
1506*4882a593Smuzhiyun 
arm_smmu_combined_irq_handler(int irq,void * dev)1507*4882a593Smuzhiyun static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1508*4882a593Smuzhiyun {
1509*4882a593Smuzhiyun 	arm_smmu_gerror_handler(irq, dev);
1510*4882a593Smuzhiyun 	return IRQ_WAKE_THREAD;
1511*4882a593Smuzhiyun }
1512*4882a593Smuzhiyun 
1513*4882a593Smuzhiyun static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1514*4882a593Smuzhiyun arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1515*4882a593Smuzhiyun 			struct arm_smmu_cmdq_ent *cmd)
1516*4882a593Smuzhiyun {
1517*4882a593Smuzhiyun 	size_t log2_span;
1518*4882a593Smuzhiyun 	size_t span_mask;
1519*4882a593Smuzhiyun 	/* ATC invalidates are always on 4096-bytes pages */
1520*4882a593Smuzhiyun 	size_t inval_grain_shift = 12;
1521*4882a593Smuzhiyun 	unsigned long page_start, page_end;
1522*4882a593Smuzhiyun 
1523*4882a593Smuzhiyun 	*cmd = (struct arm_smmu_cmdq_ent) {
1524*4882a593Smuzhiyun 		.opcode			= CMDQ_OP_ATC_INV,
1525*4882a593Smuzhiyun 		.substream_valid	= !!ssid,
1526*4882a593Smuzhiyun 		.atc.ssid		= ssid,
1527*4882a593Smuzhiyun 	};
1528*4882a593Smuzhiyun 
1529*4882a593Smuzhiyun 	if (!size) {
1530*4882a593Smuzhiyun 		cmd->atc.size = ATC_INV_SIZE_ALL;
1531*4882a593Smuzhiyun 		return;
1532*4882a593Smuzhiyun 	}
1533*4882a593Smuzhiyun 
1534*4882a593Smuzhiyun 	page_start	= iova >> inval_grain_shift;
1535*4882a593Smuzhiyun 	page_end	= (iova + size - 1) >> inval_grain_shift;
1536*4882a593Smuzhiyun 
1537*4882a593Smuzhiyun 	/*
1538*4882a593Smuzhiyun 	 * In an ATS Invalidate Request, the address must be aligned on the
1539*4882a593Smuzhiyun 	 * range size, which must be a power of two number of page sizes. We
1540*4882a593Smuzhiyun 	 * thus have to choose between grossly over-invalidating the region, or
1541*4882a593Smuzhiyun 	 * splitting the invalidation into multiple commands. For simplicity
1542*4882a593Smuzhiyun 	 * we'll go with the first solution, but should refine it in the future
1543*4882a593Smuzhiyun 	 * if multiple commands are shown to be more efficient.
1544*4882a593Smuzhiyun 	 *
1545*4882a593Smuzhiyun 	 * Find the smallest power of two that covers the range. The most
1546*4882a593Smuzhiyun 	 * significant differing bit between the start and end addresses,
1547*4882a593Smuzhiyun 	 * fls(start ^ end), indicates the required span. For example:
1548*4882a593Smuzhiyun 	 *
1549*4882a593Smuzhiyun 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1550*4882a593Smuzhiyun 	 *		x = 0b1000 ^ 0b1011 = 0b11
1551*4882a593Smuzhiyun 	 *		span = 1 << fls(x) = 4
1552*4882a593Smuzhiyun 	 *
1553*4882a593Smuzhiyun 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1554*4882a593Smuzhiyun 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1555*4882a593Smuzhiyun 	 *		span = 1 << fls(x) = 16
1556*4882a593Smuzhiyun 	 */
1557*4882a593Smuzhiyun 	log2_span	= fls_long(page_start ^ page_end);
1558*4882a593Smuzhiyun 	span_mask	= (1ULL << log2_span) - 1;
1559*4882a593Smuzhiyun 
1560*4882a593Smuzhiyun 	page_start	&= ~span_mask;
1561*4882a593Smuzhiyun 
1562*4882a593Smuzhiyun 	cmd->atc.addr	= page_start << inval_grain_shift;
1563*4882a593Smuzhiyun 	cmd->atc.size	= log2_span;
1564*4882a593Smuzhiyun }
1565*4882a593Smuzhiyun 
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1566*4882a593Smuzhiyun static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1567*4882a593Smuzhiyun {
1568*4882a593Smuzhiyun 	int i;
1569*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd;
1570*4882a593Smuzhiyun 
1571*4882a593Smuzhiyun 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1572*4882a593Smuzhiyun 
1573*4882a593Smuzhiyun 	for (i = 0; i < master->num_sids; i++) {
1574*4882a593Smuzhiyun 		cmd.atc.sid = master->sids[i];
1575*4882a593Smuzhiyun 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1576*4882a593Smuzhiyun 	}
1577*4882a593Smuzhiyun 
1578*4882a593Smuzhiyun 	return arm_smmu_cmdq_issue_sync(master->smmu);
1579*4882a593Smuzhiyun }
1580*4882a593Smuzhiyun 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1581*4882a593Smuzhiyun static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1582*4882a593Smuzhiyun 				   int ssid, unsigned long iova, size_t size)
1583*4882a593Smuzhiyun {
1584*4882a593Smuzhiyun 	int i;
1585*4882a593Smuzhiyun 	unsigned long flags;
1586*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd;
1587*4882a593Smuzhiyun 	struct arm_smmu_master *master;
1588*4882a593Smuzhiyun 	struct arm_smmu_cmdq_batch cmds = {};
1589*4882a593Smuzhiyun 
1590*4882a593Smuzhiyun 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1591*4882a593Smuzhiyun 		return 0;
1592*4882a593Smuzhiyun 
1593*4882a593Smuzhiyun 	/*
1594*4882a593Smuzhiyun 	 * Ensure that we've completed prior invalidation of the main TLBs
1595*4882a593Smuzhiyun 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1596*4882a593Smuzhiyun 	 * arm_smmu_enable_ats():
1597*4882a593Smuzhiyun 	 *
1598*4882a593Smuzhiyun 	 *	// unmap()			// arm_smmu_enable_ats()
1599*4882a593Smuzhiyun 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1600*4882a593Smuzhiyun 	 *	smp_mb();			[...]
1601*4882a593Smuzhiyun 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1602*4882a593Smuzhiyun 	 *
1603*4882a593Smuzhiyun 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1604*4882a593Smuzhiyun 	 * ATS was enabled at the PCI device before completion of the TLBI.
1605*4882a593Smuzhiyun 	 */
1606*4882a593Smuzhiyun 	smp_mb();
1607*4882a593Smuzhiyun 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1608*4882a593Smuzhiyun 		return 0;
1609*4882a593Smuzhiyun 
1610*4882a593Smuzhiyun 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1611*4882a593Smuzhiyun 
1612*4882a593Smuzhiyun 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1613*4882a593Smuzhiyun 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1614*4882a593Smuzhiyun 		if (!master->ats_enabled)
1615*4882a593Smuzhiyun 			continue;
1616*4882a593Smuzhiyun 
1617*4882a593Smuzhiyun 		for (i = 0; i < master->num_sids; i++) {
1618*4882a593Smuzhiyun 			cmd.atc.sid = master->sids[i];
1619*4882a593Smuzhiyun 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1620*4882a593Smuzhiyun 		}
1621*4882a593Smuzhiyun 	}
1622*4882a593Smuzhiyun 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1623*4882a593Smuzhiyun 
1624*4882a593Smuzhiyun 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1625*4882a593Smuzhiyun }
1626*4882a593Smuzhiyun 
1627*4882a593Smuzhiyun /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1628*4882a593Smuzhiyun static void arm_smmu_tlb_inv_context(void *cookie)
1629*4882a593Smuzhiyun {
1630*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = cookie;
1631*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1632*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd;
1633*4882a593Smuzhiyun 
1634*4882a593Smuzhiyun 	/*
1635*4882a593Smuzhiyun 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1636*4882a593Smuzhiyun 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1637*4882a593Smuzhiyun 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1638*4882a593Smuzhiyun 	 * insertion to guarantee those are observed before the TLBI. Do be
1639*4882a593Smuzhiyun 	 * careful, 007.
1640*4882a593Smuzhiyun 	 */
1641*4882a593Smuzhiyun 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1642*4882a593Smuzhiyun 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1643*4882a593Smuzhiyun 	} else {
1644*4882a593Smuzhiyun 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1645*4882a593Smuzhiyun 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1646*4882a593Smuzhiyun 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1647*4882a593Smuzhiyun 		arm_smmu_cmdq_issue_sync(smmu);
1648*4882a593Smuzhiyun 	}
1649*4882a593Smuzhiyun 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1650*4882a593Smuzhiyun }
1651*4882a593Smuzhiyun 
arm_smmu_tlb_inv_range(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1652*4882a593Smuzhiyun static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1653*4882a593Smuzhiyun 				   size_t granule, bool leaf,
1654*4882a593Smuzhiyun 				   struct arm_smmu_domain *smmu_domain)
1655*4882a593Smuzhiyun {
1656*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1657*4882a593Smuzhiyun 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1658*4882a593Smuzhiyun 	size_t inv_range = granule;
1659*4882a593Smuzhiyun 	struct arm_smmu_cmdq_batch cmds = {};
1660*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd = {
1661*4882a593Smuzhiyun 		.tlbi = {
1662*4882a593Smuzhiyun 			.leaf	= leaf,
1663*4882a593Smuzhiyun 		},
1664*4882a593Smuzhiyun 	};
1665*4882a593Smuzhiyun 
1666*4882a593Smuzhiyun 	if (!size)
1667*4882a593Smuzhiyun 		return;
1668*4882a593Smuzhiyun 
1669*4882a593Smuzhiyun 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1670*4882a593Smuzhiyun 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1671*4882a593Smuzhiyun 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1672*4882a593Smuzhiyun 	} else {
1673*4882a593Smuzhiyun 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1674*4882a593Smuzhiyun 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1675*4882a593Smuzhiyun 	}
1676*4882a593Smuzhiyun 
1677*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1678*4882a593Smuzhiyun 		/* Get the leaf page size */
1679*4882a593Smuzhiyun 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1680*4882a593Smuzhiyun 
1681*4882a593Smuzhiyun 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1682*4882a593Smuzhiyun 		cmd.tlbi.tg = (tg - 10) / 2;
1683*4882a593Smuzhiyun 
1684*4882a593Smuzhiyun 		/* Determine what level the granule is at */
1685*4882a593Smuzhiyun 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1686*4882a593Smuzhiyun 
1687*4882a593Smuzhiyun 		num_pages = size >> tg;
1688*4882a593Smuzhiyun 	}
1689*4882a593Smuzhiyun 
1690*4882a593Smuzhiyun 	while (iova < end) {
1691*4882a593Smuzhiyun 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1692*4882a593Smuzhiyun 			/*
1693*4882a593Smuzhiyun 			 * On each iteration of the loop, the range is 5 bits
1694*4882a593Smuzhiyun 			 * worth of the aligned size remaining.
1695*4882a593Smuzhiyun 			 * The range in pages is:
1696*4882a593Smuzhiyun 			 *
1697*4882a593Smuzhiyun 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1698*4882a593Smuzhiyun 			 */
1699*4882a593Smuzhiyun 			unsigned long scale, num;
1700*4882a593Smuzhiyun 
1701*4882a593Smuzhiyun 			/* Determine the power of 2 multiple number of pages */
1702*4882a593Smuzhiyun 			scale = __ffs(num_pages);
1703*4882a593Smuzhiyun 			cmd.tlbi.scale = scale;
1704*4882a593Smuzhiyun 
1705*4882a593Smuzhiyun 			/* Determine how many chunks of 2^scale size we have */
1706*4882a593Smuzhiyun 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1707*4882a593Smuzhiyun 			cmd.tlbi.num = num - 1;
1708*4882a593Smuzhiyun 
1709*4882a593Smuzhiyun 			/* range is num * 2^scale * pgsize */
1710*4882a593Smuzhiyun 			inv_range = num << (scale + tg);
1711*4882a593Smuzhiyun 
1712*4882a593Smuzhiyun 			/* Clear out the lower order bits for the next iteration */
1713*4882a593Smuzhiyun 			num_pages -= num << scale;
1714*4882a593Smuzhiyun 		}
1715*4882a593Smuzhiyun 
1716*4882a593Smuzhiyun 		cmd.tlbi.addr = iova;
1717*4882a593Smuzhiyun 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1718*4882a593Smuzhiyun 		iova += inv_range;
1719*4882a593Smuzhiyun 	}
1720*4882a593Smuzhiyun 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1721*4882a593Smuzhiyun 
1722*4882a593Smuzhiyun 	/*
1723*4882a593Smuzhiyun 	 * Unfortunately, this can't be leaf-only since we may have
1724*4882a593Smuzhiyun 	 * zapped an entire table.
1725*4882a593Smuzhiyun 	 */
1726*4882a593Smuzhiyun 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1727*4882a593Smuzhiyun }
1728*4882a593Smuzhiyun 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)1729*4882a593Smuzhiyun static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1730*4882a593Smuzhiyun 					 unsigned long iova, size_t granule,
1731*4882a593Smuzhiyun 					 void *cookie)
1732*4882a593Smuzhiyun {
1733*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = cookie;
1734*4882a593Smuzhiyun 	struct iommu_domain *domain = &smmu_domain->domain;
1735*4882a593Smuzhiyun 
1736*4882a593Smuzhiyun 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1737*4882a593Smuzhiyun }
1738*4882a593Smuzhiyun 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)1739*4882a593Smuzhiyun static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1740*4882a593Smuzhiyun 				  size_t granule, void *cookie)
1741*4882a593Smuzhiyun {
1742*4882a593Smuzhiyun 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1743*4882a593Smuzhiyun }
1744*4882a593Smuzhiyun 
1745*4882a593Smuzhiyun static const struct iommu_flush_ops arm_smmu_flush_ops = {
1746*4882a593Smuzhiyun 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1747*4882a593Smuzhiyun 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1748*4882a593Smuzhiyun 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1749*4882a593Smuzhiyun };
1750*4882a593Smuzhiyun 
1751*4882a593Smuzhiyun /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1752*4882a593Smuzhiyun static bool arm_smmu_capable(enum iommu_cap cap)
1753*4882a593Smuzhiyun {
1754*4882a593Smuzhiyun 	switch (cap) {
1755*4882a593Smuzhiyun 	case IOMMU_CAP_CACHE_COHERENCY:
1756*4882a593Smuzhiyun 		return true;
1757*4882a593Smuzhiyun 	case IOMMU_CAP_NOEXEC:
1758*4882a593Smuzhiyun 		return true;
1759*4882a593Smuzhiyun 	default:
1760*4882a593Smuzhiyun 		return false;
1761*4882a593Smuzhiyun 	}
1762*4882a593Smuzhiyun }
1763*4882a593Smuzhiyun 
arm_smmu_domain_alloc(unsigned type)1764*4882a593Smuzhiyun static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1765*4882a593Smuzhiyun {
1766*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain;
1767*4882a593Smuzhiyun 
1768*4882a593Smuzhiyun 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1769*4882a593Smuzhiyun 	    type != IOMMU_DOMAIN_DMA &&
1770*4882a593Smuzhiyun 	    type != IOMMU_DOMAIN_IDENTITY)
1771*4882a593Smuzhiyun 		return NULL;
1772*4882a593Smuzhiyun 
1773*4882a593Smuzhiyun 	/*
1774*4882a593Smuzhiyun 	 * Allocate the domain and initialise some of its data structures.
1775*4882a593Smuzhiyun 	 * We can't really do anything meaningful until we've added a
1776*4882a593Smuzhiyun 	 * master.
1777*4882a593Smuzhiyun 	 */
1778*4882a593Smuzhiyun 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1779*4882a593Smuzhiyun 	if (!smmu_domain)
1780*4882a593Smuzhiyun 		return NULL;
1781*4882a593Smuzhiyun 
1782*4882a593Smuzhiyun 	if (type == IOMMU_DOMAIN_DMA &&
1783*4882a593Smuzhiyun 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1784*4882a593Smuzhiyun 		kfree(smmu_domain);
1785*4882a593Smuzhiyun 		return NULL;
1786*4882a593Smuzhiyun 	}
1787*4882a593Smuzhiyun 
1788*4882a593Smuzhiyun 	mutex_init(&smmu_domain->init_mutex);
1789*4882a593Smuzhiyun 	INIT_LIST_HEAD(&smmu_domain->devices);
1790*4882a593Smuzhiyun 	spin_lock_init(&smmu_domain->devices_lock);
1791*4882a593Smuzhiyun 
1792*4882a593Smuzhiyun 	return &smmu_domain->domain;
1793*4882a593Smuzhiyun }
1794*4882a593Smuzhiyun 
arm_smmu_bitmap_alloc(unsigned long * map,int span)1795*4882a593Smuzhiyun static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1796*4882a593Smuzhiyun {
1797*4882a593Smuzhiyun 	int idx, size = 1 << span;
1798*4882a593Smuzhiyun 
1799*4882a593Smuzhiyun 	do {
1800*4882a593Smuzhiyun 		idx = find_first_zero_bit(map, size);
1801*4882a593Smuzhiyun 		if (idx == size)
1802*4882a593Smuzhiyun 			return -ENOSPC;
1803*4882a593Smuzhiyun 	} while (test_and_set_bit(idx, map));
1804*4882a593Smuzhiyun 
1805*4882a593Smuzhiyun 	return idx;
1806*4882a593Smuzhiyun }
1807*4882a593Smuzhiyun 
arm_smmu_bitmap_free(unsigned long * map,int idx)1808*4882a593Smuzhiyun static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1809*4882a593Smuzhiyun {
1810*4882a593Smuzhiyun 	clear_bit(idx, map);
1811*4882a593Smuzhiyun }
1812*4882a593Smuzhiyun 
arm_smmu_domain_free(struct iommu_domain * domain)1813*4882a593Smuzhiyun static void arm_smmu_domain_free(struct iommu_domain *domain)
1814*4882a593Smuzhiyun {
1815*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1816*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1817*4882a593Smuzhiyun 
1818*4882a593Smuzhiyun 	iommu_put_dma_cookie(domain);
1819*4882a593Smuzhiyun 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1820*4882a593Smuzhiyun 
1821*4882a593Smuzhiyun 	/* Free the CD and ASID, if we allocated them */
1822*4882a593Smuzhiyun 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1823*4882a593Smuzhiyun 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1824*4882a593Smuzhiyun 
1825*4882a593Smuzhiyun 		/* Prevent SVA from touching the CD while we're freeing it */
1826*4882a593Smuzhiyun 		mutex_lock(&arm_smmu_asid_lock);
1827*4882a593Smuzhiyun 		if (cfg->cdcfg.cdtab)
1828*4882a593Smuzhiyun 			arm_smmu_free_cd_tables(smmu_domain);
1829*4882a593Smuzhiyun 		arm_smmu_free_asid(&cfg->cd);
1830*4882a593Smuzhiyun 		mutex_unlock(&arm_smmu_asid_lock);
1831*4882a593Smuzhiyun 	} else {
1832*4882a593Smuzhiyun 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1833*4882a593Smuzhiyun 		if (cfg->vmid)
1834*4882a593Smuzhiyun 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1835*4882a593Smuzhiyun 	}
1836*4882a593Smuzhiyun 
1837*4882a593Smuzhiyun 	kfree(smmu_domain);
1838*4882a593Smuzhiyun }
1839*4882a593Smuzhiyun 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1840*4882a593Smuzhiyun static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1841*4882a593Smuzhiyun 				       struct arm_smmu_master *master,
1842*4882a593Smuzhiyun 				       struct io_pgtable_cfg *pgtbl_cfg)
1843*4882a593Smuzhiyun {
1844*4882a593Smuzhiyun 	int ret;
1845*4882a593Smuzhiyun 	u32 asid;
1846*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1847*4882a593Smuzhiyun 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1848*4882a593Smuzhiyun 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1849*4882a593Smuzhiyun 
1850*4882a593Smuzhiyun 	refcount_set(&cfg->cd.refs, 1);
1851*4882a593Smuzhiyun 
1852*4882a593Smuzhiyun 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1853*4882a593Smuzhiyun 	mutex_lock(&arm_smmu_asid_lock);
1854*4882a593Smuzhiyun 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1855*4882a593Smuzhiyun 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1856*4882a593Smuzhiyun 	if (ret)
1857*4882a593Smuzhiyun 		goto out_unlock;
1858*4882a593Smuzhiyun 
1859*4882a593Smuzhiyun 	cfg->s1cdmax = master->ssid_bits;
1860*4882a593Smuzhiyun 
1861*4882a593Smuzhiyun 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1862*4882a593Smuzhiyun 	if (ret)
1863*4882a593Smuzhiyun 		goto out_free_asid;
1864*4882a593Smuzhiyun 
1865*4882a593Smuzhiyun 	cfg->cd.asid	= (u16)asid;
1866*4882a593Smuzhiyun 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1867*4882a593Smuzhiyun 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1868*4882a593Smuzhiyun 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1869*4882a593Smuzhiyun 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1870*4882a593Smuzhiyun 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1871*4882a593Smuzhiyun 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1872*4882a593Smuzhiyun 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1873*4882a593Smuzhiyun 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1874*4882a593Smuzhiyun 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1875*4882a593Smuzhiyun 
1876*4882a593Smuzhiyun 	/*
1877*4882a593Smuzhiyun 	 * Note that this will end up calling arm_smmu_sync_cd() before
1878*4882a593Smuzhiyun 	 * the master has been added to the devices list for this domain.
1879*4882a593Smuzhiyun 	 * This isn't an issue because the STE hasn't been installed yet.
1880*4882a593Smuzhiyun 	 */
1881*4882a593Smuzhiyun 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1882*4882a593Smuzhiyun 	if (ret)
1883*4882a593Smuzhiyun 		goto out_free_cd_tables;
1884*4882a593Smuzhiyun 
1885*4882a593Smuzhiyun 	mutex_unlock(&arm_smmu_asid_lock);
1886*4882a593Smuzhiyun 	return 0;
1887*4882a593Smuzhiyun 
1888*4882a593Smuzhiyun out_free_cd_tables:
1889*4882a593Smuzhiyun 	arm_smmu_free_cd_tables(smmu_domain);
1890*4882a593Smuzhiyun out_free_asid:
1891*4882a593Smuzhiyun 	arm_smmu_free_asid(&cfg->cd);
1892*4882a593Smuzhiyun out_unlock:
1893*4882a593Smuzhiyun 	mutex_unlock(&arm_smmu_asid_lock);
1894*4882a593Smuzhiyun 	return ret;
1895*4882a593Smuzhiyun }
1896*4882a593Smuzhiyun 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1897*4882a593Smuzhiyun static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1898*4882a593Smuzhiyun 				       struct arm_smmu_master *master,
1899*4882a593Smuzhiyun 				       struct io_pgtable_cfg *pgtbl_cfg)
1900*4882a593Smuzhiyun {
1901*4882a593Smuzhiyun 	int vmid;
1902*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1903*4882a593Smuzhiyun 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1904*4882a593Smuzhiyun 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1905*4882a593Smuzhiyun 
1906*4882a593Smuzhiyun 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1907*4882a593Smuzhiyun 	if (vmid < 0)
1908*4882a593Smuzhiyun 		return vmid;
1909*4882a593Smuzhiyun 
1910*4882a593Smuzhiyun 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1911*4882a593Smuzhiyun 	cfg->vmid	= (u16)vmid;
1912*4882a593Smuzhiyun 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1913*4882a593Smuzhiyun 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1914*4882a593Smuzhiyun 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1915*4882a593Smuzhiyun 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1916*4882a593Smuzhiyun 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1917*4882a593Smuzhiyun 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1918*4882a593Smuzhiyun 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1919*4882a593Smuzhiyun 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1920*4882a593Smuzhiyun 	return 0;
1921*4882a593Smuzhiyun }
1922*4882a593Smuzhiyun 
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)1923*4882a593Smuzhiyun static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1924*4882a593Smuzhiyun 				    struct arm_smmu_master *master)
1925*4882a593Smuzhiyun {
1926*4882a593Smuzhiyun 	int ret;
1927*4882a593Smuzhiyun 	unsigned long ias, oas;
1928*4882a593Smuzhiyun 	enum io_pgtable_fmt fmt;
1929*4882a593Smuzhiyun 	struct io_pgtable_cfg pgtbl_cfg;
1930*4882a593Smuzhiyun 	struct io_pgtable_ops *pgtbl_ops;
1931*4882a593Smuzhiyun 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1932*4882a593Smuzhiyun 				 struct arm_smmu_master *,
1933*4882a593Smuzhiyun 				 struct io_pgtable_cfg *);
1934*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1935*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1936*4882a593Smuzhiyun 
1937*4882a593Smuzhiyun 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1938*4882a593Smuzhiyun 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1939*4882a593Smuzhiyun 		return 0;
1940*4882a593Smuzhiyun 	}
1941*4882a593Smuzhiyun 
1942*4882a593Smuzhiyun 	/* Restrict the stage to what we can actually support */
1943*4882a593Smuzhiyun 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1944*4882a593Smuzhiyun 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1945*4882a593Smuzhiyun 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1946*4882a593Smuzhiyun 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1947*4882a593Smuzhiyun 
1948*4882a593Smuzhiyun 	switch (smmu_domain->stage) {
1949*4882a593Smuzhiyun 	case ARM_SMMU_DOMAIN_S1:
1950*4882a593Smuzhiyun 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1951*4882a593Smuzhiyun 		ias = min_t(unsigned long, ias, VA_BITS);
1952*4882a593Smuzhiyun 		oas = smmu->ias;
1953*4882a593Smuzhiyun 		fmt = ARM_64_LPAE_S1;
1954*4882a593Smuzhiyun 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1955*4882a593Smuzhiyun 		break;
1956*4882a593Smuzhiyun 	case ARM_SMMU_DOMAIN_NESTED:
1957*4882a593Smuzhiyun 	case ARM_SMMU_DOMAIN_S2:
1958*4882a593Smuzhiyun 		ias = smmu->ias;
1959*4882a593Smuzhiyun 		oas = smmu->oas;
1960*4882a593Smuzhiyun 		fmt = ARM_64_LPAE_S2;
1961*4882a593Smuzhiyun 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1962*4882a593Smuzhiyun 		break;
1963*4882a593Smuzhiyun 	default:
1964*4882a593Smuzhiyun 		return -EINVAL;
1965*4882a593Smuzhiyun 	}
1966*4882a593Smuzhiyun 
1967*4882a593Smuzhiyun 	pgtbl_cfg = (struct io_pgtable_cfg) {
1968*4882a593Smuzhiyun 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1969*4882a593Smuzhiyun 		.ias		= ias,
1970*4882a593Smuzhiyun 		.oas		= oas,
1971*4882a593Smuzhiyun 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1972*4882a593Smuzhiyun 		.tlb		= &arm_smmu_flush_ops,
1973*4882a593Smuzhiyun 		.iommu_dev	= smmu->dev,
1974*4882a593Smuzhiyun 	};
1975*4882a593Smuzhiyun 
1976*4882a593Smuzhiyun 	if (smmu_domain->non_strict)
1977*4882a593Smuzhiyun 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1978*4882a593Smuzhiyun 
1979*4882a593Smuzhiyun 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1980*4882a593Smuzhiyun 	if (!pgtbl_ops)
1981*4882a593Smuzhiyun 		return -ENOMEM;
1982*4882a593Smuzhiyun 
1983*4882a593Smuzhiyun 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1984*4882a593Smuzhiyun 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1985*4882a593Smuzhiyun 	domain->geometry.force_aperture = true;
1986*4882a593Smuzhiyun 
1987*4882a593Smuzhiyun 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
1988*4882a593Smuzhiyun 	if (ret < 0) {
1989*4882a593Smuzhiyun 		free_io_pgtable_ops(pgtbl_ops);
1990*4882a593Smuzhiyun 		return ret;
1991*4882a593Smuzhiyun 	}
1992*4882a593Smuzhiyun 
1993*4882a593Smuzhiyun 	smmu_domain->pgtbl_ops = pgtbl_ops;
1994*4882a593Smuzhiyun 	return 0;
1995*4882a593Smuzhiyun }
1996*4882a593Smuzhiyun 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)1997*4882a593Smuzhiyun static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1998*4882a593Smuzhiyun {
1999*4882a593Smuzhiyun 	__le64 *step;
2000*4882a593Smuzhiyun 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2001*4882a593Smuzhiyun 
2002*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2003*4882a593Smuzhiyun 		struct arm_smmu_strtab_l1_desc *l1_desc;
2004*4882a593Smuzhiyun 		int idx;
2005*4882a593Smuzhiyun 
2006*4882a593Smuzhiyun 		/* Two-level walk */
2007*4882a593Smuzhiyun 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2008*4882a593Smuzhiyun 		l1_desc = &cfg->l1_desc[idx];
2009*4882a593Smuzhiyun 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2010*4882a593Smuzhiyun 		step = &l1_desc->l2ptr[idx];
2011*4882a593Smuzhiyun 	} else {
2012*4882a593Smuzhiyun 		/* Simple linear lookup */
2013*4882a593Smuzhiyun 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2014*4882a593Smuzhiyun 	}
2015*4882a593Smuzhiyun 
2016*4882a593Smuzhiyun 	return step;
2017*4882a593Smuzhiyun }
2018*4882a593Smuzhiyun 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2019*4882a593Smuzhiyun static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2020*4882a593Smuzhiyun {
2021*4882a593Smuzhiyun 	int i, j;
2022*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = master->smmu;
2023*4882a593Smuzhiyun 
2024*4882a593Smuzhiyun 	for (i = 0; i < master->num_sids; ++i) {
2025*4882a593Smuzhiyun 		u32 sid = master->sids[i];
2026*4882a593Smuzhiyun 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2027*4882a593Smuzhiyun 
2028*4882a593Smuzhiyun 		/* Bridged PCI devices may end up with duplicated IDs */
2029*4882a593Smuzhiyun 		for (j = 0; j < i; j++)
2030*4882a593Smuzhiyun 			if (master->sids[j] == sid)
2031*4882a593Smuzhiyun 				break;
2032*4882a593Smuzhiyun 		if (j < i)
2033*4882a593Smuzhiyun 			continue;
2034*4882a593Smuzhiyun 
2035*4882a593Smuzhiyun 		arm_smmu_write_strtab_ent(master, sid, step);
2036*4882a593Smuzhiyun 	}
2037*4882a593Smuzhiyun }
2038*4882a593Smuzhiyun 
arm_smmu_ats_supported(struct arm_smmu_master * master)2039*4882a593Smuzhiyun static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2040*4882a593Smuzhiyun {
2041*4882a593Smuzhiyun 	struct device *dev = master->dev;
2042*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = master->smmu;
2043*4882a593Smuzhiyun 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2044*4882a593Smuzhiyun 
2045*4882a593Smuzhiyun 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2046*4882a593Smuzhiyun 		return false;
2047*4882a593Smuzhiyun 
2048*4882a593Smuzhiyun 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2049*4882a593Smuzhiyun 		return false;
2050*4882a593Smuzhiyun 
2051*4882a593Smuzhiyun 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2052*4882a593Smuzhiyun }
2053*4882a593Smuzhiyun 
arm_smmu_enable_ats(struct arm_smmu_master * master)2054*4882a593Smuzhiyun static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2055*4882a593Smuzhiyun {
2056*4882a593Smuzhiyun 	size_t stu;
2057*4882a593Smuzhiyun 	struct pci_dev *pdev;
2058*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = master->smmu;
2059*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = master->domain;
2060*4882a593Smuzhiyun 
2061*4882a593Smuzhiyun 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2062*4882a593Smuzhiyun 	if (!master->ats_enabled)
2063*4882a593Smuzhiyun 		return;
2064*4882a593Smuzhiyun 
2065*4882a593Smuzhiyun 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2066*4882a593Smuzhiyun 	stu = __ffs(smmu->pgsize_bitmap);
2067*4882a593Smuzhiyun 	pdev = to_pci_dev(master->dev);
2068*4882a593Smuzhiyun 
2069*4882a593Smuzhiyun 	atomic_inc(&smmu_domain->nr_ats_masters);
2070*4882a593Smuzhiyun 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2071*4882a593Smuzhiyun 	if (pci_enable_ats(pdev, stu))
2072*4882a593Smuzhiyun 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2073*4882a593Smuzhiyun }
2074*4882a593Smuzhiyun 
arm_smmu_disable_ats(struct arm_smmu_master * master)2075*4882a593Smuzhiyun static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2076*4882a593Smuzhiyun {
2077*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = master->domain;
2078*4882a593Smuzhiyun 
2079*4882a593Smuzhiyun 	if (!master->ats_enabled)
2080*4882a593Smuzhiyun 		return;
2081*4882a593Smuzhiyun 
2082*4882a593Smuzhiyun 	pci_disable_ats(to_pci_dev(master->dev));
2083*4882a593Smuzhiyun 	/*
2084*4882a593Smuzhiyun 	 * Ensure ATS is disabled at the endpoint before we issue the
2085*4882a593Smuzhiyun 	 * ATC invalidation via the SMMU.
2086*4882a593Smuzhiyun 	 */
2087*4882a593Smuzhiyun 	wmb();
2088*4882a593Smuzhiyun 	arm_smmu_atc_inv_master(master);
2089*4882a593Smuzhiyun 	atomic_dec(&smmu_domain->nr_ats_masters);
2090*4882a593Smuzhiyun }
2091*4882a593Smuzhiyun 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2092*4882a593Smuzhiyun static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2093*4882a593Smuzhiyun {
2094*4882a593Smuzhiyun 	int ret;
2095*4882a593Smuzhiyun 	int features;
2096*4882a593Smuzhiyun 	int num_pasids;
2097*4882a593Smuzhiyun 	struct pci_dev *pdev;
2098*4882a593Smuzhiyun 
2099*4882a593Smuzhiyun 	if (!dev_is_pci(master->dev))
2100*4882a593Smuzhiyun 		return -ENODEV;
2101*4882a593Smuzhiyun 
2102*4882a593Smuzhiyun 	pdev = to_pci_dev(master->dev);
2103*4882a593Smuzhiyun 
2104*4882a593Smuzhiyun 	features = pci_pasid_features(pdev);
2105*4882a593Smuzhiyun 	if (features < 0)
2106*4882a593Smuzhiyun 		return features;
2107*4882a593Smuzhiyun 
2108*4882a593Smuzhiyun 	num_pasids = pci_max_pasids(pdev);
2109*4882a593Smuzhiyun 	if (num_pasids <= 0)
2110*4882a593Smuzhiyun 		return num_pasids;
2111*4882a593Smuzhiyun 
2112*4882a593Smuzhiyun 	ret = pci_enable_pasid(pdev, features);
2113*4882a593Smuzhiyun 	if (ret) {
2114*4882a593Smuzhiyun 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2115*4882a593Smuzhiyun 		return ret;
2116*4882a593Smuzhiyun 	}
2117*4882a593Smuzhiyun 
2118*4882a593Smuzhiyun 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2119*4882a593Smuzhiyun 				  master->smmu->ssid_bits);
2120*4882a593Smuzhiyun 	return 0;
2121*4882a593Smuzhiyun }
2122*4882a593Smuzhiyun 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2123*4882a593Smuzhiyun static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2124*4882a593Smuzhiyun {
2125*4882a593Smuzhiyun 	struct pci_dev *pdev;
2126*4882a593Smuzhiyun 
2127*4882a593Smuzhiyun 	if (!dev_is_pci(master->dev))
2128*4882a593Smuzhiyun 		return;
2129*4882a593Smuzhiyun 
2130*4882a593Smuzhiyun 	pdev = to_pci_dev(master->dev);
2131*4882a593Smuzhiyun 
2132*4882a593Smuzhiyun 	if (!pdev->pasid_enabled)
2133*4882a593Smuzhiyun 		return;
2134*4882a593Smuzhiyun 
2135*4882a593Smuzhiyun 	master->ssid_bits = 0;
2136*4882a593Smuzhiyun 	pci_disable_pasid(pdev);
2137*4882a593Smuzhiyun }
2138*4882a593Smuzhiyun 
arm_smmu_detach_dev(struct arm_smmu_master * master)2139*4882a593Smuzhiyun static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2140*4882a593Smuzhiyun {
2141*4882a593Smuzhiyun 	unsigned long flags;
2142*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = master->domain;
2143*4882a593Smuzhiyun 
2144*4882a593Smuzhiyun 	if (!smmu_domain)
2145*4882a593Smuzhiyun 		return;
2146*4882a593Smuzhiyun 
2147*4882a593Smuzhiyun 	arm_smmu_disable_ats(master);
2148*4882a593Smuzhiyun 
2149*4882a593Smuzhiyun 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2150*4882a593Smuzhiyun 	list_del(&master->domain_head);
2151*4882a593Smuzhiyun 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2152*4882a593Smuzhiyun 
2153*4882a593Smuzhiyun 	master->domain = NULL;
2154*4882a593Smuzhiyun 	master->ats_enabled = false;
2155*4882a593Smuzhiyun 	arm_smmu_install_ste_for_dev(master);
2156*4882a593Smuzhiyun }
2157*4882a593Smuzhiyun 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2158*4882a593Smuzhiyun static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2159*4882a593Smuzhiyun {
2160*4882a593Smuzhiyun 	int ret = 0;
2161*4882a593Smuzhiyun 	unsigned long flags;
2162*4882a593Smuzhiyun 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2163*4882a593Smuzhiyun 	struct arm_smmu_device *smmu;
2164*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2165*4882a593Smuzhiyun 	struct arm_smmu_master *master;
2166*4882a593Smuzhiyun 
2167*4882a593Smuzhiyun 	if (!fwspec)
2168*4882a593Smuzhiyun 		return -ENOENT;
2169*4882a593Smuzhiyun 
2170*4882a593Smuzhiyun 	master = dev_iommu_priv_get(dev);
2171*4882a593Smuzhiyun 	smmu = master->smmu;
2172*4882a593Smuzhiyun 
2173*4882a593Smuzhiyun 	/*
2174*4882a593Smuzhiyun 	 * Checking that SVA is disabled ensures that this device isn't bound to
2175*4882a593Smuzhiyun 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2176*4882a593Smuzhiyun 	 * be removed concurrently since we're holding the group mutex.
2177*4882a593Smuzhiyun 	 */
2178*4882a593Smuzhiyun 	if (arm_smmu_master_sva_enabled(master)) {
2179*4882a593Smuzhiyun 		dev_err(dev, "cannot attach - SVA enabled\n");
2180*4882a593Smuzhiyun 		return -EBUSY;
2181*4882a593Smuzhiyun 	}
2182*4882a593Smuzhiyun 
2183*4882a593Smuzhiyun 	arm_smmu_detach_dev(master);
2184*4882a593Smuzhiyun 
2185*4882a593Smuzhiyun 	mutex_lock(&smmu_domain->init_mutex);
2186*4882a593Smuzhiyun 
2187*4882a593Smuzhiyun 	if (!smmu_domain->smmu) {
2188*4882a593Smuzhiyun 		smmu_domain->smmu = smmu;
2189*4882a593Smuzhiyun 		ret = arm_smmu_domain_finalise(domain, master);
2190*4882a593Smuzhiyun 		if (ret) {
2191*4882a593Smuzhiyun 			smmu_domain->smmu = NULL;
2192*4882a593Smuzhiyun 			goto out_unlock;
2193*4882a593Smuzhiyun 		}
2194*4882a593Smuzhiyun 	} else if (smmu_domain->smmu != smmu) {
2195*4882a593Smuzhiyun 		dev_err(dev,
2196*4882a593Smuzhiyun 			"cannot attach to SMMU %s (upstream of %s)\n",
2197*4882a593Smuzhiyun 			dev_name(smmu_domain->smmu->dev),
2198*4882a593Smuzhiyun 			dev_name(smmu->dev));
2199*4882a593Smuzhiyun 		ret = -ENXIO;
2200*4882a593Smuzhiyun 		goto out_unlock;
2201*4882a593Smuzhiyun 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2202*4882a593Smuzhiyun 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2203*4882a593Smuzhiyun 		dev_err(dev,
2204*4882a593Smuzhiyun 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2205*4882a593Smuzhiyun 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2206*4882a593Smuzhiyun 		ret = -EINVAL;
2207*4882a593Smuzhiyun 		goto out_unlock;
2208*4882a593Smuzhiyun 	}
2209*4882a593Smuzhiyun 
2210*4882a593Smuzhiyun 	master->domain = smmu_domain;
2211*4882a593Smuzhiyun 
2212*4882a593Smuzhiyun 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2213*4882a593Smuzhiyun 		master->ats_enabled = arm_smmu_ats_supported(master);
2214*4882a593Smuzhiyun 
2215*4882a593Smuzhiyun 	arm_smmu_install_ste_for_dev(master);
2216*4882a593Smuzhiyun 
2217*4882a593Smuzhiyun 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2218*4882a593Smuzhiyun 	list_add(&master->domain_head, &smmu_domain->devices);
2219*4882a593Smuzhiyun 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2220*4882a593Smuzhiyun 
2221*4882a593Smuzhiyun 	arm_smmu_enable_ats(master);
2222*4882a593Smuzhiyun 
2223*4882a593Smuzhiyun out_unlock:
2224*4882a593Smuzhiyun 	mutex_unlock(&smmu_domain->init_mutex);
2225*4882a593Smuzhiyun 	return ret;
2226*4882a593Smuzhiyun }
2227*4882a593Smuzhiyun 
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot,gfp_t gfp)2228*4882a593Smuzhiyun static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2229*4882a593Smuzhiyun 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2230*4882a593Smuzhiyun {
2231*4882a593Smuzhiyun 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2232*4882a593Smuzhiyun 
2233*4882a593Smuzhiyun 	if (!ops)
2234*4882a593Smuzhiyun 		return -ENODEV;
2235*4882a593Smuzhiyun 
2236*4882a593Smuzhiyun 	return ops->map(ops, iova, paddr, size, prot, gfp);
2237*4882a593Smuzhiyun }
2238*4882a593Smuzhiyun 
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)2239*4882a593Smuzhiyun static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2240*4882a593Smuzhiyun 			     size_t size, struct iommu_iotlb_gather *gather)
2241*4882a593Smuzhiyun {
2242*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2243*4882a593Smuzhiyun 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2244*4882a593Smuzhiyun 
2245*4882a593Smuzhiyun 	if (!ops)
2246*4882a593Smuzhiyun 		return 0;
2247*4882a593Smuzhiyun 
2248*4882a593Smuzhiyun 	return ops->unmap(ops, iova, size, gather);
2249*4882a593Smuzhiyun }
2250*4882a593Smuzhiyun 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2251*4882a593Smuzhiyun static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2252*4882a593Smuzhiyun {
2253*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2254*4882a593Smuzhiyun 
2255*4882a593Smuzhiyun 	if (smmu_domain->smmu)
2256*4882a593Smuzhiyun 		arm_smmu_tlb_inv_context(smmu_domain);
2257*4882a593Smuzhiyun }
2258*4882a593Smuzhiyun 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2259*4882a593Smuzhiyun static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2260*4882a593Smuzhiyun 				struct iommu_iotlb_gather *gather)
2261*4882a593Smuzhiyun {
2262*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2263*4882a593Smuzhiyun 
2264*4882a593Smuzhiyun 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1,
2265*4882a593Smuzhiyun 			       gather->pgsize, true, smmu_domain);
2266*4882a593Smuzhiyun }
2267*4882a593Smuzhiyun 
2268*4882a593Smuzhiyun static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2269*4882a593Smuzhiyun arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2270*4882a593Smuzhiyun {
2271*4882a593Smuzhiyun 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2272*4882a593Smuzhiyun 
2273*4882a593Smuzhiyun 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2274*4882a593Smuzhiyun 		return iova;
2275*4882a593Smuzhiyun 
2276*4882a593Smuzhiyun 	if (!ops)
2277*4882a593Smuzhiyun 		return 0;
2278*4882a593Smuzhiyun 
2279*4882a593Smuzhiyun 	return ops->iova_to_phys(ops, iova);
2280*4882a593Smuzhiyun }
2281*4882a593Smuzhiyun 
2282*4882a593Smuzhiyun static struct platform_driver arm_smmu_driver;
2283*4882a593Smuzhiyun 
2284*4882a593Smuzhiyun static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2285*4882a593Smuzhiyun struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2286*4882a593Smuzhiyun {
2287*4882a593Smuzhiyun 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2288*4882a593Smuzhiyun 							  fwnode);
2289*4882a593Smuzhiyun 	put_device(dev);
2290*4882a593Smuzhiyun 	return dev ? dev_get_drvdata(dev) : NULL;
2291*4882a593Smuzhiyun }
2292*4882a593Smuzhiyun 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2293*4882a593Smuzhiyun static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2294*4882a593Smuzhiyun {
2295*4882a593Smuzhiyun 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2296*4882a593Smuzhiyun 
2297*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2298*4882a593Smuzhiyun 		limit *= 1UL << STRTAB_SPLIT;
2299*4882a593Smuzhiyun 
2300*4882a593Smuzhiyun 	return sid < limit;
2301*4882a593Smuzhiyun }
2302*4882a593Smuzhiyun 
2303*4882a593Smuzhiyun static struct iommu_ops arm_smmu_ops;
2304*4882a593Smuzhiyun 
arm_smmu_probe_device(struct device * dev)2305*4882a593Smuzhiyun static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2306*4882a593Smuzhiyun {
2307*4882a593Smuzhiyun 	int i, ret;
2308*4882a593Smuzhiyun 	struct arm_smmu_device *smmu;
2309*4882a593Smuzhiyun 	struct arm_smmu_master *master;
2310*4882a593Smuzhiyun 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2311*4882a593Smuzhiyun 
2312*4882a593Smuzhiyun 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2313*4882a593Smuzhiyun 		return ERR_PTR(-ENODEV);
2314*4882a593Smuzhiyun 
2315*4882a593Smuzhiyun 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2316*4882a593Smuzhiyun 		return ERR_PTR(-EBUSY);
2317*4882a593Smuzhiyun 
2318*4882a593Smuzhiyun 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2319*4882a593Smuzhiyun 	if (!smmu)
2320*4882a593Smuzhiyun 		return ERR_PTR(-ENODEV);
2321*4882a593Smuzhiyun 
2322*4882a593Smuzhiyun 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2323*4882a593Smuzhiyun 	if (!master)
2324*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
2325*4882a593Smuzhiyun 
2326*4882a593Smuzhiyun 	master->dev = dev;
2327*4882a593Smuzhiyun 	master->smmu = smmu;
2328*4882a593Smuzhiyun 	master->sids = fwspec->ids;
2329*4882a593Smuzhiyun 	master->num_sids = fwspec->num_ids;
2330*4882a593Smuzhiyun 	INIT_LIST_HEAD(&master->bonds);
2331*4882a593Smuzhiyun 	dev_iommu_priv_set(dev, master);
2332*4882a593Smuzhiyun 
2333*4882a593Smuzhiyun 	/* Check the SIDs are in range of the SMMU and our stream table */
2334*4882a593Smuzhiyun 	for (i = 0; i < master->num_sids; i++) {
2335*4882a593Smuzhiyun 		u32 sid = master->sids[i];
2336*4882a593Smuzhiyun 
2337*4882a593Smuzhiyun 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2338*4882a593Smuzhiyun 			ret = -ERANGE;
2339*4882a593Smuzhiyun 			goto err_free_master;
2340*4882a593Smuzhiyun 		}
2341*4882a593Smuzhiyun 
2342*4882a593Smuzhiyun 		/* Ensure l2 strtab is initialised */
2343*4882a593Smuzhiyun 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2344*4882a593Smuzhiyun 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2345*4882a593Smuzhiyun 			if (ret)
2346*4882a593Smuzhiyun 				goto err_free_master;
2347*4882a593Smuzhiyun 		}
2348*4882a593Smuzhiyun 	}
2349*4882a593Smuzhiyun 
2350*4882a593Smuzhiyun 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2351*4882a593Smuzhiyun 
2352*4882a593Smuzhiyun 	/*
2353*4882a593Smuzhiyun 	 * Note that PASID must be enabled before, and disabled after ATS:
2354*4882a593Smuzhiyun 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2355*4882a593Smuzhiyun 	 *
2356*4882a593Smuzhiyun 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2357*4882a593Smuzhiyun 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2358*4882a593Smuzhiyun 	 *   are changed.
2359*4882a593Smuzhiyun 	 */
2360*4882a593Smuzhiyun 	arm_smmu_enable_pasid(master);
2361*4882a593Smuzhiyun 
2362*4882a593Smuzhiyun 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2363*4882a593Smuzhiyun 		master->ssid_bits = min_t(u8, master->ssid_bits,
2364*4882a593Smuzhiyun 					  CTXDESC_LINEAR_CDMAX);
2365*4882a593Smuzhiyun 
2366*4882a593Smuzhiyun 	return &smmu->iommu;
2367*4882a593Smuzhiyun 
2368*4882a593Smuzhiyun err_free_master:
2369*4882a593Smuzhiyun 	kfree(master);
2370*4882a593Smuzhiyun 	dev_iommu_priv_set(dev, NULL);
2371*4882a593Smuzhiyun 	return ERR_PTR(ret);
2372*4882a593Smuzhiyun }
2373*4882a593Smuzhiyun 
arm_smmu_release_device(struct device * dev)2374*4882a593Smuzhiyun static void arm_smmu_release_device(struct device *dev)
2375*4882a593Smuzhiyun {
2376*4882a593Smuzhiyun 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2377*4882a593Smuzhiyun 	struct arm_smmu_master *master;
2378*4882a593Smuzhiyun 
2379*4882a593Smuzhiyun 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2380*4882a593Smuzhiyun 		return;
2381*4882a593Smuzhiyun 
2382*4882a593Smuzhiyun 	master = dev_iommu_priv_get(dev);
2383*4882a593Smuzhiyun 	WARN_ON(arm_smmu_master_sva_enabled(master));
2384*4882a593Smuzhiyun 	arm_smmu_detach_dev(master);
2385*4882a593Smuzhiyun 	arm_smmu_disable_pasid(master);
2386*4882a593Smuzhiyun 	kfree(master);
2387*4882a593Smuzhiyun 	iommu_fwspec_free(dev);
2388*4882a593Smuzhiyun }
2389*4882a593Smuzhiyun 
arm_smmu_device_group(struct device * dev)2390*4882a593Smuzhiyun static struct iommu_group *arm_smmu_device_group(struct device *dev)
2391*4882a593Smuzhiyun {
2392*4882a593Smuzhiyun 	struct iommu_group *group;
2393*4882a593Smuzhiyun 
2394*4882a593Smuzhiyun 	/*
2395*4882a593Smuzhiyun 	 * We don't support devices sharing stream IDs other than PCI RID
2396*4882a593Smuzhiyun 	 * aliases, since the necessary ID-to-device lookup becomes rather
2397*4882a593Smuzhiyun 	 * impractical given a potential sparse 32-bit stream ID space.
2398*4882a593Smuzhiyun 	 */
2399*4882a593Smuzhiyun 	if (dev_is_pci(dev))
2400*4882a593Smuzhiyun 		group = pci_device_group(dev);
2401*4882a593Smuzhiyun 	else
2402*4882a593Smuzhiyun 		group = generic_device_group(dev);
2403*4882a593Smuzhiyun 
2404*4882a593Smuzhiyun 	return group;
2405*4882a593Smuzhiyun }
2406*4882a593Smuzhiyun 
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2407*4882a593Smuzhiyun static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2408*4882a593Smuzhiyun 				    enum iommu_attr attr, void *data)
2409*4882a593Smuzhiyun {
2410*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2411*4882a593Smuzhiyun 
2412*4882a593Smuzhiyun 	switch (domain->type) {
2413*4882a593Smuzhiyun 	case IOMMU_DOMAIN_UNMANAGED:
2414*4882a593Smuzhiyun 		switch (attr) {
2415*4882a593Smuzhiyun 		case DOMAIN_ATTR_NESTING:
2416*4882a593Smuzhiyun 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2417*4882a593Smuzhiyun 			return 0;
2418*4882a593Smuzhiyun 		default:
2419*4882a593Smuzhiyun 			return -ENODEV;
2420*4882a593Smuzhiyun 		}
2421*4882a593Smuzhiyun 		break;
2422*4882a593Smuzhiyun 	case IOMMU_DOMAIN_DMA:
2423*4882a593Smuzhiyun 		switch (attr) {
2424*4882a593Smuzhiyun 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2425*4882a593Smuzhiyun 			*(int *)data = smmu_domain->non_strict;
2426*4882a593Smuzhiyun 			return 0;
2427*4882a593Smuzhiyun 		default:
2428*4882a593Smuzhiyun 			return -ENODEV;
2429*4882a593Smuzhiyun 		}
2430*4882a593Smuzhiyun 		break;
2431*4882a593Smuzhiyun 	default:
2432*4882a593Smuzhiyun 		return -EINVAL;
2433*4882a593Smuzhiyun 	}
2434*4882a593Smuzhiyun }
2435*4882a593Smuzhiyun 
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2436*4882a593Smuzhiyun static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2437*4882a593Smuzhiyun 				    enum iommu_attr attr, void *data)
2438*4882a593Smuzhiyun {
2439*4882a593Smuzhiyun 	int ret = 0;
2440*4882a593Smuzhiyun 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2441*4882a593Smuzhiyun 
2442*4882a593Smuzhiyun 	mutex_lock(&smmu_domain->init_mutex);
2443*4882a593Smuzhiyun 
2444*4882a593Smuzhiyun 	switch (domain->type) {
2445*4882a593Smuzhiyun 	case IOMMU_DOMAIN_UNMANAGED:
2446*4882a593Smuzhiyun 		switch (attr) {
2447*4882a593Smuzhiyun 		case DOMAIN_ATTR_NESTING:
2448*4882a593Smuzhiyun 			if (smmu_domain->smmu) {
2449*4882a593Smuzhiyun 				ret = -EPERM;
2450*4882a593Smuzhiyun 				goto out_unlock;
2451*4882a593Smuzhiyun 			}
2452*4882a593Smuzhiyun 
2453*4882a593Smuzhiyun 			if (*(int *)data)
2454*4882a593Smuzhiyun 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2455*4882a593Smuzhiyun 			else
2456*4882a593Smuzhiyun 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2457*4882a593Smuzhiyun 			break;
2458*4882a593Smuzhiyun 		default:
2459*4882a593Smuzhiyun 			ret = -ENODEV;
2460*4882a593Smuzhiyun 		}
2461*4882a593Smuzhiyun 		break;
2462*4882a593Smuzhiyun 	case IOMMU_DOMAIN_DMA:
2463*4882a593Smuzhiyun 		switch(attr) {
2464*4882a593Smuzhiyun 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2465*4882a593Smuzhiyun 			smmu_domain->non_strict = *(int *)data;
2466*4882a593Smuzhiyun 			break;
2467*4882a593Smuzhiyun 		default:
2468*4882a593Smuzhiyun 			ret = -ENODEV;
2469*4882a593Smuzhiyun 		}
2470*4882a593Smuzhiyun 		break;
2471*4882a593Smuzhiyun 	default:
2472*4882a593Smuzhiyun 		ret = -EINVAL;
2473*4882a593Smuzhiyun 	}
2474*4882a593Smuzhiyun 
2475*4882a593Smuzhiyun out_unlock:
2476*4882a593Smuzhiyun 	mutex_unlock(&smmu_domain->init_mutex);
2477*4882a593Smuzhiyun 	return ret;
2478*4882a593Smuzhiyun }
2479*4882a593Smuzhiyun 
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2480*4882a593Smuzhiyun static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2481*4882a593Smuzhiyun {
2482*4882a593Smuzhiyun 	return iommu_fwspec_add_ids(dev, args->args, 1);
2483*4882a593Smuzhiyun }
2484*4882a593Smuzhiyun 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2485*4882a593Smuzhiyun static void arm_smmu_get_resv_regions(struct device *dev,
2486*4882a593Smuzhiyun 				      struct list_head *head)
2487*4882a593Smuzhiyun {
2488*4882a593Smuzhiyun 	struct iommu_resv_region *region;
2489*4882a593Smuzhiyun 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2490*4882a593Smuzhiyun 
2491*4882a593Smuzhiyun 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2492*4882a593Smuzhiyun 					 prot, IOMMU_RESV_SW_MSI);
2493*4882a593Smuzhiyun 	if (!region)
2494*4882a593Smuzhiyun 		return;
2495*4882a593Smuzhiyun 
2496*4882a593Smuzhiyun 	list_add_tail(&region->list, head);
2497*4882a593Smuzhiyun 
2498*4882a593Smuzhiyun 	iommu_dma_get_resv_regions(dev, head);
2499*4882a593Smuzhiyun }
2500*4882a593Smuzhiyun 
arm_smmu_dev_has_feature(struct device * dev,enum iommu_dev_features feat)2501*4882a593Smuzhiyun static bool arm_smmu_dev_has_feature(struct device *dev,
2502*4882a593Smuzhiyun 				     enum iommu_dev_features feat)
2503*4882a593Smuzhiyun {
2504*4882a593Smuzhiyun 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2505*4882a593Smuzhiyun 
2506*4882a593Smuzhiyun 	if (!master)
2507*4882a593Smuzhiyun 		return false;
2508*4882a593Smuzhiyun 
2509*4882a593Smuzhiyun 	switch (feat) {
2510*4882a593Smuzhiyun 	case IOMMU_DEV_FEAT_SVA:
2511*4882a593Smuzhiyun 		return arm_smmu_master_sva_supported(master);
2512*4882a593Smuzhiyun 	default:
2513*4882a593Smuzhiyun 		return false;
2514*4882a593Smuzhiyun 	}
2515*4882a593Smuzhiyun }
2516*4882a593Smuzhiyun 
arm_smmu_dev_feature_enabled(struct device * dev,enum iommu_dev_features feat)2517*4882a593Smuzhiyun static bool arm_smmu_dev_feature_enabled(struct device *dev,
2518*4882a593Smuzhiyun 					 enum iommu_dev_features feat)
2519*4882a593Smuzhiyun {
2520*4882a593Smuzhiyun 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2521*4882a593Smuzhiyun 
2522*4882a593Smuzhiyun 	if (!master)
2523*4882a593Smuzhiyun 		return false;
2524*4882a593Smuzhiyun 
2525*4882a593Smuzhiyun 	switch (feat) {
2526*4882a593Smuzhiyun 	case IOMMU_DEV_FEAT_SVA:
2527*4882a593Smuzhiyun 		return arm_smmu_master_sva_enabled(master);
2528*4882a593Smuzhiyun 	default:
2529*4882a593Smuzhiyun 		return false;
2530*4882a593Smuzhiyun 	}
2531*4882a593Smuzhiyun }
2532*4882a593Smuzhiyun 
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2533*4882a593Smuzhiyun static int arm_smmu_dev_enable_feature(struct device *dev,
2534*4882a593Smuzhiyun 				       enum iommu_dev_features feat)
2535*4882a593Smuzhiyun {
2536*4882a593Smuzhiyun 	if (!arm_smmu_dev_has_feature(dev, feat))
2537*4882a593Smuzhiyun 		return -ENODEV;
2538*4882a593Smuzhiyun 
2539*4882a593Smuzhiyun 	if (arm_smmu_dev_feature_enabled(dev, feat))
2540*4882a593Smuzhiyun 		return -EBUSY;
2541*4882a593Smuzhiyun 
2542*4882a593Smuzhiyun 	switch (feat) {
2543*4882a593Smuzhiyun 	case IOMMU_DEV_FEAT_SVA:
2544*4882a593Smuzhiyun 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2545*4882a593Smuzhiyun 	default:
2546*4882a593Smuzhiyun 		return -EINVAL;
2547*4882a593Smuzhiyun 	}
2548*4882a593Smuzhiyun }
2549*4882a593Smuzhiyun 
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2550*4882a593Smuzhiyun static int arm_smmu_dev_disable_feature(struct device *dev,
2551*4882a593Smuzhiyun 					enum iommu_dev_features feat)
2552*4882a593Smuzhiyun {
2553*4882a593Smuzhiyun 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2554*4882a593Smuzhiyun 		return -EINVAL;
2555*4882a593Smuzhiyun 
2556*4882a593Smuzhiyun 	switch (feat) {
2557*4882a593Smuzhiyun 	case IOMMU_DEV_FEAT_SVA:
2558*4882a593Smuzhiyun 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2559*4882a593Smuzhiyun 	default:
2560*4882a593Smuzhiyun 		return -EINVAL;
2561*4882a593Smuzhiyun 	}
2562*4882a593Smuzhiyun }
2563*4882a593Smuzhiyun 
2564*4882a593Smuzhiyun static struct iommu_ops arm_smmu_ops = {
2565*4882a593Smuzhiyun 	.capable		= arm_smmu_capable,
2566*4882a593Smuzhiyun 	.domain_alloc		= arm_smmu_domain_alloc,
2567*4882a593Smuzhiyun 	.domain_free		= arm_smmu_domain_free,
2568*4882a593Smuzhiyun 	.attach_dev		= arm_smmu_attach_dev,
2569*4882a593Smuzhiyun 	.map			= arm_smmu_map,
2570*4882a593Smuzhiyun 	.unmap			= arm_smmu_unmap,
2571*4882a593Smuzhiyun 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2572*4882a593Smuzhiyun 	.iotlb_sync		= arm_smmu_iotlb_sync,
2573*4882a593Smuzhiyun 	.iova_to_phys		= arm_smmu_iova_to_phys,
2574*4882a593Smuzhiyun 	.probe_device		= arm_smmu_probe_device,
2575*4882a593Smuzhiyun 	.release_device		= arm_smmu_release_device,
2576*4882a593Smuzhiyun 	.device_group		= arm_smmu_device_group,
2577*4882a593Smuzhiyun 	.domain_get_attr	= arm_smmu_domain_get_attr,
2578*4882a593Smuzhiyun 	.domain_set_attr	= arm_smmu_domain_set_attr,
2579*4882a593Smuzhiyun 	.of_xlate		= arm_smmu_of_xlate,
2580*4882a593Smuzhiyun 	.get_resv_regions	= arm_smmu_get_resv_regions,
2581*4882a593Smuzhiyun 	.put_resv_regions	= generic_iommu_put_resv_regions,
2582*4882a593Smuzhiyun 	.dev_has_feat		= arm_smmu_dev_has_feature,
2583*4882a593Smuzhiyun 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2584*4882a593Smuzhiyun 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2585*4882a593Smuzhiyun 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2586*4882a593Smuzhiyun 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2587*4882a593Smuzhiyun };
2588*4882a593Smuzhiyun 
2589*4882a593Smuzhiyun /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2590*4882a593Smuzhiyun static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2591*4882a593Smuzhiyun 				   struct arm_smmu_queue *q,
2592*4882a593Smuzhiyun 				   unsigned long prod_off,
2593*4882a593Smuzhiyun 				   unsigned long cons_off,
2594*4882a593Smuzhiyun 				   size_t dwords, const char *name)
2595*4882a593Smuzhiyun {
2596*4882a593Smuzhiyun 	size_t qsz;
2597*4882a593Smuzhiyun 
2598*4882a593Smuzhiyun 	do {
2599*4882a593Smuzhiyun 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2600*4882a593Smuzhiyun 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2601*4882a593Smuzhiyun 					      GFP_KERNEL);
2602*4882a593Smuzhiyun 		if (q->base || qsz < PAGE_SIZE)
2603*4882a593Smuzhiyun 			break;
2604*4882a593Smuzhiyun 
2605*4882a593Smuzhiyun 		q->llq.max_n_shift--;
2606*4882a593Smuzhiyun 	} while (1);
2607*4882a593Smuzhiyun 
2608*4882a593Smuzhiyun 	if (!q->base) {
2609*4882a593Smuzhiyun 		dev_err(smmu->dev,
2610*4882a593Smuzhiyun 			"failed to allocate queue (0x%zx bytes) for %s\n",
2611*4882a593Smuzhiyun 			qsz, name);
2612*4882a593Smuzhiyun 		return -ENOMEM;
2613*4882a593Smuzhiyun 	}
2614*4882a593Smuzhiyun 
2615*4882a593Smuzhiyun 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2616*4882a593Smuzhiyun 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2617*4882a593Smuzhiyun 			 1 << q->llq.max_n_shift, name);
2618*4882a593Smuzhiyun 	}
2619*4882a593Smuzhiyun 
2620*4882a593Smuzhiyun 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2621*4882a593Smuzhiyun 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2622*4882a593Smuzhiyun 	q->ent_dwords	= dwords;
2623*4882a593Smuzhiyun 
2624*4882a593Smuzhiyun 	q->q_base  = Q_BASE_RWA;
2625*4882a593Smuzhiyun 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2626*4882a593Smuzhiyun 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2627*4882a593Smuzhiyun 
2628*4882a593Smuzhiyun 	q->llq.prod = q->llq.cons = 0;
2629*4882a593Smuzhiyun 	return 0;
2630*4882a593Smuzhiyun }
2631*4882a593Smuzhiyun 
arm_smmu_cmdq_free_bitmap(void * data)2632*4882a593Smuzhiyun static void arm_smmu_cmdq_free_bitmap(void *data)
2633*4882a593Smuzhiyun {
2634*4882a593Smuzhiyun 	unsigned long *bitmap = data;
2635*4882a593Smuzhiyun 	bitmap_free(bitmap);
2636*4882a593Smuzhiyun }
2637*4882a593Smuzhiyun 
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2638*4882a593Smuzhiyun static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2639*4882a593Smuzhiyun {
2640*4882a593Smuzhiyun 	int ret = 0;
2641*4882a593Smuzhiyun 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2642*4882a593Smuzhiyun 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2643*4882a593Smuzhiyun 	atomic_long_t *bitmap;
2644*4882a593Smuzhiyun 
2645*4882a593Smuzhiyun 	atomic_set(&cmdq->owner_prod, 0);
2646*4882a593Smuzhiyun 	atomic_set(&cmdq->lock, 0);
2647*4882a593Smuzhiyun 
2648*4882a593Smuzhiyun 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2649*4882a593Smuzhiyun 	if (!bitmap) {
2650*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2651*4882a593Smuzhiyun 		ret = -ENOMEM;
2652*4882a593Smuzhiyun 	} else {
2653*4882a593Smuzhiyun 		cmdq->valid_map = bitmap;
2654*4882a593Smuzhiyun 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2655*4882a593Smuzhiyun 	}
2656*4882a593Smuzhiyun 
2657*4882a593Smuzhiyun 	return ret;
2658*4882a593Smuzhiyun }
2659*4882a593Smuzhiyun 
arm_smmu_init_queues(struct arm_smmu_device * smmu)2660*4882a593Smuzhiyun static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2661*4882a593Smuzhiyun {
2662*4882a593Smuzhiyun 	int ret;
2663*4882a593Smuzhiyun 
2664*4882a593Smuzhiyun 	/* cmdq */
2665*4882a593Smuzhiyun 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2666*4882a593Smuzhiyun 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2667*4882a593Smuzhiyun 				      "cmdq");
2668*4882a593Smuzhiyun 	if (ret)
2669*4882a593Smuzhiyun 		return ret;
2670*4882a593Smuzhiyun 
2671*4882a593Smuzhiyun 	ret = arm_smmu_cmdq_init(smmu);
2672*4882a593Smuzhiyun 	if (ret)
2673*4882a593Smuzhiyun 		return ret;
2674*4882a593Smuzhiyun 
2675*4882a593Smuzhiyun 	/* evtq */
2676*4882a593Smuzhiyun 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2677*4882a593Smuzhiyun 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2678*4882a593Smuzhiyun 				      "evtq");
2679*4882a593Smuzhiyun 	if (ret)
2680*4882a593Smuzhiyun 		return ret;
2681*4882a593Smuzhiyun 
2682*4882a593Smuzhiyun 	/* priq */
2683*4882a593Smuzhiyun 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2684*4882a593Smuzhiyun 		return 0;
2685*4882a593Smuzhiyun 
2686*4882a593Smuzhiyun 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2687*4882a593Smuzhiyun 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2688*4882a593Smuzhiyun 				       "priq");
2689*4882a593Smuzhiyun }
2690*4882a593Smuzhiyun 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2691*4882a593Smuzhiyun static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2692*4882a593Smuzhiyun {
2693*4882a593Smuzhiyun 	unsigned int i;
2694*4882a593Smuzhiyun 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2695*4882a593Smuzhiyun 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2696*4882a593Smuzhiyun 	void *strtab = smmu->strtab_cfg.strtab;
2697*4882a593Smuzhiyun 
2698*4882a593Smuzhiyun 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2699*4882a593Smuzhiyun 	if (!cfg->l1_desc) {
2700*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2701*4882a593Smuzhiyun 		return -ENOMEM;
2702*4882a593Smuzhiyun 	}
2703*4882a593Smuzhiyun 
2704*4882a593Smuzhiyun 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2705*4882a593Smuzhiyun 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2706*4882a593Smuzhiyun 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2707*4882a593Smuzhiyun 	}
2708*4882a593Smuzhiyun 
2709*4882a593Smuzhiyun 	return 0;
2710*4882a593Smuzhiyun }
2711*4882a593Smuzhiyun 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2712*4882a593Smuzhiyun static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2713*4882a593Smuzhiyun {
2714*4882a593Smuzhiyun 	void *strtab;
2715*4882a593Smuzhiyun 	u64 reg;
2716*4882a593Smuzhiyun 	u32 size, l1size;
2717*4882a593Smuzhiyun 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2718*4882a593Smuzhiyun 
2719*4882a593Smuzhiyun 	/* Calculate the L1 size, capped to the SIDSIZE. */
2720*4882a593Smuzhiyun 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2721*4882a593Smuzhiyun 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2722*4882a593Smuzhiyun 	cfg->num_l1_ents = 1 << size;
2723*4882a593Smuzhiyun 
2724*4882a593Smuzhiyun 	size += STRTAB_SPLIT;
2725*4882a593Smuzhiyun 	if (size < smmu->sid_bits)
2726*4882a593Smuzhiyun 		dev_warn(smmu->dev,
2727*4882a593Smuzhiyun 			 "2-level strtab only covers %u/%u bits of SID\n",
2728*4882a593Smuzhiyun 			 size, smmu->sid_bits);
2729*4882a593Smuzhiyun 
2730*4882a593Smuzhiyun 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2731*4882a593Smuzhiyun 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2732*4882a593Smuzhiyun 				     GFP_KERNEL);
2733*4882a593Smuzhiyun 	if (!strtab) {
2734*4882a593Smuzhiyun 		dev_err(smmu->dev,
2735*4882a593Smuzhiyun 			"failed to allocate l1 stream table (%u bytes)\n",
2736*4882a593Smuzhiyun 			l1size);
2737*4882a593Smuzhiyun 		return -ENOMEM;
2738*4882a593Smuzhiyun 	}
2739*4882a593Smuzhiyun 	cfg->strtab = strtab;
2740*4882a593Smuzhiyun 
2741*4882a593Smuzhiyun 	/* Configure strtab_base_cfg for 2 levels */
2742*4882a593Smuzhiyun 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2743*4882a593Smuzhiyun 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2744*4882a593Smuzhiyun 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2745*4882a593Smuzhiyun 	cfg->strtab_base_cfg = reg;
2746*4882a593Smuzhiyun 
2747*4882a593Smuzhiyun 	return arm_smmu_init_l1_strtab(smmu);
2748*4882a593Smuzhiyun }
2749*4882a593Smuzhiyun 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2750*4882a593Smuzhiyun static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2751*4882a593Smuzhiyun {
2752*4882a593Smuzhiyun 	void *strtab;
2753*4882a593Smuzhiyun 	u64 reg;
2754*4882a593Smuzhiyun 	u32 size;
2755*4882a593Smuzhiyun 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2756*4882a593Smuzhiyun 
2757*4882a593Smuzhiyun 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2758*4882a593Smuzhiyun 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2759*4882a593Smuzhiyun 				     GFP_KERNEL);
2760*4882a593Smuzhiyun 	if (!strtab) {
2761*4882a593Smuzhiyun 		dev_err(smmu->dev,
2762*4882a593Smuzhiyun 			"failed to allocate linear stream table (%u bytes)\n",
2763*4882a593Smuzhiyun 			size);
2764*4882a593Smuzhiyun 		return -ENOMEM;
2765*4882a593Smuzhiyun 	}
2766*4882a593Smuzhiyun 	cfg->strtab = strtab;
2767*4882a593Smuzhiyun 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2768*4882a593Smuzhiyun 
2769*4882a593Smuzhiyun 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2770*4882a593Smuzhiyun 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2771*4882a593Smuzhiyun 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2772*4882a593Smuzhiyun 	cfg->strtab_base_cfg = reg;
2773*4882a593Smuzhiyun 
2774*4882a593Smuzhiyun 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2775*4882a593Smuzhiyun 	return 0;
2776*4882a593Smuzhiyun }
2777*4882a593Smuzhiyun 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2778*4882a593Smuzhiyun static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2779*4882a593Smuzhiyun {
2780*4882a593Smuzhiyun 	u64 reg;
2781*4882a593Smuzhiyun 	int ret;
2782*4882a593Smuzhiyun 
2783*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2784*4882a593Smuzhiyun 		ret = arm_smmu_init_strtab_2lvl(smmu);
2785*4882a593Smuzhiyun 	else
2786*4882a593Smuzhiyun 		ret = arm_smmu_init_strtab_linear(smmu);
2787*4882a593Smuzhiyun 
2788*4882a593Smuzhiyun 	if (ret)
2789*4882a593Smuzhiyun 		return ret;
2790*4882a593Smuzhiyun 
2791*4882a593Smuzhiyun 	/* Set the strtab base address */
2792*4882a593Smuzhiyun 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2793*4882a593Smuzhiyun 	reg |= STRTAB_BASE_RA;
2794*4882a593Smuzhiyun 	smmu->strtab_cfg.strtab_base = reg;
2795*4882a593Smuzhiyun 
2796*4882a593Smuzhiyun 	/* Allocate the first VMID for stage-2 bypass STEs */
2797*4882a593Smuzhiyun 	set_bit(0, smmu->vmid_map);
2798*4882a593Smuzhiyun 	return 0;
2799*4882a593Smuzhiyun }
2800*4882a593Smuzhiyun 
arm_smmu_init_structures(struct arm_smmu_device * smmu)2801*4882a593Smuzhiyun static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2802*4882a593Smuzhiyun {
2803*4882a593Smuzhiyun 	int ret;
2804*4882a593Smuzhiyun 
2805*4882a593Smuzhiyun 	ret = arm_smmu_init_queues(smmu);
2806*4882a593Smuzhiyun 	if (ret)
2807*4882a593Smuzhiyun 		return ret;
2808*4882a593Smuzhiyun 
2809*4882a593Smuzhiyun 	return arm_smmu_init_strtab(smmu);
2810*4882a593Smuzhiyun }
2811*4882a593Smuzhiyun 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2812*4882a593Smuzhiyun static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2813*4882a593Smuzhiyun 				   unsigned int reg_off, unsigned int ack_off)
2814*4882a593Smuzhiyun {
2815*4882a593Smuzhiyun 	u32 reg;
2816*4882a593Smuzhiyun 
2817*4882a593Smuzhiyun 	writel_relaxed(val, smmu->base + reg_off);
2818*4882a593Smuzhiyun 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2819*4882a593Smuzhiyun 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2820*4882a593Smuzhiyun }
2821*4882a593Smuzhiyun 
2822*4882a593Smuzhiyun /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)2823*4882a593Smuzhiyun static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2824*4882a593Smuzhiyun {
2825*4882a593Smuzhiyun 	int ret;
2826*4882a593Smuzhiyun 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2827*4882a593Smuzhiyun 
2828*4882a593Smuzhiyun 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2829*4882a593Smuzhiyun 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2830*4882a593Smuzhiyun 	if (ret)
2831*4882a593Smuzhiyun 		return ret;
2832*4882a593Smuzhiyun 
2833*4882a593Smuzhiyun 	reg &= ~clr;
2834*4882a593Smuzhiyun 	reg |= set;
2835*4882a593Smuzhiyun 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2836*4882a593Smuzhiyun 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2837*4882a593Smuzhiyun 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2838*4882a593Smuzhiyun 
2839*4882a593Smuzhiyun 	if (ret)
2840*4882a593Smuzhiyun 		dev_err(smmu->dev, "GBPA not responding to update\n");
2841*4882a593Smuzhiyun 	return ret;
2842*4882a593Smuzhiyun }
2843*4882a593Smuzhiyun 
arm_smmu_free_msis(void * data)2844*4882a593Smuzhiyun static void arm_smmu_free_msis(void *data)
2845*4882a593Smuzhiyun {
2846*4882a593Smuzhiyun 	struct device *dev = data;
2847*4882a593Smuzhiyun 	platform_msi_domain_free_irqs(dev);
2848*4882a593Smuzhiyun }
2849*4882a593Smuzhiyun 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2850*4882a593Smuzhiyun static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2851*4882a593Smuzhiyun {
2852*4882a593Smuzhiyun 	phys_addr_t doorbell;
2853*4882a593Smuzhiyun 	struct device *dev = msi_desc_to_dev(desc);
2854*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2855*4882a593Smuzhiyun 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2856*4882a593Smuzhiyun 
2857*4882a593Smuzhiyun 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2858*4882a593Smuzhiyun 	doorbell &= MSI_CFG0_ADDR_MASK;
2859*4882a593Smuzhiyun 
2860*4882a593Smuzhiyun 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2861*4882a593Smuzhiyun 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2862*4882a593Smuzhiyun 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2863*4882a593Smuzhiyun }
2864*4882a593Smuzhiyun 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2865*4882a593Smuzhiyun static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2866*4882a593Smuzhiyun {
2867*4882a593Smuzhiyun 	struct msi_desc *desc;
2868*4882a593Smuzhiyun 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2869*4882a593Smuzhiyun 	struct device *dev = smmu->dev;
2870*4882a593Smuzhiyun 
2871*4882a593Smuzhiyun 	/* Clear the MSI address regs */
2872*4882a593Smuzhiyun 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2873*4882a593Smuzhiyun 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2874*4882a593Smuzhiyun 
2875*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2876*4882a593Smuzhiyun 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2877*4882a593Smuzhiyun 	else
2878*4882a593Smuzhiyun 		nvec--;
2879*4882a593Smuzhiyun 
2880*4882a593Smuzhiyun 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2881*4882a593Smuzhiyun 		return;
2882*4882a593Smuzhiyun 
2883*4882a593Smuzhiyun 	if (!dev->msi_domain) {
2884*4882a593Smuzhiyun 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2885*4882a593Smuzhiyun 		return;
2886*4882a593Smuzhiyun 	}
2887*4882a593Smuzhiyun 
2888*4882a593Smuzhiyun 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2889*4882a593Smuzhiyun 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2890*4882a593Smuzhiyun 	if (ret) {
2891*4882a593Smuzhiyun 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2892*4882a593Smuzhiyun 		return;
2893*4882a593Smuzhiyun 	}
2894*4882a593Smuzhiyun 
2895*4882a593Smuzhiyun 	for_each_msi_entry(desc, dev) {
2896*4882a593Smuzhiyun 		switch (desc->platform.msi_index) {
2897*4882a593Smuzhiyun 		case EVTQ_MSI_INDEX:
2898*4882a593Smuzhiyun 			smmu->evtq.q.irq = desc->irq;
2899*4882a593Smuzhiyun 			break;
2900*4882a593Smuzhiyun 		case GERROR_MSI_INDEX:
2901*4882a593Smuzhiyun 			smmu->gerr_irq = desc->irq;
2902*4882a593Smuzhiyun 			break;
2903*4882a593Smuzhiyun 		case PRIQ_MSI_INDEX:
2904*4882a593Smuzhiyun 			smmu->priq.q.irq = desc->irq;
2905*4882a593Smuzhiyun 			break;
2906*4882a593Smuzhiyun 		default:	/* Unknown */
2907*4882a593Smuzhiyun 			continue;
2908*4882a593Smuzhiyun 		}
2909*4882a593Smuzhiyun 	}
2910*4882a593Smuzhiyun 
2911*4882a593Smuzhiyun 	/* Add callback to free MSIs on teardown */
2912*4882a593Smuzhiyun 	devm_add_action(dev, arm_smmu_free_msis, dev);
2913*4882a593Smuzhiyun }
2914*4882a593Smuzhiyun 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)2915*4882a593Smuzhiyun static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2916*4882a593Smuzhiyun {
2917*4882a593Smuzhiyun 	int irq, ret;
2918*4882a593Smuzhiyun 
2919*4882a593Smuzhiyun 	arm_smmu_setup_msis(smmu);
2920*4882a593Smuzhiyun 
2921*4882a593Smuzhiyun 	/* Request interrupt lines */
2922*4882a593Smuzhiyun 	irq = smmu->evtq.q.irq;
2923*4882a593Smuzhiyun 	if (irq) {
2924*4882a593Smuzhiyun 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2925*4882a593Smuzhiyun 						arm_smmu_evtq_thread,
2926*4882a593Smuzhiyun 						IRQF_ONESHOT,
2927*4882a593Smuzhiyun 						"arm-smmu-v3-evtq", smmu);
2928*4882a593Smuzhiyun 		if (ret < 0)
2929*4882a593Smuzhiyun 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2930*4882a593Smuzhiyun 	} else {
2931*4882a593Smuzhiyun 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2932*4882a593Smuzhiyun 	}
2933*4882a593Smuzhiyun 
2934*4882a593Smuzhiyun 	irq = smmu->gerr_irq;
2935*4882a593Smuzhiyun 	if (irq) {
2936*4882a593Smuzhiyun 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2937*4882a593Smuzhiyun 				       0, "arm-smmu-v3-gerror", smmu);
2938*4882a593Smuzhiyun 		if (ret < 0)
2939*4882a593Smuzhiyun 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2940*4882a593Smuzhiyun 	} else {
2941*4882a593Smuzhiyun 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2942*4882a593Smuzhiyun 	}
2943*4882a593Smuzhiyun 
2944*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2945*4882a593Smuzhiyun 		irq = smmu->priq.q.irq;
2946*4882a593Smuzhiyun 		if (irq) {
2947*4882a593Smuzhiyun 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2948*4882a593Smuzhiyun 							arm_smmu_priq_thread,
2949*4882a593Smuzhiyun 							IRQF_ONESHOT,
2950*4882a593Smuzhiyun 							"arm-smmu-v3-priq",
2951*4882a593Smuzhiyun 							smmu);
2952*4882a593Smuzhiyun 			if (ret < 0)
2953*4882a593Smuzhiyun 				dev_warn(smmu->dev,
2954*4882a593Smuzhiyun 					 "failed to enable priq irq\n");
2955*4882a593Smuzhiyun 		} else {
2956*4882a593Smuzhiyun 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2957*4882a593Smuzhiyun 		}
2958*4882a593Smuzhiyun 	}
2959*4882a593Smuzhiyun }
2960*4882a593Smuzhiyun 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2961*4882a593Smuzhiyun static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2962*4882a593Smuzhiyun {
2963*4882a593Smuzhiyun 	int ret, irq;
2964*4882a593Smuzhiyun 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2965*4882a593Smuzhiyun 
2966*4882a593Smuzhiyun 	/* Disable IRQs first */
2967*4882a593Smuzhiyun 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2968*4882a593Smuzhiyun 				      ARM_SMMU_IRQ_CTRLACK);
2969*4882a593Smuzhiyun 	if (ret) {
2970*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to disable irqs\n");
2971*4882a593Smuzhiyun 		return ret;
2972*4882a593Smuzhiyun 	}
2973*4882a593Smuzhiyun 
2974*4882a593Smuzhiyun 	irq = smmu->combined_irq;
2975*4882a593Smuzhiyun 	if (irq) {
2976*4882a593Smuzhiyun 		/*
2977*4882a593Smuzhiyun 		 * Cavium ThunderX2 implementation doesn't support unique irq
2978*4882a593Smuzhiyun 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
2979*4882a593Smuzhiyun 		 */
2980*4882a593Smuzhiyun 		ret = devm_request_threaded_irq(smmu->dev, irq,
2981*4882a593Smuzhiyun 					arm_smmu_combined_irq_handler,
2982*4882a593Smuzhiyun 					arm_smmu_combined_irq_thread,
2983*4882a593Smuzhiyun 					IRQF_ONESHOT,
2984*4882a593Smuzhiyun 					"arm-smmu-v3-combined-irq", smmu);
2985*4882a593Smuzhiyun 		if (ret < 0)
2986*4882a593Smuzhiyun 			dev_warn(smmu->dev, "failed to enable combined irq\n");
2987*4882a593Smuzhiyun 	} else
2988*4882a593Smuzhiyun 		arm_smmu_setup_unique_irqs(smmu);
2989*4882a593Smuzhiyun 
2990*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2991*4882a593Smuzhiyun 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2992*4882a593Smuzhiyun 
2993*4882a593Smuzhiyun 	/* Enable interrupt generation on the SMMU */
2994*4882a593Smuzhiyun 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2995*4882a593Smuzhiyun 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2996*4882a593Smuzhiyun 	if (ret)
2997*4882a593Smuzhiyun 		dev_warn(smmu->dev, "failed to enable irqs\n");
2998*4882a593Smuzhiyun 
2999*4882a593Smuzhiyun 	return 0;
3000*4882a593Smuzhiyun }
3001*4882a593Smuzhiyun 
arm_smmu_device_disable(struct arm_smmu_device * smmu)3002*4882a593Smuzhiyun static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3003*4882a593Smuzhiyun {
3004*4882a593Smuzhiyun 	int ret;
3005*4882a593Smuzhiyun 
3006*4882a593Smuzhiyun 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3007*4882a593Smuzhiyun 	if (ret)
3008*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to clear cr0\n");
3009*4882a593Smuzhiyun 
3010*4882a593Smuzhiyun 	return ret;
3011*4882a593Smuzhiyun }
3012*4882a593Smuzhiyun 
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3013*4882a593Smuzhiyun static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3014*4882a593Smuzhiyun {
3015*4882a593Smuzhiyun 	int ret;
3016*4882a593Smuzhiyun 	u32 reg, enables;
3017*4882a593Smuzhiyun 	struct arm_smmu_cmdq_ent cmd;
3018*4882a593Smuzhiyun 
3019*4882a593Smuzhiyun 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3020*4882a593Smuzhiyun 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3021*4882a593Smuzhiyun 	if (reg & CR0_SMMUEN) {
3022*4882a593Smuzhiyun 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3023*4882a593Smuzhiyun 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3024*4882a593Smuzhiyun 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3025*4882a593Smuzhiyun 	}
3026*4882a593Smuzhiyun 
3027*4882a593Smuzhiyun 	ret = arm_smmu_device_disable(smmu);
3028*4882a593Smuzhiyun 	if (ret)
3029*4882a593Smuzhiyun 		return ret;
3030*4882a593Smuzhiyun 
3031*4882a593Smuzhiyun 	/* CR1 (table and queue memory attributes) */
3032*4882a593Smuzhiyun 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3033*4882a593Smuzhiyun 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3034*4882a593Smuzhiyun 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3035*4882a593Smuzhiyun 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3036*4882a593Smuzhiyun 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3037*4882a593Smuzhiyun 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3038*4882a593Smuzhiyun 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3039*4882a593Smuzhiyun 
3040*4882a593Smuzhiyun 	/* CR2 (random crap) */
3041*4882a593Smuzhiyun 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3042*4882a593Smuzhiyun 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3043*4882a593Smuzhiyun 
3044*4882a593Smuzhiyun 	/* Stream table */
3045*4882a593Smuzhiyun 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3046*4882a593Smuzhiyun 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3047*4882a593Smuzhiyun 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3048*4882a593Smuzhiyun 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3049*4882a593Smuzhiyun 
3050*4882a593Smuzhiyun 	/* Command queue */
3051*4882a593Smuzhiyun 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3052*4882a593Smuzhiyun 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3053*4882a593Smuzhiyun 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3054*4882a593Smuzhiyun 
3055*4882a593Smuzhiyun 	enables = CR0_CMDQEN;
3056*4882a593Smuzhiyun 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3057*4882a593Smuzhiyun 				      ARM_SMMU_CR0ACK);
3058*4882a593Smuzhiyun 	if (ret) {
3059*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to enable command queue\n");
3060*4882a593Smuzhiyun 		return ret;
3061*4882a593Smuzhiyun 	}
3062*4882a593Smuzhiyun 
3063*4882a593Smuzhiyun 	/* Invalidate any cached configuration */
3064*4882a593Smuzhiyun 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3065*4882a593Smuzhiyun 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3066*4882a593Smuzhiyun 	arm_smmu_cmdq_issue_sync(smmu);
3067*4882a593Smuzhiyun 
3068*4882a593Smuzhiyun 	/* Invalidate any stale TLB entries */
3069*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3070*4882a593Smuzhiyun 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3071*4882a593Smuzhiyun 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3072*4882a593Smuzhiyun 	}
3073*4882a593Smuzhiyun 
3074*4882a593Smuzhiyun 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3075*4882a593Smuzhiyun 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3076*4882a593Smuzhiyun 	arm_smmu_cmdq_issue_sync(smmu);
3077*4882a593Smuzhiyun 
3078*4882a593Smuzhiyun 	/* Event queue */
3079*4882a593Smuzhiyun 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3080*4882a593Smuzhiyun 	writel_relaxed(smmu->evtq.q.llq.prod,
3081*4882a593Smuzhiyun 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3082*4882a593Smuzhiyun 	writel_relaxed(smmu->evtq.q.llq.cons,
3083*4882a593Smuzhiyun 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3084*4882a593Smuzhiyun 
3085*4882a593Smuzhiyun 	enables |= CR0_EVTQEN;
3086*4882a593Smuzhiyun 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3087*4882a593Smuzhiyun 				      ARM_SMMU_CR0ACK);
3088*4882a593Smuzhiyun 	if (ret) {
3089*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to enable event queue\n");
3090*4882a593Smuzhiyun 		return ret;
3091*4882a593Smuzhiyun 	}
3092*4882a593Smuzhiyun 
3093*4882a593Smuzhiyun 	/* PRI queue */
3094*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3095*4882a593Smuzhiyun 		writeq_relaxed(smmu->priq.q.q_base,
3096*4882a593Smuzhiyun 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3097*4882a593Smuzhiyun 		writel_relaxed(smmu->priq.q.llq.prod,
3098*4882a593Smuzhiyun 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3099*4882a593Smuzhiyun 		writel_relaxed(smmu->priq.q.llq.cons,
3100*4882a593Smuzhiyun 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3101*4882a593Smuzhiyun 
3102*4882a593Smuzhiyun 		enables |= CR0_PRIQEN;
3103*4882a593Smuzhiyun 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3104*4882a593Smuzhiyun 					      ARM_SMMU_CR0ACK);
3105*4882a593Smuzhiyun 		if (ret) {
3106*4882a593Smuzhiyun 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3107*4882a593Smuzhiyun 			return ret;
3108*4882a593Smuzhiyun 		}
3109*4882a593Smuzhiyun 	}
3110*4882a593Smuzhiyun 
3111*4882a593Smuzhiyun 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3112*4882a593Smuzhiyun 		enables |= CR0_ATSCHK;
3113*4882a593Smuzhiyun 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3114*4882a593Smuzhiyun 					      ARM_SMMU_CR0ACK);
3115*4882a593Smuzhiyun 		if (ret) {
3116*4882a593Smuzhiyun 			dev_err(smmu->dev, "failed to enable ATS check\n");
3117*4882a593Smuzhiyun 			return ret;
3118*4882a593Smuzhiyun 		}
3119*4882a593Smuzhiyun 	}
3120*4882a593Smuzhiyun 
3121*4882a593Smuzhiyun 	ret = arm_smmu_setup_irqs(smmu);
3122*4882a593Smuzhiyun 	if (ret) {
3123*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to setup irqs\n");
3124*4882a593Smuzhiyun 		return ret;
3125*4882a593Smuzhiyun 	}
3126*4882a593Smuzhiyun 
3127*4882a593Smuzhiyun 	if (is_kdump_kernel())
3128*4882a593Smuzhiyun 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3129*4882a593Smuzhiyun 
3130*4882a593Smuzhiyun 	/* Enable the SMMU interface, or ensure bypass */
3131*4882a593Smuzhiyun 	if (!bypass || disable_bypass) {
3132*4882a593Smuzhiyun 		enables |= CR0_SMMUEN;
3133*4882a593Smuzhiyun 	} else {
3134*4882a593Smuzhiyun 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3135*4882a593Smuzhiyun 		if (ret)
3136*4882a593Smuzhiyun 			return ret;
3137*4882a593Smuzhiyun 	}
3138*4882a593Smuzhiyun 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3139*4882a593Smuzhiyun 				      ARM_SMMU_CR0ACK);
3140*4882a593Smuzhiyun 	if (ret) {
3141*4882a593Smuzhiyun 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3142*4882a593Smuzhiyun 		return ret;
3143*4882a593Smuzhiyun 	}
3144*4882a593Smuzhiyun 
3145*4882a593Smuzhiyun 	return 0;
3146*4882a593Smuzhiyun }
3147*4882a593Smuzhiyun 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3148*4882a593Smuzhiyun static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3149*4882a593Smuzhiyun {
3150*4882a593Smuzhiyun 	u32 reg;
3151*4882a593Smuzhiyun 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3152*4882a593Smuzhiyun 
3153*4882a593Smuzhiyun 	/* IDR0 */
3154*4882a593Smuzhiyun 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3155*4882a593Smuzhiyun 
3156*4882a593Smuzhiyun 	/* 2-level structures */
3157*4882a593Smuzhiyun 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3158*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3159*4882a593Smuzhiyun 
3160*4882a593Smuzhiyun 	if (reg & IDR0_CD2L)
3161*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3162*4882a593Smuzhiyun 
3163*4882a593Smuzhiyun 	/*
3164*4882a593Smuzhiyun 	 * Translation table endianness.
3165*4882a593Smuzhiyun 	 * We currently require the same endianness as the CPU, but this
3166*4882a593Smuzhiyun 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3167*4882a593Smuzhiyun 	 */
3168*4882a593Smuzhiyun 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3169*4882a593Smuzhiyun 	case IDR0_TTENDIAN_MIXED:
3170*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3171*4882a593Smuzhiyun 		break;
3172*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
3173*4882a593Smuzhiyun 	case IDR0_TTENDIAN_BE:
3174*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3175*4882a593Smuzhiyun 		break;
3176*4882a593Smuzhiyun #else
3177*4882a593Smuzhiyun 	case IDR0_TTENDIAN_LE:
3178*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3179*4882a593Smuzhiyun 		break;
3180*4882a593Smuzhiyun #endif
3181*4882a593Smuzhiyun 	default:
3182*4882a593Smuzhiyun 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3183*4882a593Smuzhiyun 		return -ENXIO;
3184*4882a593Smuzhiyun 	}
3185*4882a593Smuzhiyun 
3186*4882a593Smuzhiyun 	/* Boolean feature flags */
3187*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3188*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_PRI;
3189*4882a593Smuzhiyun 
3190*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3191*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_ATS;
3192*4882a593Smuzhiyun 
3193*4882a593Smuzhiyun 	if (reg & IDR0_SEV)
3194*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_SEV;
3195*4882a593Smuzhiyun 
3196*4882a593Smuzhiyun 	if (reg & IDR0_MSI) {
3197*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_MSI;
3198*4882a593Smuzhiyun 		if (coherent && !disable_msipolling)
3199*4882a593Smuzhiyun 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3200*4882a593Smuzhiyun 	}
3201*4882a593Smuzhiyun 
3202*4882a593Smuzhiyun 	if (reg & IDR0_HYP)
3203*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_HYP;
3204*4882a593Smuzhiyun 
3205*4882a593Smuzhiyun 	/*
3206*4882a593Smuzhiyun 	 * The coherency feature as set by FW is used in preference to the ID
3207*4882a593Smuzhiyun 	 * register, but warn on mismatch.
3208*4882a593Smuzhiyun 	 */
3209*4882a593Smuzhiyun 	if (!!(reg & IDR0_COHACC) != coherent)
3210*4882a593Smuzhiyun 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3211*4882a593Smuzhiyun 			 coherent ? "true" : "false");
3212*4882a593Smuzhiyun 
3213*4882a593Smuzhiyun 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3214*4882a593Smuzhiyun 	case IDR0_STALL_MODEL_FORCE:
3215*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3216*4882a593Smuzhiyun 		fallthrough;
3217*4882a593Smuzhiyun 	case IDR0_STALL_MODEL_STALL:
3218*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3219*4882a593Smuzhiyun 	}
3220*4882a593Smuzhiyun 
3221*4882a593Smuzhiyun 	if (reg & IDR0_S1P)
3222*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3223*4882a593Smuzhiyun 
3224*4882a593Smuzhiyun 	if (reg & IDR0_S2P)
3225*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3226*4882a593Smuzhiyun 
3227*4882a593Smuzhiyun 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3228*4882a593Smuzhiyun 		dev_err(smmu->dev, "no translation support!\n");
3229*4882a593Smuzhiyun 		return -ENXIO;
3230*4882a593Smuzhiyun 	}
3231*4882a593Smuzhiyun 
3232*4882a593Smuzhiyun 	/* We only support the AArch64 table format at present */
3233*4882a593Smuzhiyun 	switch (FIELD_GET(IDR0_TTF, reg)) {
3234*4882a593Smuzhiyun 	case IDR0_TTF_AARCH32_64:
3235*4882a593Smuzhiyun 		smmu->ias = 40;
3236*4882a593Smuzhiyun 		fallthrough;
3237*4882a593Smuzhiyun 	case IDR0_TTF_AARCH64:
3238*4882a593Smuzhiyun 		break;
3239*4882a593Smuzhiyun 	default:
3240*4882a593Smuzhiyun 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3241*4882a593Smuzhiyun 		return -ENXIO;
3242*4882a593Smuzhiyun 	}
3243*4882a593Smuzhiyun 
3244*4882a593Smuzhiyun 	/* ASID/VMID sizes */
3245*4882a593Smuzhiyun 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3246*4882a593Smuzhiyun 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3247*4882a593Smuzhiyun 
3248*4882a593Smuzhiyun 	/* IDR1 */
3249*4882a593Smuzhiyun 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3250*4882a593Smuzhiyun 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3251*4882a593Smuzhiyun 		dev_err(smmu->dev, "embedded implementation not supported\n");
3252*4882a593Smuzhiyun 		return -ENXIO;
3253*4882a593Smuzhiyun 	}
3254*4882a593Smuzhiyun 
3255*4882a593Smuzhiyun 	/* Queue sizes, capped to ensure natural alignment */
3256*4882a593Smuzhiyun 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3257*4882a593Smuzhiyun 					     FIELD_GET(IDR1_CMDQS, reg));
3258*4882a593Smuzhiyun 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3259*4882a593Smuzhiyun 		/*
3260*4882a593Smuzhiyun 		 * We don't support splitting up batches, so one batch of
3261*4882a593Smuzhiyun 		 * commands plus an extra sync needs to fit inside the command
3262*4882a593Smuzhiyun 		 * queue. There's also no way we can handle the weird alignment
3263*4882a593Smuzhiyun 		 * restrictions on the base pointer for a unit-length queue.
3264*4882a593Smuzhiyun 		 */
3265*4882a593Smuzhiyun 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3266*4882a593Smuzhiyun 			CMDQ_BATCH_ENTRIES);
3267*4882a593Smuzhiyun 		return -ENXIO;
3268*4882a593Smuzhiyun 	}
3269*4882a593Smuzhiyun 
3270*4882a593Smuzhiyun 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3271*4882a593Smuzhiyun 					     FIELD_GET(IDR1_EVTQS, reg));
3272*4882a593Smuzhiyun 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3273*4882a593Smuzhiyun 					     FIELD_GET(IDR1_PRIQS, reg));
3274*4882a593Smuzhiyun 
3275*4882a593Smuzhiyun 	/* SID/SSID sizes */
3276*4882a593Smuzhiyun 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3277*4882a593Smuzhiyun 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3278*4882a593Smuzhiyun 
3279*4882a593Smuzhiyun 	/*
3280*4882a593Smuzhiyun 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3281*4882a593Smuzhiyun 	 * table, use a linear table instead.
3282*4882a593Smuzhiyun 	 */
3283*4882a593Smuzhiyun 	if (smmu->sid_bits <= STRTAB_SPLIT)
3284*4882a593Smuzhiyun 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3285*4882a593Smuzhiyun 
3286*4882a593Smuzhiyun 	/* IDR3 */
3287*4882a593Smuzhiyun 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3288*4882a593Smuzhiyun 	if (FIELD_GET(IDR3_RIL, reg))
3289*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3290*4882a593Smuzhiyun 
3291*4882a593Smuzhiyun 	/* IDR5 */
3292*4882a593Smuzhiyun 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3293*4882a593Smuzhiyun 
3294*4882a593Smuzhiyun 	/* Maximum number of outstanding stalls */
3295*4882a593Smuzhiyun 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3296*4882a593Smuzhiyun 
3297*4882a593Smuzhiyun 	/* Page sizes */
3298*4882a593Smuzhiyun 	if (reg & IDR5_GRAN64K)
3299*4882a593Smuzhiyun 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3300*4882a593Smuzhiyun 	if (reg & IDR5_GRAN16K)
3301*4882a593Smuzhiyun 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3302*4882a593Smuzhiyun 	if (reg & IDR5_GRAN4K)
3303*4882a593Smuzhiyun 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3304*4882a593Smuzhiyun 
3305*4882a593Smuzhiyun 	/* Input address size */
3306*4882a593Smuzhiyun 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3307*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_VAX;
3308*4882a593Smuzhiyun 
3309*4882a593Smuzhiyun 	/* Output address size */
3310*4882a593Smuzhiyun 	switch (FIELD_GET(IDR5_OAS, reg)) {
3311*4882a593Smuzhiyun 	case IDR5_OAS_32_BIT:
3312*4882a593Smuzhiyun 		smmu->oas = 32;
3313*4882a593Smuzhiyun 		break;
3314*4882a593Smuzhiyun 	case IDR5_OAS_36_BIT:
3315*4882a593Smuzhiyun 		smmu->oas = 36;
3316*4882a593Smuzhiyun 		break;
3317*4882a593Smuzhiyun 	case IDR5_OAS_40_BIT:
3318*4882a593Smuzhiyun 		smmu->oas = 40;
3319*4882a593Smuzhiyun 		break;
3320*4882a593Smuzhiyun 	case IDR5_OAS_42_BIT:
3321*4882a593Smuzhiyun 		smmu->oas = 42;
3322*4882a593Smuzhiyun 		break;
3323*4882a593Smuzhiyun 	case IDR5_OAS_44_BIT:
3324*4882a593Smuzhiyun 		smmu->oas = 44;
3325*4882a593Smuzhiyun 		break;
3326*4882a593Smuzhiyun 	case IDR5_OAS_52_BIT:
3327*4882a593Smuzhiyun 		smmu->oas = 52;
3328*4882a593Smuzhiyun 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3329*4882a593Smuzhiyun 		break;
3330*4882a593Smuzhiyun 	default:
3331*4882a593Smuzhiyun 		dev_info(smmu->dev,
3332*4882a593Smuzhiyun 			"unknown output address size. Truncating to 48-bit\n");
3333*4882a593Smuzhiyun 		fallthrough;
3334*4882a593Smuzhiyun 	case IDR5_OAS_48_BIT:
3335*4882a593Smuzhiyun 		smmu->oas = 48;
3336*4882a593Smuzhiyun 	}
3337*4882a593Smuzhiyun 
3338*4882a593Smuzhiyun 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3339*4882a593Smuzhiyun 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3340*4882a593Smuzhiyun 	else
3341*4882a593Smuzhiyun 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3342*4882a593Smuzhiyun 
3343*4882a593Smuzhiyun 	/* Set the DMA mask for our table walker */
3344*4882a593Smuzhiyun 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3345*4882a593Smuzhiyun 		dev_warn(smmu->dev,
3346*4882a593Smuzhiyun 			 "failed to set DMA mask for table walker\n");
3347*4882a593Smuzhiyun 
3348*4882a593Smuzhiyun 	smmu->ias = max(smmu->ias, smmu->oas);
3349*4882a593Smuzhiyun 
3350*4882a593Smuzhiyun 	if (arm_smmu_sva_supported(smmu))
3351*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_SVA;
3352*4882a593Smuzhiyun 
3353*4882a593Smuzhiyun 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3354*4882a593Smuzhiyun 		 smmu->ias, smmu->oas, smmu->features);
3355*4882a593Smuzhiyun 	return 0;
3356*4882a593Smuzhiyun }
3357*4882a593Smuzhiyun 
3358*4882a593Smuzhiyun #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3359*4882a593Smuzhiyun static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3360*4882a593Smuzhiyun {
3361*4882a593Smuzhiyun 	switch (model) {
3362*4882a593Smuzhiyun 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3363*4882a593Smuzhiyun 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3364*4882a593Smuzhiyun 		break;
3365*4882a593Smuzhiyun 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3366*4882a593Smuzhiyun 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3367*4882a593Smuzhiyun 		break;
3368*4882a593Smuzhiyun 	}
3369*4882a593Smuzhiyun 
3370*4882a593Smuzhiyun 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3371*4882a593Smuzhiyun }
3372*4882a593Smuzhiyun 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3373*4882a593Smuzhiyun static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3374*4882a593Smuzhiyun 				      struct arm_smmu_device *smmu)
3375*4882a593Smuzhiyun {
3376*4882a593Smuzhiyun 	struct acpi_iort_smmu_v3 *iort_smmu;
3377*4882a593Smuzhiyun 	struct device *dev = smmu->dev;
3378*4882a593Smuzhiyun 	struct acpi_iort_node *node;
3379*4882a593Smuzhiyun 
3380*4882a593Smuzhiyun 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3381*4882a593Smuzhiyun 
3382*4882a593Smuzhiyun 	/* Retrieve SMMUv3 specific data */
3383*4882a593Smuzhiyun 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3384*4882a593Smuzhiyun 
3385*4882a593Smuzhiyun 	acpi_smmu_get_options(iort_smmu->model, smmu);
3386*4882a593Smuzhiyun 
3387*4882a593Smuzhiyun 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3388*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3389*4882a593Smuzhiyun 
3390*4882a593Smuzhiyun 	return 0;
3391*4882a593Smuzhiyun }
3392*4882a593Smuzhiyun #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3393*4882a593Smuzhiyun static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3394*4882a593Smuzhiyun 					     struct arm_smmu_device *smmu)
3395*4882a593Smuzhiyun {
3396*4882a593Smuzhiyun 	return -ENODEV;
3397*4882a593Smuzhiyun }
3398*4882a593Smuzhiyun #endif
3399*4882a593Smuzhiyun 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3400*4882a593Smuzhiyun static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3401*4882a593Smuzhiyun 				    struct arm_smmu_device *smmu)
3402*4882a593Smuzhiyun {
3403*4882a593Smuzhiyun 	struct device *dev = &pdev->dev;
3404*4882a593Smuzhiyun 	u32 cells;
3405*4882a593Smuzhiyun 	int ret = -EINVAL;
3406*4882a593Smuzhiyun 
3407*4882a593Smuzhiyun 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3408*4882a593Smuzhiyun 		dev_err(dev, "missing #iommu-cells property\n");
3409*4882a593Smuzhiyun 	else if (cells != 1)
3410*4882a593Smuzhiyun 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3411*4882a593Smuzhiyun 	else
3412*4882a593Smuzhiyun 		ret = 0;
3413*4882a593Smuzhiyun 
3414*4882a593Smuzhiyun 	parse_driver_options(smmu);
3415*4882a593Smuzhiyun 
3416*4882a593Smuzhiyun 	if (of_dma_is_coherent(dev->of_node))
3417*4882a593Smuzhiyun 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3418*4882a593Smuzhiyun 
3419*4882a593Smuzhiyun 	return ret;
3420*4882a593Smuzhiyun }
3421*4882a593Smuzhiyun 
arm_smmu_resource_size(struct arm_smmu_device * smmu)3422*4882a593Smuzhiyun static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3423*4882a593Smuzhiyun {
3424*4882a593Smuzhiyun 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3425*4882a593Smuzhiyun 		return SZ_64K;
3426*4882a593Smuzhiyun 	else
3427*4882a593Smuzhiyun 		return SZ_128K;
3428*4882a593Smuzhiyun }
3429*4882a593Smuzhiyun 
arm_smmu_set_bus_ops(struct iommu_ops * ops)3430*4882a593Smuzhiyun static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3431*4882a593Smuzhiyun {
3432*4882a593Smuzhiyun 	int err;
3433*4882a593Smuzhiyun 
3434*4882a593Smuzhiyun #ifdef CONFIG_PCI
3435*4882a593Smuzhiyun 	if (pci_bus_type.iommu_ops != ops) {
3436*4882a593Smuzhiyun 		err = bus_set_iommu(&pci_bus_type, ops);
3437*4882a593Smuzhiyun 		if (err)
3438*4882a593Smuzhiyun 			return err;
3439*4882a593Smuzhiyun 	}
3440*4882a593Smuzhiyun #endif
3441*4882a593Smuzhiyun #ifdef CONFIG_ARM_AMBA
3442*4882a593Smuzhiyun 	if (amba_bustype.iommu_ops != ops) {
3443*4882a593Smuzhiyun 		err = bus_set_iommu(&amba_bustype, ops);
3444*4882a593Smuzhiyun 		if (err)
3445*4882a593Smuzhiyun 			goto err_reset_pci_ops;
3446*4882a593Smuzhiyun 	}
3447*4882a593Smuzhiyun #endif
3448*4882a593Smuzhiyun 	if (platform_bus_type.iommu_ops != ops) {
3449*4882a593Smuzhiyun 		err = bus_set_iommu(&platform_bus_type, ops);
3450*4882a593Smuzhiyun 		if (err)
3451*4882a593Smuzhiyun 			goto err_reset_amba_ops;
3452*4882a593Smuzhiyun 	}
3453*4882a593Smuzhiyun 
3454*4882a593Smuzhiyun 	return 0;
3455*4882a593Smuzhiyun 
3456*4882a593Smuzhiyun err_reset_amba_ops:
3457*4882a593Smuzhiyun #ifdef CONFIG_ARM_AMBA
3458*4882a593Smuzhiyun 	bus_set_iommu(&amba_bustype, NULL);
3459*4882a593Smuzhiyun #endif
3460*4882a593Smuzhiyun err_reset_pci_ops: __maybe_unused;
3461*4882a593Smuzhiyun #ifdef CONFIG_PCI
3462*4882a593Smuzhiyun 	bus_set_iommu(&pci_bus_type, NULL);
3463*4882a593Smuzhiyun #endif
3464*4882a593Smuzhiyun 	return err;
3465*4882a593Smuzhiyun }
3466*4882a593Smuzhiyun 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3467*4882a593Smuzhiyun static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3468*4882a593Smuzhiyun 				      resource_size_t size)
3469*4882a593Smuzhiyun {
3470*4882a593Smuzhiyun 	struct resource res = {
3471*4882a593Smuzhiyun 		.flags = IORESOURCE_MEM,
3472*4882a593Smuzhiyun 		.start = start,
3473*4882a593Smuzhiyun 		.end = start + size - 1,
3474*4882a593Smuzhiyun 	};
3475*4882a593Smuzhiyun 
3476*4882a593Smuzhiyun 	return devm_ioremap_resource(dev, &res);
3477*4882a593Smuzhiyun }
3478*4882a593Smuzhiyun 
arm_smmu_device_probe(struct platform_device * pdev)3479*4882a593Smuzhiyun static int arm_smmu_device_probe(struct platform_device *pdev)
3480*4882a593Smuzhiyun {
3481*4882a593Smuzhiyun 	int irq, ret;
3482*4882a593Smuzhiyun 	struct resource *res;
3483*4882a593Smuzhiyun 	resource_size_t ioaddr;
3484*4882a593Smuzhiyun 	struct arm_smmu_device *smmu;
3485*4882a593Smuzhiyun 	struct device *dev = &pdev->dev;
3486*4882a593Smuzhiyun 	bool bypass;
3487*4882a593Smuzhiyun 
3488*4882a593Smuzhiyun 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3489*4882a593Smuzhiyun 	if (!smmu) {
3490*4882a593Smuzhiyun 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3491*4882a593Smuzhiyun 		return -ENOMEM;
3492*4882a593Smuzhiyun 	}
3493*4882a593Smuzhiyun 	smmu->dev = dev;
3494*4882a593Smuzhiyun 
3495*4882a593Smuzhiyun 	if (dev->of_node) {
3496*4882a593Smuzhiyun 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3497*4882a593Smuzhiyun 	} else {
3498*4882a593Smuzhiyun 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3499*4882a593Smuzhiyun 		if (ret == -ENODEV)
3500*4882a593Smuzhiyun 			return ret;
3501*4882a593Smuzhiyun 	}
3502*4882a593Smuzhiyun 
3503*4882a593Smuzhiyun 	/* Set bypass mode according to firmware probing result */
3504*4882a593Smuzhiyun 	bypass = !!ret;
3505*4882a593Smuzhiyun 
3506*4882a593Smuzhiyun 	/* Base address */
3507*4882a593Smuzhiyun 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3508*4882a593Smuzhiyun 	if (!res)
3509*4882a593Smuzhiyun 		return -EINVAL;
3510*4882a593Smuzhiyun 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3511*4882a593Smuzhiyun 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3512*4882a593Smuzhiyun 		return -EINVAL;
3513*4882a593Smuzhiyun 	}
3514*4882a593Smuzhiyun 	ioaddr = res->start;
3515*4882a593Smuzhiyun 
3516*4882a593Smuzhiyun 	/*
3517*4882a593Smuzhiyun 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3518*4882a593Smuzhiyun 	 * the PMCG registers which are reserved by the PMU driver.
3519*4882a593Smuzhiyun 	 */
3520*4882a593Smuzhiyun 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3521*4882a593Smuzhiyun 	if (IS_ERR(smmu->base))
3522*4882a593Smuzhiyun 		return PTR_ERR(smmu->base);
3523*4882a593Smuzhiyun 
3524*4882a593Smuzhiyun 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3525*4882a593Smuzhiyun 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3526*4882a593Smuzhiyun 					       ARM_SMMU_REG_SZ);
3527*4882a593Smuzhiyun 		if (IS_ERR(smmu->page1))
3528*4882a593Smuzhiyun 			return PTR_ERR(smmu->page1);
3529*4882a593Smuzhiyun 	} else {
3530*4882a593Smuzhiyun 		smmu->page1 = smmu->base;
3531*4882a593Smuzhiyun 	}
3532*4882a593Smuzhiyun 
3533*4882a593Smuzhiyun 	/* Interrupt lines */
3534*4882a593Smuzhiyun 
3535*4882a593Smuzhiyun 	irq = platform_get_irq_byname_optional(pdev, "combined");
3536*4882a593Smuzhiyun 	if (irq > 0)
3537*4882a593Smuzhiyun 		smmu->combined_irq = irq;
3538*4882a593Smuzhiyun 	else {
3539*4882a593Smuzhiyun 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3540*4882a593Smuzhiyun 		if (irq > 0)
3541*4882a593Smuzhiyun 			smmu->evtq.q.irq = irq;
3542*4882a593Smuzhiyun 
3543*4882a593Smuzhiyun 		irq = platform_get_irq_byname_optional(pdev, "priq");
3544*4882a593Smuzhiyun 		if (irq > 0)
3545*4882a593Smuzhiyun 			smmu->priq.q.irq = irq;
3546*4882a593Smuzhiyun 
3547*4882a593Smuzhiyun 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3548*4882a593Smuzhiyun 		if (irq > 0)
3549*4882a593Smuzhiyun 			smmu->gerr_irq = irq;
3550*4882a593Smuzhiyun 	}
3551*4882a593Smuzhiyun 	/* Probe the h/w */
3552*4882a593Smuzhiyun 	ret = arm_smmu_device_hw_probe(smmu);
3553*4882a593Smuzhiyun 	if (ret)
3554*4882a593Smuzhiyun 		return ret;
3555*4882a593Smuzhiyun 
3556*4882a593Smuzhiyun 	/* Initialise in-memory data structures */
3557*4882a593Smuzhiyun 	ret = arm_smmu_init_structures(smmu);
3558*4882a593Smuzhiyun 	if (ret)
3559*4882a593Smuzhiyun 		return ret;
3560*4882a593Smuzhiyun 
3561*4882a593Smuzhiyun 	/* Record our private device structure */
3562*4882a593Smuzhiyun 	platform_set_drvdata(pdev, smmu);
3563*4882a593Smuzhiyun 
3564*4882a593Smuzhiyun 	/* Reset the device */
3565*4882a593Smuzhiyun 	ret = arm_smmu_device_reset(smmu, bypass);
3566*4882a593Smuzhiyun 	if (ret)
3567*4882a593Smuzhiyun 		return ret;
3568*4882a593Smuzhiyun 
3569*4882a593Smuzhiyun 	/* And we're up. Go go go! */
3570*4882a593Smuzhiyun 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3571*4882a593Smuzhiyun 				     "smmu3.%pa", &ioaddr);
3572*4882a593Smuzhiyun 	if (ret)
3573*4882a593Smuzhiyun 		return ret;
3574*4882a593Smuzhiyun 
3575*4882a593Smuzhiyun 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3576*4882a593Smuzhiyun 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3577*4882a593Smuzhiyun 
3578*4882a593Smuzhiyun 	ret = iommu_device_register(&smmu->iommu);
3579*4882a593Smuzhiyun 	if (ret) {
3580*4882a593Smuzhiyun 		dev_err(dev, "Failed to register iommu\n");
3581*4882a593Smuzhiyun 		return ret;
3582*4882a593Smuzhiyun 	}
3583*4882a593Smuzhiyun 
3584*4882a593Smuzhiyun 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3585*4882a593Smuzhiyun }
3586*4882a593Smuzhiyun 
arm_smmu_device_remove(struct platform_device * pdev)3587*4882a593Smuzhiyun static int arm_smmu_device_remove(struct platform_device *pdev)
3588*4882a593Smuzhiyun {
3589*4882a593Smuzhiyun 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3590*4882a593Smuzhiyun 
3591*4882a593Smuzhiyun 	arm_smmu_set_bus_ops(NULL);
3592*4882a593Smuzhiyun 	iommu_device_unregister(&smmu->iommu);
3593*4882a593Smuzhiyun 	iommu_device_sysfs_remove(&smmu->iommu);
3594*4882a593Smuzhiyun 	arm_smmu_device_disable(smmu);
3595*4882a593Smuzhiyun 
3596*4882a593Smuzhiyun 	return 0;
3597*4882a593Smuzhiyun }
3598*4882a593Smuzhiyun 
arm_smmu_device_shutdown(struct platform_device * pdev)3599*4882a593Smuzhiyun static void arm_smmu_device_shutdown(struct platform_device *pdev)
3600*4882a593Smuzhiyun {
3601*4882a593Smuzhiyun 	arm_smmu_device_remove(pdev);
3602*4882a593Smuzhiyun }
3603*4882a593Smuzhiyun 
3604*4882a593Smuzhiyun static const struct of_device_id arm_smmu_of_match[] = {
3605*4882a593Smuzhiyun 	{ .compatible = "arm,smmu-v3", },
3606*4882a593Smuzhiyun 	{ },
3607*4882a593Smuzhiyun };
3608*4882a593Smuzhiyun MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3609*4882a593Smuzhiyun 
3610*4882a593Smuzhiyun static struct platform_driver arm_smmu_driver = {
3611*4882a593Smuzhiyun 	.driver	= {
3612*4882a593Smuzhiyun 		.name			= "arm-smmu-v3",
3613*4882a593Smuzhiyun 		.of_match_table		= arm_smmu_of_match,
3614*4882a593Smuzhiyun 		.suppress_bind_attrs	= true,
3615*4882a593Smuzhiyun 	},
3616*4882a593Smuzhiyun 	.probe	= arm_smmu_device_probe,
3617*4882a593Smuzhiyun 	.remove	= arm_smmu_device_remove,
3618*4882a593Smuzhiyun 	.shutdown = arm_smmu_device_shutdown,
3619*4882a593Smuzhiyun };
3620*4882a593Smuzhiyun module_platform_driver(arm_smmu_driver);
3621*4882a593Smuzhiyun 
3622*4882a593Smuzhiyun MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3623*4882a593Smuzhiyun MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3624*4882a593Smuzhiyun MODULE_ALIAS("platform:arm-smmu-v3");
3625*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
3626