xref: /OK3568_Linux_fs/kernel/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/dma-iommu.h>
18 #include <linux/err.h>
19 #include <linux/interrupt.h>
20 #include <linux/io-pgtable.h>
21 #include <linux/iopoll.h>
22 #include <linux/module.h>
23 #include <linux/msi.h>
24 #include <linux/of.h>
25 #include <linux/of_address.h>
26 #include <linux/of_iommu.h>
27 #include <linux/of_platform.h>
28 #include <linux/pci.h>
29 #include <linux/pci-ats.h>
30 #include <linux/platform_device.h>
31 
32 #include <linux/amba/bus.h>
33 
34 #include "arm-smmu-v3.h"
35 
36 static bool disable_bypass = 1;
37 module_param(disable_bypass, bool, 0444);
38 MODULE_PARM_DESC(disable_bypass,
39 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40 
41 static bool disable_msipolling;
42 module_param(disable_msipolling, bool, 0444);
43 MODULE_PARM_DESC(disable_msipolling,
44 	"Disable MSI-based polling for CMD_SYNC completion.");
45 
46 enum arm_smmu_msi_index {
47 	EVTQ_MSI_INDEX,
48 	GERROR_MSI_INDEX,
49 	PRIQ_MSI_INDEX,
50 	ARM_SMMU_MAX_MSIS,
51 };
52 
53 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54 	[EVTQ_MSI_INDEX] = {
55 		ARM_SMMU_EVTQ_IRQ_CFG0,
56 		ARM_SMMU_EVTQ_IRQ_CFG1,
57 		ARM_SMMU_EVTQ_IRQ_CFG2,
58 	},
59 	[GERROR_MSI_INDEX] = {
60 		ARM_SMMU_GERROR_IRQ_CFG0,
61 		ARM_SMMU_GERROR_IRQ_CFG1,
62 		ARM_SMMU_GERROR_IRQ_CFG2,
63 	},
64 	[PRIQ_MSI_INDEX] = {
65 		ARM_SMMU_PRIQ_IRQ_CFG0,
66 		ARM_SMMU_PRIQ_IRQ_CFG1,
67 		ARM_SMMU_PRIQ_IRQ_CFG2,
68 	},
69 };
70 
71 struct arm_smmu_option_prop {
72 	u32 opt;
73 	const char *prop;
74 };
75 
76 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77 DEFINE_MUTEX(arm_smmu_asid_lock);
78 
79 static struct arm_smmu_option_prop arm_smmu_options[] = {
80 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
81 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
82 	{ 0, NULL},
83 };
84 
arm_smmu_page1_fixup(unsigned long offset,struct arm_smmu_device * smmu)85 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
86 						 struct arm_smmu_device *smmu)
87 {
88 	if (offset > SZ_64K)
89 		return smmu->page1 + offset - SZ_64K;
90 
91 	return smmu->base + offset;
92 }
93 
to_smmu_domain(struct iommu_domain * dom)94 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
95 {
96 	return container_of(dom, struct arm_smmu_domain, domain);
97 }
98 
parse_driver_options(struct arm_smmu_device * smmu)99 static void parse_driver_options(struct arm_smmu_device *smmu)
100 {
101 	int i = 0;
102 
103 	do {
104 		if (of_property_read_bool(smmu->dev->of_node,
105 						arm_smmu_options[i].prop)) {
106 			smmu->options |= arm_smmu_options[i].opt;
107 			dev_notice(smmu->dev, "option %s\n",
108 				arm_smmu_options[i].prop);
109 		}
110 	} while (arm_smmu_options[++i].opt);
111 }
112 
113 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)114 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
115 {
116 	u32 space, prod, cons;
117 
118 	prod = Q_IDX(q, q->prod);
119 	cons = Q_IDX(q, q->cons);
120 
121 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
122 		space = (1 << q->max_n_shift) - (prod - cons);
123 	else
124 		space = cons - prod;
125 
126 	return space >= n;
127 }
128 
queue_full(struct arm_smmu_ll_queue * q)129 static bool queue_full(struct arm_smmu_ll_queue *q)
130 {
131 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
132 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
133 }
134 
queue_empty(struct arm_smmu_ll_queue * q)135 static bool queue_empty(struct arm_smmu_ll_queue *q)
136 {
137 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
138 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
139 }
140 
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)141 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
142 {
143 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
144 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
145 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
146 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
147 }
148 
queue_sync_cons_out(struct arm_smmu_queue * q)149 static void queue_sync_cons_out(struct arm_smmu_queue *q)
150 {
151 	/*
152 	 * Ensure that all CPU accesses (reads and writes) to the queue
153 	 * are complete before we update the cons pointer.
154 	 */
155 	__iomb();
156 	writel_relaxed(q->llq.cons, q->cons_reg);
157 }
158 
queue_inc_cons(struct arm_smmu_ll_queue * q)159 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
160 {
161 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
162 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
163 }
164 
queue_sync_prod_in(struct arm_smmu_queue * q)165 static int queue_sync_prod_in(struct arm_smmu_queue *q)
166 {
167 	u32 prod;
168 	int ret = 0;
169 
170 	/*
171 	 * We can't use the _relaxed() variant here, as we must prevent
172 	 * speculative reads of the queue before we have determined that
173 	 * prod has indeed moved.
174 	 */
175 	prod = readl(q->prod_reg);
176 
177 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
178 		ret = -EOVERFLOW;
179 
180 	q->llq.prod = prod;
181 	return ret;
182 }
183 
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)184 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
185 {
186 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
187 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
188 }
189 
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)190 static void queue_poll_init(struct arm_smmu_device *smmu,
191 			    struct arm_smmu_queue_poll *qp)
192 {
193 	qp->delay = 1;
194 	qp->spin_cnt = 0;
195 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
196 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
197 }
198 
queue_poll(struct arm_smmu_queue_poll * qp)199 static int queue_poll(struct arm_smmu_queue_poll *qp)
200 {
201 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
202 		return -ETIMEDOUT;
203 
204 	if (qp->wfe) {
205 		wfe();
206 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
207 		cpu_relax();
208 	} else {
209 		udelay(qp->delay);
210 		qp->delay *= 2;
211 		qp->spin_cnt = 0;
212 	}
213 
214 	return 0;
215 }
216 
queue_write(__le64 * dst,u64 * src,size_t n_dwords)217 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
218 {
219 	int i;
220 
221 	for (i = 0; i < n_dwords; ++i)
222 		*dst++ = cpu_to_le64(*src++);
223 }
224 
queue_read(u64 * dst,__le64 * src,size_t n_dwords)225 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
226 {
227 	int i;
228 
229 	for (i = 0; i < n_dwords; ++i)
230 		*dst++ = le64_to_cpu(*src++);
231 }
232 
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)233 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
234 {
235 	if (queue_empty(&q->llq))
236 		return -EAGAIN;
237 
238 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
239 	queue_inc_cons(&q->llq);
240 	queue_sync_cons_out(q);
241 	return 0;
242 }
243 
244 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)245 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
246 {
247 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
248 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
249 
250 	switch (ent->opcode) {
251 	case CMDQ_OP_TLBI_EL2_ALL:
252 	case CMDQ_OP_TLBI_NSNH_ALL:
253 		break;
254 	case CMDQ_OP_PREFETCH_CFG:
255 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
256 		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
257 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
258 		break;
259 	case CMDQ_OP_CFGI_CD:
260 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 		fallthrough;
262 	case CMDQ_OP_CFGI_STE:
263 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 		break;
266 	case CMDQ_OP_CFGI_CD_ALL:
267 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 		break;
269 	case CMDQ_OP_CFGI_ALL:
270 		/* Cover the entire SID range */
271 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 		break;
273 	case CMDQ_OP_TLBI_NH_VA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
275 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
276 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
278 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
279 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
281 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
282 		break;
283 	case CMDQ_OP_TLBI_S2_IPA:
284 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
285 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
287 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
288 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
289 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
290 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
291 		break;
292 	case CMDQ_OP_TLBI_NH_ASID:
293 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
294 		fallthrough;
295 	case CMDQ_OP_TLBI_S12_VMALL:
296 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
297 		break;
298 	case CMDQ_OP_ATC_INV:
299 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
300 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
301 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
302 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
303 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
304 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
305 		break;
306 	case CMDQ_OP_PRI_RESP:
307 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
308 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
309 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
310 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
311 		switch (ent->pri.resp) {
312 		case PRI_RESP_DENY:
313 		case PRI_RESP_FAIL:
314 		case PRI_RESP_SUCC:
315 			break;
316 		default:
317 			return -EINVAL;
318 		}
319 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
320 		break;
321 	case CMDQ_OP_CMD_SYNC:
322 		if (ent->sync.msiaddr) {
323 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325 		} else {
326 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327 		}
328 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330 		break;
331 	default:
332 		return -ENOENT;
333 	}
334 
335 	return 0;
336 }
337 
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,u32 prod)338 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339 					 u32 prod)
340 {
341 	struct arm_smmu_queue *q = &smmu->cmdq.q;
342 	struct arm_smmu_cmdq_ent ent = {
343 		.opcode = CMDQ_OP_CMD_SYNC,
344 	};
345 
346 	/*
347 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348 	 * payload, so the write will zero the entire command on that platform.
349 	 */
350 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
352 				   q->ent_dwords * 8;
353 	}
354 
355 	arm_smmu_cmdq_build_cmd(cmd, &ent);
356 }
357 
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)358 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
359 {
360 	static const char *cerror_str[] = {
361 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
362 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
363 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
364 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
365 	};
366 
367 	int i;
368 	u64 cmd[CMDQ_ENT_DWORDS];
369 	struct arm_smmu_queue *q = &smmu->cmdq.q;
370 	u32 cons = readl_relaxed(q->cons_reg);
371 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372 	struct arm_smmu_cmdq_ent cmd_sync = {
373 		.opcode = CMDQ_OP_CMD_SYNC,
374 	};
375 
376 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
378 
379 	switch (idx) {
380 	case CMDQ_ERR_CERROR_ABT_IDX:
381 		dev_err(smmu->dev, "retrying command fetch\n");
382 	case CMDQ_ERR_CERROR_NONE_IDX:
383 		return;
384 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
385 		/*
386 		 * ATC Invalidation Completion timeout. CONS is still pointing
387 		 * at the CMD_SYNC. Attempt to complete other pending commands
388 		 * by repeating the CMD_SYNC, though we might well end up back
389 		 * here since the ATC invalidation may still be pending.
390 		 */
391 		return;
392 	case CMDQ_ERR_CERROR_ILL_IDX:
393 	default:
394 		break;
395 	}
396 
397 	/*
398 	 * We may have concurrent producers, so we need to be careful
399 	 * not to touch any of the shadow cmdq state.
400 	 */
401 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
402 	dev_err(smmu->dev, "skipping command in error state:\n");
403 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
404 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
405 
406 	/* Convert the erroneous command into a CMD_SYNC */
407 	if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
408 		dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
409 		return;
410 	}
411 
412 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
413 }
414 
415 /*
416  * Command queue locking.
417  * This is a form of bastardised rwlock with the following major changes:
418  *
419  * - The only LOCK routines are exclusive_trylock() and shared_lock().
420  *   Neither have barrier semantics, and instead provide only a control
421  *   dependency.
422  *
423  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
424  *   fails if the caller appears to be the last lock holder (yes, this is
425  *   racy). All successful UNLOCK routines have RELEASE semantics.
426  */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)427 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
428 {
429 	int val;
430 
431 	/*
432 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
433 	 * lock counter. When held in exclusive state, the lock counter is set
434 	 * to INT_MIN so these increments won't hurt as the value will remain
435 	 * negative.
436 	 */
437 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
438 		return;
439 
440 	do {
441 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
442 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
443 }
444 
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)445 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
446 {
447 	(void)atomic_dec_return_release(&cmdq->lock);
448 }
449 
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)450 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
451 {
452 	if (atomic_read(&cmdq->lock) == 1)
453 		return false;
454 
455 	arm_smmu_cmdq_shared_unlock(cmdq);
456 	return true;
457 }
458 
459 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
460 ({									\
461 	bool __ret;							\
462 	local_irq_save(flags);						\
463 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
464 	if (!__ret)							\
465 		local_irq_restore(flags);				\
466 	__ret;								\
467 })
468 
469 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
470 ({									\
471 	atomic_set_release(&cmdq->lock, 0);				\
472 	local_irq_restore(flags);					\
473 })
474 
475 
476 /*
477  * Command queue insertion.
478  * This is made fiddly by our attempts to achieve some sort of scalability
479  * since there is one queue shared amongst all of the CPUs in the system.  If
480  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
481  * then you'll *love* this monstrosity.
482  *
483  * The basic idea is to split the queue up into ranges of commands that are
484  * owned by a given CPU; the owner may not have written all of the commands
485  * itself, but is responsible for advancing the hardware prod pointer when
486  * the time comes. The algorithm is roughly:
487  *
488  * 	1. Allocate some space in the queue. At this point we also discover
489  *	   whether the head of the queue is currently owned by another CPU,
490  *	   or whether we are the owner.
491  *
492  *	2. Write our commands into our allocated slots in the queue.
493  *
494  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
495  *
496  *	4. If we are an owner:
497  *		a. Wait for the previous owner to finish.
498  *		b. Mark the queue head as unowned, which tells us the range
499  *		   that we are responsible for publishing.
500  *		c. Wait for all commands in our owned range to become valid.
501  *		d. Advance the hardware prod pointer.
502  *		e. Tell the next owner we've finished.
503  *
504  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
505  *	   owner), then we need to stick around until it has completed:
506  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
507  *		   to clear the first 4 bytes.
508  *		b. Otherwise, we spin waiting for the hardware cons pointer to
509  *		   advance past our command.
510  *
511  * The devil is in the details, particularly the use of locking for handling
512  * SYNC completion and freeing up space in the queue before we think that it is
513  * full.
514  */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)515 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
516 					       u32 sprod, u32 eprod, bool set)
517 {
518 	u32 swidx, sbidx, ewidx, ebidx;
519 	struct arm_smmu_ll_queue llq = {
520 		.max_n_shift	= cmdq->q.llq.max_n_shift,
521 		.prod		= sprod,
522 	};
523 
524 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
525 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
526 
527 	while (llq.prod != eprod) {
528 		unsigned long mask;
529 		atomic_long_t *ptr;
530 		u32 limit = BITS_PER_LONG;
531 
532 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
533 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
534 
535 		ptr = &cmdq->valid_map[swidx];
536 
537 		if ((swidx == ewidx) && (sbidx < ebidx))
538 			limit = ebidx;
539 
540 		mask = GENMASK(limit - 1, sbidx);
541 
542 		/*
543 		 * The valid bit is the inverse of the wrap bit. This means
544 		 * that a zero-initialised queue is invalid and, after marking
545 		 * all entries as valid, they become invalid again when we
546 		 * wrap.
547 		 */
548 		if (set) {
549 			atomic_long_xor(mask, ptr);
550 		} else { /* Poll */
551 			unsigned long valid;
552 
553 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
554 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
555 		}
556 
557 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
558 	}
559 }
560 
561 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)562 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
563 					u32 sprod, u32 eprod)
564 {
565 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
566 }
567 
568 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)569 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
570 					 u32 sprod, u32 eprod)
571 {
572 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
573 }
574 
575 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)576 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
577 					     struct arm_smmu_ll_queue *llq)
578 {
579 	unsigned long flags;
580 	struct arm_smmu_queue_poll qp;
581 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
582 	int ret = 0;
583 
584 	/*
585 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
586 	 * that fails, spin until somebody else updates it for us.
587 	 */
588 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
589 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
590 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
591 		llq->val = READ_ONCE(cmdq->q.llq.val);
592 		return 0;
593 	}
594 
595 	queue_poll_init(smmu, &qp);
596 	do {
597 		llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
598 		if (!queue_full(llq))
599 			break;
600 
601 		ret = queue_poll(&qp);
602 	} while (!ret);
603 
604 	return ret;
605 }
606 
607 /*
608  * Wait until the SMMU signals a CMD_SYNC completion MSI.
609  * Must be called with the cmdq lock held in some capacity.
610  */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)611 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
612 					  struct arm_smmu_ll_queue *llq)
613 {
614 	int ret = 0;
615 	struct arm_smmu_queue_poll qp;
616 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
617 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
618 
619 	queue_poll_init(smmu, &qp);
620 
621 	/*
622 	 * The MSI won't generate an event, since it's being written back
623 	 * into the command queue.
624 	 */
625 	qp.wfe = false;
626 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
627 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
628 	return ret;
629 }
630 
631 /*
632  * Wait until the SMMU cons index passes llq->prod.
633  * Must be called with the cmdq lock held in some capacity.
634  */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)635 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
636 					       struct arm_smmu_ll_queue *llq)
637 {
638 	struct arm_smmu_queue_poll qp;
639 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
640 	u32 prod = llq->prod;
641 	int ret = 0;
642 
643 	queue_poll_init(smmu, &qp);
644 	llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
645 	do {
646 		if (queue_consumed(llq, prod))
647 			break;
648 
649 		ret = queue_poll(&qp);
650 
651 		/*
652 		 * This needs to be a readl() so that our subsequent call
653 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
654 		 *
655 		 * Specifically, we need to ensure that we observe all
656 		 * shared_lock()s by other CMD_SYNCs that share our owner,
657 		 * so that a failing call to tryunlock() means that we're
658 		 * the last one out and therefore we can safely advance
659 		 * cmdq->q.llq.cons. Roughly speaking:
660 		 *
661 		 * CPU 0		CPU1			CPU2 (us)
662 		 *
663 		 * if (sync)
664 		 * 	shared_lock();
665 		 *
666 		 * dma_wmb();
667 		 * set_valid_map();
668 		 *
669 		 * 			if (owner) {
670 		 *				poll_valid_map();
671 		 *				<control dependency>
672 		 *				writel(prod_reg);
673 		 *
674 		 *						readl(cons_reg);
675 		 *						tryunlock();
676 		 *
677 		 * Requires us to see CPU 0's shared_lock() acquisition.
678 		 */
679 		llq->cons = readl(cmdq->q.cons_reg);
680 	} while (!ret);
681 
682 	return ret;
683 }
684 
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)685 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
686 					 struct arm_smmu_ll_queue *llq)
687 {
688 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
689 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
690 
691 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
692 }
693 
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)694 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
695 					u32 prod, int n)
696 {
697 	int i;
698 	struct arm_smmu_ll_queue llq = {
699 		.max_n_shift	= cmdq->q.llq.max_n_shift,
700 		.prod		= prod,
701 	};
702 
703 	for (i = 0; i < n; ++i) {
704 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
705 
706 		prod = queue_inc_prod_n(&llq, i);
707 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
708 	}
709 }
710 
711 /*
712  * This is the actual insertion function, and provides the following
713  * ordering guarantees to callers:
714  *
715  * - There is a dma_wmb() before publishing any commands to the queue.
716  *   This can be relied upon to order prior writes to data structures
717  *   in memory (such as a CD or an STE) before the command.
718  *
719  * - On completion of a CMD_SYNC, there is a control dependency.
720  *   This can be relied upon to order subsequent writes to memory (e.g.
721  *   freeing an IOVA) after completion of the CMD_SYNC.
722  *
723  * - Command insertion is totally ordered, so if two CPUs each race to
724  *   insert their own list of commands then all of the commands from one
725  *   CPU will appear before any of the commands from the other CPU.
726  */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)727 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
728 				       u64 *cmds, int n, bool sync)
729 {
730 	u64 cmd_sync[CMDQ_ENT_DWORDS];
731 	u32 prod;
732 	unsigned long flags;
733 	bool owner;
734 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
735 	struct arm_smmu_ll_queue llq = {
736 		.max_n_shift = cmdq->q.llq.max_n_shift,
737 	}, head = llq;
738 	int ret = 0;
739 
740 	/* 1. Allocate some space in the queue */
741 	local_irq_save(flags);
742 	llq.val = READ_ONCE(cmdq->q.llq.val);
743 	do {
744 		u64 old;
745 
746 		while (!queue_has_space(&llq, n + sync)) {
747 			local_irq_restore(flags);
748 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
749 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
750 			local_irq_save(flags);
751 		}
752 
753 		head.cons = llq.cons;
754 		head.prod = queue_inc_prod_n(&llq, n + sync) |
755 					     CMDQ_PROD_OWNED_FLAG;
756 
757 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
758 		if (old == llq.val)
759 			break;
760 
761 		llq.val = old;
762 	} while (1);
763 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
764 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
765 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
766 
767 	/*
768 	 * 2. Write our commands into the queue
769 	 * Dependency ordering from the cmpxchg() loop above.
770 	 */
771 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
772 	if (sync) {
773 		prod = queue_inc_prod_n(&llq, n);
774 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
775 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
776 
777 		/*
778 		 * In order to determine completion of our CMD_SYNC, we must
779 		 * ensure that the queue can't wrap twice without us noticing.
780 		 * We achieve that by taking the cmdq lock as shared before
781 		 * marking our slot as valid.
782 		 */
783 		arm_smmu_cmdq_shared_lock(cmdq);
784 	}
785 
786 	/* 3. Mark our slots as valid, ensuring commands are visible first */
787 	dma_wmb();
788 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
789 
790 	/* 4. If we are the owner, take control of the SMMU hardware */
791 	if (owner) {
792 		/* a. Wait for previous owner to finish */
793 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
794 
795 		/* b. Stop gathering work by clearing the owned flag */
796 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
797 						   &cmdq->q.llq.atomic.prod);
798 		prod &= ~CMDQ_PROD_OWNED_FLAG;
799 
800 		/*
801 		 * c. Wait for any gathered work to be written to the queue.
802 		 * Note that we read our own entries so that we have the control
803 		 * dependency required by (d).
804 		 */
805 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
806 
807 		/*
808 		 * d. Advance the hardware prod pointer
809 		 * Control dependency ordering from the entries becoming valid.
810 		 */
811 		writel_relaxed(prod, cmdq->q.prod_reg);
812 
813 		/*
814 		 * e. Tell the next owner we're done
815 		 * Make sure we've updated the hardware first, so that we don't
816 		 * race to update prod and potentially move it backwards.
817 		 */
818 		atomic_set_release(&cmdq->owner_prod, prod);
819 	}
820 
821 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
822 	if (sync) {
823 		llq.prod = queue_inc_prod_n(&llq, n);
824 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
825 		if (ret) {
826 			dev_err_ratelimited(smmu->dev,
827 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
828 					    llq.prod,
829 					    readl_relaxed(cmdq->q.prod_reg),
830 					    readl_relaxed(cmdq->q.cons_reg));
831 		}
832 
833 		/*
834 		 * Try to unlock the cmdq lock. This will fail if we're the last
835 		 * reader, in which case we can safely update cmdq->q.llq.cons
836 		 */
837 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
838 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
839 			arm_smmu_cmdq_shared_unlock(cmdq);
840 		}
841 	}
842 
843 	local_irq_restore(flags);
844 	return ret;
845 }
846 
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)847 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
848 				   struct arm_smmu_cmdq_ent *ent)
849 {
850 	u64 cmd[CMDQ_ENT_DWORDS];
851 
852 	if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
853 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
854 			 ent->opcode);
855 		return -EINVAL;
856 	}
857 
858 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
859 }
860 
arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)861 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
862 {
863 	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
864 }
865 
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)866 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
867 				    struct arm_smmu_cmdq_batch *cmds,
868 				    struct arm_smmu_cmdq_ent *cmd)
869 {
870 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
871 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
872 		cmds->num = 0;
873 	}
874 	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
875 	cmds->num++;
876 }
877 
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)878 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
879 				      struct arm_smmu_cmdq_batch *cmds)
880 {
881 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
882 }
883 
884 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)885 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
886 {
887 	struct arm_smmu_cmdq_ent cmd = {
888 		.opcode = CMDQ_OP_TLBI_NH_ASID,
889 		.tlbi.asid = asid,
890 	};
891 
892 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
893 	arm_smmu_cmdq_issue_sync(smmu);
894 }
895 
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)896 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
897 			     int ssid, bool leaf)
898 {
899 	size_t i;
900 	unsigned long flags;
901 	struct arm_smmu_master *master;
902 	struct arm_smmu_cmdq_batch cmds = {};
903 	struct arm_smmu_device *smmu = smmu_domain->smmu;
904 	struct arm_smmu_cmdq_ent cmd = {
905 		.opcode	= CMDQ_OP_CFGI_CD,
906 		.cfgi	= {
907 			.ssid	= ssid,
908 			.leaf	= leaf,
909 		},
910 	};
911 
912 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
913 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
914 		for (i = 0; i < master->num_sids; i++) {
915 			cmd.cfgi.sid = master->sids[i];
916 			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
917 		}
918 	}
919 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
920 
921 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
922 }
923 
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)924 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
925 					struct arm_smmu_l1_ctx_desc *l1_desc)
926 {
927 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
928 
929 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
930 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
931 	if (!l1_desc->l2ptr) {
932 		dev_warn(smmu->dev,
933 			 "failed to allocate context descriptor table\n");
934 		return -ENOMEM;
935 	}
936 	return 0;
937 }
938 
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)939 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
940 				      struct arm_smmu_l1_ctx_desc *l1_desc)
941 {
942 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
943 		  CTXDESC_L1_DESC_V;
944 
945 	/* See comment in arm_smmu_write_ctx_desc() */
946 	WRITE_ONCE(*dst, cpu_to_le64(val));
947 }
948 
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)949 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
950 				   u32 ssid)
951 {
952 	__le64 *l1ptr;
953 	unsigned int idx;
954 	struct arm_smmu_l1_ctx_desc *l1_desc;
955 	struct arm_smmu_device *smmu = smmu_domain->smmu;
956 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
957 
958 	if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
959 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
960 
961 	idx = ssid >> CTXDESC_SPLIT;
962 	l1_desc = &cdcfg->l1_desc[idx];
963 	if (!l1_desc->l2ptr) {
964 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
965 			return NULL;
966 
967 		l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
968 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
969 		/* An invalid L1CD can be cached */
970 		arm_smmu_sync_cd(smmu_domain, ssid, false);
971 	}
972 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
973 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
974 }
975 
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)976 int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
977 			    struct arm_smmu_ctx_desc *cd)
978 {
979 	/*
980 	 * This function handles the following cases:
981 	 *
982 	 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
983 	 * (2) Install a secondary CD, for SID+SSID traffic.
984 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
985 	 *     CD, then invalidate the old entry and mappings.
986 	 * (4) Remove a secondary CD.
987 	 */
988 	u64 val;
989 	bool cd_live;
990 	__le64 *cdptr;
991 	struct arm_smmu_device *smmu = smmu_domain->smmu;
992 
993 	if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
994 		return -E2BIG;
995 
996 	cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
997 	if (!cdptr)
998 		return -ENOMEM;
999 
1000 	val = le64_to_cpu(cdptr[0]);
1001 	cd_live = !!(val & CTXDESC_CD_0_V);
1002 
1003 	if (!cd) { /* (4) */
1004 		val = 0;
1005 	} else if (cd_live) { /* (3) */
1006 		val &= ~CTXDESC_CD_0_ASID;
1007 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1008 		/*
1009 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1010 		 * this substream's traffic
1011 		 */
1012 	} else { /* (1) and (2) */
1013 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1014 		cdptr[2] = 0;
1015 		cdptr[3] = cpu_to_le64(cd->mair);
1016 
1017 		/*
1018 		 * STE is live, and the SMMU might read dwords of this CD in any
1019 		 * order. Ensure that it observes valid values before reading
1020 		 * V=1.
1021 		 */
1022 		arm_smmu_sync_cd(smmu_domain, ssid, true);
1023 
1024 		val = cd->tcr |
1025 #ifdef __BIG_ENDIAN
1026 			CTXDESC_CD_0_ENDI |
1027 #endif
1028 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1029 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1030 			CTXDESC_CD_0_AA64 |
1031 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1032 			CTXDESC_CD_0_V;
1033 
1034 		/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1035 		if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1036 			val |= CTXDESC_CD_0_S;
1037 	}
1038 
1039 	/*
1040 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1041 	 * "Configuration structures and configuration invalidation completion"
1042 	 *
1043 	 *   The size of single-copy atomic reads made by the SMMU is
1044 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1045 	 *   field within an aligned 64-bit span of a structure can be altered
1046 	 *   without first making the structure invalid.
1047 	 */
1048 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1049 	arm_smmu_sync_cd(smmu_domain, ssid, true);
1050 	return 0;
1051 }
1052 
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1053 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1054 {
1055 	int ret;
1056 	size_t l1size;
1057 	size_t max_contexts;
1058 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1059 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1060 	struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1061 
1062 	max_contexts = 1 << cfg->s1cdmax;
1063 
1064 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1065 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1066 		cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1067 		cdcfg->num_l1_ents = max_contexts;
1068 
1069 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1070 	} else {
1071 		cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1072 		cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1073 						  CTXDESC_L2_ENTRIES);
1074 
1075 		cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1076 					      sizeof(*cdcfg->l1_desc),
1077 					      GFP_KERNEL);
1078 		if (!cdcfg->l1_desc)
1079 			return -ENOMEM;
1080 
1081 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1082 	}
1083 
1084 	cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1085 					   GFP_KERNEL);
1086 	if (!cdcfg->cdtab) {
1087 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1088 		ret = -ENOMEM;
1089 		goto err_free_l1;
1090 	}
1091 
1092 	return 0;
1093 
1094 err_free_l1:
1095 	if (cdcfg->l1_desc) {
1096 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1097 		cdcfg->l1_desc = NULL;
1098 	}
1099 	return ret;
1100 }
1101 
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1102 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1103 {
1104 	int i;
1105 	size_t size, l1size;
1106 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1107 	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1108 
1109 	if (cdcfg->l1_desc) {
1110 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1111 
1112 		for (i = 0; i < cdcfg->num_l1_ents; i++) {
1113 			if (!cdcfg->l1_desc[i].l2ptr)
1114 				continue;
1115 
1116 			dmam_free_coherent(smmu->dev, size,
1117 					   cdcfg->l1_desc[i].l2ptr,
1118 					   cdcfg->l1_desc[i].l2ptr_dma);
1119 		}
1120 		devm_kfree(smmu->dev, cdcfg->l1_desc);
1121 		cdcfg->l1_desc = NULL;
1122 
1123 		l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124 	} else {
1125 		l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1126 	}
1127 
1128 	dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1129 	cdcfg->cdtab_dma = 0;
1130 	cdcfg->cdtab = NULL;
1131 }
1132 
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1133 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1134 {
1135 	bool free;
1136 	struct arm_smmu_ctx_desc *old_cd;
1137 
1138 	if (!cd->asid)
1139 		return false;
1140 
1141 	free = refcount_dec_and_test(&cd->refs);
1142 	if (free) {
1143 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1144 		WARN_ON(old_cd != cd);
1145 	}
1146 	return free;
1147 }
1148 
1149 /* Stream table manipulation functions */
1150 static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1151 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1152 {
1153 	u64 val = 0;
1154 
1155 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1156 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1157 
1158 	/* See comment in arm_smmu_write_ctx_desc() */
1159 	WRITE_ONCE(*dst, cpu_to_le64(val));
1160 }
1161 
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1162 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1163 {
1164 	struct arm_smmu_cmdq_ent cmd = {
1165 		.opcode	= CMDQ_OP_CFGI_STE,
1166 		.cfgi	= {
1167 			.sid	= sid,
1168 			.leaf	= true,
1169 		},
1170 	};
1171 
1172 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1173 	arm_smmu_cmdq_issue_sync(smmu);
1174 }
1175 
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1176 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1177 				      __le64 *dst)
1178 {
1179 	/*
1180 	 * This is hideously complicated, but we only really care about
1181 	 * three cases at the moment:
1182 	 *
1183 	 * 1. Invalid (all zero) -> bypass/fault (init)
1184 	 * 2. Bypass/fault -> translation/bypass (attach)
1185 	 * 3. Translation/bypass -> bypass/fault (detach)
1186 	 *
1187 	 * Given that we can't update the STE atomically and the SMMU
1188 	 * doesn't read the thing in a defined order, that leaves us
1189 	 * with the following maintenance requirements:
1190 	 *
1191 	 * 1. Update Config, return (init time STEs aren't live)
1192 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1193 	 * 3. Update Config, sync
1194 	 */
1195 	u64 val = le64_to_cpu(dst[0]);
1196 	bool ste_live = false;
1197 	struct arm_smmu_device *smmu = NULL;
1198 	struct arm_smmu_s1_cfg *s1_cfg = NULL;
1199 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1200 	struct arm_smmu_domain *smmu_domain = NULL;
1201 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1202 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1203 		.prefetch	= {
1204 			.sid	= sid,
1205 		},
1206 	};
1207 
1208 	if (master) {
1209 		smmu_domain = master->domain;
1210 		smmu = master->smmu;
1211 	}
1212 
1213 	if (smmu_domain) {
1214 		switch (smmu_domain->stage) {
1215 		case ARM_SMMU_DOMAIN_S1:
1216 			s1_cfg = &smmu_domain->s1_cfg;
1217 			break;
1218 		case ARM_SMMU_DOMAIN_S2:
1219 		case ARM_SMMU_DOMAIN_NESTED:
1220 			s2_cfg = &smmu_domain->s2_cfg;
1221 			break;
1222 		default:
1223 			break;
1224 		}
1225 	}
1226 
1227 	if (val & STRTAB_STE_0_V) {
1228 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1229 		case STRTAB_STE_0_CFG_BYPASS:
1230 			break;
1231 		case STRTAB_STE_0_CFG_S1_TRANS:
1232 		case STRTAB_STE_0_CFG_S2_TRANS:
1233 			ste_live = true;
1234 			break;
1235 		case STRTAB_STE_0_CFG_ABORT:
1236 			BUG_ON(!disable_bypass);
1237 			break;
1238 		default:
1239 			BUG(); /* STE corruption */
1240 		}
1241 	}
1242 
1243 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1244 	val = STRTAB_STE_0_V;
1245 
1246 	/* Bypass/fault */
1247 	if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1248 		if (!smmu_domain && disable_bypass)
1249 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1250 		else
1251 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1252 
1253 		dst[0] = cpu_to_le64(val);
1254 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1255 						STRTAB_STE_1_SHCFG_INCOMING));
1256 		dst[2] = 0; /* Nuke the VMID */
1257 		/*
1258 		 * The SMMU can perform negative caching, so we must sync
1259 		 * the STE regardless of whether the old value was live.
1260 		 */
1261 		if (smmu)
1262 			arm_smmu_sync_ste_for_sid(smmu, sid);
1263 		return;
1264 	}
1265 
1266 	if (s1_cfg) {
1267 		BUG_ON(ste_live);
1268 		dst[1] = cpu_to_le64(
1269 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1270 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1271 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1272 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1273 			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1274 
1275 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1276 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1277 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1278 
1279 		val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1280 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1281 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1282 			FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1283 	}
1284 
1285 	if (s2_cfg) {
1286 		BUG_ON(ste_live);
1287 		dst[2] = cpu_to_le64(
1288 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1289 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1290 #ifdef __BIG_ENDIAN
1291 			 STRTAB_STE_2_S2ENDI |
1292 #endif
1293 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1294 			 STRTAB_STE_2_S2R);
1295 
1296 		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1297 
1298 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1299 	}
1300 
1301 	if (master->ats_enabled)
1302 		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1303 						 STRTAB_STE_1_EATS_TRANS));
1304 
1305 	arm_smmu_sync_ste_for_sid(smmu, sid);
1306 	/* See comment in arm_smmu_write_ctx_desc() */
1307 	WRITE_ONCE(dst[0], cpu_to_le64(val));
1308 	arm_smmu_sync_ste_for_sid(smmu, sid);
1309 
1310 	/* It's likely that we'll want to use the new STE soon */
1311 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1312 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1313 }
1314 
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent)1315 static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1316 {
1317 	unsigned int i;
1318 
1319 	for (i = 0; i < nent; ++i) {
1320 		arm_smmu_write_strtab_ent(NULL, -1, strtab);
1321 		strtab += STRTAB_STE_DWORDS;
1322 	}
1323 }
1324 
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1325 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1326 {
1327 	size_t size;
1328 	void *strtab;
1329 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1330 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1331 
1332 	if (desc->l2ptr)
1333 		return 0;
1334 
1335 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1336 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1337 
1338 	desc->span = STRTAB_SPLIT + 1;
1339 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1340 					  GFP_KERNEL);
1341 	if (!desc->l2ptr) {
1342 		dev_err(smmu->dev,
1343 			"failed to allocate l2 stream table for SID %u\n",
1344 			sid);
1345 		return -ENOMEM;
1346 	}
1347 
1348 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1349 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1350 	return 0;
1351 }
1352 
1353 /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1354 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1355 {
1356 	int i;
1357 	struct arm_smmu_device *smmu = dev;
1358 	struct arm_smmu_queue *q = &smmu->evtq.q;
1359 	struct arm_smmu_ll_queue *llq = &q->llq;
1360 	u64 evt[EVTQ_ENT_DWORDS];
1361 
1362 	do {
1363 		while (!queue_remove_raw(q, evt)) {
1364 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1365 
1366 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1367 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1368 				dev_info(smmu->dev, "\t0x%016llx\n",
1369 					 (unsigned long long)evt[i]);
1370 
1371 			cond_resched();
1372 		}
1373 
1374 		/*
1375 		 * Not much we can do on overflow, so scream and pretend we're
1376 		 * trying harder.
1377 		 */
1378 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1379 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1380 	} while (!queue_empty(llq));
1381 
1382 	/* Sync our overflow flag, as we believe we're up to speed */
1383 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1384 		    Q_IDX(llq, llq->cons);
1385 	return IRQ_HANDLED;
1386 }
1387 
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1388 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1389 {
1390 	u32 sid, ssid;
1391 	u16 grpid;
1392 	bool ssv, last;
1393 
1394 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1395 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1396 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1397 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1398 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1399 
1400 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1401 	dev_info(smmu->dev,
1402 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1403 		 sid, ssid, grpid, last ? "L" : "",
1404 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1405 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1406 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1407 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1408 		 evt[1] & PRIQ_1_ADDR_MASK);
1409 
1410 	if (last) {
1411 		struct arm_smmu_cmdq_ent cmd = {
1412 			.opcode			= CMDQ_OP_PRI_RESP,
1413 			.substream_valid	= ssv,
1414 			.pri			= {
1415 				.sid	= sid,
1416 				.ssid	= ssid,
1417 				.grpid	= grpid,
1418 				.resp	= PRI_RESP_DENY,
1419 			},
1420 		};
1421 
1422 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1423 	}
1424 }
1425 
arm_smmu_priq_thread(int irq,void * dev)1426 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1427 {
1428 	struct arm_smmu_device *smmu = dev;
1429 	struct arm_smmu_queue *q = &smmu->priq.q;
1430 	struct arm_smmu_ll_queue *llq = &q->llq;
1431 	u64 evt[PRIQ_ENT_DWORDS];
1432 
1433 	do {
1434 		while (!queue_remove_raw(q, evt))
1435 			arm_smmu_handle_ppr(smmu, evt);
1436 
1437 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1438 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1439 	} while (!queue_empty(llq));
1440 
1441 	/* Sync our overflow flag, as we believe we're up to speed */
1442 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1443 		      Q_IDX(llq, llq->cons);
1444 	queue_sync_cons_out(q);
1445 	return IRQ_HANDLED;
1446 }
1447 
1448 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1449 
arm_smmu_gerror_handler(int irq,void * dev)1450 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1451 {
1452 	u32 gerror, gerrorn, active;
1453 	struct arm_smmu_device *smmu = dev;
1454 
1455 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1456 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1457 
1458 	active = gerror ^ gerrorn;
1459 	if (!(active & GERROR_ERR_MASK))
1460 		return IRQ_NONE; /* No errors pending */
1461 
1462 	dev_warn(smmu->dev,
1463 		 "unexpected global error reported (0x%08x), this could be serious\n",
1464 		 active);
1465 
1466 	if (active & GERROR_SFM_ERR) {
1467 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1468 		arm_smmu_device_disable(smmu);
1469 	}
1470 
1471 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1472 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1473 
1474 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1475 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1476 
1477 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1478 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1479 
1480 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1481 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1482 
1483 	if (active & GERROR_PRIQ_ABT_ERR)
1484 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1485 
1486 	if (active & GERROR_EVTQ_ABT_ERR)
1487 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1488 
1489 	if (active & GERROR_CMDQ_ERR)
1490 		arm_smmu_cmdq_skip_err(smmu);
1491 
1492 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1493 	return IRQ_HANDLED;
1494 }
1495 
arm_smmu_combined_irq_thread(int irq,void * dev)1496 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1497 {
1498 	struct arm_smmu_device *smmu = dev;
1499 
1500 	arm_smmu_evtq_thread(irq, dev);
1501 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1502 		arm_smmu_priq_thread(irq, dev);
1503 
1504 	return IRQ_HANDLED;
1505 }
1506 
arm_smmu_combined_irq_handler(int irq,void * dev)1507 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1508 {
1509 	arm_smmu_gerror_handler(irq, dev);
1510 	return IRQ_WAKE_THREAD;
1511 }
1512 
1513 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1514 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1515 			struct arm_smmu_cmdq_ent *cmd)
1516 {
1517 	size_t log2_span;
1518 	size_t span_mask;
1519 	/* ATC invalidates are always on 4096-bytes pages */
1520 	size_t inval_grain_shift = 12;
1521 	unsigned long page_start, page_end;
1522 
1523 	*cmd = (struct arm_smmu_cmdq_ent) {
1524 		.opcode			= CMDQ_OP_ATC_INV,
1525 		.substream_valid	= !!ssid,
1526 		.atc.ssid		= ssid,
1527 	};
1528 
1529 	if (!size) {
1530 		cmd->atc.size = ATC_INV_SIZE_ALL;
1531 		return;
1532 	}
1533 
1534 	page_start	= iova >> inval_grain_shift;
1535 	page_end	= (iova + size - 1) >> inval_grain_shift;
1536 
1537 	/*
1538 	 * In an ATS Invalidate Request, the address must be aligned on the
1539 	 * range size, which must be a power of two number of page sizes. We
1540 	 * thus have to choose between grossly over-invalidating the region, or
1541 	 * splitting the invalidation into multiple commands. For simplicity
1542 	 * we'll go with the first solution, but should refine it in the future
1543 	 * if multiple commands are shown to be more efficient.
1544 	 *
1545 	 * Find the smallest power of two that covers the range. The most
1546 	 * significant differing bit between the start and end addresses,
1547 	 * fls(start ^ end), indicates the required span. For example:
1548 	 *
1549 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1550 	 *		x = 0b1000 ^ 0b1011 = 0b11
1551 	 *		span = 1 << fls(x) = 4
1552 	 *
1553 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1554 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1555 	 *		span = 1 << fls(x) = 16
1556 	 */
1557 	log2_span	= fls_long(page_start ^ page_end);
1558 	span_mask	= (1ULL << log2_span) - 1;
1559 
1560 	page_start	&= ~span_mask;
1561 
1562 	cmd->atc.addr	= page_start << inval_grain_shift;
1563 	cmd->atc.size	= log2_span;
1564 }
1565 
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1566 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1567 {
1568 	int i;
1569 	struct arm_smmu_cmdq_ent cmd;
1570 
1571 	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1572 
1573 	for (i = 0; i < master->num_sids; i++) {
1574 		cmd.atc.sid = master->sids[i];
1575 		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1576 	}
1577 
1578 	return arm_smmu_cmdq_issue_sync(master->smmu);
1579 }
1580 
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1581 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1582 				   int ssid, unsigned long iova, size_t size)
1583 {
1584 	int i;
1585 	unsigned long flags;
1586 	struct arm_smmu_cmdq_ent cmd;
1587 	struct arm_smmu_master *master;
1588 	struct arm_smmu_cmdq_batch cmds = {};
1589 
1590 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1591 		return 0;
1592 
1593 	/*
1594 	 * Ensure that we've completed prior invalidation of the main TLBs
1595 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1596 	 * arm_smmu_enable_ats():
1597 	 *
1598 	 *	// unmap()			// arm_smmu_enable_ats()
1599 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1600 	 *	smp_mb();			[...]
1601 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1602 	 *
1603 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1604 	 * ATS was enabled at the PCI device before completion of the TLBI.
1605 	 */
1606 	smp_mb();
1607 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1608 		return 0;
1609 
1610 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1611 
1612 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1613 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1614 		if (!master->ats_enabled)
1615 			continue;
1616 
1617 		for (i = 0; i < master->num_sids; i++) {
1618 			cmd.atc.sid = master->sids[i];
1619 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1620 		}
1621 	}
1622 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1623 
1624 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1625 }
1626 
1627 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1628 static void arm_smmu_tlb_inv_context(void *cookie)
1629 {
1630 	struct arm_smmu_domain *smmu_domain = cookie;
1631 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1632 	struct arm_smmu_cmdq_ent cmd;
1633 
1634 	/*
1635 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1636 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1637 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1638 	 * insertion to guarantee those are observed before the TLBI. Do be
1639 	 * careful, 007.
1640 	 */
1641 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1642 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1643 	} else {
1644 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1645 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1646 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1647 		arm_smmu_cmdq_issue_sync(smmu);
1648 	}
1649 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1650 }
1651 
arm_smmu_tlb_inv_range(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1652 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1653 				   size_t granule, bool leaf,
1654 				   struct arm_smmu_domain *smmu_domain)
1655 {
1656 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1657 	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1658 	size_t inv_range = granule;
1659 	struct arm_smmu_cmdq_batch cmds = {};
1660 	struct arm_smmu_cmdq_ent cmd = {
1661 		.tlbi = {
1662 			.leaf	= leaf,
1663 		},
1664 	};
1665 
1666 	if (!size)
1667 		return;
1668 
1669 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1670 		cmd.opcode	= CMDQ_OP_TLBI_NH_VA;
1671 		cmd.tlbi.asid	= smmu_domain->s1_cfg.cd.asid;
1672 	} else {
1673 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1674 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1675 	}
1676 
1677 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1678 		/* Get the leaf page size */
1679 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1680 
1681 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1682 		cmd.tlbi.tg = (tg - 10) / 2;
1683 
1684 		/* Determine what level the granule is at */
1685 		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1686 
1687 		num_pages = size >> tg;
1688 	}
1689 
1690 	while (iova < end) {
1691 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1692 			/*
1693 			 * On each iteration of the loop, the range is 5 bits
1694 			 * worth of the aligned size remaining.
1695 			 * The range in pages is:
1696 			 *
1697 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1698 			 */
1699 			unsigned long scale, num;
1700 
1701 			/* Determine the power of 2 multiple number of pages */
1702 			scale = __ffs(num_pages);
1703 			cmd.tlbi.scale = scale;
1704 
1705 			/* Determine how many chunks of 2^scale size we have */
1706 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1707 			cmd.tlbi.num = num - 1;
1708 
1709 			/* range is num * 2^scale * pgsize */
1710 			inv_range = num << (scale + tg);
1711 
1712 			/* Clear out the lower order bits for the next iteration */
1713 			num_pages -= num << scale;
1714 		}
1715 
1716 		cmd.tlbi.addr = iova;
1717 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1718 		iova += inv_range;
1719 	}
1720 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1721 
1722 	/*
1723 	 * Unfortunately, this can't be leaf-only since we may have
1724 	 * zapped an entire table.
1725 	 */
1726 	arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1727 }
1728 
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)1729 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1730 					 unsigned long iova, size_t granule,
1731 					 void *cookie)
1732 {
1733 	struct arm_smmu_domain *smmu_domain = cookie;
1734 	struct iommu_domain *domain = &smmu_domain->domain;
1735 
1736 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1737 }
1738 
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)1739 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1740 				  size_t granule, void *cookie)
1741 {
1742 	arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1743 }
1744 
1745 static const struct iommu_flush_ops arm_smmu_flush_ops = {
1746 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
1747 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
1748 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
1749 };
1750 
1751 /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1752 static bool arm_smmu_capable(enum iommu_cap cap)
1753 {
1754 	switch (cap) {
1755 	case IOMMU_CAP_CACHE_COHERENCY:
1756 		return true;
1757 	case IOMMU_CAP_NOEXEC:
1758 		return true;
1759 	default:
1760 		return false;
1761 	}
1762 }
1763 
arm_smmu_domain_alloc(unsigned type)1764 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1765 {
1766 	struct arm_smmu_domain *smmu_domain;
1767 
1768 	if (type != IOMMU_DOMAIN_UNMANAGED &&
1769 	    type != IOMMU_DOMAIN_DMA &&
1770 	    type != IOMMU_DOMAIN_IDENTITY)
1771 		return NULL;
1772 
1773 	/*
1774 	 * Allocate the domain and initialise some of its data structures.
1775 	 * We can't really do anything meaningful until we've added a
1776 	 * master.
1777 	 */
1778 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1779 	if (!smmu_domain)
1780 		return NULL;
1781 
1782 	if (type == IOMMU_DOMAIN_DMA &&
1783 	    iommu_get_dma_cookie(&smmu_domain->domain)) {
1784 		kfree(smmu_domain);
1785 		return NULL;
1786 	}
1787 
1788 	mutex_init(&smmu_domain->init_mutex);
1789 	INIT_LIST_HEAD(&smmu_domain->devices);
1790 	spin_lock_init(&smmu_domain->devices_lock);
1791 
1792 	return &smmu_domain->domain;
1793 }
1794 
arm_smmu_bitmap_alloc(unsigned long * map,int span)1795 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1796 {
1797 	int idx, size = 1 << span;
1798 
1799 	do {
1800 		idx = find_first_zero_bit(map, size);
1801 		if (idx == size)
1802 			return -ENOSPC;
1803 	} while (test_and_set_bit(idx, map));
1804 
1805 	return idx;
1806 }
1807 
arm_smmu_bitmap_free(unsigned long * map,int idx)1808 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1809 {
1810 	clear_bit(idx, map);
1811 }
1812 
arm_smmu_domain_free(struct iommu_domain * domain)1813 static void arm_smmu_domain_free(struct iommu_domain *domain)
1814 {
1815 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1816 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1817 
1818 	iommu_put_dma_cookie(domain);
1819 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1820 
1821 	/* Free the CD and ASID, if we allocated them */
1822 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1823 		struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1824 
1825 		/* Prevent SVA from touching the CD while we're freeing it */
1826 		mutex_lock(&arm_smmu_asid_lock);
1827 		if (cfg->cdcfg.cdtab)
1828 			arm_smmu_free_cd_tables(smmu_domain);
1829 		arm_smmu_free_asid(&cfg->cd);
1830 		mutex_unlock(&arm_smmu_asid_lock);
1831 	} else {
1832 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1833 		if (cfg->vmid)
1834 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1835 	}
1836 
1837 	kfree(smmu_domain);
1838 }
1839 
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1840 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1841 				       struct arm_smmu_master *master,
1842 				       struct io_pgtable_cfg *pgtbl_cfg)
1843 {
1844 	int ret;
1845 	u32 asid;
1846 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1847 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1848 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1849 
1850 	refcount_set(&cfg->cd.refs, 1);
1851 
1852 	/* Prevent SVA from modifying the ASID until it is written to the CD */
1853 	mutex_lock(&arm_smmu_asid_lock);
1854 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1855 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1856 	if (ret)
1857 		goto out_unlock;
1858 
1859 	cfg->s1cdmax = master->ssid_bits;
1860 
1861 	ret = arm_smmu_alloc_cd_tables(smmu_domain);
1862 	if (ret)
1863 		goto out_free_asid;
1864 
1865 	cfg->cd.asid	= (u16)asid;
1866 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1867 	cfg->cd.tcr	= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1868 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1869 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1870 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1871 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1872 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1873 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1874 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
1875 
1876 	/*
1877 	 * Note that this will end up calling arm_smmu_sync_cd() before
1878 	 * the master has been added to the devices list for this domain.
1879 	 * This isn't an issue because the STE hasn't been installed yet.
1880 	 */
1881 	ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1882 	if (ret)
1883 		goto out_free_cd_tables;
1884 
1885 	mutex_unlock(&arm_smmu_asid_lock);
1886 	return 0;
1887 
1888 out_free_cd_tables:
1889 	arm_smmu_free_cd_tables(smmu_domain);
1890 out_free_asid:
1891 	arm_smmu_free_asid(&cfg->cd);
1892 out_unlock:
1893 	mutex_unlock(&arm_smmu_asid_lock);
1894 	return ret;
1895 }
1896 
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1897 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1898 				       struct arm_smmu_master *master,
1899 				       struct io_pgtable_cfg *pgtbl_cfg)
1900 {
1901 	int vmid;
1902 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1903 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1904 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1905 
1906 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1907 	if (vmid < 0)
1908 		return vmid;
1909 
1910 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1911 	cfg->vmid	= (u16)vmid;
1912 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1913 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1914 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1915 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1916 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1917 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1918 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1919 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1920 	return 0;
1921 }
1922 
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)1923 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1924 				    struct arm_smmu_master *master)
1925 {
1926 	int ret;
1927 	unsigned long ias, oas;
1928 	enum io_pgtable_fmt fmt;
1929 	struct io_pgtable_cfg pgtbl_cfg;
1930 	struct io_pgtable_ops *pgtbl_ops;
1931 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
1932 				 struct arm_smmu_master *,
1933 				 struct io_pgtable_cfg *);
1934 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1935 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1936 
1937 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1938 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1939 		return 0;
1940 	}
1941 
1942 	/* Restrict the stage to what we can actually support */
1943 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1944 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1945 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1946 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1947 
1948 	switch (smmu_domain->stage) {
1949 	case ARM_SMMU_DOMAIN_S1:
1950 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1951 		ias = min_t(unsigned long, ias, VA_BITS);
1952 		oas = smmu->ias;
1953 		fmt = ARM_64_LPAE_S1;
1954 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
1955 		break;
1956 	case ARM_SMMU_DOMAIN_NESTED:
1957 	case ARM_SMMU_DOMAIN_S2:
1958 		ias = smmu->ias;
1959 		oas = smmu->oas;
1960 		fmt = ARM_64_LPAE_S2;
1961 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
1962 		break;
1963 	default:
1964 		return -EINVAL;
1965 	}
1966 
1967 	pgtbl_cfg = (struct io_pgtable_cfg) {
1968 		.pgsize_bitmap	= smmu->pgsize_bitmap,
1969 		.ias		= ias,
1970 		.oas		= oas,
1971 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
1972 		.tlb		= &arm_smmu_flush_ops,
1973 		.iommu_dev	= smmu->dev,
1974 	};
1975 
1976 	if (smmu_domain->non_strict)
1977 		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1978 
1979 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1980 	if (!pgtbl_ops)
1981 		return -ENOMEM;
1982 
1983 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1984 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1985 	domain->geometry.force_aperture = true;
1986 
1987 	ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
1988 	if (ret < 0) {
1989 		free_io_pgtable_ops(pgtbl_ops);
1990 		return ret;
1991 	}
1992 
1993 	smmu_domain->pgtbl_ops = pgtbl_ops;
1994 	return 0;
1995 }
1996 
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)1997 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1998 {
1999 	__le64 *step;
2000 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2001 
2002 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2003 		struct arm_smmu_strtab_l1_desc *l1_desc;
2004 		int idx;
2005 
2006 		/* Two-level walk */
2007 		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2008 		l1_desc = &cfg->l1_desc[idx];
2009 		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2010 		step = &l1_desc->l2ptr[idx];
2011 	} else {
2012 		/* Simple linear lookup */
2013 		step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2014 	}
2015 
2016 	return step;
2017 }
2018 
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2019 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2020 {
2021 	int i, j;
2022 	struct arm_smmu_device *smmu = master->smmu;
2023 
2024 	for (i = 0; i < master->num_sids; ++i) {
2025 		u32 sid = master->sids[i];
2026 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2027 
2028 		/* Bridged PCI devices may end up with duplicated IDs */
2029 		for (j = 0; j < i; j++)
2030 			if (master->sids[j] == sid)
2031 				break;
2032 		if (j < i)
2033 			continue;
2034 
2035 		arm_smmu_write_strtab_ent(master, sid, step);
2036 	}
2037 }
2038 
arm_smmu_ats_supported(struct arm_smmu_master * master)2039 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2040 {
2041 	struct device *dev = master->dev;
2042 	struct arm_smmu_device *smmu = master->smmu;
2043 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2044 
2045 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2046 		return false;
2047 
2048 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2049 		return false;
2050 
2051 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2052 }
2053 
arm_smmu_enable_ats(struct arm_smmu_master * master)2054 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2055 {
2056 	size_t stu;
2057 	struct pci_dev *pdev;
2058 	struct arm_smmu_device *smmu = master->smmu;
2059 	struct arm_smmu_domain *smmu_domain = master->domain;
2060 
2061 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2062 	if (!master->ats_enabled)
2063 		return;
2064 
2065 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2066 	stu = __ffs(smmu->pgsize_bitmap);
2067 	pdev = to_pci_dev(master->dev);
2068 
2069 	atomic_inc(&smmu_domain->nr_ats_masters);
2070 	arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2071 	if (pci_enable_ats(pdev, stu))
2072 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2073 }
2074 
arm_smmu_disable_ats(struct arm_smmu_master * master)2075 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2076 {
2077 	struct arm_smmu_domain *smmu_domain = master->domain;
2078 
2079 	if (!master->ats_enabled)
2080 		return;
2081 
2082 	pci_disable_ats(to_pci_dev(master->dev));
2083 	/*
2084 	 * Ensure ATS is disabled at the endpoint before we issue the
2085 	 * ATC invalidation via the SMMU.
2086 	 */
2087 	wmb();
2088 	arm_smmu_atc_inv_master(master);
2089 	atomic_dec(&smmu_domain->nr_ats_masters);
2090 }
2091 
arm_smmu_enable_pasid(struct arm_smmu_master * master)2092 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2093 {
2094 	int ret;
2095 	int features;
2096 	int num_pasids;
2097 	struct pci_dev *pdev;
2098 
2099 	if (!dev_is_pci(master->dev))
2100 		return -ENODEV;
2101 
2102 	pdev = to_pci_dev(master->dev);
2103 
2104 	features = pci_pasid_features(pdev);
2105 	if (features < 0)
2106 		return features;
2107 
2108 	num_pasids = pci_max_pasids(pdev);
2109 	if (num_pasids <= 0)
2110 		return num_pasids;
2111 
2112 	ret = pci_enable_pasid(pdev, features);
2113 	if (ret) {
2114 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2115 		return ret;
2116 	}
2117 
2118 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2119 				  master->smmu->ssid_bits);
2120 	return 0;
2121 }
2122 
arm_smmu_disable_pasid(struct arm_smmu_master * master)2123 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2124 {
2125 	struct pci_dev *pdev;
2126 
2127 	if (!dev_is_pci(master->dev))
2128 		return;
2129 
2130 	pdev = to_pci_dev(master->dev);
2131 
2132 	if (!pdev->pasid_enabled)
2133 		return;
2134 
2135 	master->ssid_bits = 0;
2136 	pci_disable_pasid(pdev);
2137 }
2138 
arm_smmu_detach_dev(struct arm_smmu_master * master)2139 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2140 {
2141 	unsigned long flags;
2142 	struct arm_smmu_domain *smmu_domain = master->domain;
2143 
2144 	if (!smmu_domain)
2145 		return;
2146 
2147 	arm_smmu_disable_ats(master);
2148 
2149 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2150 	list_del(&master->domain_head);
2151 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2152 
2153 	master->domain = NULL;
2154 	master->ats_enabled = false;
2155 	arm_smmu_install_ste_for_dev(master);
2156 }
2157 
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2158 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2159 {
2160 	int ret = 0;
2161 	unsigned long flags;
2162 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2163 	struct arm_smmu_device *smmu;
2164 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2165 	struct arm_smmu_master *master;
2166 
2167 	if (!fwspec)
2168 		return -ENOENT;
2169 
2170 	master = dev_iommu_priv_get(dev);
2171 	smmu = master->smmu;
2172 
2173 	/*
2174 	 * Checking that SVA is disabled ensures that this device isn't bound to
2175 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2176 	 * be removed concurrently since we're holding the group mutex.
2177 	 */
2178 	if (arm_smmu_master_sva_enabled(master)) {
2179 		dev_err(dev, "cannot attach - SVA enabled\n");
2180 		return -EBUSY;
2181 	}
2182 
2183 	arm_smmu_detach_dev(master);
2184 
2185 	mutex_lock(&smmu_domain->init_mutex);
2186 
2187 	if (!smmu_domain->smmu) {
2188 		smmu_domain->smmu = smmu;
2189 		ret = arm_smmu_domain_finalise(domain, master);
2190 		if (ret) {
2191 			smmu_domain->smmu = NULL;
2192 			goto out_unlock;
2193 		}
2194 	} else if (smmu_domain->smmu != smmu) {
2195 		dev_err(dev,
2196 			"cannot attach to SMMU %s (upstream of %s)\n",
2197 			dev_name(smmu_domain->smmu->dev),
2198 			dev_name(smmu->dev));
2199 		ret = -ENXIO;
2200 		goto out_unlock;
2201 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2202 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2203 		dev_err(dev,
2204 			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
2205 			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2206 		ret = -EINVAL;
2207 		goto out_unlock;
2208 	}
2209 
2210 	master->domain = smmu_domain;
2211 
2212 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2213 		master->ats_enabled = arm_smmu_ats_supported(master);
2214 
2215 	arm_smmu_install_ste_for_dev(master);
2216 
2217 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2218 	list_add(&master->domain_head, &smmu_domain->devices);
2219 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2220 
2221 	arm_smmu_enable_ats(master);
2222 
2223 out_unlock:
2224 	mutex_unlock(&smmu_domain->init_mutex);
2225 	return ret;
2226 }
2227 
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot,gfp_t gfp)2228 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2229 			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2230 {
2231 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2232 
2233 	if (!ops)
2234 		return -ENODEV;
2235 
2236 	return ops->map(ops, iova, paddr, size, prot, gfp);
2237 }
2238 
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)2239 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2240 			     size_t size, struct iommu_iotlb_gather *gather)
2241 {
2242 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2243 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2244 
2245 	if (!ops)
2246 		return 0;
2247 
2248 	return ops->unmap(ops, iova, size, gather);
2249 }
2250 
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2251 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2252 {
2253 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2254 
2255 	if (smmu_domain->smmu)
2256 		arm_smmu_tlb_inv_context(smmu_domain);
2257 }
2258 
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2259 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2260 				struct iommu_iotlb_gather *gather)
2261 {
2262 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2263 
2264 	arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1,
2265 			       gather->pgsize, true, smmu_domain);
2266 }
2267 
2268 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2269 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2270 {
2271 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2272 
2273 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
2274 		return iova;
2275 
2276 	if (!ops)
2277 		return 0;
2278 
2279 	return ops->iova_to_phys(ops, iova);
2280 }
2281 
2282 static struct platform_driver arm_smmu_driver;
2283 
2284 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2285 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2286 {
2287 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2288 							  fwnode);
2289 	put_device(dev);
2290 	return dev ? dev_get_drvdata(dev) : NULL;
2291 }
2292 
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2293 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2294 {
2295 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2296 
2297 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2298 		limit *= 1UL << STRTAB_SPLIT;
2299 
2300 	return sid < limit;
2301 }
2302 
2303 static struct iommu_ops arm_smmu_ops;
2304 
arm_smmu_probe_device(struct device * dev)2305 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2306 {
2307 	int i, ret;
2308 	struct arm_smmu_device *smmu;
2309 	struct arm_smmu_master *master;
2310 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2311 
2312 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2313 		return ERR_PTR(-ENODEV);
2314 
2315 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2316 		return ERR_PTR(-EBUSY);
2317 
2318 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2319 	if (!smmu)
2320 		return ERR_PTR(-ENODEV);
2321 
2322 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2323 	if (!master)
2324 		return ERR_PTR(-ENOMEM);
2325 
2326 	master->dev = dev;
2327 	master->smmu = smmu;
2328 	master->sids = fwspec->ids;
2329 	master->num_sids = fwspec->num_ids;
2330 	INIT_LIST_HEAD(&master->bonds);
2331 	dev_iommu_priv_set(dev, master);
2332 
2333 	/* Check the SIDs are in range of the SMMU and our stream table */
2334 	for (i = 0; i < master->num_sids; i++) {
2335 		u32 sid = master->sids[i];
2336 
2337 		if (!arm_smmu_sid_in_range(smmu, sid)) {
2338 			ret = -ERANGE;
2339 			goto err_free_master;
2340 		}
2341 
2342 		/* Ensure l2 strtab is initialised */
2343 		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2344 			ret = arm_smmu_init_l2_strtab(smmu, sid);
2345 			if (ret)
2346 				goto err_free_master;
2347 		}
2348 	}
2349 
2350 	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2351 
2352 	/*
2353 	 * Note that PASID must be enabled before, and disabled after ATS:
2354 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2355 	 *
2356 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2357 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2358 	 *   are changed.
2359 	 */
2360 	arm_smmu_enable_pasid(master);
2361 
2362 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2363 		master->ssid_bits = min_t(u8, master->ssid_bits,
2364 					  CTXDESC_LINEAR_CDMAX);
2365 
2366 	return &smmu->iommu;
2367 
2368 err_free_master:
2369 	kfree(master);
2370 	dev_iommu_priv_set(dev, NULL);
2371 	return ERR_PTR(ret);
2372 }
2373 
arm_smmu_release_device(struct device * dev)2374 static void arm_smmu_release_device(struct device *dev)
2375 {
2376 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2377 	struct arm_smmu_master *master;
2378 
2379 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
2380 		return;
2381 
2382 	master = dev_iommu_priv_get(dev);
2383 	WARN_ON(arm_smmu_master_sva_enabled(master));
2384 	arm_smmu_detach_dev(master);
2385 	arm_smmu_disable_pasid(master);
2386 	kfree(master);
2387 	iommu_fwspec_free(dev);
2388 }
2389 
arm_smmu_device_group(struct device * dev)2390 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2391 {
2392 	struct iommu_group *group;
2393 
2394 	/*
2395 	 * We don't support devices sharing stream IDs other than PCI RID
2396 	 * aliases, since the necessary ID-to-device lookup becomes rather
2397 	 * impractical given a potential sparse 32-bit stream ID space.
2398 	 */
2399 	if (dev_is_pci(dev))
2400 		group = pci_device_group(dev);
2401 	else
2402 		group = generic_device_group(dev);
2403 
2404 	return group;
2405 }
2406 
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2407 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2408 				    enum iommu_attr attr, void *data)
2409 {
2410 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2411 
2412 	switch (domain->type) {
2413 	case IOMMU_DOMAIN_UNMANAGED:
2414 		switch (attr) {
2415 		case DOMAIN_ATTR_NESTING:
2416 			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2417 			return 0;
2418 		default:
2419 			return -ENODEV;
2420 		}
2421 		break;
2422 	case IOMMU_DOMAIN_DMA:
2423 		switch (attr) {
2424 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2425 			*(int *)data = smmu_domain->non_strict;
2426 			return 0;
2427 		default:
2428 			return -ENODEV;
2429 		}
2430 		break;
2431 	default:
2432 		return -EINVAL;
2433 	}
2434 }
2435 
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2436 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2437 				    enum iommu_attr attr, void *data)
2438 {
2439 	int ret = 0;
2440 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2441 
2442 	mutex_lock(&smmu_domain->init_mutex);
2443 
2444 	switch (domain->type) {
2445 	case IOMMU_DOMAIN_UNMANAGED:
2446 		switch (attr) {
2447 		case DOMAIN_ATTR_NESTING:
2448 			if (smmu_domain->smmu) {
2449 				ret = -EPERM;
2450 				goto out_unlock;
2451 			}
2452 
2453 			if (*(int *)data)
2454 				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2455 			else
2456 				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2457 			break;
2458 		default:
2459 			ret = -ENODEV;
2460 		}
2461 		break;
2462 	case IOMMU_DOMAIN_DMA:
2463 		switch(attr) {
2464 		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2465 			smmu_domain->non_strict = *(int *)data;
2466 			break;
2467 		default:
2468 			ret = -ENODEV;
2469 		}
2470 		break;
2471 	default:
2472 		ret = -EINVAL;
2473 	}
2474 
2475 out_unlock:
2476 	mutex_unlock(&smmu_domain->init_mutex);
2477 	return ret;
2478 }
2479 
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2480 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2481 {
2482 	return iommu_fwspec_add_ids(dev, args->args, 1);
2483 }
2484 
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2485 static void arm_smmu_get_resv_regions(struct device *dev,
2486 				      struct list_head *head)
2487 {
2488 	struct iommu_resv_region *region;
2489 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2490 
2491 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2492 					 prot, IOMMU_RESV_SW_MSI);
2493 	if (!region)
2494 		return;
2495 
2496 	list_add_tail(&region->list, head);
2497 
2498 	iommu_dma_get_resv_regions(dev, head);
2499 }
2500 
arm_smmu_dev_has_feature(struct device * dev,enum iommu_dev_features feat)2501 static bool arm_smmu_dev_has_feature(struct device *dev,
2502 				     enum iommu_dev_features feat)
2503 {
2504 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2505 
2506 	if (!master)
2507 		return false;
2508 
2509 	switch (feat) {
2510 	case IOMMU_DEV_FEAT_SVA:
2511 		return arm_smmu_master_sva_supported(master);
2512 	default:
2513 		return false;
2514 	}
2515 }
2516 
arm_smmu_dev_feature_enabled(struct device * dev,enum iommu_dev_features feat)2517 static bool arm_smmu_dev_feature_enabled(struct device *dev,
2518 					 enum iommu_dev_features feat)
2519 {
2520 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2521 
2522 	if (!master)
2523 		return false;
2524 
2525 	switch (feat) {
2526 	case IOMMU_DEV_FEAT_SVA:
2527 		return arm_smmu_master_sva_enabled(master);
2528 	default:
2529 		return false;
2530 	}
2531 }
2532 
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2533 static int arm_smmu_dev_enable_feature(struct device *dev,
2534 				       enum iommu_dev_features feat)
2535 {
2536 	if (!arm_smmu_dev_has_feature(dev, feat))
2537 		return -ENODEV;
2538 
2539 	if (arm_smmu_dev_feature_enabled(dev, feat))
2540 		return -EBUSY;
2541 
2542 	switch (feat) {
2543 	case IOMMU_DEV_FEAT_SVA:
2544 		return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2545 	default:
2546 		return -EINVAL;
2547 	}
2548 }
2549 
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2550 static int arm_smmu_dev_disable_feature(struct device *dev,
2551 					enum iommu_dev_features feat)
2552 {
2553 	if (!arm_smmu_dev_feature_enabled(dev, feat))
2554 		return -EINVAL;
2555 
2556 	switch (feat) {
2557 	case IOMMU_DEV_FEAT_SVA:
2558 		return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2559 	default:
2560 		return -EINVAL;
2561 	}
2562 }
2563 
2564 static struct iommu_ops arm_smmu_ops = {
2565 	.capable		= arm_smmu_capable,
2566 	.domain_alloc		= arm_smmu_domain_alloc,
2567 	.domain_free		= arm_smmu_domain_free,
2568 	.attach_dev		= arm_smmu_attach_dev,
2569 	.map			= arm_smmu_map,
2570 	.unmap			= arm_smmu_unmap,
2571 	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2572 	.iotlb_sync		= arm_smmu_iotlb_sync,
2573 	.iova_to_phys		= arm_smmu_iova_to_phys,
2574 	.probe_device		= arm_smmu_probe_device,
2575 	.release_device		= arm_smmu_release_device,
2576 	.device_group		= arm_smmu_device_group,
2577 	.domain_get_attr	= arm_smmu_domain_get_attr,
2578 	.domain_set_attr	= arm_smmu_domain_set_attr,
2579 	.of_xlate		= arm_smmu_of_xlate,
2580 	.get_resv_regions	= arm_smmu_get_resv_regions,
2581 	.put_resv_regions	= generic_iommu_put_resv_regions,
2582 	.dev_has_feat		= arm_smmu_dev_has_feature,
2583 	.dev_feat_enabled	= arm_smmu_dev_feature_enabled,
2584 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2585 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2586 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2587 };
2588 
2589 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2590 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2591 				   struct arm_smmu_queue *q,
2592 				   unsigned long prod_off,
2593 				   unsigned long cons_off,
2594 				   size_t dwords, const char *name)
2595 {
2596 	size_t qsz;
2597 
2598 	do {
2599 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2600 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2601 					      GFP_KERNEL);
2602 		if (q->base || qsz < PAGE_SIZE)
2603 			break;
2604 
2605 		q->llq.max_n_shift--;
2606 	} while (1);
2607 
2608 	if (!q->base) {
2609 		dev_err(smmu->dev,
2610 			"failed to allocate queue (0x%zx bytes) for %s\n",
2611 			qsz, name);
2612 		return -ENOMEM;
2613 	}
2614 
2615 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2616 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2617 			 1 << q->llq.max_n_shift, name);
2618 	}
2619 
2620 	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
2621 	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
2622 	q->ent_dwords	= dwords;
2623 
2624 	q->q_base  = Q_BASE_RWA;
2625 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2626 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2627 
2628 	q->llq.prod = q->llq.cons = 0;
2629 	return 0;
2630 }
2631 
arm_smmu_cmdq_free_bitmap(void * data)2632 static void arm_smmu_cmdq_free_bitmap(void *data)
2633 {
2634 	unsigned long *bitmap = data;
2635 	bitmap_free(bitmap);
2636 }
2637 
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2638 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2639 {
2640 	int ret = 0;
2641 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2642 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2643 	atomic_long_t *bitmap;
2644 
2645 	atomic_set(&cmdq->owner_prod, 0);
2646 	atomic_set(&cmdq->lock, 0);
2647 
2648 	bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2649 	if (!bitmap) {
2650 		dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2651 		ret = -ENOMEM;
2652 	} else {
2653 		cmdq->valid_map = bitmap;
2654 		devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2655 	}
2656 
2657 	return ret;
2658 }
2659 
arm_smmu_init_queues(struct arm_smmu_device * smmu)2660 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2661 {
2662 	int ret;
2663 
2664 	/* cmdq */
2665 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2666 				      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2667 				      "cmdq");
2668 	if (ret)
2669 		return ret;
2670 
2671 	ret = arm_smmu_cmdq_init(smmu);
2672 	if (ret)
2673 		return ret;
2674 
2675 	/* evtq */
2676 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2677 				      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2678 				      "evtq");
2679 	if (ret)
2680 		return ret;
2681 
2682 	/* priq */
2683 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2684 		return 0;
2685 
2686 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2687 				       ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2688 				       "priq");
2689 }
2690 
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2691 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2692 {
2693 	unsigned int i;
2694 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2695 	size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2696 	void *strtab = smmu->strtab_cfg.strtab;
2697 
2698 	cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2699 	if (!cfg->l1_desc) {
2700 		dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2701 		return -ENOMEM;
2702 	}
2703 
2704 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2705 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2706 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2707 	}
2708 
2709 	return 0;
2710 }
2711 
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2712 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2713 {
2714 	void *strtab;
2715 	u64 reg;
2716 	u32 size, l1size;
2717 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2718 
2719 	/* Calculate the L1 size, capped to the SIDSIZE. */
2720 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2721 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2722 	cfg->num_l1_ents = 1 << size;
2723 
2724 	size += STRTAB_SPLIT;
2725 	if (size < smmu->sid_bits)
2726 		dev_warn(smmu->dev,
2727 			 "2-level strtab only covers %u/%u bits of SID\n",
2728 			 size, smmu->sid_bits);
2729 
2730 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2731 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2732 				     GFP_KERNEL);
2733 	if (!strtab) {
2734 		dev_err(smmu->dev,
2735 			"failed to allocate l1 stream table (%u bytes)\n",
2736 			l1size);
2737 		return -ENOMEM;
2738 	}
2739 	cfg->strtab = strtab;
2740 
2741 	/* Configure strtab_base_cfg for 2 levels */
2742 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2743 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2744 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2745 	cfg->strtab_base_cfg = reg;
2746 
2747 	return arm_smmu_init_l1_strtab(smmu);
2748 }
2749 
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2750 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2751 {
2752 	void *strtab;
2753 	u64 reg;
2754 	u32 size;
2755 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2756 
2757 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2758 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2759 				     GFP_KERNEL);
2760 	if (!strtab) {
2761 		dev_err(smmu->dev,
2762 			"failed to allocate linear stream table (%u bytes)\n",
2763 			size);
2764 		return -ENOMEM;
2765 	}
2766 	cfg->strtab = strtab;
2767 	cfg->num_l1_ents = 1 << smmu->sid_bits;
2768 
2769 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
2770 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2771 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2772 	cfg->strtab_base_cfg = reg;
2773 
2774 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2775 	return 0;
2776 }
2777 
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2778 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2779 {
2780 	u64 reg;
2781 	int ret;
2782 
2783 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2784 		ret = arm_smmu_init_strtab_2lvl(smmu);
2785 	else
2786 		ret = arm_smmu_init_strtab_linear(smmu);
2787 
2788 	if (ret)
2789 		return ret;
2790 
2791 	/* Set the strtab base address */
2792 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2793 	reg |= STRTAB_BASE_RA;
2794 	smmu->strtab_cfg.strtab_base = reg;
2795 
2796 	/* Allocate the first VMID for stage-2 bypass STEs */
2797 	set_bit(0, smmu->vmid_map);
2798 	return 0;
2799 }
2800 
arm_smmu_init_structures(struct arm_smmu_device * smmu)2801 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2802 {
2803 	int ret;
2804 
2805 	ret = arm_smmu_init_queues(smmu);
2806 	if (ret)
2807 		return ret;
2808 
2809 	return arm_smmu_init_strtab(smmu);
2810 }
2811 
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2812 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2813 				   unsigned int reg_off, unsigned int ack_off)
2814 {
2815 	u32 reg;
2816 
2817 	writel_relaxed(val, smmu->base + reg_off);
2818 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2819 					  1, ARM_SMMU_POLL_TIMEOUT_US);
2820 }
2821 
2822 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)2823 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2824 {
2825 	int ret;
2826 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2827 
2828 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2829 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2830 	if (ret)
2831 		return ret;
2832 
2833 	reg &= ~clr;
2834 	reg |= set;
2835 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
2836 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2837 					 1, ARM_SMMU_POLL_TIMEOUT_US);
2838 
2839 	if (ret)
2840 		dev_err(smmu->dev, "GBPA not responding to update\n");
2841 	return ret;
2842 }
2843 
arm_smmu_free_msis(void * data)2844 static void arm_smmu_free_msis(void *data)
2845 {
2846 	struct device *dev = data;
2847 	platform_msi_domain_free_irqs(dev);
2848 }
2849 
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2850 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2851 {
2852 	phys_addr_t doorbell;
2853 	struct device *dev = msi_desc_to_dev(desc);
2854 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2855 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2856 
2857 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2858 	doorbell &= MSI_CFG0_ADDR_MASK;
2859 
2860 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
2861 	writel_relaxed(msg->data, smmu->base + cfg[1]);
2862 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2863 }
2864 
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2865 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2866 {
2867 	struct msi_desc *desc;
2868 	int ret, nvec = ARM_SMMU_MAX_MSIS;
2869 	struct device *dev = smmu->dev;
2870 
2871 	/* Clear the MSI address regs */
2872 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2873 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2874 
2875 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2876 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2877 	else
2878 		nvec--;
2879 
2880 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2881 		return;
2882 
2883 	if (!dev->msi_domain) {
2884 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2885 		return;
2886 	}
2887 
2888 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2889 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2890 	if (ret) {
2891 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2892 		return;
2893 	}
2894 
2895 	for_each_msi_entry(desc, dev) {
2896 		switch (desc->platform.msi_index) {
2897 		case EVTQ_MSI_INDEX:
2898 			smmu->evtq.q.irq = desc->irq;
2899 			break;
2900 		case GERROR_MSI_INDEX:
2901 			smmu->gerr_irq = desc->irq;
2902 			break;
2903 		case PRIQ_MSI_INDEX:
2904 			smmu->priq.q.irq = desc->irq;
2905 			break;
2906 		default:	/* Unknown */
2907 			continue;
2908 		}
2909 	}
2910 
2911 	/* Add callback to free MSIs on teardown */
2912 	devm_add_action(dev, arm_smmu_free_msis, dev);
2913 }
2914 
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)2915 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2916 {
2917 	int irq, ret;
2918 
2919 	arm_smmu_setup_msis(smmu);
2920 
2921 	/* Request interrupt lines */
2922 	irq = smmu->evtq.q.irq;
2923 	if (irq) {
2924 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2925 						arm_smmu_evtq_thread,
2926 						IRQF_ONESHOT,
2927 						"arm-smmu-v3-evtq", smmu);
2928 		if (ret < 0)
2929 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
2930 	} else {
2931 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2932 	}
2933 
2934 	irq = smmu->gerr_irq;
2935 	if (irq) {
2936 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2937 				       0, "arm-smmu-v3-gerror", smmu);
2938 		if (ret < 0)
2939 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
2940 	} else {
2941 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2942 	}
2943 
2944 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
2945 		irq = smmu->priq.q.irq;
2946 		if (irq) {
2947 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2948 							arm_smmu_priq_thread,
2949 							IRQF_ONESHOT,
2950 							"arm-smmu-v3-priq",
2951 							smmu);
2952 			if (ret < 0)
2953 				dev_warn(smmu->dev,
2954 					 "failed to enable priq irq\n");
2955 		} else {
2956 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2957 		}
2958 	}
2959 }
2960 
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2961 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2962 {
2963 	int ret, irq;
2964 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2965 
2966 	/* Disable IRQs first */
2967 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2968 				      ARM_SMMU_IRQ_CTRLACK);
2969 	if (ret) {
2970 		dev_err(smmu->dev, "failed to disable irqs\n");
2971 		return ret;
2972 	}
2973 
2974 	irq = smmu->combined_irq;
2975 	if (irq) {
2976 		/*
2977 		 * Cavium ThunderX2 implementation doesn't support unique irq
2978 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
2979 		 */
2980 		ret = devm_request_threaded_irq(smmu->dev, irq,
2981 					arm_smmu_combined_irq_handler,
2982 					arm_smmu_combined_irq_thread,
2983 					IRQF_ONESHOT,
2984 					"arm-smmu-v3-combined-irq", smmu);
2985 		if (ret < 0)
2986 			dev_warn(smmu->dev, "failed to enable combined irq\n");
2987 	} else
2988 		arm_smmu_setup_unique_irqs(smmu);
2989 
2990 	if (smmu->features & ARM_SMMU_FEAT_PRI)
2991 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2992 
2993 	/* Enable interrupt generation on the SMMU */
2994 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2995 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2996 	if (ret)
2997 		dev_warn(smmu->dev, "failed to enable irqs\n");
2998 
2999 	return 0;
3000 }
3001 
arm_smmu_device_disable(struct arm_smmu_device * smmu)3002 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3003 {
3004 	int ret;
3005 
3006 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3007 	if (ret)
3008 		dev_err(smmu->dev, "failed to clear cr0\n");
3009 
3010 	return ret;
3011 }
3012 
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3013 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3014 {
3015 	int ret;
3016 	u32 reg, enables;
3017 	struct arm_smmu_cmdq_ent cmd;
3018 
3019 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3020 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3021 	if (reg & CR0_SMMUEN) {
3022 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3023 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3024 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3025 	}
3026 
3027 	ret = arm_smmu_device_disable(smmu);
3028 	if (ret)
3029 		return ret;
3030 
3031 	/* CR1 (table and queue memory attributes) */
3032 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3033 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3034 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3035 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3036 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3037 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3038 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3039 
3040 	/* CR2 (random crap) */
3041 	reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3042 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3043 
3044 	/* Stream table */
3045 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3046 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3047 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3048 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3049 
3050 	/* Command queue */
3051 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3052 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3053 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3054 
3055 	enables = CR0_CMDQEN;
3056 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3057 				      ARM_SMMU_CR0ACK);
3058 	if (ret) {
3059 		dev_err(smmu->dev, "failed to enable command queue\n");
3060 		return ret;
3061 	}
3062 
3063 	/* Invalidate any cached configuration */
3064 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3065 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3066 	arm_smmu_cmdq_issue_sync(smmu);
3067 
3068 	/* Invalidate any stale TLB entries */
3069 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3070 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3071 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3072 	}
3073 
3074 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3075 	arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3076 	arm_smmu_cmdq_issue_sync(smmu);
3077 
3078 	/* Event queue */
3079 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3080 	writel_relaxed(smmu->evtq.q.llq.prod,
3081 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3082 	writel_relaxed(smmu->evtq.q.llq.cons,
3083 		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3084 
3085 	enables |= CR0_EVTQEN;
3086 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3087 				      ARM_SMMU_CR0ACK);
3088 	if (ret) {
3089 		dev_err(smmu->dev, "failed to enable event queue\n");
3090 		return ret;
3091 	}
3092 
3093 	/* PRI queue */
3094 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3095 		writeq_relaxed(smmu->priq.q.q_base,
3096 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3097 		writel_relaxed(smmu->priq.q.llq.prod,
3098 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3099 		writel_relaxed(smmu->priq.q.llq.cons,
3100 			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3101 
3102 		enables |= CR0_PRIQEN;
3103 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3104 					      ARM_SMMU_CR0ACK);
3105 		if (ret) {
3106 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3107 			return ret;
3108 		}
3109 	}
3110 
3111 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3112 		enables |= CR0_ATSCHK;
3113 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3114 					      ARM_SMMU_CR0ACK);
3115 		if (ret) {
3116 			dev_err(smmu->dev, "failed to enable ATS check\n");
3117 			return ret;
3118 		}
3119 	}
3120 
3121 	ret = arm_smmu_setup_irqs(smmu);
3122 	if (ret) {
3123 		dev_err(smmu->dev, "failed to setup irqs\n");
3124 		return ret;
3125 	}
3126 
3127 	if (is_kdump_kernel())
3128 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3129 
3130 	/* Enable the SMMU interface, or ensure bypass */
3131 	if (!bypass || disable_bypass) {
3132 		enables |= CR0_SMMUEN;
3133 	} else {
3134 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3135 		if (ret)
3136 			return ret;
3137 	}
3138 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3139 				      ARM_SMMU_CR0ACK);
3140 	if (ret) {
3141 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3142 		return ret;
3143 	}
3144 
3145 	return 0;
3146 }
3147 
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3148 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3149 {
3150 	u32 reg;
3151 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3152 
3153 	/* IDR0 */
3154 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3155 
3156 	/* 2-level structures */
3157 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3158 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3159 
3160 	if (reg & IDR0_CD2L)
3161 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3162 
3163 	/*
3164 	 * Translation table endianness.
3165 	 * We currently require the same endianness as the CPU, but this
3166 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3167 	 */
3168 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3169 	case IDR0_TTENDIAN_MIXED:
3170 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3171 		break;
3172 #ifdef __BIG_ENDIAN
3173 	case IDR0_TTENDIAN_BE:
3174 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3175 		break;
3176 #else
3177 	case IDR0_TTENDIAN_LE:
3178 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3179 		break;
3180 #endif
3181 	default:
3182 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3183 		return -ENXIO;
3184 	}
3185 
3186 	/* Boolean feature flags */
3187 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3188 		smmu->features |= ARM_SMMU_FEAT_PRI;
3189 
3190 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3191 		smmu->features |= ARM_SMMU_FEAT_ATS;
3192 
3193 	if (reg & IDR0_SEV)
3194 		smmu->features |= ARM_SMMU_FEAT_SEV;
3195 
3196 	if (reg & IDR0_MSI) {
3197 		smmu->features |= ARM_SMMU_FEAT_MSI;
3198 		if (coherent && !disable_msipolling)
3199 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3200 	}
3201 
3202 	if (reg & IDR0_HYP)
3203 		smmu->features |= ARM_SMMU_FEAT_HYP;
3204 
3205 	/*
3206 	 * The coherency feature as set by FW is used in preference to the ID
3207 	 * register, but warn on mismatch.
3208 	 */
3209 	if (!!(reg & IDR0_COHACC) != coherent)
3210 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3211 			 coherent ? "true" : "false");
3212 
3213 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3214 	case IDR0_STALL_MODEL_FORCE:
3215 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3216 		fallthrough;
3217 	case IDR0_STALL_MODEL_STALL:
3218 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3219 	}
3220 
3221 	if (reg & IDR0_S1P)
3222 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3223 
3224 	if (reg & IDR0_S2P)
3225 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3226 
3227 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3228 		dev_err(smmu->dev, "no translation support!\n");
3229 		return -ENXIO;
3230 	}
3231 
3232 	/* We only support the AArch64 table format at present */
3233 	switch (FIELD_GET(IDR0_TTF, reg)) {
3234 	case IDR0_TTF_AARCH32_64:
3235 		smmu->ias = 40;
3236 		fallthrough;
3237 	case IDR0_TTF_AARCH64:
3238 		break;
3239 	default:
3240 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3241 		return -ENXIO;
3242 	}
3243 
3244 	/* ASID/VMID sizes */
3245 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3246 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3247 
3248 	/* IDR1 */
3249 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3250 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3251 		dev_err(smmu->dev, "embedded implementation not supported\n");
3252 		return -ENXIO;
3253 	}
3254 
3255 	/* Queue sizes, capped to ensure natural alignment */
3256 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3257 					     FIELD_GET(IDR1_CMDQS, reg));
3258 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3259 		/*
3260 		 * We don't support splitting up batches, so one batch of
3261 		 * commands plus an extra sync needs to fit inside the command
3262 		 * queue. There's also no way we can handle the weird alignment
3263 		 * restrictions on the base pointer for a unit-length queue.
3264 		 */
3265 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3266 			CMDQ_BATCH_ENTRIES);
3267 		return -ENXIO;
3268 	}
3269 
3270 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3271 					     FIELD_GET(IDR1_EVTQS, reg));
3272 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3273 					     FIELD_GET(IDR1_PRIQS, reg));
3274 
3275 	/* SID/SSID sizes */
3276 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3277 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3278 
3279 	/*
3280 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3281 	 * table, use a linear table instead.
3282 	 */
3283 	if (smmu->sid_bits <= STRTAB_SPLIT)
3284 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3285 
3286 	/* IDR3 */
3287 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3288 	if (FIELD_GET(IDR3_RIL, reg))
3289 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3290 
3291 	/* IDR5 */
3292 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3293 
3294 	/* Maximum number of outstanding stalls */
3295 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3296 
3297 	/* Page sizes */
3298 	if (reg & IDR5_GRAN64K)
3299 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3300 	if (reg & IDR5_GRAN16K)
3301 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3302 	if (reg & IDR5_GRAN4K)
3303 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3304 
3305 	/* Input address size */
3306 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3307 		smmu->features |= ARM_SMMU_FEAT_VAX;
3308 
3309 	/* Output address size */
3310 	switch (FIELD_GET(IDR5_OAS, reg)) {
3311 	case IDR5_OAS_32_BIT:
3312 		smmu->oas = 32;
3313 		break;
3314 	case IDR5_OAS_36_BIT:
3315 		smmu->oas = 36;
3316 		break;
3317 	case IDR5_OAS_40_BIT:
3318 		smmu->oas = 40;
3319 		break;
3320 	case IDR5_OAS_42_BIT:
3321 		smmu->oas = 42;
3322 		break;
3323 	case IDR5_OAS_44_BIT:
3324 		smmu->oas = 44;
3325 		break;
3326 	case IDR5_OAS_52_BIT:
3327 		smmu->oas = 52;
3328 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3329 		break;
3330 	default:
3331 		dev_info(smmu->dev,
3332 			"unknown output address size. Truncating to 48-bit\n");
3333 		fallthrough;
3334 	case IDR5_OAS_48_BIT:
3335 		smmu->oas = 48;
3336 	}
3337 
3338 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3339 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3340 	else
3341 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3342 
3343 	/* Set the DMA mask for our table walker */
3344 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3345 		dev_warn(smmu->dev,
3346 			 "failed to set DMA mask for table walker\n");
3347 
3348 	smmu->ias = max(smmu->ias, smmu->oas);
3349 
3350 	if (arm_smmu_sva_supported(smmu))
3351 		smmu->features |= ARM_SMMU_FEAT_SVA;
3352 
3353 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3354 		 smmu->ias, smmu->oas, smmu->features);
3355 	return 0;
3356 }
3357 
3358 #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3359 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3360 {
3361 	switch (model) {
3362 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3363 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3364 		break;
3365 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3366 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3367 		break;
3368 	}
3369 
3370 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3371 }
3372 
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3373 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3374 				      struct arm_smmu_device *smmu)
3375 {
3376 	struct acpi_iort_smmu_v3 *iort_smmu;
3377 	struct device *dev = smmu->dev;
3378 	struct acpi_iort_node *node;
3379 
3380 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3381 
3382 	/* Retrieve SMMUv3 specific data */
3383 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3384 
3385 	acpi_smmu_get_options(iort_smmu->model, smmu);
3386 
3387 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3388 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3389 
3390 	return 0;
3391 }
3392 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3393 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3394 					     struct arm_smmu_device *smmu)
3395 {
3396 	return -ENODEV;
3397 }
3398 #endif
3399 
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3400 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3401 				    struct arm_smmu_device *smmu)
3402 {
3403 	struct device *dev = &pdev->dev;
3404 	u32 cells;
3405 	int ret = -EINVAL;
3406 
3407 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3408 		dev_err(dev, "missing #iommu-cells property\n");
3409 	else if (cells != 1)
3410 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3411 	else
3412 		ret = 0;
3413 
3414 	parse_driver_options(smmu);
3415 
3416 	if (of_dma_is_coherent(dev->of_node))
3417 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3418 
3419 	return ret;
3420 }
3421 
arm_smmu_resource_size(struct arm_smmu_device * smmu)3422 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3423 {
3424 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3425 		return SZ_64K;
3426 	else
3427 		return SZ_128K;
3428 }
3429 
arm_smmu_set_bus_ops(struct iommu_ops * ops)3430 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3431 {
3432 	int err;
3433 
3434 #ifdef CONFIG_PCI
3435 	if (pci_bus_type.iommu_ops != ops) {
3436 		err = bus_set_iommu(&pci_bus_type, ops);
3437 		if (err)
3438 			return err;
3439 	}
3440 #endif
3441 #ifdef CONFIG_ARM_AMBA
3442 	if (amba_bustype.iommu_ops != ops) {
3443 		err = bus_set_iommu(&amba_bustype, ops);
3444 		if (err)
3445 			goto err_reset_pci_ops;
3446 	}
3447 #endif
3448 	if (platform_bus_type.iommu_ops != ops) {
3449 		err = bus_set_iommu(&platform_bus_type, ops);
3450 		if (err)
3451 			goto err_reset_amba_ops;
3452 	}
3453 
3454 	return 0;
3455 
3456 err_reset_amba_ops:
3457 #ifdef CONFIG_ARM_AMBA
3458 	bus_set_iommu(&amba_bustype, NULL);
3459 #endif
3460 err_reset_pci_ops: __maybe_unused;
3461 #ifdef CONFIG_PCI
3462 	bus_set_iommu(&pci_bus_type, NULL);
3463 #endif
3464 	return err;
3465 }
3466 
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3467 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3468 				      resource_size_t size)
3469 {
3470 	struct resource res = {
3471 		.flags = IORESOURCE_MEM,
3472 		.start = start,
3473 		.end = start + size - 1,
3474 	};
3475 
3476 	return devm_ioremap_resource(dev, &res);
3477 }
3478 
arm_smmu_device_probe(struct platform_device * pdev)3479 static int arm_smmu_device_probe(struct platform_device *pdev)
3480 {
3481 	int irq, ret;
3482 	struct resource *res;
3483 	resource_size_t ioaddr;
3484 	struct arm_smmu_device *smmu;
3485 	struct device *dev = &pdev->dev;
3486 	bool bypass;
3487 
3488 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3489 	if (!smmu) {
3490 		dev_err(dev, "failed to allocate arm_smmu_device\n");
3491 		return -ENOMEM;
3492 	}
3493 	smmu->dev = dev;
3494 
3495 	if (dev->of_node) {
3496 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3497 	} else {
3498 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3499 		if (ret == -ENODEV)
3500 			return ret;
3501 	}
3502 
3503 	/* Set bypass mode according to firmware probing result */
3504 	bypass = !!ret;
3505 
3506 	/* Base address */
3507 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3508 	if (!res)
3509 		return -EINVAL;
3510 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3511 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3512 		return -EINVAL;
3513 	}
3514 	ioaddr = res->start;
3515 
3516 	/*
3517 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3518 	 * the PMCG registers which are reserved by the PMU driver.
3519 	 */
3520 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3521 	if (IS_ERR(smmu->base))
3522 		return PTR_ERR(smmu->base);
3523 
3524 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3525 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3526 					       ARM_SMMU_REG_SZ);
3527 		if (IS_ERR(smmu->page1))
3528 			return PTR_ERR(smmu->page1);
3529 	} else {
3530 		smmu->page1 = smmu->base;
3531 	}
3532 
3533 	/* Interrupt lines */
3534 
3535 	irq = platform_get_irq_byname_optional(pdev, "combined");
3536 	if (irq > 0)
3537 		smmu->combined_irq = irq;
3538 	else {
3539 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3540 		if (irq > 0)
3541 			smmu->evtq.q.irq = irq;
3542 
3543 		irq = platform_get_irq_byname_optional(pdev, "priq");
3544 		if (irq > 0)
3545 			smmu->priq.q.irq = irq;
3546 
3547 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3548 		if (irq > 0)
3549 			smmu->gerr_irq = irq;
3550 	}
3551 	/* Probe the h/w */
3552 	ret = arm_smmu_device_hw_probe(smmu);
3553 	if (ret)
3554 		return ret;
3555 
3556 	/* Initialise in-memory data structures */
3557 	ret = arm_smmu_init_structures(smmu);
3558 	if (ret)
3559 		return ret;
3560 
3561 	/* Record our private device structure */
3562 	platform_set_drvdata(pdev, smmu);
3563 
3564 	/* Reset the device */
3565 	ret = arm_smmu_device_reset(smmu, bypass);
3566 	if (ret)
3567 		return ret;
3568 
3569 	/* And we're up. Go go go! */
3570 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3571 				     "smmu3.%pa", &ioaddr);
3572 	if (ret)
3573 		return ret;
3574 
3575 	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3576 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3577 
3578 	ret = iommu_device_register(&smmu->iommu);
3579 	if (ret) {
3580 		dev_err(dev, "Failed to register iommu\n");
3581 		return ret;
3582 	}
3583 
3584 	return arm_smmu_set_bus_ops(&arm_smmu_ops);
3585 }
3586 
arm_smmu_device_remove(struct platform_device * pdev)3587 static int arm_smmu_device_remove(struct platform_device *pdev)
3588 {
3589 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3590 
3591 	arm_smmu_set_bus_ops(NULL);
3592 	iommu_device_unregister(&smmu->iommu);
3593 	iommu_device_sysfs_remove(&smmu->iommu);
3594 	arm_smmu_device_disable(smmu);
3595 
3596 	return 0;
3597 }
3598 
arm_smmu_device_shutdown(struct platform_device * pdev)3599 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3600 {
3601 	arm_smmu_device_remove(pdev);
3602 }
3603 
3604 static const struct of_device_id arm_smmu_of_match[] = {
3605 	{ .compatible = "arm,smmu-v3", },
3606 	{ },
3607 };
3608 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3609 
3610 static struct platform_driver arm_smmu_driver = {
3611 	.driver	= {
3612 		.name			= "arm-smmu-v3",
3613 		.of_match_table		= arm_smmu_of_match,
3614 		.suppress_bind_attrs	= true,
3615 	},
3616 	.probe	= arm_smmu_device_probe,
3617 	.remove	= arm_smmu_device_remove,
3618 	.shutdown = arm_smmu_device_shutdown,
3619 };
3620 module_platform_driver(arm_smmu_driver);
3621 
3622 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3623 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3624 MODULE_ALIAS("platform:arm-smmu-v3");
3625 MODULE_LICENSE("GPL v2");
3626