1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * IOMMU API for ARM architected SMMUv3 implementations.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2015 ARM Limited
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Author: Will Deacon <will.deacon@arm.com>
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * This driver is powered by bad coffee and bombay mix.
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include <linux/acpi.h>
13*4882a593Smuzhiyun #include <linux/acpi_iort.h>
14*4882a593Smuzhiyun #include <linux/bitops.h>
15*4882a593Smuzhiyun #include <linux/crash_dump.h>
16*4882a593Smuzhiyun #include <linux/delay.h>
17*4882a593Smuzhiyun #include <linux/dma-iommu.h>
18*4882a593Smuzhiyun #include <linux/err.h>
19*4882a593Smuzhiyun #include <linux/interrupt.h>
20*4882a593Smuzhiyun #include <linux/io-pgtable.h>
21*4882a593Smuzhiyun #include <linux/iopoll.h>
22*4882a593Smuzhiyun #include <linux/module.h>
23*4882a593Smuzhiyun #include <linux/msi.h>
24*4882a593Smuzhiyun #include <linux/of.h>
25*4882a593Smuzhiyun #include <linux/of_address.h>
26*4882a593Smuzhiyun #include <linux/of_iommu.h>
27*4882a593Smuzhiyun #include <linux/of_platform.h>
28*4882a593Smuzhiyun #include <linux/pci.h>
29*4882a593Smuzhiyun #include <linux/pci-ats.h>
30*4882a593Smuzhiyun #include <linux/platform_device.h>
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun #include <linux/amba/bus.h>
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #include "arm-smmu-v3.h"
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun static bool disable_bypass = 1;
37*4882a593Smuzhiyun module_param(disable_bypass, bool, 0444);
38*4882a593Smuzhiyun MODULE_PARM_DESC(disable_bypass,
39*4882a593Smuzhiyun "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun static bool disable_msipolling;
42*4882a593Smuzhiyun module_param(disable_msipolling, bool, 0444);
43*4882a593Smuzhiyun MODULE_PARM_DESC(disable_msipolling,
44*4882a593Smuzhiyun "Disable MSI-based polling for CMD_SYNC completion.");
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun enum arm_smmu_msi_index {
47*4882a593Smuzhiyun EVTQ_MSI_INDEX,
48*4882a593Smuzhiyun GERROR_MSI_INDEX,
49*4882a593Smuzhiyun PRIQ_MSI_INDEX,
50*4882a593Smuzhiyun ARM_SMMU_MAX_MSIS,
51*4882a593Smuzhiyun };
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54*4882a593Smuzhiyun [EVTQ_MSI_INDEX] = {
55*4882a593Smuzhiyun ARM_SMMU_EVTQ_IRQ_CFG0,
56*4882a593Smuzhiyun ARM_SMMU_EVTQ_IRQ_CFG1,
57*4882a593Smuzhiyun ARM_SMMU_EVTQ_IRQ_CFG2,
58*4882a593Smuzhiyun },
59*4882a593Smuzhiyun [GERROR_MSI_INDEX] = {
60*4882a593Smuzhiyun ARM_SMMU_GERROR_IRQ_CFG0,
61*4882a593Smuzhiyun ARM_SMMU_GERROR_IRQ_CFG1,
62*4882a593Smuzhiyun ARM_SMMU_GERROR_IRQ_CFG2,
63*4882a593Smuzhiyun },
64*4882a593Smuzhiyun [PRIQ_MSI_INDEX] = {
65*4882a593Smuzhiyun ARM_SMMU_PRIQ_IRQ_CFG0,
66*4882a593Smuzhiyun ARM_SMMU_PRIQ_IRQ_CFG1,
67*4882a593Smuzhiyun ARM_SMMU_PRIQ_IRQ_CFG2,
68*4882a593Smuzhiyun },
69*4882a593Smuzhiyun };
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun struct arm_smmu_option_prop {
72*4882a593Smuzhiyun u32 opt;
73*4882a593Smuzhiyun const char *prop;
74*4882a593Smuzhiyun };
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77*4882a593Smuzhiyun DEFINE_MUTEX(arm_smmu_asid_lock);
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun static struct arm_smmu_option_prop arm_smmu_options[] = {
80*4882a593Smuzhiyun { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
81*4882a593Smuzhiyun { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
82*4882a593Smuzhiyun { 0, NULL},
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun
arm_smmu_page1_fixup(unsigned long offset,struct arm_smmu_device * smmu)85*4882a593Smuzhiyun static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
86*4882a593Smuzhiyun struct arm_smmu_device *smmu)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun if (offset > SZ_64K)
89*4882a593Smuzhiyun return smmu->page1 + offset - SZ_64K;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun return smmu->base + offset;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
to_smmu_domain(struct iommu_domain * dom)94*4882a593Smuzhiyun static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun return container_of(dom, struct arm_smmu_domain, domain);
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun
parse_driver_options(struct arm_smmu_device * smmu)99*4882a593Smuzhiyun static void parse_driver_options(struct arm_smmu_device *smmu)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun int i = 0;
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun do {
104*4882a593Smuzhiyun if (of_property_read_bool(smmu->dev->of_node,
105*4882a593Smuzhiyun arm_smmu_options[i].prop)) {
106*4882a593Smuzhiyun smmu->options |= arm_smmu_options[i].opt;
107*4882a593Smuzhiyun dev_notice(smmu->dev, "option %s\n",
108*4882a593Smuzhiyun arm_smmu_options[i].prop);
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun } while (arm_smmu_options[++i].opt);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)114*4882a593Smuzhiyun static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun u32 space, prod, cons;
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun prod = Q_IDX(q, q->prod);
119*4882a593Smuzhiyun cons = Q_IDX(q, q->cons);
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
122*4882a593Smuzhiyun space = (1 << q->max_n_shift) - (prod - cons);
123*4882a593Smuzhiyun else
124*4882a593Smuzhiyun space = cons - prod;
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun return space >= n;
127*4882a593Smuzhiyun }
128*4882a593Smuzhiyun
queue_full(struct arm_smmu_ll_queue * q)129*4882a593Smuzhiyun static bool queue_full(struct arm_smmu_ll_queue *q)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
132*4882a593Smuzhiyun Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun
queue_empty(struct arm_smmu_ll_queue * q)135*4882a593Smuzhiyun static bool queue_empty(struct arm_smmu_ll_queue *q)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
138*4882a593Smuzhiyun Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)141*4882a593Smuzhiyun static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
144*4882a593Smuzhiyun (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
145*4882a593Smuzhiyun ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
146*4882a593Smuzhiyun (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun
queue_sync_cons_out(struct arm_smmu_queue * q)149*4882a593Smuzhiyun static void queue_sync_cons_out(struct arm_smmu_queue *q)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun /*
152*4882a593Smuzhiyun * Ensure that all CPU accesses (reads and writes) to the queue
153*4882a593Smuzhiyun * are complete before we update the cons pointer.
154*4882a593Smuzhiyun */
155*4882a593Smuzhiyun __iomb();
156*4882a593Smuzhiyun writel_relaxed(q->llq.cons, q->cons_reg);
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
queue_inc_cons(struct arm_smmu_ll_queue * q)159*4882a593Smuzhiyun static void queue_inc_cons(struct arm_smmu_ll_queue *q)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
162*4882a593Smuzhiyun q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun
queue_sync_prod_in(struct arm_smmu_queue * q)165*4882a593Smuzhiyun static int queue_sync_prod_in(struct arm_smmu_queue *q)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun u32 prod;
168*4882a593Smuzhiyun int ret = 0;
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun /*
171*4882a593Smuzhiyun * We can't use the _relaxed() variant here, as we must prevent
172*4882a593Smuzhiyun * speculative reads of the queue before we have determined that
173*4882a593Smuzhiyun * prod has indeed moved.
174*4882a593Smuzhiyun */
175*4882a593Smuzhiyun prod = readl(q->prod_reg);
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun if (Q_OVF(prod) != Q_OVF(q->llq.prod))
178*4882a593Smuzhiyun ret = -EOVERFLOW;
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun q->llq.prod = prod;
181*4882a593Smuzhiyun return ret;
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)184*4882a593Smuzhiyun static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
185*4882a593Smuzhiyun {
186*4882a593Smuzhiyun u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
187*4882a593Smuzhiyun return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)190*4882a593Smuzhiyun static void queue_poll_init(struct arm_smmu_device *smmu,
191*4882a593Smuzhiyun struct arm_smmu_queue_poll *qp)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun qp->delay = 1;
194*4882a593Smuzhiyun qp->spin_cnt = 0;
195*4882a593Smuzhiyun qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
196*4882a593Smuzhiyun qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun
queue_poll(struct arm_smmu_queue_poll * qp)199*4882a593Smuzhiyun static int queue_poll(struct arm_smmu_queue_poll *qp)
200*4882a593Smuzhiyun {
201*4882a593Smuzhiyun if (ktime_compare(ktime_get(), qp->timeout) > 0)
202*4882a593Smuzhiyun return -ETIMEDOUT;
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun if (qp->wfe) {
205*4882a593Smuzhiyun wfe();
206*4882a593Smuzhiyun } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
207*4882a593Smuzhiyun cpu_relax();
208*4882a593Smuzhiyun } else {
209*4882a593Smuzhiyun udelay(qp->delay);
210*4882a593Smuzhiyun qp->delay *= 2;
211*4882a593Smuzhiyun qp->spin_cnt = 0;
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun return 0;
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun
queue_write(__le64 * dst,u64 * src,size_t n_dwords)217*4882a593Smuzhiyun static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun int i;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun for (i = 0; i < n_dwords; ++i)
222*4882a593Smuzhiyun *dst++ = cpu_to_le64(*src++);
223*4882a593Smuzhiyun }
224*4882a593Smuzhiyun
queue_read(u64 * dst,__le64 * src,size_t n_dwords)225*4882a593Smuzhiyun static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun int i;
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun for (i = 0; i < n_dwords; ++i)
230*4882a593Smuzhiyun *dst++ = le64_to_cpu(*src++);
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)233*4882a593Smuzhiyun static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun if (queue_empty(&q->llq))
236*4882a593Smuzhiyun return -EAGAIN;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
239*4882a593Smuzhiyun queue_inc_cons(&q->llq);
240*4882a593Smuzhiyun queue_sync_cons_out(q);
241*4882a593Smuzhiyun return 0;
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)245*4882a593Smuzhiyun static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
248*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun switch (ent->opcode) {
251*4882a593Smuzhiyun case CMDQ_OP_TLBI_EL2_ALL:
252*4882a593Smuzhiyun case CMDQ_OP_TLBI_NSNH_ALL:
253*4882a593Smuzhiyun break;
254*4882a593Smuzhiyun case CMDQ_OP_PREFETCH_CFG:
255*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
256*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
257*4882a593Smuzhiyun cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
258*4882a593Smuzhiyun break;
259*4882a593Smuzhiyun case CMDQ_OP_CFGI_CD:
260*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261*4882a593Smuzhiyun fallthrough;
262*4882a593Smuzhiyun case CMDQ_OP_CFGI_STE:
263*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265*4882a593Smuzhiyun break;
266*4882a593Smuzhiyun case CMDQ_OP_CFGI_CD_ALL:
267*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268*4882a593Smuzhiyun break;
269*4882a593Smuzhiyun case CMDQ_OP_CFGI_ALL:
270*4882a593Smuzhiyun /* Cover the entire SID range */
271*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272*4882a593Smuzhiyun break;
273*4882a593Smuzhiyun case CMDQ_OP_TLBI_NH_VA:
274*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
275*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
276*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
277*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
278*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
279*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
280*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
281*4882a593Smuzhiyun cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
282*4882a593Smuzhiyun break;
283*4882a593Smuzhiyun case CMDQ_OP_TLBI_S2_IPA:
284*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
285*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
286*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
287*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
288*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
289*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
290*4882a593Smuzhiyun cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
291*4882a593Smuzhiyun break;
292*4882a593Smuzhiyun case CMDQ_OP_TLBI_NH_ASID:
293*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
294*4882a593Smuzhiyun fallthrough;
295*4882a593Smuzhiyun case CMDQ_OP_TLBI_S12_VMALL:
296*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
297*4882a593Smuzhiyun break;
298*4882a593Smuzhiyun case CMDQ_OP_ATC_INV:
299*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
300*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
301*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
302*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
303*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
304*4882a593Smuzhiyun cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
305*4882a593Smuzhiyun break;
306*4882a593Smuzhiyun case CMDQ_OP_PRI_RESP:
307*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
308*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
309*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
310*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
311*4882a593Smuzhiyun switch (ent->pri.resp) {
312*4882a593Smuzhiyun case PRI_RESP_DENY:
313*4882a593Smuzhiyun case PRI_RESP_FAIL:
314*4882a593Smuzhiyun case PRI_RESP_SUCC:
315*4882a593Smuzhiyun break;
316*4882a593Smuzhiyun default:
317*4882a593Smuzhiyun return -EINVAL;
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
320*4882a593Smuzhiyun break;
321*4882a593Smuzhiyun case CMDQ_OP_CMD_SYNC:
322*4882a593Smuzhiyun if (ent->sync.msiaddr) {
323*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
324*4882a593Smuzhiyun cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
325*4882a593Smuzhiyun } else {
326*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
329*4882a593Smuzhiyun cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
330*4882a593Smuzhiyun break;
331*4882a593Smuzhiyun default:
332*4882a593Smuzhiyun return -ENOENT;
333*4882a593Smuzhiyun }
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun return 0;
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,u32 prod)338*4882a593Smuzhiyun static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
339*4882a593Smuzhiyun u32 prod)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun struct arm_smmu_queue *q = &smmu->cmdq.q;
342*4882a593Smuzhiyun struct arm_smmu_cmdq_ent ent = {
343*4882a593Smuzhiyun .opcode = CMDQ_OP_CMD_SYNC,
344*4882a593Smuzhiyun };
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun /*
347*4882a593Smuzhiyun * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
348*4882a593Smuzhiyun * payload, so the write will zero the entire command on that platform.
349*4882a593Smuzhiyun */
350*4882a593Smuzhiyun if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
351*4882a593Smuzhiyun ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
352*4882a593Smuzhiyun q->ent_dwords * 8;
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun arm_smmu_cmdq_build_cmd(cmd, &ent);
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)358*4882a593Smuzhiyun static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun static const char *cerror_str[] = {
361*4882a593Smuzhiyun [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
362*4882a593Smuzhiyun [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
363*4882a593Smuzhiyun [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
364*4882a593Smuzhiyun [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
365*4882a593Smuzhiyun };
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun int i;
368*4882a593Smuzhiyun u64 cmd[CMDQ_ENT_DWORDS];
369*4882a593Smuzhiyun struct arm_smmu_queue *q = &smmu->cmdq.q;
370*4882a593Smuzhiyun u32 cons = readl_relaxed(q->cons_reg);
371*4882a593Smuzhiyun u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
372*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd_sync = {
373*4882a593Smuzhiyun .opcode = CMDQ_OP_CMD_SYNC,
374*4882a593Smuzhiyun };
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
377*4882a593Smuzhiyun idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
378*4882a593Smuzhiyun
379*4882a593Smuzhiyun switch (idx) {
380*4882a593Smuzhiyun case CMDQ_ERR_CERROR_ABT_IDX:
381*4882a593Smuzhiyun dev_err(smmu->dev, "retrying command fetch\n");
382*4882a593Smuzhiyun case CMDQ_ERR_CERROR_NONE_IDX:
383*4882a593Smuzhiyun return;
384*4882a593Smuzhiyun case CMDQ_ERR_CERROR_ATC_INV_IDX:
385*4882a593Smuzhiyun /*
386*4882a593Smuzhiyun * ATC Invalidation Completion timeout. CONS is still pointing
387*4882a593Smuzhiyun * at the CMD_SYNC. Attempt to complete other pending commands
388*4882a593Smuzhiyun * by repeating the CMD_SYNC, though we might well end up back
389*4882a593Smuzhiyun * here since the ATC invalidation may still be pending.
390*4882a593Smuzhiyun */
391*4882a593Smuzhiyun return;
392*4882a593Smuzhiyun case CMDQ_ERR_CERROR_ILL_IDX:
393*4882a593Smuzhiyun default:
394*4882a593Smuzhiyun break;
395*4882a593Smuzhiyun }
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun /*
398*4882a593Smuzhiyun * We may have concurrent producers, so we need to be careful
399*4882a593Smuzhiyun * not to touch any of the shadow cmdq state.
400*4882a593Smuzhiyun */
401*4882a593Smuzhiyun queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
402*4882a593Smuzhiyun dev_err(smmu->dev, "skipping command in error state:\n");
403*4882a593Smuzhiyun for (i = 0; i < ARRAY_SIZE(cmd); ++i)
404*4882a593Smuzhiyun dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun /* Convert the erroneous command into a CMD_SYNC */
407*4882a593Smuzhiyun if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
408*4882a593Smuzhiyun dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
409*4882a593Smuzhiyun return;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun /*
416*4882a593Smuzhiyun * Command queue locking.
417*4882a593Smuzhiyun * This is a form of bastardised rwlock with the following major changes:
418*4882a593Smuzhiyun *
419*4882a593Smuzhiyun * - The only LOCK routines are exclusive_trylock() and shared_lock().
420*4882a593Smuzhiyun * Neither have barrier semantics, and instead provide only a control
421*4882a593Smuzhiyun * dependency.
422*4882a593Smuzhiyun *
423*4882a593Smuzhiyun * - The UNLOCK routines are supplemented with shared_tryunlock(), which
424*4882a593Smuzhiyun * fails if the caller appears to be the last lock holder (yes, this is
425*4882a593Smuzhiyun * racy). All successful UNLOCK routines have RELEASE semantics.
426*4882a593Smuzhiyun */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)427*4882a593Smuzhiyun static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
428*4882a593Smuzhiyun {
429*4882a593Smuzhiyun int val;
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun /*
432*4882a593Smuzhiyun * We can try to avoid the cmpxchg() loop by simply incrementing the
433*4882a593Smuzhiyun * lock counter. When held in exclusive state, the lock counter is set
434*4882a593Smuzhiyun * to INT_MIN so these increments won't hurt as the value will remain
435*4882a593Smuzhiyun * negative.
436*4882a593Smuzhiyun */
437*4882a593Smuzhiyun if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
438*4882a593Smuzhiyun return;
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun do {
441*4882a593Smuzhiyun val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
442*4882a593Smuzhiyun } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)445*4882a593Smuzhiyun static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
446*4882a593Smuzhiyun {
447*4882a593Smuzhiyun (void)atomic_dec_return_release(&cmdq->lock);
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)450*4882a593Smuzhiyun static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun if (atomic_read(&cmdq->lock) == 1)
453*4882a593Smuzhiyun return false;
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun arm_smmu_cmdq_shared_unlock(cmdq);
456*4882a593Smuzhiyun return true;
457*4882a593Smuzhiyun }
458*4882a593Smuzhiyun
459*4882a593Smuzhiyun #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
460*4882a593Smuzhiyun ({ \
461*4882a593Smuzhiyun bool __ret; \
462*4882a593Smuzhiyun local_irq_save(flags); \
463*4882a593Smuzhiyun __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
464*4882a593Smuzhiyun if (!__ret) \
465*4882a593Smuzhiyun local_irq_restore(flags); \
466*4882a593Smuzhiyun __ret; \
467*4882a593Smuzhiyun })
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
470*4882a593Smuzhiyun ({ \
471*4882a593Smuzhiyun atomic_set_release(&cmdq->lock, 0); \
472*4882a593Smuzhiyun local_irq_restore(flags); \
473*4882a593Smuzhiyun })
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun /*
477*4882a593Smuzhiyun * Command queue insertion.
478*4882a593Smuzhiyun * This is made fiddly by our attempts to achieve some sort of scalability
479*4882a593Smuzhiyun * since there is one queue shared amongst all of the CPUs in the system. If
480*4882a593Smuzhiyun * you like mixed-size concurrency, dependency ordering and relaxed atomics,
481*4882a593Smuzhiyun * then you'll *love* this monstrosity.
482*4882a593Smuzhiyun *
483*4882a593Smuzhiyun * The basic idea is to split the queue up into ranges of commands that are
484*4882a593Smuzhiyun * owned by a given CPU; the owner may not have written all of the commands
485*4882a593Smuzhiyun * itself, but is responsible for advancing the hardware prod pointer when
486*4882a593Smuzhiyun * the time comes. The algorithm is roughly:
487*4882a593Smuzhiyun *
488*4882a593Smuzhiyun * 1. Allocate some space in the queue. At this point we also discover
489*4882a593Smuzhiyun * whether the head of the queue is currently owned by another CPU,
490*4882a593Smuzhiyun * or whether we are the owner.
491*4882a593Smuzhiyun *
492*4882a593Smuzhiyun * 2. Write our commands into our allocated slots in the queue.
493*4882a593Smuzhiyun *
494*4882a593Smuzhiyun * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
495*4882a593Smuzhiyun *
496*4882a593Smuzhiyun * 4. If we are an owner:
497*4882a593Smuzhiyun * a. Wait for the previous owner to finish.
498*4882a593Smuzhiyun * b. Mark the queue head as unowned, which tells us the range
499*4882a593Smuzhiyun * that we are responsible for publishing.
500*4882a593Smuzhiyun * c. Wait for all commands in our owned range to become valid.
501*4882a593Smuzhiyun * d. Advance the hardware prod pointer.
502*4882a593Smuzhiyun * e. Tell the next owner we've finished.
503*4882a593Smuzhiyun *
504*4882a593Smuzhiyun * 5. If we are inserting a CMD_SYNC (we may or may not have been an
505*4882a593Smuzhiyun * owner), then we need to stick around until it has completed:
506*4882a593Smuzhiyun * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
507*4882a593Smuzhiyun * to clear the first 4 bytes.
508*4882a593Smuzhiyun * b. Otherwise, we spin waiting for the hardware cons pointer to
509*4882a593Smuzhiyun * advance past our command.
510*4882a593Smuzhiyun *
511*4882a593Smuzhiyun * The devil is in the details, particularly the use of locking for handling
512*4882a593Smuzhiyun * SYNC completion and freeing up space in the queue before we think that it is
513*4882a593Smuzhiyun * full.
514*4882a593Smuzhiyun */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)515*4882a593Smuzhiyun static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
516*4882a593Smuzhiyun u32 sprod, u32 eprod, bool set)
517*4882a593Smuzhiyun {
518*4882a593Smuzhiyun u32 swidx, sbidx, ewidx, ebidx;
519*4882a593Smuzhiyun struct arm_smmu_ll_queue llq = {
520*4882a593Smuzhiyun .max_n_shift = cmdq->q.llq.max_n_shift,
521*4882a593Smuzhiyun .prod = sprod,
522*4882a593Smuzhiyun };
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun ewidx = BIT_WORD(Q_IDX(&llq, eprod));
525*4882a593Smuzhiyun ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun while (llq.prod != eprod) {
528*4882a593Smuzhiyun unsigned long mask;
529*4882a593Smuzhiyun atomic_long_t *ptr;
530*4882a593Smuzhiyun u32 limit = BITS_PER_LONG;
531*4882a593Smuzhiyun
532*4882a593Smuzhiyun swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
533*4882a593Smuzhiyun sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun ptr = &cmdq->valid_map[swidx];
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun if ((swidx == ewidx) && (sbidx < ebidx))
538*4882a593Smuzhiyun limit = ebidx;
539*4882a593Smuzhiyun
540*4882a593Smuzhiyun mask = GENMASK(limit - 1, sbidx);
541*4882a593Smuzhiyun
542*4882a593Smuzhiyun /*
543*4882a593Smuzhiyun * The valid bit is the inverse of the wrap bit. This means
544*4882a593Smuzhiyun * that a zero-initialised queue is invalid and, after marking
545*4882a593Smuzhiyun * all entries as valid, they become invalid again when we
546*4882a593Smuzhiyun * wrap.
547*4882a593Smuzhiyun */
548*4882a593Smuzhiyun if (set) {
549*4882a593Smuzhiyun atomic_long_xor(mask, ptr);
550*4882a593Smuzhiyun } else { /* Poll */
551*4882a593Smuzhiyun unsigned long valid;
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
554*4882a593Smuzhiyun atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
555*4882a593Smuzhiyun }
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
558*4882a593Smuzhiyun }
559*4882a593Smuzhiyun }
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)562*4882a593Smuzhiyun static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
563*4882a593Smuzhiyun u32 sprod, u32 eprod)
564*4882a593Smuzhiyun {
565*4882a593Smuzhiyun __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
566*4882a593Smuzhiyun }
567*4882a593Smuzhiyun
568*4882a593Smuzhiyun /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)569*4882a593Smuzhiyun static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
570*4882a593Smuzhiyun u32 sprod, u32 eprod)
571*4882a593Smuzhiyun {
572*4882a593Smuzhiyun __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
573*4882a593Smuzhiyun }
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)576*4882a593Smuzhiyun static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
577*4882a593Smuzhiyun struct arm_smmu_ll_queue *llq)
578*4882a593Smuzhiyun {
579*4882a593Smuzhiyun unsigned long flags;
580*4882a593Smuzhiyun struct arm_smmu_queue_poll qp;
581*4882a593Smuzhiyun struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
582*4882a593Smuzhiyun int ret = 0;
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun /*
585*4882a593Smuzhiyun * Try to update our copy of cons by grabbing exclusive cmdq access. If
586*4882a593Smuzhiyun * that fails, spin until somebody else updates it for us.
587*4882a593Smuzhiyun */
588*4882a593Smuzhiyun if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
589*4882a593Smuzhiyun WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
590*4882a593Smuzhiyun arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
591*4882a593Smuzhiyun llq->val = READ_ONCE(cmdq->q.llq.val);
592*4882a593Smuzhiyun return 0;
593*4882a593Smuzhiyun }
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun queue_poll_init(smmu, &qp);
596*4882a593Smuzhiyun do {
597*4882a593Smuzhiyun llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
598*4882a593Smuzhiyun if (!queue_full(llq))
599*4882a593Smuzhiyun break;
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun ret = queue_poll(&qp);
602*4882a593Smuzhiyun } while (!ret);
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun return ret;
605*4882a593Smuzhiyun }
606*4882a593Smuzhiyun
607*4882a593Smuzhiyun /*
608*4882a593Smuzhiyun * Wait until the SMMU signals a CMD_SYNC completion MSI.
609*4882a593Smuzhiyun * Must be called with the cmdq lock held in some capacity.
610*4882a593Smuzhiyun */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)611*4882a593Smuzhiyun static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
612*4882a593Smuzhiyun struct arm_smmu_ll_queue *llq)
613*4882a593Smuzhiyun {
614*4882a593Smuzhiyun int ret = 0;
615*4882a593Smuzhiyun struct arm_smmu_queue_poll qp;
616*4882a593Smuzhiyun struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
617*4882a593Smuzhiyun u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun queue_poll_init(smmu, &qp);
620*4882a593Smuzhiyun
621*4882a593Smuzhiyun /*
622*4882a593Smuzhiyun * The MSI won't generate an event, since it's being written back
623*4882a593Smuzhiyun * into the command queue.
624*4882a593Smuzhiyun */
625*4882a593Smuzhiyun qp.wfe = false;
626*4882a593Smuzhiyun smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
627*4882a593Smuzhiyun llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
628*4882a593Smuzhiyun return ret;
629*4882a593Smuzhiyun }
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun /*
632*4882a593Smuzhiyun * Wait until the SMMU cons index passes llq->prod.
633*4882a593Smuzhiyun * Must be called with the cmdq lock held in some capacity.
634*4882a593Smuzhiyun */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)635*4882a593Smuzhiyun static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
636*4882a593Smuzhiyun struct arm_smmu_ll_queue *llq)
637*4882a593Smuzhiyun {
638*4882a593Smuzhiyun struct arm_smmu_queue_poll qp;
639*4882a593Smuzhiyun struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
640*4882a593Smuzhiyun u32 prod = llq->prod;
641*4882a593Smuzhiyun int ret = 0;
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun queue_poll_init(smmu, &qp);
644*4882a593Smuzhiyun llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
645*4882a593Smuzhiyun do {
646*4882a593Smuzhiyun if (queue_consumed(llq, prod))
647*4882a593Smuzhiyun break;
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun ret = queue_poll(&qp);
650*4882a593Smuzhiyun
651*4882a593Smuzhiyun /*
652*4882a593Smuzhiyun * This needs to be a readl() so that our subsequent call
653*4882a593Smuzhiyun * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
654*4882a593Smuzhiyun *
655*4882a593Smuzhiyun * Specifically, we need to ensure that we observe all
656*4882a593Smuzhiyun * shared_lock()s by other CMD_SYNCs that share our owner,
657*4882a593Smuzhiyun * so that a failing call to tryunlock() means that we're
658*4882a593Smuzhiyun * the last one out and therefore we can safely advance
659*4882a593Smuzhiyun * cmdq->q.llq.cons. Roughly speaking:
660*4882a593Smuzhiyun *
661*4882a593Smuzhiyun * CPU 0 CPU1 CPU2 (us)
662*4882a593Smuzhiyun *
663*4882a593Smuzhiyun * if (sync)
664*4882a593Smuzhiyun * shared_lock();
665*4882a593Smuzhiyun *
666*4882a593Smuzhiyun * dma_wmb();
667*4882a593Smuzhiyun * set_valid_map();
668*4882a593Smuzhiyun *
669*4882a593Smuzhiyun * if (owner) {
670*4882a593Smuzhiyun * poll_valid_map();
671*4882a593Smuzhiyun * <control dependency>
672*4882a593Smuzhiyun * writel(prod_reg);
673*4882a593Smuzhiyun *
674*4882a593Smuzhiyun * readl(cons_reg);
675*4882a593Smuzhiyun * tryunlock();
676*4882a593Smuzhiyun *
677*4882a593Smuzhiyun * Requires us to see CPU 0's shared_lock() acquisition.
678*4882a593Smuzhiyun */
679*4882a593Smuzhiyun llq->cons = readl(cmdq->q.cons_reg);
680*4882a593Smuzhiyun } while (!ret);
681*4882a593Smuzhiyun
682*4882a593Smuzhiyun return ret;
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_ll_queue * llq)685*4882a593Smuzhiyun static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
686*4882a593Smuzhiyun struct arm_smmu_ll_queue *llq)
687*4882a593Smuzhiyun {
688*4882a593Smuzhiyun if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
689*4882a593Smuzhiyun return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
690*4882a593Smuzhiyun
691*4882a593Smuzhiyun return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
692*4882a593Smuzhiyun }
693*4882a593Smuzhiyun
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)694*4882a593Smuzhiyun static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
695*4882a593Smuzhiyun u32 prod, int n)
696*4882a593Smuzhiyun {
697*4882a593Smuzhiyun int i;
698*4882a593Smuzhiyun struct arm_smmu_ll_queue llq = {
699*4882a593Smuzhiyun .max_n_shift = cmdq->q.llq.max_n_shift,
700*4882a593Smuzhiyun .prod = prod,
701*4882a593Smuzhiyun };
702*4882a593Smuzhiyun
703*4882a593Smuzhiyun for (i = 0; i < n; ++i) {
704*4882a593Smuzhiyun u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun prod = queue_inc_prod_n(&llq, i);
707*4882a593Smuzhiyun queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
708*4882a593Smuzhiyun }
709*4882a593Smuzhiyun }
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun /*
712*4882a593Smuzhiyun * This is the actual insertion function, and provides the following
713*4882a593Smuzhiyun * ordering guarantees to callers:
714*4882a593Smuzhiyun *
715*4882a593Smuzhiyun * - There is a dma_wmb() before publishing any commands to the queue.
716*4882a593Smuzhiyun * This can be relied upon to order prior writes to data structures
717*4882a593Smuzhiyun * in memory (such as a CD or an STE) before the command.
718*4882a593Smuzhiyun *
719*4882a593Smuzhiyun * - On completion of a CMD_SYNC, there is a control dependency.
720*4882a593Smuzhiyun * This can be relied upon to order subsequent writes to memory (e.g.
721*4882a593Smuzhiyun * freeing an IOVA) after completion of the CMD_SYNC.
722*4882a593Smuzhiyun *
723*4882a593Smuzhiyun * - Command insertion is totally ordered, so if two CPUs each race to
724*4882a593Smuzhiyun * insert their own list of commands then all of the commands from one
725*4882a593Smuzhiyun * CPU will appear before any of the commands from the other CPU.
726*4882a593Smuzhiyun */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,u64 * cmds,int n,bool sync)727*4882a593Smuzhiyun static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
728*4882a593Smuzhiyun u64 *cmds, int n, bool sync)
729*4882a593Smuzhiyun {
730*4882a593Smuzhiyun u64 cmd_sync[CMDQ_ENT_DWORDS];
731*4882a593Smuzhiyun u32 prod;
732*4882a593Smuzhiyun unsigned long flags;
733*4882a593Smuzhiyun bool owner;
734*4882a593Smuzhiyun struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
735*4882a593Smuzhiyun struct arm_smmu_ll_queue llq = {
736*4882a593Smuzhiyun .max_n_shift = cmdq->q.llq.max_n_shift,
737*4882a593Smuzhiyun }, head = llq;
738*4882a593Smuzhiyun int ret = 0;
739*4882a593Smuzhiyun
740*4882a593Smuzhiyun /* 1. Allocate some space in the queue */
741*4882a593Smuzhiyun local_irq_save(flags);
742*4882a593Smuzhiyun llq.val = READ_ONCE(cmdq->q.llq.val);
743*4882a593Smuzhiyun do {
744*4882a593Smuzhiyun u64 old;
745*4882a593Smuzhiyun
746*4882a593Smuzhiyun while (!queue_has_space(&llq, n + sync)) {
747*4882a593Smuzhiyun local_irq_restore(flags);
748*4882a593Smuzhiyun if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
749*4882a593Smuzhiyun dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
750*4882a593Smuzhiyun local_irq_save(flags);
751*4882a593Smuzhiyun }
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun head.cons = llq.cons;
754*4882a593Smuzhiyun head.prod = queue_inc_prod_n(&llq, n + sync) |
755*4882a593Smuzhiyun CMDQ_PROD_OWNED_FLAG;
756*4882a593Smuzhiyun
757*4882a593Smuzhiyun old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
758*4882a593Smuzhiyun if (old == llq.val)
759*4882a593Smuzhiyun break;
760*4882a593Smuzhiyun
761*4882a593Smuzhiyun llq.val = old;
762*4882a593Smuzhiyun } while (1);
763*4882a593Smuzhiyun owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
764*4882a593Smuzhiyun head.prod &= ~CMDQ_PROD_OWNED_FLAG;
765*4882a593Smuzhiyun llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
766*4882a593Smuzhiyun
767*4882a593Smuzhiyun /*
768*4882a593Smuzhiyun * 2. Write our commands into the queue
769*4882a593Smuzhiyun * Dependency ordering from the cmpxchg() loop above.
770*4882a593Smuzhiyun */
771*4882a593Smuzhiyun arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
772*4882a593Smuzhiyun if (sync) {
773*4882a593Smuzhiyun prod = queue_inc_prod_n(&llq, n);
774*4882a593Smuzhiyun arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
775*4882a593Smuzhiyun queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun /*
778*4882a593Smuzhiyun * In order to determine completion of our CMD_SYNC, we must
779*4882a593Smuzhiyun * ensure that the queue can't wrap twice without us noticing.
780*4882a593Smuzhiyun * We achieve that by taking the cmdq lock as shared before
781*4882a593Smuzhiyun * marking our slot as valid.
782*4882a593Smuzhiyun */
783*4882a593Smuzhiyun arm_smmu_cmdq_shared_lock(cmdq);
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun /* 3. Mark our slots as valid, ensuring commands are visible first */
787*4882a593Smuzhiyun dma_wmb();
788*4882a593Smuzhiyun arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
789*4882a593Smuzhiyun
790*4882a593Smuzhiyun /* 4. If we are the owner, take control of the SMMU hardware */
791*4882a593Smuzhiyun if (owner) {
792*4882a593Smuzhiyun /* a. Wait for previous owner to finish */
793*4882a593Smuzhiyun atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
794*4882a593Smuzhiyun
795*4882a593Smuzhiyun /* b. Stop gathering work by clearing the owned flag */
796*4882a593Smuzhiyun prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
797*4882a593Smuzhiyun &cmdq->q.llq.atomic.prod);
798*4882a593Smuzhiyun prod &= ~CMDQ_PROD_OWNED_FLAG;
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun /*
801*4882a593Smuzhiyun * c. Wait for any gathered work to be written to the queue.
802*4882a593Smuzhiyun * Note that we read our own entries so that we have the control
803*4882a593Smuzhiyun * dependency required by (d).
804*4882a593Smuzhiyun */
805*4882a593Smuzhiyun arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
806*4882a593Smuzhiyun
807*4882a593Smuzhiyun /*
808*4882a593Smuzhiyun * d. Advance the hardware prod pointer
809*4882a593Smuzhiyun * Control dependency ordering from the entries becoming valid.
810*4882a593Smuzhiyun */
811*4882a593Smuzhiyun writel_relaxed(prod, cmdq->q.prod_reg);
812*4882a593Smuzhiyun
813*4882a593Smuzhiyun /*
814*4882a593Smuzhiyun * e. Tell the next owner we're done
815*4882a593Smuzhiyun * Make sure we've updated the hardware first, so that we don't
816*4882a593Smuzhiyun * race to update prod and potentially move it backwards.
817*4882a593Smuzhiyun */
818*4882a593Smuzhiyun atomic_set_release(&cmdq->owner_prod, prod);
819*4882a593Smuzhiyun }
820*4882a593Smuzhiyun
821*4882a593Smuzhiyun /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
822*4882a593Smuzhiyun if (sync) {
823*4882a593Smuzhiyun llq.prod = queue_inc_prod_n(&llq, n);
824*4882a593Smuzhiyun ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
825*4882a593Smuzhiyun if (ret) {
826*4882a593Smuzhiyun dev_err_ratelimited(smmu->dev,
827*4882a593Smuzhiyun "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
828*4882a593Smuzhiyun llq.prod,
829*4882a593Smuzhiyun readl_relaxed(cmdq->q.prod_reg),
830*4882a593Smuzhiyun readl_relaxed(cmdq->q.cons_reg));
831*4882a593Smuzhiyun }
832*4882a593Smuzhiyun
833*4882a593Smuzhiyun /*
834*4882a593Smuzhiyun * Try to unlock the cmdq lock. This will fail if we're the last
835*4882a593Smuzhiyun * reader, in which case we can safely update cmdq->q.llq.cons
836*4882a593Smuzhiyun */
837*4882a593Smuzhiyun if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
838*4882a593Smuzhiyun WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
839*4882a593Smuzhiyun arm_smmu_cmdq_shared_unlock(cmdq);
840*4882a593Smuzhiyun }
841*4882a593Smuzhiyun }
842*4882a593Smuzhiyun
843*4882a593Smuzhiyun local_irq_restore(flags);
844*4882a593Smuzhiyun return ret;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)847*4882a593Smuzhiyun static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
848*4882a593Smuzhiyun struct arm_smmu_cmdq_ent *ent)
849*4882a593Smuzhiyun {
850*4882a593Smuzhiyun u64 cmd[CMDQ_ENT_DWORDS];
851*4882a593Smuzhiyun
852*4882a593Smuzhiyun if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
853*4882a593Smuzhiyun dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
854*4882a593Smuzhiyun ent->opcode);
855*4882a593Smuzhiyun return -EINVAL;
856*4882a593Smuzhiyun }
857*4882a593Smuzhiyun
858*4882a593Smuzhiyun return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
859*4882a593Smuzhiyun }
860*4882a593Smuzhiyun
arm_smmu_cmdq_issue_sync(struct arm_smmu_device * smmu)861*4882a593Smuzhiyun static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
862*4882a593Smuzhiyun {
863*4882a593Smuzhiyun return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
864*4882a593Smuzhiyun }
865*4882a593Smuzhiyun
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)866*4882a593Smuzhiyun static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
867*4882a593Smuzhiyun struct arm_smmu_cmdq_batch *cmds,
868*4882a593Smuzhiyun struct arm_smmu_cmdq_ent *cmd)
869*4882a593Smuzhiyun {
870*4882a593Smuzhiyun if (cmds->num == CMDQ_BATCH_ENTRIES) {
871*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
872*4882a593Smuzhiyun cmds->num = 0;
873*4882a593Smuzhiyun }
874*4882a593Smuzhiyun arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
875*4882a593Smuzhiyun cmds->num++;
876*4882a593Smuzhiyun }
877*4882a593Smuzhiyun
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)878*4882a593Smuzhiyun static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
879*4882a593Smuzhiyun struct arm_smmu_cmdq_batch *cmds)
880*4882a593Smuzhiyun {
881*4882a593Smuzhiyun return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
882*4882a593Smuzhiyun }
883*4882a593Smuzhiyun
884*4882a593Smuzhiyun /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)885*4882a593Smuzhiyun void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
886*4882a593Smuzhiyun {
887*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd = {
888*4882a593Smuzhiyun .opcode = CMDQ_OP_TLBI_NH_ASID,
889*4882a593Smuzhiyun .tlbi.asid = asid,
890*4882a593Smuzhiyun };
891*4882a593Smuzhiyun
892*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(smmu, &cmd);
893*4882a593Smuzhiyun arm_smmu_cmdq_issue_sync(smmu);
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun
arm_smmu_sync_cd(struct arm_smmu_domain * smmu_domain,int ssid,bool leaf)896*4882a593Smuzhiyun static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
897*4882a593Smuzhiyun int ssid, bool leaf)
898*4882a593Smuzhiyun {
899*4882a593Smuzhiyun size_t i;
900*4882a593Smuzhiyun unsigned long flags;
901*4882a593Smuzhiyun struct arm_smmu_master *master;
902*4882a593Smuzhiyun struct arm_smmu_cmdq_batch cmds = {};
903*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
904*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd = {
905*4882a593Smuzhiyun .opcode = CMDQ_OP_CFGI_CD,
906*4882a593Smuzhiyun .cfgi = {
907*4882a593Smuzhiyun .ssid = ssid,
908*4882a593Smuzhiyun .leaf = leaf,
909*4882a593Smuzhiyun },
910*4882a593Smuzhiyun };
911*4882a593Smuzhiyun
912*4882a593Smuzhiyun spin_lock_irqsave(&smmu_domain->devices_lock, flags);
913*4882a593Smuzhiyun list_for_each_entry(master, &smmu_domain->devices, domain_head) {
914*4882a593Smuzhiyun for (i = 0; i < master->num_sids; i++) {
915*4882a593Smuzhiyun cmd.cfgi.sid = master->sids[i];
916*4882a593Smuzhiyun arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
917*4882a593Smuzhiyun }
918*4882a593Smuzhiyun }
919*4882a593Smuzhiyun spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
920*4882a593Smuzhiyun
921*4882a593Smuzhiyun arm_smmu_cmdq_batch_submit(smmu, &cmds);
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun
arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device * smmu,struct arm_smmu_l1_ctx_desc * l1_desc)924*4882a593Smuzhiyun static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
925*4882a593Smuzhiyun struct arm_smmu_l1_ctx_desc *l1_desc)
926*4882a593Smuzhiyun {
927*4882a593Smuzhiyun size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
928*4882a593Smuzhiyun
929*4882a593Smuzhiyun l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
930*4882a593Smuzhiyun &l1_desc->l2ptr_dma, GFP_KERNEL);
931*4882a593Smuzhiyun if (!l1_desc->l2ptr) {
932*4882a593Smuzhiyun dev_warn(smmu->dev,
933*4882a593Smuzhiyun "failed to allocate context descriptor table\n");
934*4882a593Smuzhiyun return -ENOMEM;
935*4882a593Smuzhiyun }
936*4882a593Smuzhiyun return 0;
937*4882a593Smuzhiyun }
938*4882a593Smuzhiyun
arm_smmu_write_cd_l1_desc(__le64 * dst,struct arm_smmu_l1_ctx_desc * l1_desc)939*4882a593Smuzhiyun static void arm_smmu_write_cd_l1_desc(__le64 *dst,
940*4882a593Smuzhiyun struct arm_smmu_l1_ctx_desc *l1_desc)
941*4882a593Smuzhiyun {
942*4882a593Smuzhiyun u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
943*4882a593Smuzhiyun CTXDESC_L1_DESC_V;
944*4882a593Smuzhiyun
945*4882a593Smuzhiyun /* See comment in arm_smmu_write_ctx_desc() */
946*4882a593Smuzhiyun WRITE_ONCE(*dst, cpu_to_le64(val));
947*4882a593Smuzhiyun }
948*4882a593Smuzhiyun
arm_smmu_get_cd_ptr(struct arm_smmu_domain * smmu_domain,u32 ssid)949*4882a593Smuzhiyun static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
950*4882a593Smuzhiyun u32 ssid)
951*4882a593Smuzhiyun {
952*4882a593Smuzhiyun __le64 *l1ptr;
953*4882a593Smuzhiyun unsigned int idx;
954*4882a593Smuzhiyun struct arm_smmu_l1_ctx_desc *l1_desc;
955*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
956*4882a593Smuzhiyun struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
957*4882a593Smuzhiyun
958*4882a593Smuzhiyun if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
959*4882a593Smuzhiyun return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
960*4882a593Smuzhiyun
961*4882a593Smuzhiyun idx = ssid >> CTXDESC_SPLIT;
962*4882a593Smuzhiyun l1_desc = &cdcfg->l1_desc[idx];
963*4882a593Smuzhiyun if (!l1_desc->l2ptr) {
964*4882a593Smuzhiyun if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
965*4882a593Smuzhiyun return NULL;
966*4882a593Smuzhiyun
967*4882a593Smuzhiyun l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
968*4882a593Smuzhiyun arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
969*4882a593Smuzhiyun /* An invalid L1CD can be cached */
970*4882a593Smuzhiyun arm_smmu_sync_cd(smmu_domain, ssid, false);
971*4882a593Smuzhiyun }
972*4882a593Smuzhiyun idx = ssid & (CTXDESC_L2_ENTRIES - 1);
973*4882a593Smuzhiyun return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
974*4882a593Smuzhiyun }
975*4882a593Smuzhiyun
arm_smmu_write_ctx_desc(struct arm_smmu_domain * smmu_domain,int ssid,struct arm_smmu_ctx_desc * cd)976*4882a593Smuzhiyun int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
977*4882a593Smuzhiyun struct arm_smmu_ctx_desc *cd)
978*4882a593Smuzhiyun {
979*4882a593Smuzhiyun /*
980*4882a593Smuzhiyun * This function handles the following cases:
981*4882a593Smuzhiyun *
982*4882a593Smuzhiyun * (1) Install primary CD, for normal DMA traffic (SSID = 0).
983*4882a593Smuzhiyun * (2) Install a secondary CD, for SID+SSID traffic.
984*4882a593Smuzhiyun * (3) Update ASID of a CD. Atomically write the first 64 bits of the
985*4882a593Smuzhiyun * CD, then invalidate the old entry and mappings.
986*4882a593Smuzhiyun * (4) Remove a secondary CD.
987*4882a593Smuzhiyun */
988*4882a593Smuzhiyun u64 val;
989*4882a593Smuzhiyun bool cd_live;
990*4882a593Smuzhiyun __le64 *cdptr;
991*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
994*4882a593Smuzhiyun return -E2BIG;
995*4882a593Smuzhiyun
996*4882a593Smuzhiyun cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
997*4882a593Smuzhiyun if (!cdptr)
998*4882a593Smuzhiyun return -ENOMEM;
999*4882a593Smuzhiyun
1000*4882a593Smuzhiyun val = le64_to_cpu(cdptr[0]);
1001*4882a593Smuzhiyun cd_live = !!(val & CTXDESC_CD_0_V);
1002*4882a593Smuzhiyun
1003*4882a593Smuzhiyun if (!cd) { /* (4) */
1004*4882a593Smuzhiyun val = 0;
1005*4882a593Smuzhiyun } else if (cd_live) { /* (3) */
1006*4882a593Smuzhiyun val &= ~CTXDESC_CD_0_ASID;
1007*4882a593Smuzhiyun val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1008*4882a593Smuzhiyun /*
1009*4882a593Smuzhiyun * Until CD+TLB invalidation, both ASIDs may be used for tagging
1010*4882a593Smuzhiyun * this substream's traffic
1011*4882a593Smuzhiyun */
1012*4882a593Smuzhiyun } else { /* (1) and (2) */
1013*4882a593Smuzhiyun cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1014*4882a593Smuzhiyun cdptr[2] = 0;
1015*4882a593Smuzhiyun cdptr[3] = cpu_to_le64(cd->mair);
1016*4882a593Smuzhiyun
1017*4882a593Smuzhiyun /*
1018*4882a593Smuzhiyun * STE is live, and the SMMU might read dwords of this CD in any
1019*4882a593Smuzhiyun * order. Ensure that it observes valid values before reading
1020*4882a593Smuzhiyun * V=1.
1021*4882a593Smuzhiyun */
1022*4882a593Smuzhiyun arm_smmu_sync_cd(smmu_domain, ssid, true);
1023*4882a593Smuzhiyun
1024*4882a593Smuzhiyun val = cd->tcr |
1025*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
1026*4882a593Smuzhiyun CTXDESC_CD_0_ENDI |
1027*4882a593Smuzhiyun #endif
1028*4882a593Smuzhiyun CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1029*4882a593Smuzhiyun (cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1030*4882a593Smuzhiyun CTXDESC_CD_0_AA64 |
1031*4882a593Smuzhiyun FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1032*4882a593Smuzhiyun CTXDESC_CD_0_V;
1033*4882a593Smuzhiyun
1034*4882a593Smuzhiyun /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1035*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1036*4882a593Smuzhiyun val |= CTXDESC_CD_0_S;
1037*4882a593Smuzhiyun }
1038*4882a593Smuzhiyun
1039*4882a593Smuzhiyun /*
1040*4882a593Smuzhiyun * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1041*4882a593Smuzhiyun * "Configuration structures and configuration invalidation completion"
1042*4882a593Smuzhiyun *
1043*4882a593Smuzhiyun * The size of single-copy atomic reads made by the SMMU is
1044*4882a593Smuzhiyun * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1045*4882a593Smuzhiyun * field within an aligned 64-bit span of a structure can be altered
1046*4882a593Smuzhiyun * without first making the structure invalid.
1047*4882a593Smuzhiyun */
1048*4882a593Smuzhiyun WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1049*4882a593Smuzhiyun arm_smmu_sync_cd(smmu_domain, ssid, true);
1050*4882a593Smuzhiyun return 0;
1051*4882a593Smuzhiyun }
1052*4882a593Smuzhiyun
arm_smmu_alloc_cd_tables(struct arm_smmu_domain * smmu_domain)1053*4882a593Smuzhiyun static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1054*4882a593Smuzhiyun {
1055*4882a593Smuzhiyun int ret;
1056*4882a593Smuzhiyun size_t l1size;
1057*4882a593Smuzhiyun size_t max_contexts;
1058*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
1059*4882a593Smuzhiyun struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1060*4882a593Smuzhiyun struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1061*4882a593Smuzhiyun
1062*4882a593Smuzhiyun max_contexts = 1 << cfg->s1cdmax;
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1065*4882a593Smuzhiyun max_contexts <= CTXDESC_L2_ENTRIES) {
1066*4882a593Smuzhiyun cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1067*4882a593Smuzhiyun cdcfg->num_l1_ents = max_contexts;
1068*4882a593Smuzhiyun
1069*4882a593Smuzhiyun l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1070*4882a593Smuzhiyun } else {
1071*4882a593Smuzhiyun cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1072*4882a593Smuzhiyun cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1073*4882a593Smuzhiyun CTXDESC_L2_ENTRIES);
1074*4882a593Smuzhiyun
1075*4882a593Smuzhiyun cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1076*4882a593Smuzhiyun sizeof(*cdcfg->l1_desc),
1077*4882a593Smuzhiyun GFP_KERNEL);
1078*4882a593Smuzhiyun if (!cdcfg->l1_desc)
1079*4882a593Smuzhiyun return -ENOMEM;
1080*4882a593Smuzhiyun
1081*4882a593Smuzhiyun l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1082*4882a593Smuzhiyun }
1083*4882a593Smuzhiyun
1084*4882a593Smuzhiyun cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1085*4882a593Smuzhiyun GFP_KERNEL);
1086*4882a593Smuzhiyun if (!cdcfg->cdtab) {
1087*4882a593Smuzhiyun dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1088*4882a593Smuzhiyun ret = -ENOMEM;
1089*4882a593Smuzhiyun goto err_free_l1;
1090*4882a593Smuzhiyun }
1091*4882a593Smuzhiyun
1092*4882a593Smuzhiyun return 0;
1093*4882a593Smuzhiyun
1094*4882a593Smuzhiyun err_free_l1:
1095*4882a593Smuzhiyun if (cdcfg->l1_desc) {
1096*4882a593Smuzhiyun devm_kfree(smmu->dev, cdcfg->l1_desc);
1097*4882a593Smuzhiyun cdcfg->l1_desc = NULL;
1098*4882a593Smuzhiyun }
1099*4882a593Smuzhiyun return ret;
1100*4882a593Smuzhiyun }
1101*4882a593Smuzhiyun
arm_smmu_free_cd_tables(struct arm_smmu_domain * smmu_domain)1102*4882a593Smuzhiyun static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1103*4882a593Smuzhiyun {
1104*4882a593Smuzhiyun int i;
1105*4882a593Smuzhiyun size_t size, l1size;
1106*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
1107*4882a593Smuzhiyun struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1108*4882a593Smuzhiyun
1109*4882a593Smuzhiyun if (cdcfg->l1_desc) {
1110*4882a593Smuzhiyun size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1111*4882a593Smuzhiyun
1112*4882a593Smuzhiyun for (i = 0; i < cdcfg->num_l1_ents; i++) {
1113*4882a593Smuzhiyun if (!cdcfg->l1_desc[i].l2ptr)
1114*4882a593Smuzhiyun continue;
1115*4882a593Smuzhiyun
1116*4882a593Smuzhiyun dmam_free_coherent(smmu->dev, size,
1117*4882a593Smuzhiyun cdcfg->l1_desc[i].l2ptr,
1118*4882a593Smuzhiyun cdcfg->l1_desc[i].l2ptr_dma);
1119*4882a593Smuzhiyun }
1120*4882a593Smuzhiyun devm_kfree(smmu->dev, cdcfg->l1_desc);
1121*4882a593Smuzhiyun cdcfg->l1_desc = NULL;
1122*4882a593Smuzhiyun
1123*4882a593Smuzhiyun l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1124*4882a593Smuzhiyun } else {
1125*4882a593Smuzhiyun l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1126*4882a593Smuzhiyun }
1127*4882a593Smuzhiyun
1128*4882a593Smuzhiyun dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1129*4882a593Smuzhiyun cdcfg->cdtab_dma = 0;
1130*4882a593Smuzhiyun cdcfg->cdtab = NULL;
1131*4882a593Smuzhiyun }
1132*4882a593Smuzhiyun
arm_smmu_free_asid(struct arm_smmu_ctx_desc * cd)1133*4882a593Smuzhiyun bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1134*4882a593Smuzhiyun {
1135*4882a593Smuzhiyun bool free;
1136*4882a593Smuzhiyun struct arm_smmu_ctx_desc *old_cd;
1137*4882a593Smuzhiyun
1138*4882a593Smuzhiyun if (!cd->asid)
1139*4882a593Smuzhiyun return false;
1140*4882a593Smuzhiyun
1141*4882a593Smuzhiyun free = refcount_dec_and_test(&cd->refs);
1142*4882a593Smuzhiyun if (free) {
1143*4882a593Smuzhiyun old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1144*4882a593Smuzhiyun WARN_ON(old_cd != cd);
1145*4882a593Smuzhiyun }
1146*4882a593Smuzhiyun return free;
1147*4882a593Smuzhiyun }
1148*4882a593Smuzhiyun
1149*4882a593Smuzhiyun /* Stream table manipulation functions */
1150*4882a593Smuzhiyun static void
arm_smmu_write_strtab_l1_desc(__le64 * dst,struct arm_smmu_strtab_l1_desc * desc)1151*4882a593Smuzhiyun arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1152*4882a593Smuzhiyun {
1153*4882a593Smuzhiyun u64 val = 0;
1154*4882a593Smuzhiyun
1155*4882a593Smuzhiyun val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1156*4882a593Smuzhiyun val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1157*4882a593Smuzhiyun
1158*4882a593Smuzhiyun /* See comment in arm_smmu_write_ctx_desc() */
1159*4882a593Smuzhiyun WRITE_ONCE(*dst, cpu_to_le64(val));
1160*4882a593Smuzhiyun }
1161*4882a593Smuzhiyun
arm_smmu_sync_ste_for_sid(struct arm_smmu_device * smmu,u32 sid)1162*4882a593Smuzhiyun static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1163*4882a593Smuzhiyun {
1164*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd = {
1165*4882a593Smuzhiyun .opcode = CMDQ_OP_CFGI_STE,
1166*4882a593Smuzhiyun .cfgi = {
1167*4882a593Smuzhiyun .sid = sid,
1168*4882a593Smuzhiyun .leaf = true,
1169*4882a593Smuzhiyun },
1170*4882a593Smuzhiyun };
1171*4882a593Smuzhiyun
1172*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1173*4882a593Smuzhiyun arm_smmu_cmdq_issue_sync(smmu);
1174*4882a593Smuzhiyun }
1175*4882a593Smuzhiyun
arm_smmu_write_strtab_ent(struct arm_smmu_master * master,u32 sid,__le64 * dst)1176*4882a593Smuzhiyun static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1177*4882a593Smuzhiyun __le64 *dst)
1178*4882a593Smuzhiyun {
1179*4882a593Smuzhiyun /*
1180*4882a593Smuzhiyun * This is hideously complicated, but we only really care about
1181*4882a593Smuzhiyun * three cases at the moment:
1182*4882a593Smuzhiyun *
1183*4882a593Smuzhiyun * 1. Invalid (all zero) -> bypass/fault (init)
1184*4882a593Smuzhiyun * 2. Bypass/fault -> translation/bypass (attach)
1185*4882a593Smuzhiyun * 3. Translation/bypass -> bypass/fault (detach)
1186*4882a593Smuzhiyun *
1187*4882a593Smuzhiyun * Given that we can't update the STE atomically and the SMMU
1188*4882a593Smuzhiyun * doesn't read the thing in a defined order, that leaves us
1189*4882a593Smuzhiyun * with the following maintenance requirements:
1190*4882a593Smuzhiyun *
1191*4882a593Smuzhiyun * 1. Update Config, return (init time STEs aren't live)
1192*4882a593Smuzhiyun * 2. Write everything apart from dword 0, sync, write dword 0, sync
1193*4882a593Smuzhiyun * 3. Update Config, sync
1194*4882a593Smuzhiyun */
1195*4882a593Smuzhiyun u64 val = le64_to_cpu(dst[0]);
1196*4882a593Smuzhiyun bool ste_live = false;
1197*4882a593Smuzhiyun struct arm_smmu_device *smmu = NULL;
1198*4882a593Smuzhiyun struct arm_smmu_s1_cfg *s1_cfg = NULL;
1199*4882a593Smuzhiyun struct arm_smmu_s2_cfg *s2_cfg = NULL;
1200*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = NULL;
1201*4882a593Smuzhiyun struct arm_smmu_cmdq_ent prefetch_cmd = {
1202*4882a593Smuzhiyun .opcode = CMDQ_OP_PREFETCH_CFG,
1203*4882a593Smuzhiyun .prefetch = {
1204*4882a593Smuzhiyun .sid = sid,
1205*4882a593Smuzhiyun },
1206*4882a593Smuzhiyun };
1207*4882a593Smuzhiyun
1208*4882a593Smuzhiyun if (master) {
1209*4882a593Smuzhiyun smmu_domain = master->domain;
1210*4882a593Smuzhiyun smmu = master->smmu;
1211*4882a593Smuzhiyun }
1212*4882a593Smuzhiyun
1213*4882a593Smuzhiyun if (smmu_domain) {
1214*4882a593Smuzhiyun switch (smmu_domain->stage) {
1215*4882a593Smuzhiyun case ARM_SMMU_DOMAIN_S1:
1216*4882a593Smuzhiyun s1_cfg = &smmu_domain->s1_cfg;
1217*4882a593Smuzhiyun break;
1218*4882a593Smuzhiyun case ARM_SMMU_DOMAIN_S2:
1219*4882a593Smuzhiyun case ARM_SMMU_DOMAIN_NESTED:
1220*4882a593Smuzhiyun s2_cfg = &smmu_domain->s2_cfg;
1221*4882a593Smuzhiyun break;
1222*4882a593Smuzhiyun default:
1223*4882a593Smuzhiyun break;
1224*4882a593Smuzhiyun }
1225*4882a593Smuzhiyun }
1226*4882a593Smuzhiyun
1227*4882a593Smuzhiyun if (val & STRTAB_STE_0_V) {
1228*4882a593Smuzhiyun switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1229*4882a593Smuzhiyun case STRTAB_STE_0_CFG_BYPASS:
1230*4882a593Smuzhiyun break;
1231*4882a593Smuzhiyun case STRTAB_STE_0_CFG_S1_TRANS:
1232*4882a593Smuzhiyun case STRTAB_STE_0_CFG_S2_TRANS:
1233*4882a593Smuzhiyun ste_live = true;
1234*4882a593Smuzhiyun break;
1235*4882a593Smuzhiyun case STRTAB_STE_0_CFG_ABORT:
1236*4882a593Smuzhiyun BUG_ON(!disable_bypass);
1237*4882a593Smuzhiyun break;
1238*4882a593Smuzhiyun default:
1239*4882a593Smuzhiyun BUG(); /* STE corruption */
1240*4882a593Smuzhiyun }
1241*4882a593Smuzhiyun }
1242*4882a593Smuzhiyun
1243*4882a593Smuzhiyun /* Nuke the existing STE_0 value, as we're going to rewrite it */
1244*4882a593Smuzhiyun val = STRTAB_STE_0_V;
1245*4882a593Smuzhiyun
1246*4882a593Smuzhiyun /* Bypass/fault */
1247*4882a593Smuzhiyun if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1248*4882a593Smuzhiyun if (!smmu_domain && disable_bypass)
1249*4882a593Smuzhiyun val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1250*4882a593Smuzhiyun else
1251*4882a593Smuzhiyun val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1252*4882a593Smuzhiyun
1253*4882a593Smuzhiyun dst[0] = cpu_to_le64(val);
1254*4882a593Smuzhiyun dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1255*4882a593Smuzhiyun STRTAB_STE_1_SHCFG_INCOMING));
1256*4882a593Smuzhiyun dst[2] = 0; /* Nuke the VMID */
1257*4882a593Smuzhiyun /*
1258*4882a593Smuzhiyun * The SMMU can perform negative caching, so we must sync
1259*4882a593Smuzhiyun * the STE regardless of whether the old value was live.
1260*4882a593Smuzhiyun */
1261*4882a593Smuzhiyun if (smmu)
1262*4882a593Smuzhiyun arm_smmu_sync_ste_for_sid(smmu, sid);
1263*4882a593Smuzhiyun return;
1264*4882a593Smuzhiyun }
1265*4882a593Smuzhiyun
1266*4882a593Smuzhiyun if (s1_cfg) {
1267*4882a593Smuzhiyun BUG_ON(ste_live);
1268*4882a593Smuzhiyun dst[1] = cpu_to_le64(
1269*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1270*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1271*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1272*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1273*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1274*4882a593Smuzhiyun
1275*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1276*4882a593Smuzhiyun !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1277*4882a593Smuzhiyun dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1278*4882a593Smuzhiyun
1279*4882a593Smuzhiyun val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1280*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1281*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1282*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1283*4882a593Smuzhiyun }
1284*4882a593Smuzhiyun
1285*4882a593Smuzhiyun if (s2_cfg) {
1286*4882a593Smuzhiyun BUG_ON(ste_live);
1287*4882a593Smuzhiyun dst[2] = cpu_to_le64(
1288*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1289*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1290*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
1291*4882a593Smuzhiyun STRTAB_STE_2_S2ENDI |
1292*4882a593Smuzhiyun #endif
1293*4882a593Smuzhiyun STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1294*4882a593Smuzhiyun STRTAB_STE_2_S2R);
1295*4882a593Smuzhiyun
1296*4882a593Smuzhiyun dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1297*4882a593Smuzhiyun
1298*4882a593Smuzhiyun val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1299*4882a593Smuzhiyun }
1300*4882a593Smuzhiyun
1301*4882a593Smuzhiyun if (master->ats_enabled)
1302*4882a593Smuzhiyun dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1303*4882a593Smuzhiyun STRTAB_STE_1_EATS_TRANS));
1304*4882a593Smuzhiyun
1305*4882a593Smuzhiyun arm_smmu_sync_ste_for_sid(smmu, sid);
1306*4882a593Smuzhiyun /* See comment in arm_smmu_write_ctx_desc() */
1307*4882a593Smuzhiyun WRITE_ONCE(dst[0], cpu_to_le64(val));
1308*4882a593Smuzhiyun arm_smmu_sync_ste_for_sid(smmu, sid);
1309*4882a593Smuzhiyun
1310*4882a593Smuzhiyun /* It's likely that we'll want to use the new STE soon */
1311*4882a593Smuzhiyun if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1312*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1313*4882a593Smuzhiyun }
1314*4882a593Smuzhiyun
arm_smmu_init_bypass_stes(__le64 * strtab,unsigned int nent)1315*4882a593Smuzhiyun static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent)
1316*4882a593Smuzhiyun {
1317*4882a593Smuzhiyun unsigned int i;
1318*4882a593Smuzhiyun
1319*4882a593Smuzhiyun for (i = 0; i < nent; ++i) {
1320*4882a593Smuzhiyun arm_smmu_write_strtab_ent(NULL, -1, strtab);
1321*4882a593Smuzhiyun strtab += STRTAB_STE_DWORDS;
1322*4882a593Smuzhiyun }
1323*4882a593Smuzhiyun }
1324*4882a593Smuzhiyun
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1325*4882a593Smuzhiyun static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1326*4882a593Smuzhiyun {
1327*4882a593Smuzhiyun size_t size;
1328*4882a593Smuzhiyun void *strtab;
1329*4882a593Smuzhiyun struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1330*4882a593Smuzhiyun struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1331*4882a593Smuzhiyun
1332*4882a593Smuzhiyun if (desc->l2ptr)
1333*4882a593Smuzhiyun return 0;
1334*4882a593Smuzhiyun
1335*4882a593Smuzhiyun size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1336*4882a593Smuzhiyun strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1337*4882a593Smuzhiyun
1338*4882a593Smuzhiyun desc->span = STRTAB_SPLIT + 1;
1339*4882a593Smuzhiyun desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1340*4882a593Smuzhiyun GFP_KERNEL);
1341*4882a593Smuzhiyun if (!desc->l2ptr) {
1342*4882a593Smuzhiyun dev_err(smmu->dev,
1343*4882a593Smuzhiyun "failed to allocate l2 stream table for SID %u\n",
1344*4882a593Smuzhiyun sid);
1345*4882a593Smuzhiyun return -ENOMEM;
1346*4882a593Smuzhiyun }
1347*4882a593Smuzhiyun
1348*4882a593Smuzhiyun arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1349*4882a593Smuzhiyun arm_smmu_write_strtab_l1_desc(strtab, desc);
1350*4882a593Smuzhiyun return 0;
1351*4882a593Smuzhiyun }
1352*4882a593Smuzhiyun
1353*4882a593Smuzhiyun /* IRQ and event handlers */
arm_smmu_evtq_thread(int irq,void * dev)1354*4882a593Smuzhiyun static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1355*4882a593Smuzhiyun {
1356*4882a593Smuzhiyun int i;
1357*4882a593Smuzhiyun struct arm_smmu_device *smmu = dev;
1358*4882a593Smuzhiyun struct arm_smmu_queue *q = &smmu->evtq.q;
1359*4882a593Smuzhiyun struct arm_smmu_ll_queue *llq = &q->llq;
1360*4882a593Smuzhiyun u64 evt[EVTQ_ENT_DWORDS];
1361*4882a593Smuzhiyun
1362*4882a593Smuzhiyun do {
1363*4882a593Smuzhiyun while (!queue_remove_raw(q, evt)) {
1364*4882a593Smuzhiyun u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1365*4882a593Smuzhiyun
1366*4882a593Smuzhiyun dev_info(smmu->dev, "event 0x%02x received:\n", id);
1367*4882a593Smuzhiyun for (i = 0; i < ARRAY_SIZE(evt); ++i)
1368*4882a593Smuzhiyun dev_info(smmu->dev, "\t0x%016llx\n",
1369*4882a593Smuzhiyun (unsigned long long)evt[i]);
1370*4882a593Smuzhiyun
1371*4882a593Smuzhiyun cond_resched();
1372*4882a593Smuzhiyun }
1373*4882a593Smuzhiyun
1374*4882a593Smuzhiyun /*
1375*4882a593Smuzhiyun * Not much we can do on overflow, so scream and pretend we're
1376*4882a593Smuzhiyun * trying harder.
1377*4882a593Smuzhiyun */
1378*4882a593Smuzhiyun if (queue_sync_prod_in(q) == -EOVERFLOW)
1379*4882a593Smuzhiyun dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1380*4882a593Smuzhiyun } while (!queue_empty(llq));
1381*4882a593Smuzhiyun
1382*4882a593Smuzhiyun /* Sync our overflow flag, as we believe we're up to speed */
1383*4882a593Smuzhiyun llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1384*4882a593Smuzhiyun Q_IDX(llq, llq->cons);
1385*4882a593Smuzhiyun return IRQ_HANDLED;
1386*4882a593Smuzhiyun }
1387*4882a593Smuzhiyun
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1388*4882a593Smuzhiyun static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1389*4882a593Smuzhiyun {
1390*4882a593Smuzhiyun u32 sid, ssid;
1391*4882a593Smuzhiyun u16 grpid;
1392*4882a593Smuzhiyun bool ssv, last;
1393*4882a593Smuzhiyun
1394*4882a593Smuzhiyun sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1395*4882a593Smuzhiyun ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1396*4882a593Smuzhiyun ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1397*4882a593Smuzhiyun last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1398*4882a593Smuzhiyun grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1399*4882a593Smuzhiyun
1400*4882a593Smuzhiyun dev_info(smmu->dev, "unexpected PRI request received:\n");
1401*4882a593Smuzhiyun dev_info(smmu->dev,
1402*4882a593Smuzhiyun "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1403*4882a593Smuzhiyun sid, ssid, grpid, last ? "L" : "",
1404*4882a593Smuzhiyun evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1405*4882a593Smuzhiyun evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1406*4882a593Smuzhiyun evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1407*4882a593Smuzhiyun evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1408*4882a593Smuzhiyun evt[1] & PRIQ_1_ADDR_MASK);
1409*4882a593Smuzhiyun
1410*4882a593Smuzhiyun if (last) {
1411*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd = {
1412*4882a593Smuzhiyun .opcode = CMDQ_OP_PRI_RESP,
1413*4882a593Smuzhiyun .substream_valid = ssv,
1414*4882a593Smuzhiyun .pri = {
1415*4882a593Smuzhiyun .sid = sid,
1416*4882a593Smuzhiyun .ssid = ssid,
1417*4882a593Smuzhiyun .grpid = grpid,
1418*4882a593Smuzhiyun .resp = PRI_RESP_DENY,
1419*4882a593Smuzhiyun },
1420*4882a593Smuzhiyun };
1421*4882a593Smuzhiyun
1422*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1423*4882a593Smuzhiyun }
1424*4882a593Smuzhiyun }
1425*4882a593Smuzhiyun
arm_smmu_priq_thread(int irq,void * dev)1426*4882a593Smuzhiyun static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1427*4882a593Smuzhiyun {
1428*4882a593Smuzhiyun struct arm_smmu_device *smmu = dev;
1429*4882a593Smuzhiyun struct arm_smmu_queue *q = &smmu->priq.q;
1430*4882a593Smuzhiyun struct arm_smmu_ll_queue *llq = &q->llq;
1431*4882a593Smuzhiyun u64 evt[PRIQ_ENT_DWORDS];
1432*4882a593Smuzhiyun
1433*4882a593Smuzhiyun do {
1434*4882a593Smuzhiyun while (!queue_remove_raw(q, evt))
1435*4882a593Smuzhiyun arm_smmu_handle_ppr(smmu, evt);
1436*4882a593Smuzhiyun
1437*4882a593Smuzhiyun if (queue_sync_prod_in(q) == -EOVERFLOW)
1438*4882a593Smuzhiyun dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1439*4882a593Smuzhiyun } while (!queue_empty(llq));
1440*4882a593Smuzhiyun
1441*4882a593Smuzhiyun /* Sync our overflow flag, as we believe we're up to speed */
1442*4882a593Smuzhiyun llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
1443*4882a593Smuzhiyun Q_IDX(llq, llq->cons);
1444*4882a593Smuzhiyun queue_sync_cons_out(q);
1445*4882a593Smuzhiyun return IRQ_HANDLED;
1446*4882a593Smuzhiyun }
1447*4882a593Smuzhiyun
1448*4882a593Smuzhiyun static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1449*4882a593Smuzhiyun
arm_smmu_gerror_handler(int irq,void * dev)1450*4882a593Smuzhiyun static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1451*4882a593Smuzhiyun {
1452*4882a593Smuzhiyun u32 gerror, gerrorn, active;
1453*4882a593Smuzhiyun struct arm_smmu_device *smmu = dev;
1454*4882a593Smuzhiyun
1455*4882a593Smuzhiyun gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1456*4882a593Smuzhiyun gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1457*4882a593Smuzhiyun
1458*4882a593Smuzhiyun active = gerror ^ gerrorn;
1459*4882a593Smuzhiyun if (!(active & GERROR_ERR_MASK))
1460*4882a593Smuzhiyun return IRQ_NONE; /* No errors pending */
1461*4882a593Smuzhiyun
1462*4882a593Smuzhiyun dev_warn(smmu->dev,
1463*4882a593Smuzhiyun "unexpected global error reported (0x%08x), this could be serious\n",
1464*4882a593Smuzhiyun active);
1465*4882a593Smuzhiyun
1466*4882a593Smuzhiyun if (active & GERROR_SFM_ERR) {
1467*4882a593Smuzhiyun dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1468*4882a593Smuzhiyun arm_smmu_device_disable(smmu);
1469*4882a593Smuzhiyun }
1470*4882a593Smuzhiyun
1471*4882a593Smuzhiyun if (active & GERROR_MSI_GERROR_ABT_ERR)
1472*4882a593Smuzhiyun dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1473*4882a593Smuzhiyun
1474*4882a593Smuzhiyun if (active & GERROR_MSI_PRIQ_ABT_ERR)
1475*4882a593Smuzhiyun dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1476*4882a593Smuzhiyun
1477*4882a593Smuzhiyun if (active & GERROR_MSI_EVTQ_ABT_ERR)
1478*4882a593Smuzhiyun dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1479*4882a593Smuzhiyun
1480*4882a593Smuzhiyun if (active & GERROR_MSI_CMDQ_ABT_ERR)
1481*4882a593Smuzhiyun dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1482*4882a593Smuzhiyun
1483*4882a593Smuzhiyun if (active & GERROR_PRIQ_ABT_ERR)
1484*4882a593Smuzhiyun dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1485*4882a593Smuzhiyun
1486*4882a593Smuzhiyun if (active & GERROR_EVTQ_ABT_ERR)
1487*4882a593Smuzhiyun dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1488*4882a593Smuzhiyun
1489*4882a593Smuzhiyun if (active & GERROR_CMDQ_ERR)
1490*4882a593Smuzhiyun arm_smmu_cmdq_skip_err(smmu);
1491*4882a593Smuzhiyun
1492*4882a593Smuzhiyun writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1493*4882a593Smuzhiyun return IRQ_HANDLED;
1494*4882a593Smuzhiyun }
1495*4882a593Smuzhiyun
arm_smmu_combined_irq_thread(int irq,void * dev)1496*4882a593Smuzhiyun static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1497*4882a593Smuzhiyun {
1498*4882a593Smuzhiyun struct arm_smmu_device *smmu = dev;
1499*4882a593Smuzhiyun
1500*4882a593Smuzhiyun arm_smmu_evtq_thread(irq, dev);
1501*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_PRI)
1502*4882a593Smuzhiyun arm_smmu_priq_thread(irq, dev);
1503*4882a593Smuzhiyun
1504*4882a593Smuzhiyun return IRQ_HANDLED;
1505*4882a593Smuzhiyun }
1506*4882a593Smuzhiyun
arm_smmu_combined_irq_handler(int irq,void * dev)1507*4882a593Smuzhiyun static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1508*4882a593Smuzhiyun {
1509*4882a593Smuzhiyun arm_smmu_gerror_handler(irq, dev);
1510*4882a593Smuzhiyun return IRQ_WAKE_THREAD;
1511*4882a593Smuzhiyun }
1512*4882a593Smuzhiyun
1513*4882a593Smuzhiyun static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1514*4882a593Smuzhiyun arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1515*4882a593Smuzhiyun struct arm_smmu_cmdq_ent *cmd)
1516*4882a593Smuzhiyun {
1517*4882a593Smuzhiyun size_t log2_span;
1518*4882a593Smuzhiyun size_t span_mask;
1519*4882a593Smuzhiyun /* ATC invalidates are always on 4096-bytes pages */
1520*4882a593Smuzhiyun size_t inval_grain_shift = 12;
1521*4882a593Smuzhiyun unsigned long page_start, page_end;
1522*4882a593Smuzhiyun
1523*4882a593Smuzhiyun *cmd = (struct arm_smmu_cmdq_ent) {
1524*4882a593Smuzhiyun .opcode = CMDQ_OP_ATC_INV,
1525*4882a593Smuzhiyun .substream_valid = !!ssid,
1526*4882a593Smuzhiyun .atc.ssid = ssid,
1527*4882a593Smuzhiyun };
1528*4882a593Smuzhiyun
1529*4882a593Smuzhiyun if (!size) {
1530*4882a593Smuzhiyun cmd->atc.size = ATC_INV_SIZE_ALL;
1531*4882a593Smuzhiyun return;
1532*4882a593Smuzhiyun }
1533*4882a593Smuzhiyun
1534*4882a593Smuzhiyun page_start = iova >> inval_grain_shift;
1535*4882a593Smuzhiyun page_end = (iova + size - 1) >> inval_grain_shift;
1536*4882a593Smuzhiyun
1537*4882a593Smuzhiyun /*
1538*4882a593Smuzhiyun * In an ATS Invalidate Request, the address must be aligned on the
1539*4882a593Smuzhiyun * range size, which must be a power of two number of page sizes. We
1540*4882a593Smuzhiyun * thus have to choose between grossly over-invalidating the region, or
1541*4882a593Smuzhiyun * splitting the invalidation into multiple commands. For simplicity
1542*4882a593Smuzhiyun * we'll go with the first solution, but should refine it in the future
1543*4882a593Smuzhiyun * if multiple commands are shown to be more efficient.
1544*4882a593Smuzhiyun *
1545*4882a593Smuzhiyun * Find the smallest power of two that covers the range. The most
1546*4882a593Smuzhiyun * significant differing bit between the start and end addresses,
1547*4882a593Smuzhiyun * fls(start ^ end), indicates the required span. For example:
1548*4882a593Smuzhiyun *
1549*4882a593Smuzhiyun * We want to invalidate pages [8; 11]. This is already the ideal range:
1550*4882a593Smuzhiyun * x = 0b1000 ^ 0b1011 = 0b11
1551*4882a593Smuzhiyun * span = 1 << fls(x) = 4
1552*4882a593Smuzhiyun *
1553*4882a593Smuzhiyun * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1554*4882a593Smuzhiyun * x = 0b0111 ^ 0b1010 = 0b1101
1555*4882a593Smuzhiyun * span = 1 << fls(x) = 16
1556*4882a593Smuzhiyun */
1557*4882a593Smuzhiyun log2_span = fls_long(page_start ^ page_end);
1558*4882a593Smuzhiyun span_mask = (1ULL << log2_span) - 1;
1559*4882a593Smuzhiyun
1560*4882a593Smuzhiyun page_start &= ~span_mask;
1561*4882a593Smuzhiyun
1562*4882a593Smuzhiyun cmd->atc.addr = page_start << inval_grain_shift;
1563*4882a593Smuzhiyun cmd->atc.size = log2_span;
1564*4882a593Smuzhiyun }
1565*4882a593Smuzhiyun
arm_smmu_atc_inv_master(struct arm_smmu_master * master)1566*4882a593Smuzhiyun static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1567*4882a593Smuzhiyun {
1568*4882a593Smuzhiyun int i;
1569*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd;
1570*4882a593Smuzhiyun
1571*4882a593Smuzhiyun arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
1572*4882a593Smuzhiyun
1573*4882a593Smuzhiyun for (i = 0; i < master->num_sids; i++) {
1574*4882a593Smuzhiyun cmd.atc.sid = master->sids[i];
1575*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
1576*4882a593Smuzhiyun }
1577*4882a593Smuzhiyun
1578*4882a593Smuzhiyun return arm_smmu_cmdq_issue_sync(master->smmu);
1579*4882a593Smuzhiyun }
1580*4882a593Smuzhiyun
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,int ssid,unsigned long iova,size_t size)1581*4882a593Smuzhiyun static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
1582*4882a593Smuzhiyun int ssid, unsigned long iova, size_t size)
1583*4882a593Smuzhiyun {
1584*4882a593Smuzhiyun int i;
1585*4882a593Smuzhiyun unsigned long flags;
1586*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd;
1587*4882a593Smuzhiyun struct arm_smmu_master *master;
1588*4882a593Smuzhiyun struct arm_smmu_cmdq_batch cmds = {};
1589*4882a593Smuzhiyun
1590*4882a593Smuzhiyun if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1591*4882a593Smuzhiyun return 0;
1592*4882a593Smuzhiyun
1593*4882a593Smuzhiyun /*
1594*4882a593Smuzhiyun * Ensure that we've completed prior invalidation of the main TLBs
1595*4882a593Smuzhiyun * before we read 'nr_ats_masters' in case of a concurrent call to
1596*4882a593Smuzhiyun * arm_smmu_enable_ats():
1597*4882a593Smuzhiyun *
1598*4882a593Smuzhiyun * // unmap() // arm_smmu_enable_ats()
1599*4882a593Smuzhiyun * TLBI+SYNC atomic_inc(&nr_ats_masters);
1600*4882a593Smuzhiyun * smp_mb(); [...]
1601*4882a593Smuzhiyun * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
1602*4882a593Smuzhiyun *
1603*4882a593Smuzhiyun * Ensures that we always see the incremented 'nr_ats_masters' count if
1604*4882a593Smuzhiyun * ATS was enabled at the PCI device before completion of the TLBI.
1605*4882a593Smuzhiyun */
1606*4882a593Smuzhiyun smp_mb();
1607*4882a593Smuzhiyun if (!atomic_read(&smmu_domain->nr_ats_masters))
1608*4882a593Smuzhiyun return 0;
1609*4882a593Smuzhiyun
1610*4882a593Smuzhiyun arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1611*4882a593Smuzhiyun
1612*4882a593Smuzhiyun spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1613*4882a593Smuzhiyun list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1614*4882a593Smuzhiyun if (!master->ats_enabled)
1615*4882a593Smuzhiyun continue;
1616*4882a593Smuzhiyun
1617*4882a593Smuzhiyun for (i = 0; i < master->num_sids; i++) {
1618*4882a593Smuzhiyun cmd.atc.sid = master->sids[i];
1619*4882a593Smuzhiyun arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1620*4882a593Smuzhiyun }
1621*4882a593Smuzhiyun }
1622*4882a593Smuzhiyun spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1623*4882a593Smuzhiyun
1624*4882a593Smuzhiyun return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1625*4882a593Smuzhiyun }
1626*4882a593Smuzhiyun
1627*4882a593Smuzhiyun /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)1628*4882a593Smuzhiyun static void arm_smmu_tlb_inv_context(void *cookie)
1629*4882a593Smuzhiyun {
1630*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = cookie;
1631*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
1632*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd;
1633*4882a593Smuzhiyun
1634*4882a593Smuzhiyun /*
1635*4882a593Smuzhiyun * NOTE: when io-pgtable is in non-strict mode, we may get here with
1636*4882a593Smuzhiyun * PTEs previously cleared by unmaps on the current CPU not yet visible
1637*4882a593Smuzhiyun * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1638*4882a593Smuzhiyun * insertion to guarantee those are observed before the TLBI. Do be
1639*4882a593Smuzhiyun * careful, 007.
1640*4882a593Smuzhiyun */
1641*4882a593Smuzhiyun if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1642*4882a593Smuzhiyun arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
1643*4882a593Smuzhiyun } else {
1644*4882a593Smuzhiyun cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1645*4882a593Smuzhiyun cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1646*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1647*4882a593Smuzhiyun arm_smmu_cmdq_issue_sync(smmu);
1648*4882a593Smuzhiyun }
1649*4882a593Smuzhiyun arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
1650*4882a593Smuzhiyun }
1651*4882a593Smuzhiyun
arm_smmu_tlb_inv_range(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)1652*4882a593Smuzhiyun static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
1653*4882a593Smuzhiyun size_t granule, bool leaf,
1654*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain)
1655*4882a593Smuzhiyun {
1656*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
1657*4882a593Smuzhiyun unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
1658*4882a593Smuzhiyun size_t inv_range = granule;
1659*4882a593Smuzhiyun struct arm_smmu_cmdq_batch cmds = {};
1660*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd = {
1661*4882a593Smuzhiyun .tlbi = {
1662*4882a593Smuzhiyun .leaf = leaf,
1663*4882a593Smuzhiyun },
1664*4882a593Smuzhiyun };
1665*4882a593Smuzhiyun
1666*4882a593Smuzhiyun if (!size)
1667*4882a593Smuzhiyun return;
1668*4882a593Smuzhiyun
1669*4882a593Smuzhiyun if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1670*4882a593Smuzhiyun cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1671*4882a593Smuzhiyun cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1672*4882a593Smuzhiyun } else {
1673*4882a593Smuzhiyun cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1674*4882a593Smuzhiyun cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1675*4882a593Smuzhiyun }
1676*4882a593Smuzhiyun
1677*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1678*4882a593Smuzhiyun /* Get the leaf page size */
1679*4882a593Smuzhiyun tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1680*4882a593Smuzhiyun
1681*4882a593Smuzhiyun /* Convert page size of 12,14,16 (log2) to 1,2,3 */
1682*4882a593Smuzhiyun cmd.tlbi.tg = (tg - 10) / 2;
1683*4882a593Smuzhiyun
1684*4882a593Smuzhiyun /* Determine what level the granule is at */
1685*4882a593Smuzhiyun cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1686*4882a593Smuzhiyun
1687*4882a593Smuzhiyun num_pages = size >> tg;
1688*4882a593Smuzhiyun }
1689*4882a593Smuzhiyun
1690*4882a593Smuzhiyun while (iova < end) {
1691*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1692*4882a593Smuzhiyun /*
1693*4882a593Smuzhiyun * On each iteration of the loop, the range is 5 bits
1694*4882a593Smuzhiyun * worth of the aligned size remaining.
1695*4882a593Smuzhiyun * The range in pages is:
1696*4882a593Smuzhiyun *
1697*4882a593Smuzhiyun * range = (num_pages & (0x1f << __ffs(num_pages)))
1698*4882a593Smuzhiyun */
1699*4882a593Smuzhiyun unsigned long scale, num;
1700*4882a593Smuzhiyun
1701*4882a593Smuzhiyun /* Determine the power of 2 multiple number of pages */
1702*4882a593Smuzhiyun scale = __ffs(num_pages);
1703*4882a593Smuzhiyun cmd.tlbi.scale = scale;
1704*4882a593Smuzhiyun
1705*4882a593Smuzhiyun /* Determine how many chunks of 2^scale size we have */
1706*4882a593Smuzhiyun num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1707*4882a593Smuzhiyun cmd.tlbi.num = num - 1;
1708*4882a593Smuzhiyun
1709*4882a593Smuzhiyun /* range is num * 2^scale * pgsize */
1710*4882a593Smuzhiyun inv_range = num << (scale + tg);
1711*4882a593Smuzhiyun
1712*4882a593Smuzhiyun /* Clear out the lower order bits for the next iteration */
1713*4882a593Smuzhiyun num_pages -= num << scale;
1714*4882a593Smuzhiyun }
1715*4882a593Smuzhiyun
1716*4882a593Smuzhiyun cmd.tlbi.addr = iova;
1717*4882a593Smuzhiyun arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1718*4882a593Smuzhiyun iova += inv_range;
1719*4882a593Smuzhiyun }
1720*4882a593Smuzhiyun arm_smmu_cmdq_batch_submit(smmu, &cmds);
1721*4882a593Smuzhiyun
1722*4882a593Smuzhiyun /*
1723*4882a593Smuzhiyun * Unfortunately, this can't be leaf-only since we may have
1724*4882a593Smuzhiyun * zapped an entire table.
1725*4882a593Smuzhiyun */
1726*4882a593Smuzhiyun arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
1727*4882a593Smuzhiyun }
1728*4882a593Smuzhiyun
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)1729*4882a593Smuzhiyun static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1730*4882a593Smuzhiyun unsigned long iova, size_t granule,
1731*4882a593Smuzhiyun void *cookie)
1732*4882a593Smuzhiyun {
1733*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = cookie;
1734*4882a593Smuzhiyun struct iommu_domain *domain = &smmu_domain->domain;
1735*4882a593Smuzhiyun
1736*4882a593Smuzhiyun iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1737*4882a593Smuzhiyun }
1738*4882a593Smuzhiyun
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)1739*4882a593Smuzhiyun static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1740*4882a593Smuzhiyun size_t granule, void *cookie)
1741*4882a593Smuzhiyun {
1742*4882a593Smuzhiyun arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
1743*4882a593Smuzhiyun }
1744*4882a593Smuzhiyun
1745*4882a593Smuzhiyun static const struct iommu_flush_ops arm_smmu_flush_ops = {
1746*4882a593Smuzhiyun .tlb_flush_all = arm_smmu_tlb_inv_context,
1747*4882a593Smuzhiyun .tlb_flush_walk = arm_smmu_tlb_inv_walk,
1748*4882a593Smuzhiyun .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
1749*4882a593Smuzhiyun };
1750*4882a593Smuzhiyun
1751*4882a593Smuzhiyun /* IOMMU API */
arm_smmu_capable(enum iommu_cap cap)1752*4882a593Smuzhiyun static bool arm_smmu_capable(enum iommu_cap cap)
1753*4882a593Smuzhiyun {
1754*4882a593Smuzhiyun switch (cap) {
1755*4882a593Smuzhiyun case IOMMU_CAP_CACHE_COHERENCY:
1756*4882a593Smuzhiyun return true;
1757*4882a593Smuzhiyun case IOMMU_CAP_NOEXEC:
1758*4882a593Smuzhiyun return true;
1759*4882a593Smuzhiyun default:
1760*4882a593Smuzhiyun return false;
1761*4882a593Smuzhiyun }
1762*4882a593Smuzhiyun }
1763*4882a593Smuzhiyun
arm_smmu_domain_alloc(unsigned type)1764*4882a593Smuzhiyun static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1765*4882a593Smuzhiyun {
1766*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain;
1767*4882a593Smuzhiyun
1768*4882a593Smuzhiyun if (type != IOMMU_DOMAIN_UNMANAGED &&
1769*4882a593Smuzhiyun type != IOMMU_DOMAIN_DMA &&
1770*4882a593Smuzhiyun type != IOMMU_DOMAIN_IDENTITY)
1771*4882a593Smuzhiyun return NULL;
1772*4882a593Smuzhiyun
1773*4882a593Smuzhiyun /*
1774*4882a593Smuzhiyun * Allocate the domain and initialise some of its data structures.
1775*4882a593Smuzhiyun * We can't really do anything meaningful until we've added a
1776*4882a593Smuzhiyun * master.
1777*4882a593Smuzhiyun */
1778*4882a593Smuzhiyun smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1779*4882a593Smuzhiyun if (!smmu_domain)
1780*4882a593Smuzhiyun return NULL;
1781*4882a593Smuzhiyun
1782*4882a593Smuzhiyun if (type == IOMMU_DOMAIN_DMA &&
1783*4882a593Smuzhiyun iommu_get_dma_cookie(&smmu_domain->domain)) {
1784*4882a593Smuzhiyun kfree(smmu_domain);
1785*4882a593Smuzhiyun return NULL;
1786*4882a593Smuzhiyun }
1787*4882a593Smuzhiyun
1788*4882a593Smuzhiyun mutex_init(&smmu_domain->init_mutex);
1789*4882a593Smuzhiyun INIT_LIST_HEAD(&smmu_domain->devices);
1790*4882a593Smuzhiyun spin_lock_init(&smmu_domain->devices_lock);
1791*4882a593Smuzhiyun
1792*4882a593Smuzhiyun return &smmu_domain->domain;
1793*4882a593Smuzhiyun }
1794*4882a593Smuzhiyun
arm_smmu_bitmap_alloc(unsigned long * map,int span)1795*4882a593Smuzhiyun static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1796*4882a593Smuzhiyun {
1797*4882a593Smuzhiyun int idx, size = 1 << span;
1798*4882a593Smuzhiyun
1799*4882a593Smuzhiyun do {
1800*4882a593Smuzhiyun idx = find_first_zero_bit(map, size);
1801*4882a593Smuzhiyun if (idx == size)
1802*4882a593Smuzhiyun return -ENOSPC;
1803*4882a593Smuzhiyun } while (test_and_set_bit(idx, map));
1804*4882a593Smuzhiyun
1805*4882a593Smuzhiyun return idx;
1806*4882a593Smuzhiyun }
1807*4882a593Smuzhiyun
arm_smmu_bitmap_free(unsigned long * map,int idx)1808*4882a593Smuzhiyun static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1809*4882a593Smuzhiyun {
1810*4882a593Smuzhiyun clear_bit(idx, map);
1811*4882a593Smuzhiyun }
1812*4882a593Smuzhiyun
arm_smmu_domain_free(struct iommu_domain * domain)1813*4882a593Smuzhiyun static void arm_smmu_domain_free(struct iommu_domain *domain)
1814*4882a593Smuzhiyun {
1815*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1816*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
1817*4882a593Smuzhiyun
1818*4882a593Smuzhiyun iommu_put_dma_cookie(domain);
1819*4882a593Smuzhiyun free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1820*4882a593Smuzhiyun
1821*4882a593Smuzhiyun /* Free the CD and ASID, if we allocated them */
1822*4882a593Smuzhiyun if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1823*4882a593Smuzhiyun struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1824*4882a593Smuzhiyun
1825*4882a593Smuzhiyun /* Prevent SVA from touching the CD while we're freeing it */
1826*4882a593Smuzhiyun mutex_lock(&arm_smmu_asid_lock);
1827*4882a593Smuzhiyun if (cfg->cdcfg.cdtab)
1828*4882a593Smuzhiyun arm_smmu_free_cd_tables(smmu_domain);
1829*4882a593Smuzhiyun arm_smmu_free_asid(&cfg->cd);
1830*4882a593Smuzhiyun mutex_unlock(&arm_smmu_asid_lock);
1831*4882a593Smuzhiyun } else {
1832*4882a593Smuzhiyun struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1833*4882a593Smuzhiyun if (cfg->vmid)
1834*4882a593Smuzhiyun arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1835*4882a593Smuzhiyun }
1836*4882a593Smuzhiyun
1837*4882a593Smuzhiyun kfree(smmu_domain);
1838*4882a593Smuzhiyun }
1839*4882a593Smuzhiyun
arm_smmu_domain_finalise_s1(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1840*4882a593Smuzhiyun static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1841*4882a593Smuzhiyun struct arm_smmu_master *master,
1842*4882a593Smuzhiyun struct io_pgtable_cfg *pgtbl_cfg)
1843*4882a593Smuzhiyun {
1844*4882a593Smuzhiyun int ret;
1845*4882a593Smuzhiyun u32 asid;
1846*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
1847*4882a593Smuzhiyun struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1848*4882a593Smuzhiyun typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1849*4882a593Smuzhiyun
1850*4882a593Smuzhiyun refcount_set(&cfg->cd.refs, 1);
1851*4882a593Smuzhiyun
1852*4882a593Smuzhiyun /* Prevent SVA from modifying the ASID until it is written to the CD */
1853*4882a593Smuzhiyun mutex_lock(&arm_smmu_asid_lock);
1854*4882a593Smuzhiyun ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
1855*4882a593Smuzhiyun XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
1856*4882a593Smuzhiyun if (ret)
1857*4882a593Smuzhiyun goto out_unlock;
1858*4882a593Smuzhiyun
1859*4882a593Smuzhiyun cfg->s1cdmax = master->ssid_bits;
1860*4882a593Smuzhiyun
1861*4882a593Smuzhiyun ret = arm_smmu_alloc_cd_tables(smmu_domain);
1862*4882a593Smuzhiyun if (ret)
1863*4882a593Smuzhiyun goto out_free_asid;
1864*4882a593Smuzhiyun
1865*4882a593Smuzhiyun cfg->cd.asid = (u16)asid;
1866*4882a593Smuzhiyun cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
1867*4882a593Smuzhiyun cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1868*4882a593Smuzhiyun FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1869*4882a593Smuzhiyun FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1870*4882a593Smuzhiyun FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1871*4882a593Smuzhiyun FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1872*4882a593Smuzhiyun FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1873*4882a593Smuzhiyun CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
1874*4882a593Smuzhiyun cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
1875*4882a593Smuzhiyun
1876*4882a593Smuzhiyun /*
1877*4882a593Smuzhiyun * Note that this will end up calling arm_smmu_sync_cd() before
1878*4882a593Smuzhiyun * the master has been added to the devices list for this domain.
1879*4882a593Smuzhiyun * This isn't an issue because the STE hasn't been installed yet.
1880*4882a593Smuzhiyun */
1881*4882a593Smuzhiyun ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
1882*4882a593Smuzhiyun if (ret)
1883*4882a593Smuzhiyun goto out_free_cd_tables;
1884*4882a593Smuzhiyun
1885*4882a593Smuzhiyun mutex_unlock(&arm_smmu_asid_lock);
1886*4882a593Smuzhiyun return 0;
1887*4882a593Smuzhiyun
1888*4882a593Smuzhiyun out_free_cd_tables:
1889*4882a593Smuzhiyun arm_smmu_free_cd_tables(smmu_domain);
1890*4882a593Smuzhiyun out_free_asid:
1891*4882a593Smuzhiyun arm_smmu_free_asid(&cfg->cd);
1892*4882a593Smuzhiyun out_unlock:
1893*4882a593Smuzhiyun mutex_unlock(&arm_smmu_asid_lock);
1894*4882a593Smuzhiyun return ret;
1895*4882a593Smuzhiyun }
1896*4882a593Smuzhiyun
arm_smmu_domain_finalise_s2(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,struct io_pgtable_cfg * pgtbl_cfg)1897*4882a593Smuzhiyun static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1898*4882a593Smuzhiyun struct arm_smmu_master *master,
1899*4882a593Smuzhiyun struct io_pgtable_cfg *pgtbl_cfg)
1900*4882a593Smuzhiyun {
1901*4882a593Smuzhiyun int vmid;
1902*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
1903*4882a593Smuzhiyun struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1904*4882a593Smuzhiyun typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
1905*4882a593Smuzhiyun
1906*4882a593Smuzhiyun vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1907*4882a593Smuzhiyun if (vmid < 0)
1908*4882a593Smuzhiyun return vmid;
1909*4882a593Smuzhiyun
1910*4882a593Smuzhiyun vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1911*4882a593Smuzhiyun cfg->vmid = (u16)vmid;
1912*4882a593Smuzhiyun cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1913*4882a593Smuzhiyun cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1914*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1915*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1916*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1917*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1918*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1919*4882a593Smuzhiyun FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1920*4882a593Smuzhiyun return 0;
1921*4882a593Smuzhiyun }
1922*4882a593Smuzhiyun
arm_smmu_domain_finalise(struct iommu_domain * domain,struct arm_smmu_master * master)1923*4882a593Smuzhiyun static int arm_smmu_domain_finalise(struct iommu_domain *domain,
1924*4882a593Smuzhiyun struct arm_smmu_master *master)
1925*4882a593Smuzhiyun {
1926*4882a593Smuzhiyun int ret;
1927*4882a593Smuzhiyun unsigned long ias, oas;
1928*4882a593Smuzhiyun enum io_pgtable_fmt fmt;
1929*4882a593Smuzhiyun struct io_pgtable_cfg pgtbl_cfg;
1930*4882a593Smuzhiyun struct io_pgtable_ops *pgtbl_ops;
1931*4882a593Smuzhiyun int (*finalise_stage_fn)(struct arm_smmu_domain *,
1932*4882a593Smuzhiyun struct arm_smmu_master *,
1933*4882a593Smuzhiyun struct io_pgtable_cfg *);
1934*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1935*4882a593Smuzhiyun struct arm_smmu_device *smmu = smmu_domain->smmu;
1936*4882a593Smuzhiyun
1937*4882a593Smuzhiyun if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1938*4882a593Smuzhiyun smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1939*4882a593Smuzhiyun return 0;
1940*4882a593Smuzhiyun }
1941*4882a593Smuzhiyun
1942*4882a593Smuzhiyun /* Restrict the stage to what we can actually support */
1943*4882a593Smuzhiyun if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1944*4882a593Smuzhiyun smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1945*4882a593Smuzhiyun if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1946*4882a593Smuzhiyun smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1947*4882a593Smuzhiyun
1948*4882a593Smuzhiyun switch (smmu_domain->stage) {
1949*4882a593Smuzhiyun case ARM_SMMU_DOMAIN_S1:
1950*4882a593Smuzhiyun ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1951*4882a593Smuzhiyun ias = min_t(unsigned long, ias, VA_BITS);
1952*4882a593Smuzhiyun oas = smmu->ias;
1953*4882a593Smuzhiyun fmt = ARM_64_LPAE_S1;
1954*4882a593Smuzhiyun finalise_stage_fn = arm_smmu_domain_finalise_s1;
1955*4882a593Smuzhiyun break;
1956*4882a593Smuzhiyun case ARM_SMMU_DOMAIN_NESTED:
1957*4882a593Smuzhiyun case ARM_SMMU_DOMAIN_S2:
1958*4882a593Smuzhiyun ias = smmu->ias;
1959*4882a593Smuzhiyun oas = smmu->oas;
1960*4882a593Smuzhiyun fmt = ARM_64_LPAE_S2;
1961*4882a593Smuzhiyun finalise_stage_fn = arm_smmu_domain_finalise_s2;
1962*4882a593Smuzhiyun break;
1963*4882a593Smuzhiyun default:
1964*4882a593Smuzhiyun return -EINVAL;
1965*4882a593Smuzhiyun }
1966*4882a593Smuzhiyun
1967*4882a593Smuzhiyun pgtbl_cfg = (struct io_pgtable_cfg) {
1968*4882a593Smuzhiyun .pgsize_bitmap = smmu->pgsize_bitmap,
1969*4882a593Smuzhiyun .ias = ias,
1970*4882a593Smuzhiyun .oas = oas,
1971*4882a593Smuzhiyun .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
1972*4882a593Smuzhiyun .tlb = &arm_smmu_flush_ops,
1973*4882a593Smuzhiyun .iommu_dev = smmu->dev,
1974*4882a593Smuzhiyun };
1975*4882a593Smuzhiyun
1976*4882a593Smuzhiyun if (smmu_domain->non_strict)
1977*4882a593Smuzhiyun pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
1978*4882a593Smuzhiyun
1979*4882a593Smuzhiyun pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1980*4882a593Smuzhiyun if (!pgtbl_ops)
1981*4882a593Smuzhiyun return -ENOMEM;
1982*4882a593Smuzhiyun
1983*4882a593Smuzhiyun domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1984*4882a593Smuzhiyun domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1985*4882a593Smuzhiyun domain->geometry.force_aperture = true;
1986*4882a593Smuzhiyun
1987*4882a593Smuzhiyun ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
1988*4882a593Smuzhiyun if (ret < 0) {
1989*4882a593Smuzhiyun free_io_pgtable_ops(pgtbl_ops);
1990*4882a593Smuzhiyun return ret;
1991*4882a593Smuzhiyun }
1992*4882a593Smuzhiyun
1993*4882a593Smuzhiyun smmu_domain->pgtbl_ops = pgtbl_ops;
1994*4882a593Smuzhiyun return 0;
1995*4882a593Smuzhiyun }
1996*4882a593Smuzhiyun
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)1997*4882a593Smuzhiyun static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1998*4882a593Smuzhiyun {
1999*4882a593Smuzhiyun __le64 *step;
2000*4882a593Smuzhiyun struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2001*4882a593Smuzhiyun
2002*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2003*4882a593Smuzhiyun struct arm_smmu_strtab_l1_desc *l1_desc;
2004*4882a593Smuzhiyun int idx;
2005*4882a593Smuzhiyun
2006*4882a593Smuzhiyun /* Two-level walk */
2007*4882a593Smuzhiyun idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2008*4882a593Smuzhiyun l1_desc = &cfg->l1_desc[idx];
2009*4882a593Smuzhiyun idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2010*4882a593Smuzhiyun step = &l1_desc->l2ptr[idx];
2011*4882a593Smuzhiyun } else {
2012*4882a593Smuzhiyun /* Simple linear lookup */
2013*4882a593Smuzhiyun step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2014*4882a593Smuzhiyun }
2015*4882a593Smuzhiyun
2016*4882a593Smuzhiyun return step;
2017*4882a593Smuzhiyun }
2018*4882a593Smuzhiyun
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master)2019*4882a593Smuzhiyun static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2020*4882a593Smuzhiyun {
2021*4882a593Smuzhiyun int i, j;
2022*4882a593Smuzhiyun struct arm_smmu_device *smmu = master->smmu;
2023*4882a593Smuzhiyun
2024*4882a593Smuzhiyun for (i = 0; i < master->num_sids; ++i) {
2025*4882a593Smuzhiyun u32 sid = master->sids[i];
2026*4882a593Smuzhiyun __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2027*4882a593Smuzhiyun
2028*4882a593Smuzhiyun /* Bridged PCI devices may end up with duplicated IDs */
2029*4882a593Smuzhiyun for (j = 0; j < i; j++)
2030*4882a593Smuzhiyun if (master->sids[j] == sid)
2031*4882a593Smuzhiyun break;
2032*4882a593Smuzhiyun if (j < i)
2033*4882a593Smuzhiyun continue;
2034*4882a593Smuzhiyun
2035*4882a593Smuzhiyun arm_smmu_write_strtab_ent(master, sid, step);
2036*4882a593Smuzhiyun }
2037*4882a593Smuzhiyun }
2038*4882a593Smuzhiyun
arm_smmu_ats_supported(struct arm_smmu_master * master)2039*4882a593Smuzhiyun static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2040*4882a593Smuzhiyun {
2041*4882a593Smuzhiyun struct device *dev = master->dev;
2042*4882a593Smuzhiyun struct arm_smmu_device *smmu = master->smmu;
2043*4882a593Smuzhiyun struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2044*4882a593Smuzhiyun
2045*4882a593Smuzhiyun if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2046*4882a593Smuzhiyun return false;
2047*4882a593Smuzhiyun
2048*4882a593Smuzhiyun if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2049*4882a593Smuzhiyun return false;
2050*4882a593Smuzhiyun
2051*4882a593Smuzhiyun return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2052*4882a593Smuzhiyun }
2053*4882a593Smuzhiyun
arm_smmu_enable_ats(struct arm_smmu_master * master)2054*4882a593Smuzhiyun static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2055*4882a593Smuzhiyun {
2056*4882a593Smuzhiyun size_t stu;
2057*4882a593Smuzhiyun struct pci_dev *pdev;
2058*4882a593Smuzhiyun struct arm_smmu_device *smmu = master->smmu;
2059*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = master->domain;
2060*4882a593Smuzhiyun
2061*4882a593Smuzhiyun /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2062*4882a593Smuzhiyun if (!master->ats_enabled)
2063*4882a593Smuzhiyun return;
2064*4882a593Smuzhiyun
2065*4882a593Smuzhiyun /* Smallest Translation Unit: log2 of the smallest supported granule */
2066*4882a593Smuzhiyun stu = __ffs(smmu->pgsize_bitmap);
2067*4882a593Smuzhiyun pdev = to_pci_dev(master->dev);
2068*4882a593Smuzhiyun
2069*4882a593Smuzhiyun atomic_inc(&smmu_domain->nr_ats_masters);
2070*4882a593Smuzhiyun arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2071*4882a593Smuzhiyun if (pci_enable_ats(pdev, stu))
2072*4882a593Smuzhiyun dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2073*4882a593Smuzhiyun }
2074*4882a593Smuzhiyun
arm_smmu_disable_ats(struct arm_smmu_master * master)2075*4882a593Smuzhiyun static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2076*4882a593Smuzhiyun {
2077*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = master->domain;
2078*4882a593Smuzhiyun
2079*4882a593Smuzhiyun if (!master->ats_enabled)
2080*4882a593Smuzhiyun return;
2081*4882a593Smuzhiyun
2082*4882a593Smuzhiyun pci_disable_ats(to_pci_dev(master->dev));
2083*4882a593Smuzhiyun /*
2084*4882a593Smuzhiyun * Ensure ATS is disabled at the endpoint before we issue the
2085*4882a593Smuzhiyun * ATC invalidation via the SMMU.
2086*4882a593Smuzhiyun */
2087*4882a593Smuzhiyun wmb();
2088*4882a593Smuzhiyun arm_smmu_atc_inv_master(master);
2089*4882a593Smuzhiyun atomic_dec(&smmu_domain->nr_ats_masters);
2090*4882a593Smuzhiyun }
2091*4882a593Smuzhiyun
arm_smmu_enable_pasid(struct arm_smmu_master * master)2092*4882a593Smuzhiyun static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2093*4882a593Smuzhiyun {
2094*4882a593Smuzhiyun int ret;
2095*4882a593Smuzhiyun int features;
2096*4882a593Smuzhiyun int num_pasids;
2097*4882a593Smuzhiyun struct pci_dev *pdev;
2098*4882a593Smuzhiyun
2099*4882a593Smuzhiyun if (!dev_is_pci(master->dev))
2100*4882a593Smuzhiyun return -ENODEV;
2101*4882a593Smuzhiyun
2102*4882a593Smuzhiyun pdev = to_pci_dev(master->dev);
2103*4882a593Smuzhiyun
2104*4882a593Smuzhiyun features = pci_pasid_features(pdev);
2105*4882a593Smuzhiyun if (features < 0)
2106*4882a593Smuzhiyun return features;
2107*4882a593Smuzhiyun
2108*4882a593Smuzhiyun num_pasids = pci_max_pasids(pdev);
2109*4882a593Smuzhiyun if (num_pasids <= 0)
2110*4882a593Smuzhiyun return num_pasids;
2111*4882a593Smuzhiyun
2112*4882a593Smuzhiyun ret = pci_enable_pasid(pdev, features);
2113*4882a593Smuzhiyun if (ret) {
2114*4882a593Smuzhiyun dev_err(&pdev->dev, "Failed to enable PASID\n");
2115*4882a593Smuzhiyun return ret;
2116*4882a593Smuzhiyun }
2117*4882a593Smuzhiyun
2118*4882a593Smuzhiyun master->ssid_bits = min_t(u8, ilog2(num_pasids),
2119*4882a593Smuzhiyun master->smmu->ssid_bits);
2120*4882a593Smuzhiyun return 0;
2121*4882a593Smuzhiyun }
2122*4882a593Smuzhiyun
arm_smmu_disable_pasid(struct arm_smmu_master * master)2123*4882a593Smuzhiyun static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2124*4882a593Smuzhiyun {
2125*4882a593Smuzhiyun struct pci_dev *pdev;
2126*4882a593Smuzhiyun
2127*4882a593Smuzhiyun if (!dev_is_pci(master->dev))
2128*4882a593Smuzhiyun return;
2129*4882a593Smuzhiyun
2130*4882a593Smuzhiyun pdev = to_pci_dev(master->dev);
2131*4882a593Smuzhiyun
2132*4882a593Smuzhiyun if (!pdev->pasid_enabled)
2133*4882a593Smuzhiyun return;
2134*4882a593Smuzhiyun
2135*4882a593Smuzhiyun master->ssid_bits = 0;
2136*4882a593Smuzhiyun pci_disable_pasid(pdev);
2137*4882a593Smuzhiyun }
2138*4882a593Smuzhiyun
arm_smmu_detach_dev(struct arm_smmu_master * master)2139*4882a593Smuzhiyun static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2140*4882a593Smuzhiyun {
2141*4882a593Smuzhiyun unsigned long flags;
2142*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = master->domain;
2143*4882a593Smuzhiyun
2144*4882a593Smuzhiyun if (!smmu_domain)
2145*4882a593Smuzhiyun return;
2146*4882a593Smuzhiyun
2147*4882a593Smuzhiyun arm_smmu_disable_ats(master);
2148*4882a593Smuzhiyun
2149*4882a593Smuzhiyun spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2150*4882a593Smuzhiyun list_del(&master->domain_head);
2151*4882a593Smuzhiyun spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2152*4882a593Smuzhiyun
2153*4882a593Smuzhiyun master->domain = NULL;
2154*4882a593Smuzhiyun master->ats_enabled = false;
2155*4882a593Smuzhiyun arm_smmu_install_ste_for_dev(master);
2156*4882a593Smuzhiyun }
2157*4882a593Smuzhiyun
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2158*4882a593Smuzhiyun static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2159*4882a593Smuzhiyun {
2160*4882a593Smuzhiyun int ret = 0;
2161*4882a593Smuzhiyun unsigned long flags;
2162*4882a593Smuzhiyun struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2163*4882a593Smuzhiyun struct arm_smmu_device *smmu;
2164*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2165*4882a593Smuzhiyun struct arm_smmu_master *master;
2166*4882a593Smuzhiyun
2167*4882a593Smuzhiyun if (!fwspec)
2168*4882a593Smuzhiyun return -ENOENT;
2169*4882a593Smuzhiyun
2170*4882a593Smuzhiyun master = dev_iommu_priv_get(dev);
2171*4882a593Smuzhiyun smmu = master->smmu;
2172*4882a593Smuzhiyun
2173*4882a593Smuzhiyun /*
2174*4882a593Smuzhiyun * Checking that SVA is disabled ensures that this device isn't bound to
2175*4882a593Smuzhiyun * any mm, and can be safely detached from its old domain. Bonds cannot
2176*4882a593Smuzhiyun * be removed concurrently since we're holding the group mutex.
2177*4882a593Smuzhiyun */
2178*4882a593Smuzhiyun if (arm_smmu_master_sva_enabled(master)) {
2179*4882a593Smuzhiyun dev_err(dev, "cannot attach - SVA enabled\n");
2180*4882a593Smuzhiyun return -EBUSY;
2181*4882a593Smuzhiyun }
2182*4882a593Smuzhiyun
2183*4882a593Smuzhiyun arm_smmu_detach_dev(master);
2184*4882a593Smuzhiyun
2185*4882a593Smuzhiyun mutex_lock(&smmu_domain->init_mutex);
2186*4882a593Smuzhiyun
2187*4882a593Smuzhiyun if (!smmu_domain->smmu) {
2188*4882a593Smuzhiyun smmu_domain->smmu = smmu;
2189*4882a593Smuzhiyun ret = arm_smmu_domain_finalise(domain, master);
2190*4882a593Smuzhiyun if (ret) {
2191*4882a593Smuzhiyun smmu_domain->smmu = NULL;
2192*4882a593Smuzhiyun goto out_unlock;
2193*4882a593Smuzhiyun }
2194*4882a593Smuzhiyun } else if (smmu_domain->smmu != smmu) {
2195*4882a593Smuzhiyun dev_err(dev,
2196*4882a593Smuzhiyun "cannot attach to SMMU %s (upstream of %s)\n",
2197*4882a593Smuzhiyun dev_name(smmu_domain->smmu->dev),
2198*4882a593Smuzhiyun dev_name(smmu->dev));
2199*4882a593Smuzhiyun ret = -ENXIO;
2200*4882a593Smuzhiyun goto out_unlock;
2201*4882a593Smuzhiyun } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2202*4882a593Smuzhiyun master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2203*4882a593Smuzhiyun dev_err(dev,
2204*4882a593Smuzhiyun "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2205*4882a593Smuzhiyun smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2206*4882a593Smuzhiyun ret = -EINVAL;
2207*4882a593Smuzhiyun goto out_unlock;
2208*4882a593Smuzhiyun }
2209*4882a593Smuzhiyun
2210*4882a593Smuzhiyun master->domain = smmu_domain;
2211*4882a593Smuzhiyun
2212*4882a593Smuzhiyun if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2213*4882a593Smuzhiyun master->ats_enabled = arm_smmu_ats_supported(master);
2214*4882a593Smuzhiyun
2215*4882a593Smuzhiyun arm_smmu_install_ste_for_dev(master);
2216*4882a593Smuzhiyun
2217*4882a593Smuzhiyun spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2218*4882a593Smuzhiyun list_add(&master->domain_head, &smmu_domain->devices);
2219*4882a593Smuzhiyun spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2220*4882a593Smuzhiyun
2221*4882a593Smuzhiyun arm_smmu_enable_ats(master);
2222*4882a593Smuzhiyun
2223*4882a593Smuzhiyun out_unlock:
2224*4882a593Smuzhiyun mutex_unlock(&smmu_domain->init_mutex);
2225*4882a593Smuzhiyun return ret;
2226*4882a593Smuzhiyun }
2227*4882a593Smuzhiyun
arm_smmu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t size,int prot,gfp_t gfp)2228*4882a593Smuzhiyun static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2229*4882a593Smuzhiyun phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2230*4882a593Smuzhiyun {
2231*4882a593Smuzhiyun struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2232*4882a593Smuzhiyun
2233*4882a593Smuzhiyun if (!ops)
2234*4882a593Smuzhiyun return -ENODEV;
2235*4882a593Smuzhiyun
2236*4882a593Smuzhiyun return ops->map(ops, iova, paddr, size, prot, gfp);
2237*4882a593Smuzhiyun }
2238*4882a593Smuzhiyun
arm_smmu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)2239*4882a593Smuzhiyun static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2240*4882a593Smuzhiyun size_t size, struct iommu_iotlb_gather *gather)
2241*4882a593Smuzhiyun {
2242*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2243*4882a593Smuzhiyun struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2244*4882a593Smuzhiyun
2245*4882a593Smuzhiyun if (!ops)
2246*4882a593Smuzhiyun return 0;
2247*4882a593Smuzhiyun
2248*4882a593Smuzhiyun return ops->unmap(ops, iova, size, gather);
2249*4882a593Smuzhiyun }
2250*4882a593Smuzhiyun
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)2251*4882a593Smuzhiyun static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2252*4882a593Smuzhiyun {
2253*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2254*4882a593Smuzhiyun
2255*4882a593Smuzhiyun if (smmu_domain->smmu)
2256*4882a593Smuzhiyun arm_smmu_tlb_inv_context(smmu_domain);
2257*4882a593Smuzhiyun }
2258*4882a593Smuzhiyun
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)2259*4882a593Smuzhiyun static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2260*4882a593Smuzhiyun struct iommu_iotlb_gather *gather)
2261*4882a593Smuzhiyun {
2262*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2263*4882a593Smuzhiyun
2264*4882a593Smuzhiyun arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1,
2265*4882a593Smuzhiyun gather->pgsize, true, smmu_domain);
2266*4882a593Smuzhiyun }
2267*4882a593Smuzhiyun
2268*4882a593Smuzhiyun static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)2269*4882a593Smuzhiyun arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2270*4882a593Smuzhiyun {
2271*4882a593Smuzhiyun struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2272*4882a593Smuzhiyun
2273*4882a593Smuzhiyun if (domain->type == IOMMU_DOMAIN_IDENTITY)
2274*4882a593Smuzhiyun return iova;
2275*4882a593Smuzhiyun
2276*4882a593Smuzhiyun if (!ops)
2277*4882a593Smuzhiyun return 0;
2278*4882a593Smuzhiyun
2279*4882a593Smuzhiyun return ops->iova_to_phys(ops, iova);
2280*4882a593Smuzhiyun }
2281*4882a593Smuzhiyun
2282*4882a593Smuzhiyun static struct platform_driver arm_smmu_driver;
2283*4882a593Smuzhiyun
2284*4882a593Smuzhiyun static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)2285*4882a593Smuzhiyun struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2286*4882a593Smuzhiyun {
2287*4882a593Smuzhiyun struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2288*4882a593Smuzhiyun fwnode);
2289*4882a593Smuzhiyun put_device(dev);
2290*4882a593Smuzhiyun return dev ? dev_get_drvdata(dev) : NULL;
2291*4882a593Smuzhiyun }
2292*4882a593Smuzhiyun
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)2293*4882a593Smuzhiyun static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2294*4882a593Smuzhiyun {
2295*4882a593Smuzhiyun unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2296*4882a593Smuzhiyun
2297*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2298*4882a593Smuzhiyun limit *= 1UL << STRTAB_SPLIT;
2299*4882a593Smuzhiyun
2300*4882a593Smuzhiyun return sid < limit;
2301*4882a593Smuzhiyun }
2302*4882a593Smuzhiyun
2303*4882a593Smuzhiyun static struct iommu_ops arm_smmu_ops;
2304*4882a593Smuzhiyun
arm_smmu_probe_device(struct device * dev)2305*4882a593Smuzhiyun static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2306*4882a593Smuzhiyun {
2307*4882a593Smuzhiyun int i, ret;
2308*4882a593Smuzhiyun struct arm_smmu_device *smmu;
2309*4882a593Smuzhiyun struct arm_smmu_master *master;
2310*4882a593Smuzhiyun struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2311*4882a593Smuzhiyun
2312*4882a593Smuzhiyun if (!fwspec || fwspec->ops != &arm_smmu_ops)
2313*4882a593Smuzhiyun return ERR_PTR(-ENODEV);
2314*4882a593Smuzhiyun
2315*4882a593Smuzhiyun if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2316*4882a593Smuzhiyun return ERR_PTR(-EBUSY);
2317*4882a593Smuzhiyun
2318*4882a593Smuzhiyun smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2319*4882a593Smuzhiyun if (!smmu)
2320*4882a593Smuzhiyun return ERR_PTR(-ENODEV);
2321*4882a593Smuzhiyun
2322*4882a593Smuzhiyun master = kzalloc(sizeof(*master), GFP_KERNEL);
2323*4882a593Smuzhiyun if (!master)
2324*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
2325*4882a593Smuzhiyun
2326*4882a593Smuzhiyun master->dev = dev;
2327*4882a593Smuzhiyun master->smmu = smmu;
2328*4882a593Smuzhiyun master->sids = fwspec->ids;
2329*4882a593Smuzhiyun master->num_sids = fwspec->num_ids;
2330*4882a593Smuzhiyun INIT_LIST_HEAD(&master->bonds);
2331*4882a593Smuzhiyun dev_iommu_priv_set(dev, master);
2332*4882a593Smuzhiyun
2333*4882a593Smuzhiyun /* Check the SIDs are in range of the SMMU and our stream table */
2334*4882a593Smuzhiyun for (i = 0; i < master->num_sids; i++) {
2335*4882a593Smuzhiyun u32 sid = master->sids[i];
2336*4882a593Smuzhiyun
2337*4882a593Smuzhiyun if (!arm_smmu_sid_in_range(smmu, sid)) {
2338*4882a593Smuzhiyun ret = -ERANGE;
2339*4882a593Smuzhiyun goto err_free_master;
2340*4882a593Smuzhiyun }
2341*4882a593Smuzhiyun
2342*4882a593Smuzhiyun /* Ensure l2 strtab is initialised */
2343*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2344*4882a593Smuzhiyun ret = arm_smmu_init_l2_strtab(smmu, sid);
2345*4882a593Smuzhiyun if (ret)
2346*4882a593Smuzhiyun goto err_free_master;
2347*4882a593Smuzhiyun }
2348*4882a593Smuzhiyun }
2349*4882a593Smuzhiyun
2350*4882a593Smuzhiyun master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2351*4882a593Smuzhiyun
2352*4882a593Smuzhiyun /*
2353*4882a593Smuzhiyun * Note that PASID must be enabled before, and disabled after ATS:
2354*4882a593Smuzhiyun * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2355*4882a593Smuzhiyun *
2356*4882a593Smuzhiyun * Behavior is undefined if this bit is Set and the value of the PASID
2357*4882a593Smuzhiyun * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2358*4882a593Smuzhiyun * are changed.
2359*4882a593Smuzhiyun */
2360*4882a593Smuzhiyun arm_smmu_enable_pasid(master);
2361*4882a593Smuzhiyun
2362*4882a593Smuzhiyun if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2363*4882a593Smuzhiyun master->ssid_bits = min_t(u8, master->ssid_bits,
2364*4882a593Smuzhiyun CTXDESC_LINEAR_CDMAX);
2365*4882a593Smuzhiyun
2366*4882a593Smuzhiyun return &smmu->iommu;
2367*4882a593Smuzhiyun
2368*4882a593Smuzhiyun err_free_master:
2369*4882a593Smuzhiyun kfree(master);
2370*4882a593Smuzhiyun dev_iommu_priv_set(dev, NULL);
2371*4882a593Smuzhiyun return ERR_PTR(ret);
2372*4882a593Smuzhiyun }
2373*4882a593Smuzhiyun
arm_smmu_release_device(struct device * dev)2374*4882a593Smuzhiyun static void arm_smmu_release_device(struct device *dev)
2375*4882a593Smuzhiyun {
2376*4882a593Smuzhiyun struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2377*4882a593Smuzhiyun struct arm_smmu_master *master;
2378*4882a593Smuzhiyun
2379*4882a593Smuzhiyun if (!fwspec || fwspec->ops != &arm_smmu_ops)
2380*4882a593Smuzhiyun return;
2381*4882a593Smuzhiyun
2382*4882a593Smuzhiyun master = dev_iommu_priv_get(dev);
2383*4882a593Smuzhiyun WARN_ON(arm_smmu_master_sva_enabled(master));
2384*4882a593Smuzhiyun arm_smmu_detach_dev(master);
2385*4882a593Smuzhiyun arm_smmu_disable_pasid(master);
2386*4882a593Smuzhiyun kfree(master);
2387*4882a593Smuzhiyun iommu_fwspec_free(dev);
2388*4882a593Smuzhiyun }
2389*4882a593Smuzhiyun
arm_smmu_device_group(struct device * dev)2390*4882a593Smuzhiyun static struct iommu_group *arm_smmu_device_group(struct device *dev)
2391*4882a593Smuzhiyun {
2392*4882a593Smuzhiyun struct iommu_group *group;
2393*4882a593Smuzhiyun
2394*4882a593Smuzhiyun /*
2395*4882a593Smuzhiyun * We don't support devices sharing stream IDs other than PCI RID
2396*4882a593Smuzhiyun * aliases, since the necessary ID-to-device lookup becomes rather
2397*4882a593Smuzhiyun * impractical given a potential sparse 32-bit stream ID space.
2398*4882a593Smuzhiyun */
2399*4882a593Smuzhiyun if (dev_is_pci(dev))
2400*4882a593Smuzhiyun group = pci_device_group(dev);
2401*4882a593Smuzhiyun else
2402*4882a593Smuzhiyun group = generic_device_group(dev);
2403*4882a593Smuzhiyun
2404*4882a593Smuzhiyun return group;
2405*4882a593Smuzhiyun }
2406*4882a593Smuzhiyun
arm_smmu_domain_get_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2407*4882a593Smuzhiyun static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
2408*4882a593Smuzhiyun enum iommu_attr attr, void *data)
2409*4882a593Smuzhiyun {
2410*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2411*4882a593Smuzhiyun
2412*4882a593Smuzhiyun switch (domain->type) {
2413*4882a593Smuzhiyun case IOMMU_DOMAIN_UNMANAGED:
2414*4882a593Smuzhiyun switch (attr) {
2415*4882a593Smuzhiyun case DOMAIN_ATTR_NESTING:
2416*4882a593Smuzhiyun *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
2417*4882a593Smuzhiyun return 0;
2418*4882a593Smuzhiyun default:
2419*4882a593Smuzhiyun return -ENODEV;
2420*4882a593Smuzhiyun }
2421*4882a593Smuzhiyun break;
2422*4882a593Smuzhiyun case IOMMU_DOMAIN_DMA:
2423*4882a593Smuzhiyun switch (attr) {
2424*4882a593Smuzhiyun case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2425*4882a593Smuzhiyun *(int *)data = smmu_domain->non_strict;
2426*4882a593Smuzhiyun return 0;
2427*4882a593Smuzhiyun default:
2428*4882a593Smuzhiyun return -ENODEV;
2429*4882a593Smuzhiyun }
2430*4882a593Smuzhiyun break;
2431*4882a593Smuzhiyun default:
2432*4882a593Smuzhiyun return -EINVAL;
2433*4882a593Smuzhiyun }
2434*4882a593Smuzhiyun }
2435*4882a593Smuzhiyun
arm_smmu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)2436*4882a593Smuzhiyun static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
2437*4882a593Smuzhiyun enum iommu_attr attr, void *data)
2438*4882a593Smuzhiyun {
2439*4882a593Smuzhiyun int ret = 0;
2440*4882a593Smuzhiyun struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2441*4882a593Smuzhiyun
2442*4882a593Smuzhiyun mutex_lock(&smmu_domain->init_mutex);
2443*4882a593Smuzhiyun
2444*4882a593Smuzhiyun switch (domain->type) {
2445*4882a593Smuzhiyun case IOMMU_DOMAIN_UNMANAGED:
2446*4882a593Smuzhiyun switch (attr) {
2447*4882a593Smuzhiyun case DOMAIN_ATTR_NESTING:
2448*4882a593Smuzhiyun if (smmu_domain->smmu) {
2449*4882a593Smuzhiyun ret = -EPERM;
2450*4882a593Smuzhiyun goto out_unlock;
2451*4882a593Smuzhiyun }
2452*4882a593Smuzhiyun
2453*4882a593Smuzhiyun if (*(int *)data)
2454*4882a593Smuzhiyun smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
2455*4882a593Smuzhiyun else
2456*4882a593Smuzhiyun smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2457*4882a593Smuzhiyun break;
2458*4882a593Smuzhiyun default:
2459*4882a593Smuzhiyun ret = -ENODEV;
2460*4882a593Smuzhiyun }
2461*4882a593Smuzhiyun break;
2462*4882a593Smuzhiyun case IOMMU_DOMAIN_DMA:
2463*4882a593Smuzhiyun switch(attr) {
2464*4882a593Smuzhiyun case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
2465*4882a593Smuzhiyun smmu_domain->non_strict = *(int *)data;
2466*4882a593Smuzhiyun break;
2467*4882a593Smuzhiyun default:
2468*4882a593Smuzhiyun ret = -ENODEV;
2469*4882a593Smuzhiyun }
2470*4882a593Smuzhiyun break;
2471*4882a593Smuzhiyun default:
2472*4882a593Smuzhiyun ret = -EINVAL;
2473*4882a593Smuzhiyun }
2474*4882a593Smuzhiyun
2475*4882a593Smuzhiyun out_unlock:
2476*4882a593Smuzhiyun mutex_unlock(&smmu_domain->init_mutex);
2477*4882a593Smuzhiyun return ret;
2478*4882a593Smuzhiyun }
2479*4882a593Smuzhiyun
arm_smmu_of_xlate(struct device * dev,struct of_phandle_args * args)2480*4882a593Smuzhiyun static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2481*4882a593Smuzhiyun {
2482*4882a593Smuzhiyun return iommu_fwspec_add_ids(dev, args->args, 1);
2483*4882a593Smuzhiyun }
2484*4882a593Smuzhiyun
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)2485*4882a593Smuzhiyun static void arm_smmu_get_resv_regions(struct device *dev,
2486*4882a593Smuzhiyun struct list_head *head)
2487*4882a593Smuzhiyun {
2488*4882a593Smuzhiyun struct iommu_resv_region *region;
2489*4882a593Smuzhiyun int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2490*4882a593Smuzhiyun
2491*4882a593Smuzhiyun region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2492*4882a593Smuzhiyun prot, IOMMU_RESV_SW_MSI);
2493*4882a593Smuzhiyun if (!region)
2494*4882a593Smuzhiyun return;
2495*4882a593Smuzhiyun
2496*4882a593Smuzhiyun list_add_tail(®ion->list, head);
2497*4882a593Smuzhiyun
2498*4882a593Smuzhiyun iommu_dma_get_resv_regions(dev, head);
2499*4882a593Smuzhiyun }
2500*4882a593Smuzhiyun
arm_smmu_dev_has_feature(struct device * dev,enum iommu_dev_features feat)2501*4882a593Smuzhiyun static bool arm_smmu_dev_has_feature(struct device *dev,
2502*4882a593Smuzhiyun enum iommu_dev_features feat)
2503*4882a593Smuzhiyun {
2504*4882a593Smuzhiyun struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2505*4882a593Smuzhiyun
2506*4882a593Smuzhiyun if (!master)
2507*4882a593Smuzhiyun return false;
2508*4882a593Smuzhiyun
2509*4882a593Smuzhiyun switch (feat) {
2510*4882a593Smuzhiyun case IOMMU_DEV_FEAT_SVA:
2511*4882a593Smuzhiyun return arm_smmu_master_sva_supported(master);
2512*4882a593Smuzhiyun default:
2513*4882a593Smuzhiyun return false;
2514*4882a593Smuzhiyun }
2515*4882a593Smuzhiyun }
2516*4882a593Smuzhiyun
arm_smmu_dev_feature_enabled(struct device * dev,enum iommu_dev_features feat)2517*4882a593Smuzhiyun static bool arm_smmu_dev_feature_enabled(struct device *dev,
2518*4882a593Smuzhiyun enum iommu_dev_features feat)
2519*4882a593Smuzhiyun {
2520*4882a593Smuzhiyun struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2521*4882a593Smuzhiyun
2522*4882a593Smuzhiyun if (!master)
2523*4882a593Smuzhiyun return false;
2524*4882a593Smuzhiyun
2525*4882a593Smuzhiyun switch (feat) {
2526*4882a593Smuzhiyun case IOMMU_DEV_FEAT_SVA:
2527*4882a593Smuzhiyun return arm_smmu_master_sva_enabled(master);
2528*4882a593Smuzhiyun default:
2529*4882a593Smuzhiyun return false;
2530*4882a593Smuzhiyun }
2531*4882a593Smuzhiyun }
2532*4882a593Smuzhiyun
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)2533*4882a593Smuzhiyun static int arm_smmu_dev_enable_feature(struct device *dev,
2534*4882a593Smuzhiyun enum iommu_dev_features feat)
2535*4882a593Smuzhiyun {
2536*4882a593Smuzhiyun if (!arm_smmu_dev_has_feature(dev, feat))
2537*4882a593Smuzhiyun return -ENODEV;
2538*4882a593Smuzhiyun
2539*4882a593Smuzhiyun if (arm_smmu_dev_feature_enabled(dev, feat))
2540*4882a593Smuzhiyun return -EBUSY;
2541*4882a593Smuzhiyun
2542*4882a593Smuzhiyun switch (feat) {
2543*4882a593Smuzhiyun case IOMMU_DEV_FEAT_SVA:
2544*4882a593Smuzhiyun return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
2545*4882a593Smuzhiyun default:
2546*4882a593Smuzhiyun return -EINVAL;
2547*4882a593Smuzhiyun }
2548*4882a593Smuzhiyun }
2549*4882a593Smuzhiyun
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)2550*4882a593Smuzhiyun static int arm_smmu_dev_disable_feature(struct device *dev,
2551*4882a593Smuzhiyun enum iommu_dev_features feat)
2552*4882a593Smuzhiyun {
2553*4882a593Smuzhiyun if (!arm_smmu_dev_feature_enabled(dev, feat))
2554*4882a593Smuzhiyun return -EINVAL;
2555*4882a593Smuzhiyun
2556*4882a593Smuzhiyun switch (feat) {
2557*4882a593Smuzhiyun case IOMMU_DEV_FEAT_SVA:
2558*4882a593Smuzhiyun return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
2559*4882a593Smuzhiyun default:
2560*4882a593Smuzhiyun return -EINVAL;
2561*4882a593Smuzhiyun }
2562*4882a593Smuzhiyun }
2563*4882a593Smuzhiyun
2564*4882a593Smuzhiyun static struct iommu_ops arm_smmu_ops = {
2565*4882a593Smuzhiyun .capable = arm_smmu_capable,
2566*4882a593Smuzhiyun .domain_alloc = arm_smmu_domain_alloc,
2567*4882a593Smuzhiyun .domain_free = arm_smmu_domain_free,
2568*4882a593Smuzhiyun .attach_dev = arm_smmu_attach_dev,
2569*4882a593Smuzhiyun .map = arm_smmu_map,
2570*4882a593Smuzhiyun .unmap = arm_smmu_unmap,
2571*4882a593Smuzhiyun .flush_iotlb_all = arm_smmu_flush_iotlb_all,
2572*4882a593Smuzhiyun .iotlb_sync = arm_smmu_iotlb_sync,
2573*4882a593Smuzhiyun .iova_to_phys = arm_smmu_iova_to_phys,
2574*4882a593Smuzhiyun .probe_device = arm_smmu_probe_device,
2575*4882a593Smuzhiyun .release_device = arm_smmu_release_device,
2576*4882a593Smuzhiyun .device_group = arm_smmu_device_group,
2577*4882a593Smuzhiyun .domain_get_attr = arm_smmu_domain_get_attr,
2578*4882a593Smuzhiyun .domain_set_attr = arm_smmu_domain_set_attr,
2579*4882a593Smuzhiyun .of_xlate = arm_smmu_of_xlate,
2580*4882a593Smuzhiyun .get_resv_regions = arm_smmu_get_resv_regions,
2581*4882a593Smuzhiyun .put_resv_regions = generic_iommu_put_resv_regions,
2582*4882a593Smuzhiyun .dev_has_feat = arm_smmu_dev_has_feature,
2583*4882a593Smuzhiyun .dev_feat_enabled = arm_smmu_dev_feature_enabled,
2584*4882a593Smuzhiyun .dev_enable_feat = arm_smmu_dev_enable_feature,
2585*4882a593Smuzhiyun .dev_disable_feat = arm_smmu_dev_disable_feature,
2586*4882a593Smuzhiyun .pgsize_bitmap = -1UL, /* Restricted during device attach */
2587*4882a593Smuzhiyun };
2588*4882a593Smuzhiyun
2589*4882a593Smuzhiyun /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)2590*4882a593Smuzhiyun static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2591*4882a593Smuzhiyun struct arm_smmu_queue *q,
2592*4882a593Smuzhiyun unsigned long prod_off,
2593*4882a593Smuzhiyun unsigned long cons_off,
2594*4882a593Smuzhiyun size_t dwords, const char *name)
2595*4882a593Smuzhiyun {
2596*4882a593Smuzhiyun size_t qsz;
2597*4882a593Smuzhiyun
2598*4882a593Smuzhiyun do {
2599*4882a593Smuzhiyun qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2600*4882a593Smuzhiyun q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2601*4882a593Smuzhiyun GFP_KERNEL);
2602*4882a593Smuzhiyun if (q->base || qsz < PAGE_SIZE)
2603*4882a593Smuzhiyun break;
2604*4882a593Smuzhiyun
2605*4882a593Smuzhiyun q->llq.max_n_shift--;
2606*4882a593Smuzhiyun } while (1);
2607*4882a593Smuzhiyun
2608*4882a593Smuzhiyun if (!q->base) {
2609*4882a593Smuzhiyun dev_err(smmu->dev,
2610*4882a593Smuzhiyun "failed to allocate queue (0x%zx bytes) for %s\n",
2611*4882a593Smuzhiyun qsz, name);
2612*4882a593Smuzhiyun return -ENOMEM;
2613*4882a593Smuzhiyun }
2614*4882a593Smuzhiyun
2615*4882a593Smuzhiyun if (!WARN_ON(q->base_dma & (qsz - 1))) {
2616*4882a593Smuzhiyun dev_info(smmu->dev, "allocated %u entries for %s\n",
2617*4882a593Smuzhiyun 1 << q->llq.max_n_shift, name);
2618*4882a593Smuzhiyun }
2619*4882a593Smuzhiyun
2620*4882a593Smuzhiyun q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2621*4882a593Smuzhiyun q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2622*4882a593Smuzhiyun q->ent_dwords = dwords;
2623*4882a593Smuzhiyun
2624*4882a593Smuzhiyun q->q_base = Q_BASE_RWA;
2625*4882a593Smuzhiyun q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2626*4882a593Smuzhiyun q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2627*4882a593Smuzhiyun
2628*4882a593Smuzhiyun q->llq.prod = q->llq.cons = 0;
2629*4882a593Smuzhiyun return 0;
2630*4882a593Smuzhiyun }
2631*4882a593Smuzhiyun
arm_smmu_cmdq_free_bitmap(void * data)2632*4882a593Smuzhiyun static void arm_smmu_cmdq_free_bitmap(void *data)
2633*4882a593Smuzhiyun {
2634*4882a593Smuzhiyun unsigned long *bitmap = data;
2635*4882a593Smuzhiyun bitmap_free(bitmap);
2636*4882a593Smuzhiyun }
2637*4882a593Smuzhiyun
arm_smmu_cmdq_init(struct arm_smmu_device * smmu)2638*4882a593Smuzhiyun static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2639*4882a593Smuzhiyun {
2640*4882a593Smuzhiyun int ret = 0;
2641*4882a593Smuzhiyun struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2642*4882a593Smuzhiyun unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2643*4882a593Smuzhiyun atomic_long_t *bitmap;
2644*4882a593Smuzhiyun
2645*4882a593Smuzhiyun atomic_set(&cmdq->owner_prod, 0);
2646*4882a593Smuzhiyun atomic_set(&cmdq->lock, 0);
2647*4882a593Smuzhiyun
2648*4882a593Smuzhiyun bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
2649*4882a593Smuzhiyun if (!bitmap) {
2650*4882a593Smuzhiyun dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
2651*4882a593Smuzhiyun ret = -ENOMEM;
2652*4882a593Smuzhiyun } else {
2653*4882a593Smuzhiyun cmdq->valid_map = bitmap;
2654*4882a593Smuzhiyun devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
2655*4882a593Smuzhiyun }
2656*4882a593Smuzhiyun
2657*4882a593Smuzhiyun return ret;
2658*4882a593Smuzhiyun }
2659*4882a593Smuzhiyun
arm_smmu_init_queues(struct arm_smmu_device * smmu)2660*4882a593Smuzhiyun static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2661*4882a593Smuzhiyun {
2662*4882a593Smuzhiyun int ret;
2663*4882a593Smuzhiyun
2664*4882a593Smuzhiyun /* cmdq */
2665*4882a593Smuzhiyun ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2666*4882a593Smuzhiyun ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
2667*4882a593Smuzhiyun "cmdq");
2668*4882a593Smuzhiyun if (ret)
2669*4882a593Smuzhiyun return ret;
2670*4882a593Smuzhiyun
2671*4882a593Smuzhiyun ret = arm_smmu_cmdq_init(smmu);
2672*4882a593Smuzhiyun if (ret)
2673*4882a593Smuzhiyun return ret;
2674*4882a593Smuzhiyun
2675*4882a593Smuzhiyun /* evtq */
2676*4882a593Smuzhiyun ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2677*4882a593Smuzhiyun ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
2678*4882a593Smuzhiyun "evtq");
2679*4882a593Smuzhiyun if (ret)
2680*4882a593Smuzhiyun return ret;
2681*4882a593Smuzhiyun
2682*4882a593Smuzhiyun /* priq */
2683*4882a593Smuzhiyun if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2684*4882a593Smuzhiyun return 0;
2685*4882a593Smuzhiyun
2686*4882a593Smuzhiyun return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2687*4882a593Smuzhiyun ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
2688*4882a593Smuzhiyun "priq");
2689*4882a593Smuzhiyun }
2690*4882a593Smuzhiyun
arm_smmu_init_l1_strtab(struct arm_smmu_device * smmu)2691*4882a593Smuzhiyun static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2692*4882a593Smuzhiyun {
2693*4882a593Smuzhiyun unsigned int i;
2694*4882a593Smuzhiyun struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2695*4882a593Smuzhiyun size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2696*4882a593Smuzhiyun void *strtab = smmu->strtab_cfg.strtab;
2697*4882a593Smuzhiyun
2698*4882a593Smuzhiyun cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2699*4882a593Smuzhiyun if (!cfg->l1_desc) {
2700*4882a593Smuzhiyun dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2701*4882a593Smuzhiyun return -ENOMEM;
2702*4882a593Smuzhiyun }
2703*4882a593Smuzhiyun
2704*4882a593Smuzhiyun for (i = 0; i < cfg->num_l1_ents; ++i) {
2705*4882a593Smuzhiyun arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2706*4882a593Smuzhiyun strtab += STRTAB_L1_DESC_DWORDS << 3;
2707*4882a593Smuzhiyun }
2708*4882a593Smuzhiyun
2709*4882a593Smuzhiyun return 0;
2710*4882a593Smuzhiyun }
2711*4882a593Smuzhiyun
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)2712*4882a593Smuzhiyun static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2713*4882a593Smuzhiyun {
2714*4882a593Smuzhiyun void *strtab;
2715*4882a593Smuzhiyun u64 reg;
2716*4882a593Smuzhiyun u32 size, l1size;
2717*4882a593Smuzhiyun struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2718*4882a593Smuzhiyun
2719*4882a593Smuzhiyun /* Calculate the L1 size, capped to the SIDSIZE. */
2720*4882a593Smuzhiyun size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2721*4882a593Smuzhiyun size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2722*4882a593Smuzhiyun cfg->num_l1_ents = 1 << size;
2723*4882a593Smuzhiyun
2724*4882a593Smuzhiyun size += STRTAB_SPLIT;
2725*4882a593Smuzhiyun if (size < smmu->sid_bits)
2726*4882a593Smuzhiyun dev_warn(smmu->dev,
2727*4882a593Smuzhiyun "2-level strtab only covers %u/%u bits of SID\n",
2728*4882a593Smuzhiyun size, smmu->sid_bits);
2729*4882a593Smuzhiyun
2730*4882a593Smuzhiyun l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2731*4882a593Smuzhiyun strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2732*4882a593Smuzhiyun GFP_KERNEL);
2733*4882a593Smuzhiyun if (!strtab) {
2734*4882a593Smuzhiyun dev_err(smmu->dev,
2735*4882a593Smuzhiyun "failed to allocate l1 stream table (%u bytes)\n",
2736*4882a593Smuzhiyun l1size);
2737*4882a593Smuzhiyun return -ENOMEM;
2738*4882a593Smuzhiyun }
2739*4882a593Smuzhiyun cfg->strtab = strtab;
2740*4882a593Smuzhiyun
2741*4882a593Smuzhiyun /* Configure strtab_base_cfg for 2 levels */
2742*4882a593Smuzhiyun reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2743*4882a593Smuzhiyun reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2744*4882a593Smuzhiyun reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2745*4882a593Smuzhiyun cfg->strtab_base_cfg = reg;
2746*4882a593Smuzhiyun
2747*4882a593Smuzhiyun return arm_smmu_init_l1_strtab(smmu);
2748*4882a593Smuzhiyun }
2749*4882a593Smuzhiyun
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)2750*4882a593Smuzhiyun static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2751*4882a593Smuzhiyun {
2752*4882a593Smuzhiyun void *strtab;
2753*4882a593Smuzhiyun u64 reg;
2754*4882a593Smuzhiyun u32 size;
2755*4882a593Smuzhiyun struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2756*4882a593Smuzhiyun
2757*4882a593Smuzhiyun size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2758*4882a593Smuzhiyun strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2759*4882a593Smuzhiyun GFP_KERNEL);
2760*4882a593Smuzhiyun if (!strtab) {
2761*4882a593Smuzhiyun dev_err(smmu->dev,
2762*4882a593Smuzhiyun "failed to allocate linear stream table (%u bytes)\n",
2763*4882a593Smuzhiyun size);
2764*4882a593Smuzhiyun return -ENOMEM;
2765*4882a593Smuzhiyun }
2766*4882a593Smuzhiyun cfg->strtab = strtab;
2767*4882a593Smuzhiyun cfg->num_l1_ents = 1 << smmu->sid_bits;
2768*4882a593Smuzhiyun
2769*4882a593Smuzhiyun /* Configure strtab_base_cfg for a linear table covering all SIDs */
2770*4882a593Smuzhiyun reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2771*4882a593Smuzhiyun reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2772*4882a593Smuzhiyun cfg->strtab_base_cfg = reg;
2773*4882a593Smuzhiyun
2774*4882a593Smuzhiyun arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2775*4882a593Smuzhiyun return 0;
2776*4882a593Smuzhiyun }
2777*4882a593Smuzhiyun
arm_smmu_init_strtab(struct arm_smmu_device * smmu)2778*4882a593Smuzhiyun static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2779*4882a593Smuzhiyun {
2780*4882a593Smuzhiyun u64 reg;
2781*4882a593Smuzhiyun int ret;
2782*4882a593Smuzhiyun
2783*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2784*4882a593Smuzhiyun ret = arm_smmu_init_strtab_2lvl(smmu);
2785*4882a593Smuzhiyun else
2786*4882a593Smuzhiyun ret = arm_smmu_init_strtab_linear(smmu);
2787*4882a593Smuzhiyun
2788*4882a593Smuzhiyun if (ret)
2789*4882a593Smuzhiyun return ret;
2790*4882a593Smuzhiyun
2791*4882a593Smuzhiyun /* Set the strtab base address */
2792*4882a593Smuzhiyun reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2793*4882a593Smuzhiyun reg |= STRTAB_BASE_RA;
2794*4882a593Smuzhiyun smmu->strtab_cfg.strtab_base = reg;
2795*4882a593Smuzhiyun
2796*4882a593Smuzhiyun /* Allocate the first VMID for stage-2 bypass STEs */
2797*4882a593Smuzhiyun set_bit(0, smmu->vmid_map);
2798*4882a593Smuzhiyun return 0;
2799*4882a593Smuzhiyun }
2800*4882a593Smuzhiyun
arm_smmu_init_structures(struct arm_smmu_device * smmu)2801*4882a593Smuzhiyun static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2802*4882a593Smuzhiyun {
2803*4882a593Smuzhiyun int ret;
2804*4882a593Smuzhiyun
2805*4882a593Smuzhiyun ret = arm_smmu_init_queues(smmu);
2806*4882a593Smuzhiyun if (ret)
2807*4882a593Smuzhiyun return ret;
2808*4882a593Smuzhiyun
2809*4882a593Smuzhiyun return arm_smmu_init_strtab(smmu);
2810*4882a593Smuzhiyun }
2811*4882a593Smuzhiyun
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)2812*4882a593Smuzhiyun static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2813*4882a593Smuzhiyun unsigned int reg_off, unsigned int ack_off)
2814*4882a593Smuzhiyun {
2815*4882a593Smuzhiyun u32 reg;
2816*4882a593Smuzhiyun
2817*4882a593Smuzhiyun writel_relaxed(val, smmu->base + reg_off);
2818*4882a593Smuzhiyun return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2819*4882a593Smuzhiyun 1, ARM_SMMU_POLL_TIMEOUT_US);
2820*4882a593Smuzhiyun }
2821*4882a593Smuzhiyun
2822*4882a593Smuzhiyun /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)2823*4882a593Smuzhiyun static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2824*4882a593Smuzhiyun {
2825*4882a593Smuzhiyun int ret;
2826*4882a593Smuzhiyun u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2827*4882a593Smuzhiyun
2828*4882a593Smuzhiyun ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2829*4882a593Smuzhiyun 1, ARM_SMMU_POLL_TIMEOUT_US);
2830*4882a593Smuzhiyun if (ret)
2831*4882a593Smuzhiyun return ret;
2832*4882a593Smuzhiyun
2833*4882a593Smuzhiyun reg &= ~clr;
2834*4882a593Smuzhiyun reg |= set;
2835*4882a593Smuzhiyun writel_relaxed(reg | GBPA_UPDATE, gbpa);
2836*4882a593Smuzhiyun ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2837*4882a593Smuzhiyun 1, ARM_SMMU_POLL_TIMEOUT_US);
2838*4882a593Smuzhiyun
2839*4882a593Smuzhiyun if (ret)
2840*4882a593Smuzhiyun dev_err(smmu->dev, "GBPA not responding to update\n");
2841*4882a593Smuzhiyun return ret;
2842*4882a593Smuzhiyun }
2843*4882a593Smuzhiyun
arm_smmu_free_msis(void * data)2844*4882a593Smuzhiyun static void arm_smmu_free_msis(void *data)
2845*4882a593Smuzhiyun {
2846*4882a593Smuzhiyun struct device *dev = data;
2847*4882a593Smuzhiyun platform_msi_domain_free_irqs(dev);
2848*4882a593Smuzhiyun }
2849*4882a593Smuzhiyun
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)2850*4882a593Smuzhiyun static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2851*4882a593Smuzhiyun {
2852*4882a593Smuzhiyun phys_addr_t doorbell;
2853*4882a593Smuzhiyun struct device *dev = msi_desc_to_dev(desc);
2854*4882a593Smuzhiyun struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2855*4882a593Smuzhiyun phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2856*4882a593Smuzhiyun
2857*4882a593Smuzhiyun doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2858*4882a593Smuzhiyun doorbell &= MSI_CFG0_ADDR_MASK;
2859*4882a593Smuzhiyun
2860*4882a593Smuzhiyun writeq_relaxed(doorbell, smmu->base + cfg[0]);
2861*4882a593Smuzhiyun writel_relaxed(msg->data, smmu->base + cfg[1]);
2862*4882a593Smuzhiyun writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2863*4882a593Smuzhiyun }
2864*4882a593Smuzhiyun
arm_smmu_setup_msis(struct arm_smmu_device * smmu)2865*4882a593Smuzhiyun static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2866*4882a593Smuzhiyun {
2867*4882a593Smuzhiyun struct msi_desc *desc;
2868*4882a593Smuzhiyun int ret, nvec = ARM_SMMU_MAX_MSIS;
2869*4882a593Smuzhiyun struct device *dev = smmu->dev;
2870*4882a593Smuzhiyun
2871*4882a593Smuzhiyun /* Clear the MSI address regs */
2872*4882a593Smuzhiyun writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2873*4882a593Smuzhiyun writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2874*4882a593Smuzhiyun
2875*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_PRI)
2876*4882a593Smuzhiyun writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2877*4882a593Smuzhiyun else
2878*4882a593Smuzhiyun nvec--;
2879*4882a593Smuzhiyun
2880*4882a593Smuzhiyun if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2881*4882a593Smuzhiyun return;
2882*4882a593Smuzhiyun
2883*4882a593Smuzhiyun if (!dev->msi_domain) {
2884*4882a593Smuzhiyun dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2885*4882a593Smuzhiyun return;
2886*4882a593Smuzhiyun }
2887*4882a593Smuzhiyun
2888*4882a593Smuzhiyun /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2889*4882a593Smuzhiyun ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2890*4882a593Smuzhiyun if (ret) {
2891*4882a593Smuzhiyun dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2892*4882a593Smuzhiyun return;
2893*4882a593Smuzhiyun }
2894*4882a593Smuzhiyun
2895*4882a593Smuzhiyun for_each_msi_entry(desc, dev) {
2896*4882a593Smuzhiyun switch (desc->platform.msi_index) {
2897*4882a593Smuzhiyun case EVTQ_MSI_INDEX:
2898*4882a593Smuzhiyun smmu->evtq.q.irq = desc->irq;
2899*4882a593Smuzhiyun break;
2900*4882a593Smuzhiyun case GERROR_MSI_INDEX:
2901*4882a593Smuzhiyun smmu->gerr_irq = desc->irq;
2902*4882a593Smuzhiyun break;
2903*4882a593Smuzhiyun case PRIQ_MSI_INDEX:
2904*4882a593Smuzhiyun smmu->priq.q.irq = desc->irq;
2905*4882a593Smuzhiyun break;
2906*4882a593Smuzhiyun default: /* Unknown */
2907*4882a593Smuzhiyun continue;
2908*4882a593Smuzhiyun }
2909*4882a593Smuzhiyun }
2910*4882a593Smuzhiyun
2911*4882a593Smuzhiyun /* Add callback to free MSIs on teardown */
2912*4882a593Smuzhiyun devm_add_action(dev, arm_smmu_free_msis, dev);
2913*4882a593Smuzhiyun }
2914*4882a593Smuzhiyun
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)2915*4882a593Smuzhiyun static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2916*4882a593Smuzhiyun {
2917*4882a593Smuzhiyun int irq, ret;
2918*4882a593Smuzhiyun
2919*4882a593Smuzhiyun arm_smmu_setup_msis(smmu);
2920*4882a593Smuzhiyun
2921*4882a593Smuzhiyun /* Request interrupt lines */
2922*4882a593Smuzhiyun irq = smmu->evtq.q.irq;
2923*4882a593Smuzhiyun if (irq) {
2924*4882a593Smuzhiyun ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2925*4882a593Smuzhiyun arm_smmu_evtq_thread,
2926*4882a593Smuzhiyun IRQF_ONESHOT,
2927*4882a593Smuzhiyun "arm-smmu-v3-evtq", smmu);
2928*4882a593Smuzhiyun if (ret < 0)
2929*4882a593Smuzhiyun dev_warn(smmu->dev, "failed to enable evtq irq\n");
2930*4882a593Smuzhiyun } else {
2931*4882a593Smuzhiyun dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2932*4882a593Smuzhiyun }
2933*4882a593Smuzhiyun
2934*4882a593Smuzhiyun irq = smmu->gerr_irq;
2935*4882a593Smuzhiyun if (irq) {
2936*4882a593Smuzhiyun ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2937*4882a593Smuzhiyun 0, "arm-smmu-v3-gerror", smmu);
2938*4882a593Smuzhiyun if (ret < 0)
2939*4882a593Smuzhiyun dev_warn(smmu->dev, "failed to enable gerror irq\n");
2940*4882a593Smuzhiyun } else {
2941*4882a593Smuzhiyun dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2942*4882a593Smuzhiyun }
2943*4882a593Smuzhiyun
2944*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_PRI) {
2945*4882a593Smuzhiyun irq = smmu->priq.q.irq;
2946*4882a593Smuzhiyun if (irq) {
2947*4882a593Smuzhiyun ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2948*4882a593Smuzhiyun arm_smmu_priq_thread,
2949*4882a593Smuzhiyun IRQF_ONESHOT,
2950*4882a593Smuzhiyun "arm-smmu-v3-priq",
2951*4882a593Smuzhiyun smmu);
2952*4882a593Smuzhiyun if (ret < 0)
2953*4882a593Smuzhiyun dev_warn(smmu->dev,
2954*4882a593Smuzhiyun "failed to enable priq irq\n");
2955*4882a593Smuzhiyun } else {
2956*4882a593Smuzhiyun dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2957*4882a593Smuzhiyun }
2958*4882a593Smuzhiyun }
2959*4882a593Smuzhiyun }
2960*4882a593Smuzhiyun
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)2961*4882a593Smuzhiyun static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2962*4882a593Smuzhiyun {
2963*4882a593Smuzhiyun int ret, irq;
2964*4882a593Smuzhiyun u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2965*4882a593Smuzhiyun
2966*4882a593Smuzhiyun /* Disable IRQs first */
2967*4882a593Smuzhiyun ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2968*4882a593Smuzhiyun ARM_SMMU_IRQ_CTRLACK);
2969*4882a593Smuzhiyun if (ret) {
2970*4882a593Smuzhiyun dev_err(smmu->dev, "failed to disable irqs\n");
2971*4882a593Smuzhiyun return ret;
2972*4882a593Smuzhiyun }
2973*4882a593Smuzhiyun
2974*4882a593Smuzhiyun irq = smmu->combined_irq;
2975*4882a593Smuzhiyun if (irq) {
2976*4882a593Smuzhiyun /*
2977*4882a593Smuzhiyun * Cavium ThunderX2 implementation doesn't support unique irq
2978*4882a593Smuzhiyun * lines. Use a single irq line for all the SMMUv3 interrupts.
2979*4882a593Smuzhiyun */
2980*4882a593Smuzhiyun ret = devm_request_threaded_irq(smmu->dev, irq,
2981*4882a593Smuzhiyun arm_smmu_combined_irq_handler,
2982*4882a593Smuzhiyun arm_smmu_combined_irq_thread,
2983*4882a593Smuzhiyun IRQF_ONESHOT,
2984*4882a593Smuzhiyun "arm-smmu-v3-combined-irq", smmu);
2985*4882a593Smuzhiyun if (ret < 0)
2986*4882a593Smuzhiyun dev_warn(smmu->dev, "failed to enable combined irq\n");
2987*4882a593Smuzhiyun } else
2988*4882a593Smuzhiyun arm_smmu_setup_unique_irqs(smmu);
2989*4882a593Smuzhiyun
2990*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_PRI)
2991*4882a593Smuzhiyun irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2992*4882a593Smuzhiyun
2993*4882a593Smuzhiyun /* Enable interrupt generation on the SMMU */
2994*4882a593Smuzhiyun ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2995*4882a593Smuzhiyun ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2996*4882a593Smuzhiyun if (ret)
2997*4882a593Smuzhiyun dev_warn(smmu->dev, "failed to enable irqs\n");
2998*4882a593Smuzhiyun
2999*4882a593Smuzhiyun return 0;
3000*4882a593Smuzhiyun }
3001*4882a593Smuzhiyun
arm_smmu_device_disable(struct arm_smmu_device * smmu)3002*4882a593Smuzhiyun static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3003*4882a593Smuzhiyun {
3004*4882a593Smuzhiyun int ret;
3005*4882a593Smuzhiyun
3006*4882a593Smuzhiyun ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3007*4882a593Smuzhiyun if (ret)
3008*4882a593Smuzhiyun dev_err(smmu->dev, "failed to clear cr0\n");
3009*4882a593Smuzhiyun
3010*4882a593Smuzhiyun return ret;
3011*4882a593Smuzhiyun }
3012*4882a593Smuzhiyun
arm_smmu_device_reset(struct arm_smmu_device * smmu,bool bypass)3013*4882a593Smuzhiyun static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3014*4882a593Smuzhiyun {
3015*4882a593Smuzhiyun int ret;
3016*4882a593Smuzhiyun u32 reg, enables;
3017*4882a593Smuzhiyun struct arm_smmu_cmdq_ent cmd;
3018*4882a593Smuzhiyun
3019*4882a593Smuzhiyun /* Clear CR0 and sync (disables SMMU and queue processing) */
3020*4882a593Smuzhiyun reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3021*4882a593Smuzhiyun if (reg & CR0_SMMUEN) {
3022*4882a593Smuzhiyun dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3023*4882a593Smuzhiyun WARN_ON(is_kdump_kernel() && !disable_bypass);
3024*4882a593Smuzhiyun arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3025*4882a593Smuzhiyun }
3026*4882a593Smuzhiyun
3027*4882a593Smuzhiyun ret = arm_smmu_device_disable(smmu);
3028*4882a593Smuzhiyun if (ret)
3029*4882a593Smuzhiyun return ret;
3030*4882a593Smuzhiyun
3031*4882a593Smuzhiyun /* CR1 (table and queue memory attributes) */
3032*4882a593Smuzhiyun reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3033*4882a593Smuzhiyun FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3034*4882a593Smuzhiyun FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3035*4882a593Smuzhiyun FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3036*4882a593Smuzhiyun FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3037*4882a593Smuzhiyun FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3038*4882a593Smuzhiyun writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3039*4882a593Smuzhiyun
3040*4882a593Smuzhiyun /* CR2 (random crap) */
3041*4882a593Smuzhiyun reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3042*4882a593Smuzhiyun writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3043*4882a593Smuzhiyun
3044*4882a593Smuzhiyun /* Stream table */
3045*4882a593Smuzhiyun writeq_relaxed(smmu->strtab_cfg.strtab_base,
3046*4882a593Smuzhiyun smmu->base + ARM_SMMU_STRTAB_BASE);
3047*4882a593Smuzhiyun writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3048*4882a593Smuzhiyun smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3049*4882a593Smuzhiyun
3050*4882a593Smuzhiyun /* Command queue */
3051*4882a593Smuzhiyun writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3052*4882a593Smuzhiyun writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3053*4882a593Smuzhiyun writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3054*4882a593Smuzhiyun
3055*4882a593Smuzhiyun enables = CR0_CMDQEN;
3056*4882a593Smuzhiyun ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3057*4882a593Smuzhiyun ARM_SMMU_CR0ACK);
3058*4882a593Smuzhiyun if (ret) {
3059*4882a593Smuzhiyun dev_err(smmu->dev, "failed to enable command queue\n");
3060*4882a593Smuzhiyun return ret;
3061*4882a593Smuzhiyun }
3062*4882a593Smuzhiyun
3063*4882a593Smuzhiyun /* Invalidate any cached configuration */
3064*4882a593Smuzhiyun cmd.opcode = CMDQ_OP_CFGI_ALL;
3065*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3066*4882a593Smuzhiyun arm_smmu_cmdq_issue_sync(smmu);
3067*4882a593Smuzhiyun
3068*4882a593Smuzhiyun /* Invalidate any stale TLB entries */
3069*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_HYP) {
3070*4882a593Smuzhiyun cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3071*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3072*4882a593Smuzhiyun }
3073*4882a593Smuzhiyun
3074*4882a593Smuzhiyun cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3075*4882a593Smuzhiyun arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3076*4882a593Smuzhiyun arm_smmu_cmdq_issue_sync(smmu);
3077*4882a593Smuzhiyun
3078*4882a593Smuzhiyun /* Event queue */
3079*4882a593Smuzhiyun writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3080*4882a593Smuzhiyun writel_relaxed(smmu->evtq.q.llq.prod,
3081*4882a593Smuzhiyun arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3082*4882a593Smuzhiyun writel_relaxed(smmu->evtq.q.llq.cons,
3083*4882a593Smuzhiyun arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3084*4882a593Smuzhiyun
3085*4882a593Smuzhiyun enables |= CR0_EVTQEN;
3086*4882a593Smuzhiyun ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3087*4882a593Smuzhiyun ARM_SMMU_CR0ACK);
3088*4882a593Smuzhiyun if (ret) {
3089*4882a593Smuzhiyun dev_err(smmu->dev, "failed to enable event queue\n");
3090*4882a593Smuzhiyun return ret;
3091*4882a593Smuzhiyun }
3092*4882a593Smuzhiyun
3093*4882a593Smuzhiyun /* PRI queue */
3094*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_PRI) {
3095*4882a593Smuzhiyun writeq_relaxed(smmu->priq.q.q_base,
3096*4882a593Smuzhiyun smmu->base + ARM_SMMU_PRIQ_BASE);
3097*4882a593Smuzhiyun writel_relaxed(smmu->priq.q.llq.prod,
3098*4882a593Smuzhiyun arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3099*4882a593Smuzhiyun writel_relaxed(smmu->priq.q.llq.cons,
3100*4882a593Smuzhiyun arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3101*4882a593Smuzhiyun
3102*4882a593Smuzhiyun enables |= CR0_PRIQEN;
3103*4882a593Smuzhiyun ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3104*4882a593Smuzhiyun ARM_SMMU_CR0ACK);
3105*4882a593Smuzhiyun if (ret) {
3106*4882a593Smuzhiyun dev_err(smmu->dev, "failed to enable PRI queue\n");
3107*4882a593Smuzhiyun return ret;
3108*4882a593Smuzhiyun }
3109*4882a593Smuzhiyun }
3110*4882a593Smuzhiyun
3111*4882a593Smuzhiyun if (smmu->features & ARM_SMMU_FEAT_ATS) {
3112*4882a593Smuzhiyun enables |= CR0_ATSCHK;
3113*4882a593Smuzhiyun ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3114*4882a593Smuzhiyun ARM_SMMU_CR0ACK);
3115*4882a593Smuzhiyun if (ret) {
3116*4882a593Smuzhiyun dev_err(smmu->dev, "failed to enable ATS check\n");
3117*4882a593Smuzhiyun return ret;
3118*4882a593Smuzhiyun }
3119*4882a593Smuzhiyun }
3120*4882a593Smuzhiyun
3121*4882a593Smuzhiyun ret = arm_smmu_setup_irqs(smmu);
3122*4882a593Smuzhiyun if (ret) {
3123*4882a593Smuzhiyun dev_err(smmu->dev, "failed to setup irqs\n");
3124*4882a593Smuzhiyun return ret;
3125*4882a593Smuzhiyun }
3126*4882a593Smuzhiyun
3127*4882a593Smuzhiyun if (is_kdump_kernel())
3128*4882a593Smuzhiyun enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3129*4882a593Smuzhiyun
3130*4882a593Smuzhiyun /* Enable the SMMU interface, or ensure bypass */
3131*4882a593Smuzhiyun if (!bypass || disable_bypass) {
3132*4882a593Smuzhiyun enables |= CR0_SMMUEN;
3133*4882a593Smuzhiyun } else {
3134*4882a593Smuzhiyun ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3135*4882a593Smuzhiyun if (ret)
3136*4882a593Smuzhiyun return ret;
3137*4882a593Smuzhiyun }
3138*4882a593Smuzhiyun ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3139*4882a593Smuzhiyun ARM_SMMU_CR0ACK);
3140*4882a593Smuzhiyun if (ret) {
3141*4882a593Smuzhiyun dev_err(smmu->dev, "failed to enable SMMU interface\n");
3142*4882a593Smuzhiyun return ret;
3143*4882a593Smuzhiyun }
3144*4882a593Smuzhiyun
3145*4882a593Smuzhiyun return 0;
3146*4882a593Smuzhiyun }
3147*4882a593Smuzhiyun
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)3148*4882a593Smuzhiyun static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3149*4882a593Smuzhiyun {
3150*4882a593Smuzhiyun u32 reg;
3151*4882a593Smuzhiyun bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3152*4882a593Smuzhiyun
3153*4882a593Smuzhiyun /* IDR0 */
3154*4882a593Smuzhiyun reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3155*4882a593Smuzhiyun
3156*4882a593Smuzhiyun /* 2-level structures */
3157*4882a593Smuzhiyun if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3158*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3159*4882a593Smuzhiyun
3160*4882a593Smuzhiyun if (reg & IDR0_CD2L)
3161*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3162*4882a593Smuzhiyun
3163*4882a593Smuzhiyun /*
3164*4882a593Smuzhiyun * Translation table endianness.
3165*4882a593Smuzhiyun * We currently require the same endianness as the CPU, but this
3166*4882a593Smuzhiyun * could be changed later by adding a new IO_PGTABLE_QUIRK.
3167*4882a593Smuzhiyun */
3168*4882a593Smuzhiyun switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3169*4882a593Smuzhiyun case IDR0_TTENDIAN_MIXED:
3170*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3171*4882a593Smuzhiyun break;
3172*4882a593Smuzhiyun #ifdef __BIG_ENDIAN
3173*4882a593Smuzhiyun case IDR0_TTENDIAN_BE:
3174*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_TT_BE;
3175*4882a593Smuzhiyun break;
3176*4882a593Smuzhiyun #else
3177*4882a593Smuzhiyun case IDR0_TTENDIAN_LE:
3178*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_TT_LE;
3179*4882a593Smuzhiyun break;
3180*4882a593Smuzhiyun #endif
3181*4882a593Smuzhiyun default:
3182*4882a593Smuzhiyun dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3183*4882a593Smuzhiyun return -ENXIO;
3184*4882a593Smuzhiyun }
3185*4882a593Smuzhiyun
3186*4882a593Smuzhiyun /* Boolean feature flags */
3187*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3188*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_PRI;
3189*4882a593Smuzhiyun
3190*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3191*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_ATS;
3192*4882a593Smuzhiyun
3193*4882a593Smuzhiyun if (reg & IDR0_SEV)
3194*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_SEV;
3195*4882a593Smuzhiyun
3196*4882a593Smuzhiyun if (reg & IDR0_MSI) {
3197*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_MSI;
3198*4882a593Smuzhiyun if (coherent && !disable_msipolling)
3199*4882a593Smuzhiyun smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3200*4882a593Smuzhiyun }
3201*4882a593Smuzhiyun
3202*4882a593Smuzhiyun if (reg & IDR0_HYP)
3203*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_HYP;
3204*4882a593Smuzhiyun
3205*4882a593Smuzhiyun /*
3206*4882a593Smuzhiyun * The coherency feature as set by FW is used in preference to the ID
3207*4882a593Smuzhiyun * register, but warn on mismatch.
3208*4882a593Smuzhiyun */
3209*4882a593Smuzhiyun if (!!(reg & IDR0_COHACC) != coherent)
3210*4882a593Smuzhiyun dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3211*4882a593Smuzhiyun coherent ? "true" : "false");
3212*4882a593Smuzhiyun
3213*4882a593Smuzhiyun switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3214*4882a593Smuzhiyun case IDR0_STALL_MODEL_FORCE:
3215*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3216*4882a593Smuzhiyun fallthrough;
3217*4882a593Smuzhiyun case IDR0_STALL_MODEL_STALL:
3218*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_STALLS;
3219*4882a593Smuzhiyun }
3220*4882a593Smuzhiyun
3221*4882a593Smuzhiyun if (reg & IDR0_S1P)
3222*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3223*4882a593Smuzhiyun
3224*4882a593Smuzhiyun if (reg & IDR0_S2P)
3225*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3226*4882a593Smuzhiyun
3227*4882a593Smuzhiyun if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3228*4882a593Smuzhiyun dev_err(smmu->dev, "no translation support!\n");
3229*4882a593Smuzhiyun return -ENXIO;
3230*4882a593Smuzhiyun }
3231*4882a593Smuzhiyun
3232*4882a593Smuzhiyun /* We only support the AArch64 table format at present */
3233*4882a593Smuzhiyun switch (FIELD_GET(IDR0_TTF, reg)) {
3234*4882a593Smuzhiyun case IDR0_TTF_AARCH32_64:
3235*4882a593Smuzhiyun smmu->ias = 40;
3236*4882a593Smuzhiyun fallthrough;
3237*4882a593Smuzhiyun case IDR0_TTF_AARCH64:
3238*4882a593Smuzhiyun break;
3239*4882a593Smuzhiyun default:
3240*4882a593Smuzhiyun dev_err(smmu->dev, "AArch64 table format not supported!\n");
3241*4882a593Smuzhiyun return -ENXIO;
3242*4882a593Smuzhiyun }
3243*4882a593Smuzhiyun
3244*4882a593Smuzhiyun /* ASID/VMID sizes */
3245*4882a593Smuzhiyun smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3246*4882a593Smuzhiyun smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3247*4882a593Smuzhiyun
3248*4882a593Smuzhiyun /* IDR1 */
3249*4882a593Smuzhiyun reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3250*4882a593Smuzhiyun if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3251*4882a593Smuzhiyun dev_err(smmu->dev, "embedded implementation not supported\n");
3252*4882a593Smuzhiyun return -ENXIO;
3253*4882a593Smuzhiyun }
3254*4882a593Smuzhiyun
3255*4882a593Smuzhiyun /* Queue sizes, capped to ensure natural alignment */
3256*4882a593Smuzhiyun smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3257*4882a593Smuzhiyun FIELD_GET(IDR1_CMDQS, reg));
3258*4882a593Smuzhiyun if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3259*4882a593Smuzhiyun /*
3260*4882a593Smuzhiyun * We don't support splitting up batches, so one batch of
3261*4882a593Smuzhiyun * commands plus an extra sync needs to fit inside the command
3262*4882a593Smuzhiyun * queue. There's also no way we can handle the weird alignment
3263*4882a593Smuzhiyun * restrictions on the base pointer for a unit-length queue.
3264*4882a593Smuzhiyun */
3265*4882a593Smuzhiyun dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3266*4882a593Smuzhiyun CMDQ_BATCH_ENTRIES);
3267*4882a593Smuzhiyun return -ENXIO;
3268*4882a593Smuzhiyun }
3269*4882a593Smuzhiyun
3270*4882a593Smuzhiyun smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3271*4882a593Smuzhiyun FIELD_GET(IDR1_EVTQS, reg));
3272*4882a593Smuzhiyun smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3273*4882a593Smuzhiyun FIELD_GET(IDR1_PRIQS, reg));
3274*4882a593Smuzhiyun
3275*4882a593Smuzhiyun /* SID/SSID sizes */
3276*4882a593Smuzhiyun smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3277*4882a593Smuzhiyun smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3278*4882a593Smuzhiyun
3279*4882a593Smuzhiyun /*
3280*4882a593Smuzhiyun * If the SMMU supports fewer bits than would fill a single L2 stream
3281*4882a593Smuzhiyun * table, use a linear table instead.
3282*4882a593Smuzhiyun */
3283*4882a593Smuzhiyun if (smmu->sid_bits <= STRTAB_SPLIT)
3284*4882a593Smuzhiyun smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3285*4882a593Smuzhiyun
3286*4882a593Smuzhiyun /* IDR3 */
3287*4882a593Smuzhiyun reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3288*4882a593Smuzhiyun if (FIELD_GET(IDR3_RIL, reg))
3289*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3290*4882a593Smuzhiyun
3291*4882a593Smuzhiyun /* IDR5 */
3292*4882a593Smuzhiyun reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3293*4882a593Smuzhiyun
3294*4882a593Smuzhiyun /* Maximum number of outstanding stalls */
3295*4882a593Smuzhiyun smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3296*4882a593Smuzhiyun
3297*4882a593Smuzhiyun /* Page sizes */
3298*4882a593Smuzhiyun if (reg & IDR5_GRAN64K)
3299*4882a593Smuzhiyun smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3300*4882a593Smuzhiyun if (reg & IDR5_GRAN16K)
3301*4882a593Smuzhiyun smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3302*4882a593Smuzhiyun if (reg & IDR5_GRAN4K)
3303*4882a593Smuzhiyun smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3304*4882a593Smuzhiyun
3305*4882a593Smuzhiyun /* Input address size */
3306*4882a593Smuzhiyun if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3307*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_VAX;
3308*4882a593Smuzhiyun
3309*4882a593Smuzhiyun /* Output address size */
3310*4882a593Smuzhiyun switch (FIELD_GET(IDR5_OAS, reg)) {
3311*4882a593Smuzhiyun case IDR5_OAS_32_BIT:
3312*4882a593Smuzhiyun smmu->oas = 32;
3313*4882a593Smuzhiyun break;
3314*4882a593Smuzhiyun case IDR5_OAS_36_BIT:
3315*4882a593Smuzhiyun smmu->oas = 36;
3316*4882a593Smuzhiyun break;
3317*4882a593Smuzhiyun case IDR5_OAS_40_BIT:
3318*4882a593Smuzhiyun smmu->oas = 40;
3319*4882a593Smuzhiyun break;
3320*4882a593Smuzhiyun case IDR5_OAS_42_BIT:
3321*4882a593Smuzhiyun smmu->oas = 42;
3322*4882a593Smuzhiyun break;
3323*4882a593Smuzhiyun case IDR5_OAS_44_BIT:
3324*4882a593Smuzhiyun smmu->oas = 44;
3325*4882a593Smuzhiyun break;
3326*4882a593Smuzhiyun case IDR5_OAS_52_BIT:
3327*4882a593Smuzhiyun smmu->oas = 52;
3328*4882a593Smuzhiyun smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3329*4882a593Smuzhiyun break;
3330*4882a593Smuzhiyun default:
3331*4882a593Smuzhiyun dev_info(smmu->dev,
3332*4882a593Smuzhiyun "unknown output address size. Truncating to 48-bit\n");
3333*4882a593Smuzhiyun fallthrough;
3334*4882a593Smuzhiyun case IDR5_OAS_48_BIT:
3335*4882a593Smuzhiyun smmu->oas = 48;
3336*4882a593Smuzhiyun }
3337*4882a593Smuzhiyun
3338*4882a593Smuzhiyun if (arm_smmu_ops.pgsize_bitmap == -1UL)
3339*4882a593Smuzhiyun arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3340*4882a593Smuzhiyun else
3341*4882a593Smuzhiyun arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3342*4882a593Smuzhiyun
3343*4882a593Smuzhiyun /* Set the DMA mask for our table walker */
3344*4882a593Smuzhiyun if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3345*4882a593Smuzhiyun dev_warn(smmu->dev,
3346*4882a593Smuzhiyun "failed to set DMA mask for table walker\n");
3347*4882a593Smuzhiyun
3348*4882a593Smuzhiyun smmu->ias = max(smmu->ias, smmu->oas);
3349*4882a593Smuzhiyun
3350*4882a593Smuzhiyun if (arm_smmu_sva_supported(smmu))
3351*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_SVA;
3352*4882a593Smuzhiyun
3353*4882a593Smuzhiyun dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3354*4882a593Smuzhiyun smmu->ias, smmu->oas, smmu->features);
3355*4882a593Smuzhiyun return 0;
3356*4882a593Smuzhiyun }
3357*4882a593Smuzhiyun
3358*4882a593Smuzhiyun #ifdef CONFIG_ACPI
acpi_smmu_get_options(u32 model,struct arm_smmu_device * smmu)3359*4882a593Smuzhiyun static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3360*4882a593Smuzhiyun {
3361*4882a593Smuzhiyun switch (model) {
3362*4882a593Smuzhiyun case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3363*4882a593Smuzhiyun smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3364*4882a593Smuzhiyun break;
3365*4882a593Smuzhiyun case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3366*4882a593Smuzhiyun smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3367*4882a593Smuzhiyun break;
3368*4882a593Smuzhiyun }
3369*4882a593Smuzhiyun
3370*4882a593Smuzhiyun dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3371*4882a593Smuzhiyun }
3372*4882a593Smuzhiyun
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3373*4882a593Smuzhiyun static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3374*4882a593Smuzhiyun struct arm_smmu_device *smmu)
3375*4882a593Smuzhiyun {
3376*4882a593Smuzhiyun struct acpi_iort_smmu_v3 *iort_smmu;
3377*4882a593Smuzhiyun struct device *dev = smmu->dev;
3378*4882a593Smuzhiyun struct acpi_iort_node *node;
3379*4882a593Smuzhiyun
3380*4882a593Smuzhiyun node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3381*4882a593Smuzhiyun
3382*4882a593Smuzhiyun /* Retrieve SMMUv3 specific data */
3383*4882a593Smuzhiyun iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3384*4882a593Smuzhiyun
3385*4882a593Smuzhiyun acpi_smmu_get_options(iort_smmu->model, smmu);
3386*4882a593Smuzhiyun
3387*4882a593Smuzhiyun if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3388*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3389*4882a593Smuzhiyun
3390*4882a593Smuzhiyun return 0;
3391*4882a593Smuzhiyun }
3392*4882a593Smuzhiyun #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3393*4882a593Smuzhiyun static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3394*4882a593Smuzhiyun struct arm_smmu_device *smmu)
3395*4882a593Smuzhiyun {
3396*4882a593Smuzhiyun return -ENODEV;
3397*4882a593Smuzhiyun }
3398*4882a593Smuzhiyun #endif
3399*4882a593Smuzhiyun
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)3400*4882a593Smuzhiyun static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3401*4882a593Smuzhiyun struct arm_smmu_device *smmu)
3402*4882a593Smuzhiyun {
3403*4882a593Smuzhiyun struct device *dev = &pdev->dev;
3404*4882a593Smuzhiyun u32 cells;
3405*4882a593Smuzhiyun int ret = -EINVAL;
3406*4882a593Smuzhiyun
3407*4882a593Smuzhiyun if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3408*4882a593Smuzhiyun dev_err(dev, "missing #iommu-cells property\n");
3409*4882a593Smuzhiyun else if (cells != 1)
3410*4882a593Smuzhiyun dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3411*4882a593Smuzhiyun else
3412*4882a593Smuzhiyun ret = 0;
3413*4882a593Smuzhiyun
3414*4882a593Smuzhiyun parse_driver_options(smmu);
3415*4882a593Smuzhiyun
3416*4882a593Smuzhiyun if (of_dma_is_coherent(dev->of_node))
3417*4882a593Smuzhiyun smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3418*4882a593Smuzhiyun
3419*4882a593Smuzhiyun return ret;
3420*4882a593Smuzhiyun }
3421*4882a593Smuzhiyun
arm_smmu_resource_size(struct arm_smmu_device * smmu)3422*4882a593Smuzhiyun static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3423*4882a593Smuzhiyun {
3424*4882a593Smuzhiyun if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3425*4882a593Smuzhiyun return SZ_64K;
3426*4882a593Smuzhiyun else
3427*4882a593Smuzhiyun return SZ_128K;
3428*4882a593Smuzhiyun }
3429*4882a593Smuzhiyun
arm_smmu_set_bus_ops(struct iommu_ops * ops)3430*4882a593Smuzhiyun static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3431*4882a593Smuzhiyun {
3432*4882a593Smuzhiyun int err;
3433*4882a593Smuzhiyun
3434*4882a593Smuzhiyun #ifdef CONFIG_PCI
3435*4882a593Smuzhiyun if (pci_bus_type.iommu_ops != ops) {
3436*4882a593Smuzhiyun err = bus_set_iommu(&pci_bus_type, ops);
3437*4882a593Smuzhiyun if (err)
3438*4882a593Smuzhiyun return err;
3439*4882a593Smuzhiyun }
3440*4882a593Smuzhiyun #endif
3441*4882a593Smuzhiyun #ifdef CONFIG_ARM_AMBA
3442*4882a593Smuzhiyun if (amba_bustype.iommu_ops != ops) {
3443*4882a593Smuzhiyun err = bus_set_iommu(&amba_bustype, ops);
3444*4882a593Smuzhiyun if (err)
3445*4882a593Smuzhiyun goto err_reset_pci_ops;
3446*4882a593Smuzhiyun }
3447*4882a593Smuzhiyun #endif
3448*4882a593Smuzhiyun if (platform_bus_type.iommu_ops != ops) {
3449*4882a593Smuzhiyun err = bus_set_iommu(&platform_bus_type, ops);
3450*4882a593Smuzhiyun if (err)
3451*4882a593Smuzhiyun goto err_reset_amba_ops;
3452*4882a593Smuzhiyun }
3453*4882a593Smuzhiyun
3454*4882a593Smuzhiyun return 0;
3455*4882a593Smuzhiyun
3456*4882a593Smuzhiyun err_reset_amba_ops:
3457*4882a593Smuzhiyun #ifdef CONFIG_ARM_AMBA
3458*4882a593Smuzhiyun bus_set_iommu(&amba_bustype, NULL);
3459*4882a593Smuzhiyun #endif
3460*4882a593Smuzhiyun err_reset_pci_ops: __maybe_unused;
3461*4882a593Smuzhiyun #ifdef CONFIG_PCI
3462*4882a593Smuzhiyun bus_set_iommu(&pci_bus_type, NULL);
3463*4882a593Smuzhiyun #endif
3464*4882a593Smuzhiyun return err;
3465*4882a593Smuzhiyun }
3466*4882a593Smuzhiyun
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)3467*4882a593Smuzhiyun static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3468*4882a593Smuzhiyun resource_size_t size)
3469*4882a593Smuzhiyun {
3470*4882a593Smuzhiyun struct resource res = {
3471*4882a593Smuzhiyun .flags = IORESOURCE_MEM,
3472*4882a593Smuzhiyun .start = start,
3473*4882a593Smuzhiyun .end = start + size - 1,
3474*4882a593Smuzhiyun };
3475*4882a593Smuzhiyun
3476*4882a593Smuzhiyun return devm_ioremap_resource(dev, &res);
3477*4882a593Smuzhiyun }
3478*4882a593Smuzhiyun
arm_smmu_device_probe(struct platform_device * pdev)3479*4882a593Smuzhiyun static int arm_smmu_device_probe(struct platform_device *pdev)
3480*4882a593Smuzhiyun {
3481*4882a593Smuzhiyun int irq, ret;
3482*4882a593Smuzhiyun struct resource *res;
3483*4882a593Smuzhiyun resource_size_t ioaddr;
3484*4882a593Smuzhiyun struct arm_smmu_device *smmu;
3485*4882a593Smuzhiyun struct device *dev = &pdev->dev;
3486*4882a593Smuzhiyun bool bypass;
3487*4882a593Smuzhiyun
3488*4882a593Smuzhiyun smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3489*4882a593Smuzhiyun if (!smmu) {
3490*4882a593Smuzhiyun dev_err(dev, "failed to allocate arm_smmu_device\n");
3491*4882a593Smuzhiyun return -ENOMEM;
3492*4882a593Smuzhiyun }
3493*4882a593Smuzhiyun smmu->dev = dev;
3494*4882a593Smuzhiyun
3495*4882a593Smuzhiyun if (dev->of_node) {
3496*4882a593Smuzhiyun ret = arm_smmu_device_dt_probe(pdev, smmu);
3497*4882a593Smuzhiyun } else {
3498*4882a593Smuzhiyun ret = arm_smmu_device_acpi_probe(pdev, smmu);
3499*4882a593Smuzhiyun if (ret == -ENODEV)
3500*4882a593Smuzhiyun return ret;
3501*4882a593Smuzhiyun }
3502*4882a593Smuzhiyun
3503*4882a593Smuzhiyun /* Set bypass mode according to firmware probing result */
3504*4882a593Smuzhiyun bypass = !!ret;
3505*4882a593Smuzhiyun
3506*4882a593Smuzhiyun /* Base address */
3507*4882a593Smuzhiyun res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3508*4882a593Smuzhiyun if (!res)
3509*4882a593Smuzhiyun return -EINVAL;
3510*4882a593Smuzhiyun if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3511*4882a593Smuzhiyun dev_err(dev, "MMIO region too small (%pr)\n", res);
3512*4882a593Smuzhiyun return -EINVAL;
3513*4882a593Smuzhiyun }
3514*4882a593Smuzhiyun ioaddr = res->start;
3515*4882a593Smuzhiyun
3516*4882a593Smuzhiyun /*
3517*4882a593Smuzhiyun * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3518*4882a593Smuzhiyun * the PMCG registers which are reserved by the PMU driver.
3519*4882a593Smuzhiyun */
3520*4882a593Smuzhiyun smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3521*4882a593Smuzhiyun if (IS_ERR(smmu->base))
3522*4882a593Smuzhiyun return PTR_ERR(smmu->base);
3523*4882a593Smuzhiyun
3524*4882a593Smuzhiyun if (arm_smmu_resource_size(smmu) > SZ_64K) {
3525*4882a593Smuzhiyun smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3526*4882a593Smuzhiyun ARM_SMMU_REG_SZ);
3527*4882a593Smuzhiyun if (IS_ERR(smmu->page1))
3528*4882a593Smuzhiyun return PTR_ERR(smmu->page1);
3529*4882a593Smuzhiyun } else {
3530*4882a593Smuzhiyun smmu->page1 = smmu->base;
3531*4882a593Smuzhiyun }
3532*4882a593Smuzhiyun
3533*4882a593Smuzhiyun /* Interrupt lines */
3534*4882a593Smuzhiyun
3535*4882a593Smuzhiyun irq = platform_get_irq_byname_optional(pdev, "combined");
3536*4882a593Smuzhiyun if (irq > 0)
3537*4882a593Smuzhiyun smmu->combined_irq = irq;
3538*4882a593Smuzhiyun else {
3539*4882a593Smuzhiyun irq = platform_get_irq_byname_optional(pdev, "eventq");
3540*4882a593Smuzhiyun if (irq > 0)
3541*4882a593Smuzhiyun smmu->evtq.q.irq = irq;
3542*4882a593Smuzhiyun
3543*4882a593Smuzhiyun irq = platform_get_irq_byname_optional(pdev, "priq");
3544*4882a593Smuzhiyun if (irq > 0)
3545*4882a593Smuzhiyun smmu->priq.q.irq = irq;
3546*4882a593Smuzhiyun
3547*4882a593Smuzhiyun irq = platform_get_irq_byname_optional(pdev, "gerror");
3548*4882a593Smuzhiyun if (irq > 0)
3549*4882a593Smuzhiyun smmu->gerr_irq = irq;
3550*4882a593Smuzhiyun }
3551*4882a593Smuzhiyun /* Probe the h/w */
3552*4882a593Smuzhiyun ret = arm_smmu_device_hw_probe(smmu);
3553*4882a593Smuzhiyun if (ret)
3554*4882a593Smuzhiyun return ret;
3555*4882a593Smuzhiyun
3556*4882a593Smuzhiyun /* Initialise in-memory data structures */
3557*4882a593Smuzhiyun ret = arm_smmu_init_structures(smmu);
3558*4882a593Smuzhiyun if (ret)
3559*4882a593Smuzhiyun return ret;
3560*4882a593Smuzhiyun
3561*4882a593Smuzhiyun /* Record our private device structure */
3562*4882a593Smuzhiyun platform_set_drvdata(pdev, smmu);
3563*4882a593Smuzhiyun
3564*4882a593Smuzhiyun /* Reset the device */
3565*4882a593Smuzhiyun ret = arm_smmu_device_reset(smmu, bypass);
3566*4882a593Smuzhiyun if (ret)
3567*4882a593Smuzhiyun return ret;
3568*4882a593Smuzhiyun
3569*4882a593Smuzhiyun /* And we're up. Go go go! */
3570*4882a593Smuzhiyun ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3571*4882a593Smuzhiyun "smmu3.%pa", &ioaddr);
3572*4882a593Smuzhiyun if (ret)
3573*4882a593Smuzhiyun return ret;
3574*4882a593Smuzhiyun
3575*4882a593Smuzhiyun iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
3576*4882a593Smuzhiyun iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
3577*4882a593Smuzhiyun
3578*4882a593Smuzhiyun ret = iommu_device_register(&smmu->iommu);
3579*4882a593Smuzhiyun if (ret) {
3580*4882a593Smuzhiyun dev_err(dev, "Failed to register iommu\n");
3581*4882a593Smuzhiyun return ret;
3582*4882a593Smuzhiyun }
3583*4882a593Smuzhiyun
3584*4882a593Smuzhiyun return arm_smmu_set_bus_ops(&arm_smmu_ops);
3585*4882a593Smuzhiyun }
3586*4882a593Smuzhiyun
arm_smmu_device_remove(struct platform_device * pdev)3587*4882a593Smuzhiyun static int arm_smmu_device_remove(struct platform_device *pdev)
3588*4882a593Smuzhiyun {
3589*4882a593Smuzhiyun struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3590*4882a593Smuzhiyun
3591*4882a593Smuzhiyun arm_smmu_set_bus_ops(NULL);
3592*4882a593Smuzhiyun iommu_device_unregister(&smmu->iommu);
3593*4882a593Smuzhiyun iommu_device_sysfs_remove(&smmu->iommu);
3594*4882a593Smuzhiyun arm_smmu_device_disable(smmu);
3595*4882a593Smuzhiyun
3596*4882a593Smuzhiyun return 0;
3597*4882a593Smuzhiyun }
3598*4882a593Smuzhiyun
arm_smmu_device_shutdown(struct platform_device * pdev)3599*4882a593Smuzhiyun static void arm_smmu_device_shutdown(struct platform_device *pdev)
3600*4882a593Smuzhiyun {
3601*4882a593Smuzhiyun arm_smmu_device_remove(pdev);
3602*4882a593Smuzhiyun }
3603*4882a593Smuzhiyun
3604*4882a593Smuzhiyun static const struct of_device_id arm_smmu_of_match[] = {
3605*4882a593Smuzhiyun { .compatible = "arm,smmu-v3", },
3606*4882a593Smuzhiyun { },
3607*4882a593Smuzhiyun };
3608*4882a593Smuzhiyun MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3609*4882a593Smuzhiyun
3610*4882a593Smuzhiyun static struct platform_driver arm_smmu_driver = {
3611*4882a593Smuzhiyun .driver = {
3612*4882a593Smuzhiyun .name = "arm-smmu-v3",
3613*4882a593Smuzhiyun .of_match_table = arm_smmu_of_match,
3614*4882a593Smuzhiyun .suppress_bind_attrs = true,
3615*4882a593Smuzhiyun },
3616*4882a593Smuzhiyun .probe = arm_smmu_device_probe,
3617*4882a593Smuzhiyun .remove = arm_smmu_device_remove,
3618*4882a593Smuzhiyun .shutdown = arm_smmu_device_shutdown,
3619*4882a593Smuzhiyun };
3620*4882a593Smuzhiyun module_platform_driver(arm_smmu_driver);
3621*4882a593Smuzhiyun
3622*4882a593Smuzhiyun MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3623*4882a593Smuzhiyun MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3624*4882a593Smuzhiyun MODULE_ALIAS("platform:arm-smmu-v3");
3625*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
3626