1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * arch/powerpc/sysdev/dart_iommu.c
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
6*4882a593Smuzhiyun * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>,
7*4882a593Smuzhiyun * IBM Corporation
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Based on pSeries_iommu.c:
10*4882a593Smuzhiyun * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
11*4882a593Smuzhiyun * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu.
14*4882a593Smuzhiyun */
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun #include <linux/init.h>
17*4882a593Smuzhiyun #include <linux/types.h>
18*4882a593Smuzhiyun #include <linux/mm.h>
19*4882a593Smuzhiyun #include <linux/spinlock.h>
20*4882a593Smuzhiyun #include <linux/string.h>
21*4882a593Smuzhiyun #include <linux/pci.h>
22*4882a593Smuzhiyun #include <linux/dma-mapping.h>
23*4882a593Smuzhiyun #include <linux/vmalloc.h>
24*4882a593Smuzhiyun #include <linux/suspend.h>
25*4882a593Smuzhiyun #include <linux/memblock.h>
26*4882a593Smuzhiyun #include <linux/gfp.h>
27*4882a593Smuzhiyun #include <linux/kmemleak.h>
28*4882a593Smuzhiyun #include <asm/io.h>
29*4882a593Smuzhiyun #include <asm/prom.h>
30*4882a593Smuzhiyun #include <asm/iommu.h>
31*4882a593Smuzhiyun #include <asm/pci-bridge.h>
32*4882a593Smuzhiyun #include <asm/machdep.h>
33*4882a593Smuzhiyun #include <asm/cacheflush.h>
34*4882a593Smuzhiyun #include <asm/ppc-pci.h>
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun #include "dart.h"
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun /* DART table address and size */
39*4882a593Smuzhiyun static u32 *dart_tablebase;
40*4882a593Smuzhiyun static unsigned long dart_tablesize;
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun /* Mapped base address for the dart */
43*4882a593Smuzhiyun static unsigned int __iomem *dart;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun /* Dummy val that entries are set to when unused */
46*4882a593Smuzhiyun static unsigned int dart_emptyval;
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun static struct iommu_table iommu_table_dart;
49*4882a593Smuzhiyun static int iommu_table_dart_inited;
50*4882a593Smuzhiyun static int dart_dirty;
51*4882a593Smuzhiyun static int dart_is_u4;
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun #define DART_U4_BYPASS_BASE 0x8000000000ull
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun #define DBG(...)
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun static DEFINE_SPINLOCK(invalidate_lock);
58*4882a593Smuzhiyun
dart_tlb_invalidate_all(void)59*4882a593Smuzhiyun static inline void dart_tlb_invalidate_all(void)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun unsigned long l = 0;
62*4882a593Smuzhiyun unsigned int reg, inv_bit;
63*4882a593Smuzhiyun unsigned long limit;
64*4882a593Smuzhiyun unsigned long flags;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun spin_lock_irqsave(&invalidate_lock, flags);
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun DBG("dart: flush\n");
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun /* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
71*4882a593Smuzhiyun * control register and wait for it to clear.
72*4882a593Smuzhiyun *
73*4882a593Smuzhiyun * Gotcha: Sometimes, the DART won't detect that the bit gets
74*4882a593Smuzhiyun * set. If so, clear it and set it again.
75*4882a593Smuzhiyun */
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun limit = 0;
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB;
80*4882a593Smuzhiyun retry:
81*4882a593Smuzhiyun l = 0;
82*4882a593Smuzhiyun reg = DART_IN(DART_CNTL);
83*4882a593Smuzhiyun reg |= inv_bit;
84*4882a593Smuzhiyun DART_OUT(DART_CNTL, reg);
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit))
87*4882a593Smuzhiyun l++;
88*4882a593Smuzhiyun if (l == (1L << limit)) {
89*4882a593Smuzhiyun if (limit < 4) {
90*4882a593Smuzhiyun limit++;
91*4882a593Smuzhiyun reg = DART_IN(DART_CNTL);
92*4882a593Smuzhiyun reg &= ~inv_bit;
93*4882a593Smuzhiyun DART_OUT(DART_CNTL, reg);
94*4882a593Smuzhiyun goto retry;
95*4882a593Smuzhiyun } else
96*4882a593Smuzhiyun panic("DART: TLB did not flush after waiting a long "
97*4882a593Smuzhiyun "time. Buggy U3 ?");
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun spin_unlock_irqrestore(&invalidate_lock, flags);
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun
dart_tlb_invalidate_one(unsigned long bus_rpn)103*4882a593Smuzhiyun static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun unsigned int reg;
106*4882a593Smuzhiyun unsigned int l, limit;
107*4882a593Smuzhiyun unsigned long flags;
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun spin_lock_irqsave(&invalidate_lock, flags);
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE |
112*4882a593Smuzhiyun (bus_rpn & DART_CNTL_U4_IONE_MASK);
113*4882a593Smuzhiyun DART_OUT(DART_CNTL, reg);
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun limit = 0;
116*4882a593Smuzhiyun wait_more:
117*4882a593Smuzhiyun l = 0;
118*4882a593Smuzhiyun while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) {
119*4882a593Smuzhiyun rmb();
120*4882a593Smuzhiyun l++;
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun if (l == (1L << limit)) {
124*4882a593Smuzhiyun if (limit < 4) {
125*4882a593Smuzhiyun limit++;
126*4882a593Smuzhiyun goto wait_more;
127*4882a593Smuzhiyun } else
128*4882a593Smuzhiyun panic("DART: TLB did not flush after waiting a long "
129*4882a593Smuzhiyun "time. Buggy U4 ?");
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun spin_unlock_irqrestore(&invalidate_lock, flags);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun
dart_cache_sync(unsigned int * base,unsigned int count)135*4882a593Smuzhiyun static void dart_cache_sync(unsigned int *base, unsigned int count)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun /*
138*4882a593Smuzhiyun * We add 1 to the number of entries to flush, following a
139*4882a593Smuzhiyun * comment in Darwin indicating that the memory controller
140*4882a593Smuzhiyun * can prefetch unmapped memory under some circumstances.
141*4882a593Smuzhiyun */
142*4882a593Smuzhiyun unsigned long start = (unsigned long)base;
143*4882a593Smuzhiyun unsigned long end = start + (count + 1) * sizeof(unsigned int);
144*4882a593Smuzhiyun unsigned int tmp;
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun /* Perform a standard cache flush */
147*4882a593Smuzhiyun flush_dcache_range(start, end);
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun /*
150*4882a593Smuzhiyun * Perform the sequence described in the CPC925 manual to
151*4882a593Smuzhiyun * ensure all the data gets to a point the cache incoherent
152*4882a593Smuzhiyun * DART hardware will see.
153*4882a593Smuzhiyun */
154*4882a593Smuzhiyun asm volatile(" sync;"
155*4882a593Smuzhiyun " isync;"
156*4882a593Smuzhiyun " dcbf 0,%1;"
157*4882a593Smuzhiyun " sync;"
158*4882a593Smuzhiyun " isync;"
159*4882a593Smuzhiyun " lwz %0,0(%1);"
160*4882a593Smuzhiyun " isync" : "=r" (tmp) : "r" (end) : "memory");
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun
dart_flush(struct iommu_table * tbl)163*4882a593Smuzhiyun static void dart_flush(struct iommu_table *tbl)
164*4882a593Smuzhiyun {
165*4882a593Smuzhiyun mb();
166*4882a593Smuzhiyun if (dart_dirty) {
167*4882a593Smuzhiyun dart_tlb_invalidate_all();
168*4882a593Smuzhiyun dart_dirty = 0;
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun
dart_build(struct iommu_table * tbl,long index,long npages,unsigned long uaddr,enum dma_data_direction direction,unsigned long attrs)172*4882a593Smuzhiyun static int dart_build(struct iommu_table *tbl, long index,
173*4882a593Smuzhiyun long npages, unsigned long uaddr,
174*4882a593Smuzhiyun enum dma_data_direction direction,
175*4882a593Smuzhiyun unsigned long attrs)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun unsigned int *dp, *orig_dp;
178*4882a593Smuzhiyun unsigned int rpn;
179*4882a593Smuzhiyun long l;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun /* On U3, all memory is contiguous, so we can move this
186*4882a593Smuzhiyun * out of the loop.
187*4882a593Smuzhiyun */
188*4882a593Smuzhiyun l = npages;
189*4882a593Smuzhiyun while (l--) {
190*4882a593Smuzhiyun rpn = __pa(uaddr) >> DART_PAGE_SHIFT;
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun uaddr += DART_PAGE_SIZE;
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun dart_cache_sync(orig_dp, npages);
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun if (dart_is_u4) {
199*4882a593Smuzhiyun rpn = index;
200*4882a593Smuzhiyun while (npages--)
201*4882a593Smuzhiyun dart_tlb_invalidate_one(rpn++);
202*4882a593Smuzhiyun } else {
203*4882a593Smuzhiyun dart_dirty = 1;
204*4882a593Smuzhiyun }
205*4882a593Smuzhiyun return 0;
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun
dart_free(struct iommu_table * tbl,long index,long npages)209*4882a593Smuzhiyun static void dart_free(struct iommu_table *tbl, long index, long npages)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun unsigned int *dp, *orig_dp;
212*4882a593Smuzhiyun long orig_npages = npages;
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun /* We don't worry about flushing the TLB cache. The only drawback of
215*4882a593Smuzhiyun * not doing it is that we won't catch buggy device drivers doing
216*4882a593Smuzhiyun * bad DMAs, but then no 32-bit architecture ever does either.
217*4882a593Smuzhiyun */
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun DBG("dart: free at: %lx, %lx\n", index, npages);
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun orig_dp = dp = ((unsigned int *)tbl->it_base) + index;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun while (npages--)
224*4882a593Smuzhiyun *(dp++) = dart_emptyval;
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun dart_cache_sync(orig_dp, orig_npages);
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
allocate_dart(void)229*4882a593Smuzhiyun static void allocate_dart(void)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun unsigned long tmp;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun /* 512 pages (2MB) is max DART tablesize. */
234*4882a593Smuzhiyun dart_tablesize = 1UL << 21;
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun /*
237*4882a593Smuzhiyun * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
238*4882a593Smuzhiyun * will blow up an entire large page anyway in the kernel mapping.
239*4882a593Smuzhiyun */
240*4882a593Smuzhiyun dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M,
241*4882a593Smuzhiyun MEMBLOCK_LOW_LIMIT, SZ_2G,
242*4882a593Smuzhiyun NUMA_NO_NODE);
243*4882a593Smuzhiyun if (!dart_tablebase)
244*4882a593Smuzhiyun panic("Failed to allocate 16MB below 2GB for DART table\n");
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun /* There is no point scanning the DART space for leaks*/
247*4882a593Smuzhiyun kmemleak_no_scan((void *)dart_tablebase);
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun /* Allocate a spare page to map all invalid DART pages. We need to do
250*4882a593Smuzhiyun * that to work around what looks like a problem with the HT bridge
251*4882a593Smuzhiyun * prefetching into invalid pages and corrupting data
252*4882a593Smuzhiyun */
253*4882a593Smuzhiyun tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
254*4882a593Smuzhiyun if (!tmp)
255*4882a593Smuzhiyun panic("DART: table allocation failed\n");
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
258*4882a593Smuzhiyun DARTMAP_RPNMASK);
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun
dart_init(struct device_node * dart_node)263*4882a593Smuzhiyun static int __init dart_init(struct device_node *dart_node)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun unsigned int i;
266*4882a593Smuzhiyun unsigned long base, size;
267*4882a593Smuzhiyun struct resource r;
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun /* IOMMU disabled by the user ? bail out */
270*4882a593Smuzhiyun if (iommu_is_off)
271*4882a593Smuzhiyun return -ENODEV;
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun /*
274*4882a593Smuzhiyun * Only use the DART if the machine has more than 1GB of RAM
275*4882a593Smuzhiyun * or if requested with iommu=on on cmdline.
276*4882a593Smuzhiyun *
277*4882a593Smuzhiyun * 1GB of RAM is picked as limit because some default devices
278*4882a593Smuzhiyun * (i.e. Airport Extreme) have 30 bit address range limits.
279*4882a593Smuzhiyun */
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
282*4882a593Smuzhiyun return -ENODEV;
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun /* Get DART registers */
285*4882a593Smuzhiyun if (of_address_to_resource(dart_node, 0, &r))
286*4882a593Smuzhiyun panic("DART: can't get register base ! ");
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun /* Map in DART registers */
289*4882a593Smuzhiyun dart = ioremap(r.start, resource_size(&r));
290*4882a593Smuzhiyun if (dart == NULL)
291*4882a593Smuzhiyun panic("DART: Cannot map registers!");
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun /* Allocate the DART and dummy page */
294*4882a593Smuzhiyun allocate_dart();
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun /* Fill initial table */
297*4882a593Smuzhiyun for (i = 0; i < dart_tablesize/4; i++)
298*4882a593Smuzhiyun dart_tablebase[i] = dart_emptyval;
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun /* Push to memory */
301*4882a593Smuzhiyun dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun /* Initialize DART with table base and enable it. */
304*4882a593Smuzhiyun base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
305*4882a593Smuzhiyun size = dart_tablesize >> DART_PAGE_SHIFT;
306*4882a593Smuzhiyun if (dart_is_u4) {
307*4882a593Smuzhiyun size &= DART_SIZE_U4_SIZE_MASK;
308*4882a593Smuzhiyun DART_OUT(DART_BASE_U4, base);
309*4882a593Smuzhiyun DART_OUT(DART_SIZE_U4, size);
310*4882a593Smuzhiyun DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE);
311*4882a593Smuzhiyun } else {
312*4882a593Smuzhiyun size &= DART_CNTL_U3_SIZE_MASK;
313*4882a593Smuzhiyun DART_OUT(DART_CNTL,
314*4882a593Smuzhiyun DART_CNTL_U3_ENABLE |
315*4882a593Smuzhiyun (base << DART_CNTL_U3_BASE_SHIFT) |
316*4882a593Smuzhiyun (size << DART_CNTL_U3_SIZE_SHIFT));
317*4882a593Smuzhiyun }
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun /* Invalidate DART to get rid of possible stale TLBs */
320*4882a593Smuzhiyun dart_tlb_invalidate_all();
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n",
323*4882a593Smuzhiyun dart_is_u4 ? "U4" : "U3");
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun return 0;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun static struct iommu_table_ops iommu_dart_ops = {
329*4882a593Smuzhiyun .set = dart_build,
330*4882a593Smuzhiyun .clear = dart_free,
331*4882a593Smuzhiyun .flush = dart_flush,
332*4882a593Smuzhiyun };
333*4882a593Smuzhiyun
iommu_table_dart_setup(void)334*4882a593Smuzhiyun static void iommu_table_dart_setup(void)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun iommu_table_dart.it_busno = 0;
337*4882a593Smuzhiyun iommu_table_dart.it_offset = 0;
338*4882a593Smuzhiyun /* it_size is in number of entries */
339*4882a593Smuzhiyun iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
340*4882a593Smuzhiyun iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun /* Initialize the common IOMMU code */
343*4882a593Smuzhiyun iommu_table_dart.it_base = (unsigned long)dart_tablebase;
344*4882a593Smuzhiyun iommu_table_dart.it_index = 0;
345*4882a593Smuzhiyun iommu_table_dart.it_blocksize = 1;
346*4882a593Smuzhiyun iommu_table_dart.it_ops = &iommu_dart_ops;
347*4882a593Smuzhiyun iommu_init_table(&iommu_table_dart, -1, 0, 0);
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun /* Reserve the last page of the DART to avoid possible prefetch
350*4882a593Smuzhiyun * past the DART mapped area
351*4882a593Smuzhiyun */
352*4882a593Smuzhiyun set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun
pci_dma_bus_setup_dart(struct pci_bus * bus)355*4882a593Smuzhiyun static void pci_dma_bus_setup_dart(struct pci_bus *bus)
356*4882a593Smuzhiyun {
357*4882a593Smuzhiyun if (!iommu_table_dart_inited) {
358*4882a593Smuzhiyun iommu_table_dart_inited = 1;
359*4882a593Smuzhiyun iommu_table_dart_setup();
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun
dart_device_on_pcie(struct device * dev)363*4882a593Smuzhiyun static bool dart_device_on_pcie(struct device *dev)
364*4882a593Smuzhiyun {
365*4882a593Smuzhiyun struct device_node *np = of_node_get(dev->of_node);
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun while(np) {
368*4882a593Smuzhiyun if (of_device_is_compatible(np, "U4-pcie") ||
369*4882a593Smuzhiyun of_device_is_compatible(np, "u4-pcie")) {
370*4882a593Smuzhiyun of_node_put(np);
371*4882a593Smuzhiyun return true;
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun np = of_get_next_parent(np);
374*4882a593Smuzhiyun }
375*4882a593Smuzhiyun return false;
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun
pci_dma_dev_setup_dart(struct pci_dev * dev)378*4882a593Smuzhiyun static void pci_dma_dev_setup_dart(struct pci_dev *dev)
379*4882a593Smuzhiyun {
380*4882a593Smuzhiyun if (dart_is_u4 && dart_device_on_pcie(&dev->dev))
381*4882a593Smuzhiyun dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE;
382*4882a593Smuzhiyun set_iommu_table_base(&dev->dev, &iommu_table_dart);
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun
iommu_bypass_supported_dart(struct pci_dev * dev,u64 mask)385*4882a593Smuzhiyun static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask)
386*4882a593Smuzhiyun {
387*4882a593Smuzhiyun return dart_is_u4 &&
388*4882a593Smuzhiyun dart_device_on_pcie(&dev->dev) &&
389*4882a593Smuzhiyun mask >= DMA_BIT_MASK(40);
390*4882a593Smuzhiyun }
391*4882a593Smuzhiyun
iommu_init_early_dart(struct pci_controller_ops * controller_ops)392*4882a593Smuzhiyun void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
393*4882a593Smuzhiyun {
394*4882a593Smuzhiyun struct device_node *dn;
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun /* Find the DART in the device-tree */
397*4882a593Smuzhiyun dn = of_find_compatible_node(NULL, "dart", "u3-dart");
398*4882a593Smuzhiyun if (dn == NULL) {
399*4882a593Smuzhiyun dn = of_find_compatible_node(NULL, "dart", "u4-dart");
400*4882a593Smuzhiyun if (dn == NULL)
401*4882a593Smuzhiyun return; /* use default direct_dma_ops */
402*4882a593Smuzhiyun dart_is_u4 = 1;
403*4882a593Smuzhiyun }
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun /* Initialize the DART HW */
406*4882a593Smuzhiyun if (dart_init(dn) != 0) {
407*4882a593Smuzhiyun of_node_put(dn);
408*4882a593Smuzhiyun return;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun /*
411*4882a593Smuzhiyun * U4 supports a DART bypass, we use it for 64-bit capable devices to
412*4882a593Smuzhiyun * improve performance. However, that only works for devices connected
413*4882a593Smuzhiyun * to the U4 own PCIe interface, not bridged through hypertransport.
414*4882a593Smuzhiyun * We need the device to support at least 40 bits of addresses.
415*4882a593Smuzhiyun */
416*4882a593Smuzhiyun controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
417*4882a593Smuzhiyun controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
418*4882a593Smuzhiyun controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart;
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun /* Setup pci_dma ops */
421*4882a593Smuzhiyun set_pci_dma_ops(&dma_iommu_ops);
422*4882a593Smuzhiyun of_node_put(dn);
423*4882a593Smuzhiyun }
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun #ifdef CONFIG_PM
iommu_dart_restore(void)426*4882a593Smuzhiyun static void iommu_dart_restore(void)
427*4882a593Smuzhiyun {
428*4882a593Smuzhiyun dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
429*4882a593Smuzhiyun dart_tlb_invalidate_all();
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun
iommu_init_late_dart(void)432*4882a593Smuzhiyun static int __init iommu_init_late_dart(void)
433*4882a593Smuzhiyun {
434*4882a593Smuzhiyun if (!dart_tablebase)
435*4882a593Smuzhiyun return 0;
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun ppc_md.iommu_restore = iommu_dart_restore;
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun return 0;
440*4882a593Smuzhiyun }
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun late_initcall(iommu_init_late_dart);
443*4882a593Smuzhiyun #endif /* CONFIG_PM */
444