xref: /OK3568_Linux_fs/kernel/arch/powerpc/sysdev/dart_iommu.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * arch/powerpc/sysdev/dart_iommu.c
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
6*4882a593Smuzhiyun  * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>,
7*4882a593Smuzhiyun  *                    IBM Corporation
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * Based on pSeries_iommu.c:
10*4882a593Smuzhiyun  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
11*4882a593Smuzhiyun  * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu.
14*4882a593Smuzhiyun  */
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun #include <linux/init.h>
17*4882a593Smuzhiyun #include <linux/types.h>
18*4882a593Smuzhiyun #include <linux/mm.h>
19*4882a593Smuzhiyun #include <linux/spinlock.h>
20*4882a593Smuzhiyun #include <linux/string.h>
21*4882a593Smuzhiyun #include <linux/pci.h>
22*4882a593Smuzhiyun #include <linux/dma-mapping.h>
23*4882a593Smuzhiyun #include <linux/vmalloc.h>
24*4882a593Smuzhiyun #include <linux/suspend.h>
25*4882a593Smuzhiyun #include <linux/memblock.h>
26*4882a593Smuzhiyun #include <linux/gfp.h>
27*4882a593Smuzhiyun #include <linux/kmemleak.h>
28*4882a593Smuzhiyun #include <asm/io.h>
29*4882a593Smuzhiyun #include <asm/prom.h>
30*4882a593Smuzhiyun #include <asm/iommu.h>
31*4882a593Smuzhiyun #include <asm/pci-bridge.h>
32*4882a593Smuzhiyun #include <asm/machdep.h>
33*4882a593Smuzhiyun #include <asm/cacheflush.h>
34*4882a593Smuzhiyun #include <asm/ppc-pci.h>
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun #include "dart.h"
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun /* DART table address and size */
39*4882a593Smuzhiyun static u32 *dart_tablebase;
40*4882a593Smuzhiyun static unsigned long dart_tablesize;
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun /* Mapped base address for the dart */
43*4882a593Smuzhiyun static unsigned int __iomem *dart;
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun /* Dummy val that entries are set to when unused */
46*4882a593Smuzhiyun static unsigned int dart_emptyval;
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun static struct iommu_table iommu_table_dart;
49*4882a593Smuzhiyun static int iommu_table_dart_inited;
50*4882a593Smuzhiyun static int dart_dirty;
51*4882a593Smuzhiyun static int dart_is_u4;
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun #define DART_U4_BYPASS_BASE	0x8000000000ull
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun #define DBG(...)
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun static DEFINE_SPINLOCK(invalidate_lock);
58*4882a593Smuzhiyun 
dart_tlb_invalidate_all(void)59*4882a593Smuzhiyun static inline void dart_tlb_invalidate_all(void)
60*4882a593Smuzhiyun {
61*4882a593Smuzhiyun 	unsigned long l = 0;
62*4882a593Smuzhiyun 	unsigned int reg, inv_bit;
63*4882a593Smuzhiyun 	unsigned long limit;
64*4882a593Smuzhiyun 	unsigned long flags;
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	spin_lock_irqsave(&invalidate_lock, flags);
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	DBG("dart: flush\n");
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun 	/* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
71*4882a593Smuzhiyun 	 * control register and wait for it to clear.
72*4882a593Smuzhiyun 	 *
73*4882a593Smuzhiyun 	 * Gotcha: Sometimes, the DART won't detect that the bit gets
74*4882a593Smuzhiyun 	 * set. If so, clear it and set it again.
75*4882a593Smuzhiyun 	 */
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun 	limit = 0;
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB;
80*4882a593Smuzhiyun retry:
81*4882a593Smuzhiyun 	l = 0;
82*4882a593Smuzhiyun 	reg = DART_IN(DART_CNTL);
83*4882a593Smuzhiyun 	reg |= inv_bit;
84*4882a593Smuzhiyun 	DART_OUT(DART_CNTL, reg);
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit))
87*4882a593Smuzhiyun 		l++;
88*4882a593Smuzhiyun 	if (l == (1L << limit)) {
89*4882a593Smuzhiyun 		if (limit < 4) {
90*4882a593Smuzhiyun 			limit++;
91*4882a593Smuzhiyun 			reg = DART_IN(DART_CNTL);
92*4882a593Smuzhiyun 			reg &= ~inv_bit;
93*4882a593Smuzhiyun 			DART_OUT(DART_CNTL, reg);
94*4882a593Smuzhiyun 			goto retry;
95*4882a593Smuzhiyun 		} else
96*4882a593Smuzhiyun 			panic("DART: TLB did not flush after waiting a long "
97*4882a593Smuzhiyun 			      "time. Buggy U3 ?");
98*4882a593Smuzhiyun 	}
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	spin_unlock_irqrestore(&invalidate_lock, flags);
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun 
dart_tlb_invalidate_one(unsigned long bus_rpn)103*4882a593Smuzhiyun static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun 	unsigned int reg;
106*4882a593Smuzhiyun 	unsigned int l, limit;
107*4882a593Smuzhiyun 	unsigned long flags;
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	spin_lock_irqsave(&invalidate_lock, flags);
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE |
112*4882a593Smuzhiyun 		(bus_rpn & DART_CNTL_U4_IONE_MASK);
113*4882a593Smuzhiyun 	DART_OUT(DART_CNTL, reg);
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	limit = 0;
116*4882a593Smuzhiyun wait_more:
117*4882a593Smuzhiyun 	l = 0;
118*4882a593Smuzhiyun 	while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) {
119*4882a593Smuzhiyun 		rmb();
120*4882a593Smuzhiyun 		l++;
121*4882a593Smuzhiyun 	}
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	if (l == (1L << limit)) {
124*4882a593Smuzhiyun 		if (limit < 4) {
125*4882a593Smuzhiyun 			limit++;
126*4882a593Smuzhiyun 			goto wait_more;
127*4882a593Smuzhiyun 		} else
128*4882a593Smuzhiyun 			panic("DART: TLB did not flush after waiting a long "
129*4882a593Smuzhiyun 			      "time. Buggy U4 ?");
130*4882a593Smuzhiyun 	}
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun 	spin_unlock_irqrestore(&invalidate_lock, flags);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun 
dart_cache_sync(unsigned int * base,unsigned int count)135*4882a593Smuzhiyun static void dart_cache_sync(unsigned int *base, unsigned int count)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun 	/*
138*4882a593Smuzhiyun 	 * We add 1 to the number of entries to flush, following a
139*4882a593Smuzhiyun 	 * comment in Darwin indicating that the memory controller
140*4882a593Smuzhiyun 	 * can prefetch unmapped memory under some circumstances.
141*4882a593Smuzhiyun 	 */
142*4882a593Smuzhiyun 	unsigned long start = (unsigned long)base;
143*4882a593Smuzhiyun 	unsigned long end = start + (count + 1) * sizeof(unsigned int);
144*4882a593Smuzhiyun 	unsigned int tmp;
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	/* Perform a standard cache flush */
147*4882a593Smuzhiyun 	flush_dcache_range(start, end);
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	/*
150*4882a593Smuzhiyun 	 * Perform the sequence described in the CPC925 manual to
151*4882a593Smuzhiyun 	 * ensure all the data gets to a point the cache incoherent
152*4882a593Smuzhiyun 	 * DART hardware will see.
153*4882a593Smuzhiyun 	 */
154*4882a593Smuzhiyun 	asm volatile(" sync;"
155*4882a593Smuzhiyun 		     " isync;"
156*4882a593Smuzhiyun 		     " dcbf 0,%1;"
157*4882a593Smuzhiyun 		     " sync;"
158*4882a593Smuzhiyun 		     " isync;"
159*4882a593Smuzhiyun 		     " lwz %0,0(%1);"
160*4882a593Smuzhiyun 		     " isync" : "=r" (tmp) : "r" (end) : "memory");
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun 
dart_flush(struct iommu_table * tbl)163*4882a593Smuzhiyun static void dart_flush(struct iommu_table *tbl)
164*4882a593Smuzhiyun {
165*4882a593Smuzhiyun 	mb();
166*4882a593Smuzhiyun 	if (dart_dirty) {
167*4882a593Smuzhiyun 		dart_tlb_invalidate_all();
168*4882a593Smuzhiyun 		dart_dirty = 0;
169*4882a593Smuzhiyun 	}
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun 
dart_build(struct iommu_table * tbl,long index,long npages,unsigned long uaddr,enum dma_data_direction direction,unsigned long attrs)172*4882a593Smuzhiyun static int dart_build(struct iommu_table *tbl, long index,
173*4882a593Smuzhiyun 		       long npages, unsigned long uaddr,
174*4882a593Smuzhiyun 		       enum dma_data_direction direction,
175*4882a593Smuzhiyun 		       unsigned long attrs)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun 	unsigned int *dp, *orig_dp;
178*4882a593Smuzhiyun 	unsigned int rpn;
179*4882a593Smuzhiyun 	long l;
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 	orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	/* On U3, all memory is contiguous, so we can move this
186*4882a593Smuzhiyun 	 * out of the loop.
187*4882a593Smuzhiyun 	 */
188*4882a593Smuzhiyun 	l = npages;
189*4882a593Smuzhiyun 	while (l--) {
190*4882a593Smuzhiyun 		rpn = __pa(uaddr) >> DART_PAGE_SHIFT;
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 		*(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 		uaddr += DART_PAGE_SIZE;
195*4882a593Smuzhiyun 	}
196*4882a593Smuzhiyun 	dart_cache_sync(orig_dp, npages);
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 	if (dart_is_u4) {
199*4882a593Smuzhiyun 		rpn = index;
200*4882a593Smuzhiyun 		while (npages--)
201*4882a593Smuzhiyun 			dart_tlb_invalidate_one(rpn++);
202*4882a593Smuzhiyun 	} else {
203*4882a593Smuzhiyun 		dart_dirty = 1;
204*4882a593Smuzhiyun 	}
205*4882a593Smuzhiyun 	return 0;
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 
dart_free(struct iommu_table * tbl,long index,long npages)209*4882a593Smuzhiyun static void dart_free(struct iommu_table *tbl, long index, long npages)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun 	unsigned int *dp, *orig_dp;
212*4882a593Smuzhiyun 	long orig_npages = npages;
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	/* We don't worry about flushing the TLB cache. The only drawback of
215*4882a593Smuzhiyun 	 * not doing it is that we won't catch buggy device drivers doing
216*4882a593Smuzhiyun 	 * bad DMAs, but then no 32-bit architecture ever does either.
217*4882a593Smuzhiyun 	 */
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	DBG("dart: free at: %lx, %lx\n", index, npages);
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	orig_dp = dp  = ((unsigned int *)tbl->it_base) + index;
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 	while (npages--)
224*4882a593Smuzhiyun 		*(dp++) = dart_emptyval;
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 	dart_cache_sync(orig_dp, orig_npages);
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun 
allocate_dart(void)229*4882a593Smuzhiyun static void allocate_dart(void)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun 	unsigned long tmp;
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	/* 512 pages (2MB) is max DART tablesize. */
234*4882a593Smuzhiyun 	dart_tablesize = 1UL << 21;
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	/*
237*4882a593Smuzhiyun 	 * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
238*4882a593Smuzhiyun 	 * will blow up an entire large page anyway in the kernel mapping.
239*4882a593Smuzhiyun 	 */
240*4882a593Smuzhiyun 	dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M,
241*4882a593Smuzhiyun 					MEMBLOCK_LOW_LIMIT, SZ_2G,
242*4882a593Smuzhiyun 					NUMA_NO_NODE);
243*4882a593Smuzhiyun 	if (!dart_tablebase)
244*4882a593Smuzhiyun 		panic("Failed to allocate 16MB below 2GB for DART table\n");
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	/* There is no point scanning the DART space for leaks*/
247*4882a593Smuzhiyun 	kmemleak_no_scan((void *)dart_tablebase);
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	/* Allocate a spare page to map all invalid DART pages. We need to do
250*4882a593Smuzhiyun 	 * that to work around what looks like a problem with the HT bridge
251*4882a593Smuzhiyun 	 * prefetching into invalid pages and corrupting data
252*4882a593Smuzhiyun 	 */
253*4882a593Smuzhiyun 	tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
254*4882a593Smuzhiyun 	if (!tmp)
255*4882a593Smuzhiyun 		panic("DART: table allocation failed\n");
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
258*4882a593Smuzhiyun 					 DARTMAP_RPNMASK);
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun 
dart_init(struct device_node * dart_node)263*4882a593Smuzhiyun static int __init dart_init(struct device_node *dart_node)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun 	unsigned int i;
266*4882a593Smuzhiyun 	unsigned long base, size;
267*4882a593Smuzhiyun 	struct resource r;
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 	/* IOMMU disabled by the user ? bail out */
270*4882a593Smuzhiyun 	if (iommu_is_off)
271*4882a593Smuzhiyun 		return -ENODEV;
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	/*
274*4882a593Smuzhiyun 	 * Only use the DART if the machine has more than 1GB of RAM
275*4882a593Smuzhiyun 	 * or if requested with iommu=on on cmdline.
276*4882a593Smuzhiyun 	 *
277*4882a593Smuzhiyun 	 * 1GB of RAM is picked as limit because some default devices
278*4882a593Smuzhiyun 	 * (i.e. Airport Extreme) have 30 bit address range limits.
279*4882a593Smuzhiyun 	 */
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 	if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
282*4882a593Smuzhiyun 		return -ENODEV;
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 	/* Get DART registers */
285*4882a593Smuzhiyun 	if (of_address_to_resource(dart_node, 0, &r))
286*4882a593Smuzhiyun 		panic("DART: can't get register base ! ");
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	/* Map in DART registers */
289*4882a593Smuzhiyun 	dart = ioremap(r.start, resource_size(&r));
290*4882a593Smuzhiyun 	if (dart == NULL)
291*4882a593Smuzhiyun 		panic("DART: Cannot map registers!");
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	/* Allocate the DART and dummy page */
294*4882a593Smuzhiyun 	allocate_dart();
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	/* Fill initial table */
297*4882a593Smuzhiyun 	for (i = 0; i < dart_tablesize/4; i++)
298*4882a593Smuzhiyun 		dart_tablebase[i] = dart_emptyval;
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	/* Push to memory */
301*4882a593Smuzhiyun 	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	/* Initialize DART with table base and enable it. */
304*4882a593Smuzhiyun 	base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
305*4882a593Smuzhiyun 	size = dart_tablesize >> DART_PAGE_SHIFT;
306*4882a593Smuzhiyun 	if (dart_is_u4) {
307*4882a593Smuzhiyun 		size &= DART_SIZE_U4_SIZE_MASK;
308*4882a593Smuzhiyun 		DART_OUT(DART_BASE_U4, base);
309*4882a593Smuzhiyun 		DART_OUT(DART_SIZE_U4, size);
310*4882a593Smuzhiyun 		DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE);
311*4882a593Smuzhiyun 	} else {
312*4882a593Smuzhiyun 		size &= DART_CNTL_U3_SIZE_MASK;
313*4882a593Smuzhiyun 		DART_OUT(DART_CNTL,
314*4882a593Smuzhiyun 			 DART_CNTL_U3_ENABLE |
315*4882a593Smuzhiyun 			 (base << DART_CNTL_U3_BASE_SHIFT) |
316*4882a593Smuzhiyun 			 (size << DART_CNTL_U3_SIZE_SHIFT));
317*4882a593Smuzhiyun 	}
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	/* Invalidate DART to get rid of possible stale TLBs */
320*4882a593Smuzhiyun 	dart_tlb_invalidate_all();
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n",
323*4882a593Smuzhiyun 	       dart_is_u4 ? "U4" : "U3");
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	return 0;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun static struct iommu_table_ops iommu_dart_ops = {
329*4882a593Smuzhiyun 	.set = dart_build,
330*4882a593Smuzhiyun 	.clear = dart_free,
331*4882a593Smuzhiyun 	.flush = dart_flush,
332*4882a593Smuzhiyun };
333*4882a593Smuzhiyun 
iommu_table_dart_setup(void)334*4882a593Smuzhiyun static void iommu_table_dart_setup(void)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun 	iommu_table_dart.it_busno = 0;
337*4882a593Smuzhiyun 	iommu_table_dart.it_offset = 0;
338*4882a593Smuzhiyun 	/* it_size is in number of entries */
339*4882a593Smuzhiyun 	iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
340*4882a593Smuzhiyun 	iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	/* Initialize the common IOMMU code */
343*4882a593Smuzhiyun 	iommu_table_dart.it_base = (unsigned long)dart_tablebase;
344*4882a593Smuzhiyun 	iommu_table_dart.it_index = 0;
345*4882a593Smuzhiyun 	iommu_table_dart.it_blocksize = 1;
346*4882a593Smuzhiyun 	iommu_table_dart.it_ops = &iommu_dart_ops;
347*4882a593Smuzhiyun 	iommu_init_table(&iommu_table_dart, -1, 0, 0);
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun 	/* Reserve the last page of the DART to avoid possible prefetch
350*4882a593Smuzhiyun 	 * past the DART mapped area
351*4882a593Smuzhiyun 	 */
352*4882a593Smuzhiyun 	set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun 
pci_dma_bus_setup_dart(struct pci_bus * bus)355*4882a593Smuzhiyun static void pci_dma_bus_setup_dart(struct pci_bus *bus)
356*4882a593Smuzhiyun {
357*4882a593Smuzhiyun 	if (!iommu_table_dart_inited) {
358*4882a593Smuzhiyun 		iommu_table_dart_inited = 1;
359*4882a593Smuzhiyun 		iommu_table_dart_setup();
360*4882a593Smuzhiyun 	}
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun 
dart_device_on_pcie(struct device * dev)363*4882a593Smuzhiyun static bool dart_device_on_pcie(struct device *dev)
364*4882a593Smuzhiyun {
365*4882a593Smuzhiyun 	struct device_node *np = of_node_get(dev->of_node);
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 	while(np) {
368*4882a593Smuzhiyun 		if (of_device_is_compatible(np, "U4-pcie") ||
369*4882a593Smuzhiyun 		    of_device_is_compatible(np, "u4-pcie")) {
370*4882a593Smuzhiyun 			of_node_put(np);
371*4882a593Smuzhiyun 			return true;
372*4882a593Smuzhiyun 		}
373*4882a593Smuzhiyun 		np = of_get_next_parent(np);
374*4882a593Smuzhiyun 	}
375*4882a593Smuzhiyun 	return false;
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun 
pci_dma_dev_setup_dart(struct pci_dev * dev)378*4882a593Smuzhiyun static void pci_dma_dev_setup_dart(struct pci_dev *dev)
379*4882a593Smuzhiyun {
380*4882a593Smuzhiyun 	if (dart_is_u4 && dart_device_on_pcie(&dev->dev))
381*4882a593Smuzhiyun 		dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE;
382*4882a593Smuzhiyun 	set_iommu_table_base(&dev->dev, &iommu_table_dart);
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun 
iommu_bypass_supported_dart(struct pci_dev * dev,u64 mask)385*4882a593Smuzhiyun static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask)
386*4882a593Smuzhiyun {
387*4882a593Smuzhiyun 	return dart_is_u4 &&
388*4882a593Smuzhiyun 		dart_device_on_pcie(&dev->dev) &&
389*4882a593Smuzhiyun 		mask >= DMA_BIT_MASK(40);
390*4882a593Smuzhiyun }
391*4882a593Smuzhiyun 
iommu_init_early_dart(struct pci_controller_ops * controller_ops)392*4882a593Smuzhiyun void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
393*4882a593Smuzhiyun {
394*4882a593Smuzhiyun 	struct device_node *dn;
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun 	/* Find the DART in the device-tree */
397*4882a593Smuzhiyun 	dn = of_find_compatible_node(NULL, "dart", "u3-dart");
398*4882a593Smuzhiyun 	if (dn == NULL) {
399*4882a593Smuzhiyun 		dn = of_find_compatible_node(NULL, "dart", "u4-dart");
400*4882a593Smuzhiyun 		if (dn == NULL)
401*4882a593Smuzhiyun 			return;	/* use default direct_dma_ops */
402*4882a593Smuzhiyun 		dart_is_u4 = 1;
403*4882a593Smuzhiyun 	}
404*4882a593Smuzhiyun 
405*4882a593Smuzhiyun 	/* Initialize the DART HW */
406*4882a593Smuzhiyun 	if (dart_init(dn) != 0) {
407*4882a593Smuzhiyun 		of_node_put(dn);
408*4882a593Smuzhiyun 		return;
409*4882a593Smuzhiyun 	}
410*4882a593Smuzhiyun 	/*
411*4882a593Smuzhiyun 	 * U4 supports a DART bypass, we use it for 64-bit capable devices to
412*4882a593Smuzhiyun 	 * improve performance.  However, that only works for devices connected
413*4882a593Smuzhiyun 	 * to the U4 own PCIe interface, not bridged through hypertransport.
414*4882a593Smuzhiyun 	 * We need the device to support at least 40 bits of addresses.
415*4882a593Smuzhiyun 	 */
416*4882a593Smuzhiyun 	controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
417*4882a593Smuzhiyun 	controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
418*4882a593Smuzhiyun 	controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart;
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 	/* Setup pci_dma ops */
421*4882a593Smuzhiyun 	set_pci_dma_ops(&dma_iommu_ops);
422*4882a593Smuzhiyun 	of_node_put(dn);
423*4882a593Smuzhiyun }
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun #ifdef CONFIG_PM
iommu_dart_restore(void)426*4882a593Smuzhiyun static void iommu_dart_restore(void)
427*4882a593Smuzhiyun {
428*4882a593Smuzhiyun 	dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
429*4882a593Smuzhiyun 	dart_tlb_invalidate_all();
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun 
iommu_init_late_dart(void)432*4882a593Smuzhiyun static int __init iommu_init_late_dart(void)
433*4882a593Smuzhiyun {
434*4882a593Smuzhiyun 	if (!dart_tablebase)
435*4882a593Smuzhiyun 		return 0;
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun 	ppc_md.iommu_restore = iommu_dart_restore;
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 	return 0;
440*4882a593Smuzhiyun }
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun late_initcall(iommu_init_late_dart);
443*4882a593Smuzhiyun #endif /* CONFIG_PM */
444