1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright 2010
4*4882a593Smuzhiyun * by Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * This code provides a IOMMU for Xen PV guests with PCI passthrough.
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * PV guests under Xen are running in an non-contiguous memory architecture.
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * When PCI pass-through is utilized, this necessitates an IOMMU for
11*4882a593Smuzhiyun * translating bus (DMA) to virtual and vice-versa and also providing a
12*4882a593Smuzhiyun * mechanism to have contiguous pages for device drivers operations (say DMA
13*4882a593Smuzhiyun * operations).
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * Specifically, under Xen the Linux idea of pages is an illusion. It
16*4882a593Smuzhiyun * assumes that pages start at zero and go up to the available memory. To
17*4882a593Smuzhiyun * help with that, the Linux Xen MMU provides a lookup mechanism to
18*4882a593Smuzhiyun * translate the page frame numbers (PFN) to machine frame numbers (MFN)
19*4882a593Smuzhiyun * and vice-versa. The MFN are the "real" frame numbers. Furthermore
20*4882a593Smuzhiyun * memory is not contiguous. Xen hypervisor stitches memory for guests
21*4882a593Smuzhiyun * from different pools, which means there is no guarantee that PFN==MFN
22*4882a593Smuzhiyun * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are
23*4882a593Smuzhiyun * allocated in descending order (high to low), meaning the guest might
24*4882a593Smuzhiyun * never get any MFN's under the 4GB mark.
25*4882a593Smuzhiyun */
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #include <linux/memblock.h>
30*4882a593Smuzhiyun #include <linux/dma-direct.h>
31*4882a593Smuzhiyun #include <linux/dma-map-ops.h>
32*4882a593Smuzhiyun #include <linux/export.h>
33*4882a593Smuzhiyun #include <xen/swiotlb-xen.h>
34*4882a593Smuzhiyun #include <xen/page.h>
35*4882a593Smuzhiyun #include <xen/xen-ops.h>
36*4882a593Smuzhiyun #include <xen/hvc-console.h>
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun #include <asm/dma-mapping.h>
39*4882a593Smuzhiyun #include <asm/xen/page-coherent.h>
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun #include <trace/events/swiotlb.h>
42*4882a593Smuzhiyun #define MAX_DMA_BITS 32
43*4882a593Smuzhiyun /*
44*4882a593Smuzhiyun * Used to do a quick range check in swiotlb_tbl_unmap_single and
45*4882a593Smuzhiyun * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
46*4882a593Smuzhiyun * API.
47*4882a593Smuzhiyun */
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun static char *xen_io_tlb_start, *xen_io_tlb_end;
50*4882a593Smuzhiyun static unsigned long xen_io_tlb_nslabs;
51*4882a593Smuzhiyun /*
52*4882a593Smuzhiyun * Quick lookup value of the bus address of the IOTLB.
53*4882a593Smuzhiyun */
54*4882a593Smuzhiyun
xen_phys_to_bus(struct device * dev,phys_addr_t paddr)55*4882a593Smuzhiyun static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr));
58*4882a593Smuzhiyun phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT;
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun baddr |= paddr & ~XEN_PAGE_MASK;
61*4882a593Smuzhiyun return baddr;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun
xen_phys_to_dma(struct device * dev,phys_addr_t paddr)64*4882a593Smuzhiyun static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun return phys_to_dma(dev, xen_phys_to_bus(dev, paddr));
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun
xen_bus_to_phys(struct device * dev,phys_addr_t baddr)69*4882a593Smuzhiyun static inline phys_addr_t xen_bus_to_phys(struct device *dev,
70*4882a593Smuzhiyun phys_addr_t baddr)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr));
73*4882a593Smuzhiyun phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) |
74*4882a593Smuzhiyun (baddr & ~XEN_PAGE_MASK);
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun return paddr;
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun
xen_dma_to_phys(struct device * dev,dma_addr_t dma_addr)79*4882a593Smuzhiyun static inline phys_addr_t xen_dma_to_phys(struct device *dev,
80*4882a593Smuzhiyun dma_addr_t dma_addr)
81*4882a593Smuzhiyun {
82*4882a593Smuzhiyun return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun
xen_virt_to_bus(struct device * dev,void * address)85*4882a593Smuzhiyun static inline dma_addr_t xen_virt_to_bus(struct device *dev, void *address)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun return xen_phys_to_dma(dev, virt_to_phys(address));
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun
range_straddles_page_boundary(phys_addr_t p,size_t size)90*4882a593Smuzhiyun static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
93*4882a593Smuzhiyun unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size);
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun next_bfn = pfn_to_bfn(xen_pfn);
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun for (i = 1; i < nr_pages; i++)
98*4882a593Smuzhiyun if (pfn_to_bfn(++xen_pfn) != ++next_bfn)
99*4882a593Smuzhiyun return 1;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun return 0;
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
is_xen_swiotlb_buffer(struct device * dev,dma_addr_t dma_addr)104*4882a593Smuzhiyun static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
107*4882a593Smuzhiyun unsigned long xen_pfn = bfn_to_local_pfn(bfn);
108*4882a593Smuzhiyun phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun /* If the address is outside our domain, it CAN
111*4882a593Smuzhiyun * have the same virtual address as another address
112*4882a593Smuzhiyun * in our domain. Therefore _only_ check address within our domain.
113*4882a593Smuzhiyun */
114*4882a593Smuzhiyun if (pfn_valid(PFN_DOWN(paddr))) {
115*4882a593Smuzhiyun return paddr >= virt_to_phys(xen_io_tlb_start) &&
116*4882a593Smuzhiyun paddr < virt_to_phys(xen_io_tlb_end);
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun return 0;
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun static int
xen_swiotlb_fixup(void * buf,size_t size,unsigned long nslabs)122*4882a593Smuzhiyun xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun int i, rc;
125*4882a593Smuzhiyun int dma_bits;
126*4882a593Smuzhiyun dma_addr_t dma_handle;
127*4882a593Smuzhiyun phys_addr_t p = virt_to_phys(buf);
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun i = 0;
132*4882a593Smuzhiyun do {
133*4882a593Smuzhiyun int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE);
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun do {
136*4882a593Smuzhiyun rc = xen_create_contiguous_region(
137*4882a593Smuzhiyun p + (i << IO_TLB_SHIFT),
138*4882a593Smuzhiyun get_order(slabs << IO_TLB_SHIFT),
139*4882a593Smuzhiyun dma_bits, &dma_handle);
140*4882a593Smuzhiyun } while (rc && dma_bits++ < MAX_DMA_BITS);
141*4882a593Smuzhiyun if (rc)
142*4882a593Smuzhiyun return rc;
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun i += slabs;
145*4882a593Smuzhiyun } while (i < nslabs);
146*4882a593Smuzhiyun return 0;
147*4882a593Smuzhiyun }
xen_set_nslabs(unsigned long nr_tbl)148*4882a593Smuzhiyun static unsigned long xen_set_nslabs(unsigned long nr_tbl)
149*4882a593Smuzhiyun {
150*4882a593Smuzhiyun if (!nr_tbl) {
151*4882a593Smuzhiyun xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
152*4882a593Smuzhiyun xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
153*4882a593Smuzhiyun } else
154*4882a593Smuzhiyun xen_io_tlb_nslabs = nr_tbl;
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun return xen_io_tlb_nslabs << IO_TLB_SHIFT;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun enum xen_swiotlb_err {
160*4882a593Smuzhiyun XEN_SWIOTLB_UNKNOWN = 0,
161*4882a593Smuzhiyun XEN_SWIOTLB_ENOMEM,
162*4882a593Smuzhiyun XEN_SWIOTLB_EFIXUP
163*4882a593Smuzhiyun };
164*4882a593Smuzhiyun
xen_swiotlb_error(enum xen_swiotlb_err err)165*4882a593Smuzhiyun static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun switch (err) {
168*4882a593Smuzhiyun case XEN_SWIOTLB_ENOMEM:
169*4882a593Smuzhiyun return "Cannot allocate Xen-SWIOTLB buffer\n";
170*4882a593Smuzhiyun case XEN_SWIOTLB_EFIXUP:
171*4882a593Smuzhiyun return "Failed to get contiguous memory for DMA from Xen!\n"\
172*4882a593Smuzhiyun "You either: don't have the permissions, do not have"\
173*4882a593Smuzhiyun " enough free memory under 4GB, or the hypervisor memory"\
174*4882a593Smuzhiyun " is too fragmented!";
175*4882a593Smuzhiyun default:
176*4882a593Smuzhiyun break;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun return "";
179*4882a593Smuzhiyun }
xen_swiotlb_init(int verbose,bool early)180*4882a593Smuzhiyun int __ref xen_swiotlb_init(int verbose, bool early)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun unsigned long bytes, order;
183*4882a593Smuzhiyun int rc = -ENOMEM;
184*4882a593Smuzhiyun enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
185*4882a593Smuzhiyun unsigned int repeat = 3;
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun xen_io_tlb_nslabs = swiotlb_nr_tbl();
188*4882a593Smuzhiyun retry:
189*4882a593Smuzhiyun bytes = xen_set_nslabs(xen_io_tlb_nslabs);
190*4882a593Smuzhiyun order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun /*
193*4882a593Smuzhiyun * IO TLB memory already allocated. Just use it.
194*4882a593Smuzhiyun */
195*4882a593Smuzhiyun if (io_tlb_start != 0) {
196*4882a593Smuzhiyun xen_io_tlb_start = phys_to_virt(io_tlb_start);
197*4882a593Smuzhiyun goto end;
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun /*
201*4882a593Smuzhiyun * Get IO TLB memory from any location.
202*4882a593Smuzhiyun */
203*4882a593Smuzhiyun if (early) {
204*4882a593Smuzhiyun xen_io_tlb_start = memblock_alloc(PAGE_ALIGN(bytes),
205*4882a593Smuzhiyun PAGE_SIZE);
206*4882a593Smuzhiyun if (!xen_io_tlb_start)
207*4882a593Smuzhiyun panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
208*4882a593Smuzhiyun __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
209*4882a593Smuzhiyun } else {
210*4882a593Smuzhiyun #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
211*4882a593Smuzhiyun #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
212*4882a593Smuzhiyun while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
213*4882a593Smuzhiyun xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order);
214*4882a593Smuzhiyun if (xen_io_tlb_start)
215*4882a593Smuzhiyun break;
216*4882a593Smuzhiyun order--;
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun if (order != get_order(bytes)) {
219*4882a593Smuzhiyun pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
220*4882a593Smuzhiyun (PAGE_SIZE << order) >> 20);
221*4882a593Smuzhiyun xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
222*4882a593Smuzhiyun bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
223*4882a593Smuzhiyun }
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun if (!xen_io_tlb_start) {
226*4882a593Smuzhiyun m_ret = XEN_SWIOTLB_ENOMEM;
227*4882a593Smuzhiyun goto error;
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun /*
230*4882a593Smuzhiyun * And replace that memory with pages under 4GB.
231*4882a593Smuzhiyun */
232*4882a593Smuzhiyun rc = xen_swiotlb_fixup(xen_io_tlb_start,
233*4882a593Smuzhiyun bytes,
234*4882a593Smuzhiyun xen_io_tlb_nslabs);
235*4882a593Smuzhiyun if (rc) {
236*4882a593Smuzhiyun if (early)
237*4882a593Smuzhiyun memblock_free(__pa(xen_io_tlb_start),
238*4882a593Smuzhiyun PAGE_ALIGN(bytes));
239*4882a593Smuzhiyun else {
240*4882a593Smuzhiyun free_pages((unsigned long)xen_io_tlb_start, order);
241*4882a593Smuzhiyun xen_io_tlb_start = NULL;
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun m_ret = XEN_SWIOTLB_EFIXUP;
244*4882a593Smuzhiyun goto error;
245*4882a593Smuzhiyun }
246*4882a593Smuzhiyun if (early) {
247*4882a593Smuzhiyun if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
248*4882a593Smuzhiyun verbose))
249*4882a593Smuzhiyun panic("Cannot allocate SWIOTLB buffer");
250*4882a593Smuzhiyun rc = 0;
251*4882a593Smuzhiyun } else
252*4882a593Smuzhiyun rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun end:
255*4882a593Smuzhiyun xen_io_tlb_end = xen_io_tlb_start + bytes;
256*4882a593Smuzhiyun if (!rc)
257*4882a593Smuzhiyun swiotlb_set_max_segment(PAGE_SIZE);
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun return rc;
260*4882a593Smuzhiyun error:
261*4882a593Smuzhiyun if (repeat--) {
262*4882a593Smuzhiyun xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
263*4882a593Smuzhiyun (xen_io_tlb_nslabs >> 1));
264*4882a593Smuzhiyun pr_info("Lowering to %luMB\n",
265*4882a593Smuzhiyun (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
266*4882a593Smuzhiyun goto retry;
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
269*4882a593Smuzhiyun if (early)
270*4882a593Smuzhiyun panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
271*4882a593Smuzhiyun else
272*4882a593Smuzhiyun free_pages((unsigned long)xen_io_tlb_start, order);
273*4882a593Smuzhiyun return rc;
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun static void *
xen_swiotlb_alloc_coherent(struct device * hwdev,size_t size,dma_addr_t * dma_handle,gfp_t flags,unsigned long attrs)277*4882a593Smuzhiyun xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
278*4882a593Smuzhiyun dma_addr_t *dma_handle, gfp_t flags,
279*4882a593Smuzhiyun unsigned long attrs)
280*4882a593Smuzhiyun {
281*4882a593Smuzhiyun void *ret;
282*4882a593Smuzhiyun int order = get_order(size);
283*4882a593Smuzhiyun u64 dma_mask = DMA_BIT_MASK(32);
284*4882a593Smuzhiyun phys_addr_t phys;
285*4882a593Smuzhiyun dma_addr_t dev_addr;
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun /*
288*4882a593Smuzhiyun * Ignore region specifiers - the kernel's ideas of
289*4882a593Smuzhiyun * pseudo-phys memory layout has nothing to do with the
290*4882a593Smuzhiyun * machine physical layout. We can't allocate highmem
291*4882a593Smuzhiyun * because we can't return a pointer to it.
292*4882a593Smuzhiyun */
293*4882a593Smuzhiyun flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun /* Convert the size to actually allocated. */
296*4882a593Smuzhiyun size = 1UL << (order + XEN_PAGE_SHIFT);
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun /* On ARM this function returns an ioremap'ped virtual address for
299*4882a593Smuzhiyun * which virt_to_phys doesn't return the corresponding physical
300*4882a593Smuzhiyun * address. In fact on ARM virt_to_phys only works for kernel direct
301*4882a593Smuzhiyun * mapped RAM memory. Also see comment below.
302*4882a593Smuzhiyun */
303*4882a593Smuzhiyun ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun if (!ret)
306*4882a593Smuzhiyun return ret;
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun if (hwdev && hwdev->coherent_dma_mask)
309*4882a593Smuzhiyun dma_mask = hwdev->coherent_dma_mask;
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun /* At this point dma_handle is the dma address, next we are
312*4882a593Smuzhiyun * going to set it to the machine address.
313*4882a593Smuzhiyun * Do not use virt_to_phys(ret) because on ARM it doesn't correspond
314*4882a593Smuzhiyun * to *dma_handle. */
315*4882a593Smuzhiyun phys = dma_to_phys(hwdev, *dma_handle);
316*4882a593Smuzhiyun dev_addr = xen_phys_to_dma(hwdev, phys);
317*4882a593Smuzhiyun if (((dev_addr + size - 1 <= dma_mask)) &&
318*4882a593Smuzhiyun !range_straddles_page_boundary(phys, size))
319*4882a593Smuzhiyun *dma_handle = dev_addr;
320*4882a593Smuzhiyun else {
321*4882a593Smuzhiyun if (xen_create_contiguous_region(phys, order,
322*4882a593Smuzhiyun fls64(dma_mask), dma_handle) != 0) {
323*4882a593Smuzhiyun xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
324*4882a593Smuzhiyun return NULL;
325*4882a593Smuzhiyun }
326*4882a593Smuzhiyun *dma_handle = phys_to_dma(hwdev, *dma_handle);
327*4882a593Smuzhiyun SetPageXenRemapped(virt_to_page(ret));
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun memset(ret, 0, size);
330*4882a593Smuzhiyun return ret;
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun static void
xen_swiotlb_free_coherent(struct device * hwdev,size_t size,void * vaddr,dma_addr_t dev_addr,unsigned long attrs)334*4882a593Smuzhiyun xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
335*4882a593Smuzhiyun dma_addr_t dev_addr, unsigned long attrs)
336*4882a593Smuzhiyun {
337*4882a593Smuzhiyun int order = get_order(size);
338*4882a593Smuzhiyun phys_addr_t phys;
339*4882a593Smuzhiyun u64 dma_mask = DMA_BIT_MASK(32);
340*4882a593Smuzhiyun struct page *page;
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun if (hwdev && hwdev->coherent_dma_mask)
343*4882a593Smuzhiyun dma_mask = hwdev->coherent_dma_mask;
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun /* do not use virt_to_phys because on ARM it doesn't return you the
346*4882a593Smuzhiyun * physical address */
347*4882a593Smuzhiyun phys = xen_dma_to_phys(hwdev, dev_addr);
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun /* Convert the size to actually allocated. */
350*4882a593Smuzhiyun size = 1UL << (order + XEN_PAGE_SHIFT);
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun if (is_vmalloc_addr(vaddr))
353*4882a593Smuzhiyun page = vmalloc_to_page(vaddr);
354*4882a593Smuzhiyun else
355*4882a593Smuzhiyun page = virt_to_page(vaddr);
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
358*4882a593Smuzhiyun range_straddles_page_boundary(phys, size)) &&
359*4882a593Smuzhiyun TestClearPageXenRemapped(page))
360*4882a593Smuzhiyun xen_destroy_contiguous_region(phys, order);
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys),
363*4882a593Smuzhiyun attrs);
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun /*
367*4882a593Smuzhiyun * Map a single buffer of the indicated size for DMA in streaming mode. The
368*4882a593Smuzhiyun * physical address to use is returned.
369*4882a593Smuzhiyun *
370*4882a593Smuzhiyun * Once the device is given the dma address, the device owns this memory until
371*4882a593Smuzhiyun * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
372*4882a593Smuzhiyun */
xen_swiotlb_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)373*4882a593Smuzhiyun static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
374*4882a593Smuzhiyun unsigned long offset, size_t size,
375*4882a593Smuzhiyun enum dma_data_direction dir,
376*4882a593Smuzhiyun unsigned long attrs)
377*4882a593Smuzhiyun {
378*4882a593Smuzhiyun phys_addr_t map, phys = page_to_phys(page) + offset;
379*4882a593Smuzhiyun dma_addr_t dev_addr = xen_phys_to_dma(dev, phys);
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun BUG_ON(dir == DMA_NONE);
382*4882a593Smuzhiyun /*
383*4882a593Smuzhiyun * If the address happens to be in the device's DMA window,
384*4882a593Smuzhiyun * we can safely return the device addr and not worry about bounce
385*4882a593Smuzhiyun * buffering it.
386*4882a593Smuzhiyun */
387*4882a593Smuzhiyun if (dma_capable(dev, dev_addr, size, true) &&
388*4882a593Smuzhiyun !range_straddles_page_boundary(phys, size) &&
389*4882a593Smuzhiyun !xen_arch_need_swiotlb(dev, phys, dev_addr) &&
390*4882a593Smuzhiyun swiotlb_force != SWIOTLB_FORCE)
391*4882a593Smuzhiyun goto done;
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun /*
394*4882a593Smuzhiyun * Oh well, have to allocate and map a bounce buffer.
395*4882a593Smuzhiyun */
396*4882a593Smuzhiyun trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
399*4882a593Smuzhiyun if (map == (phys_addr_t)DMA_MAPPING_ERROR)
400*4882a593Smuzhiyun return DMA_MAPPING_ERROR;
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun phys = map;
403*4882a593Smuzhiyun dev_addr = xen_phys_to_dma(dev, map);
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun /*
406*4882a593Smuzhiyun * Ensure that the address returned is DMA'ble
407*4882a593Smuzhiyun */
408*4882a593Smuzhiyun if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
409*4882a593Smuzhiyun swiotlb_tbl_unmap_single(dev, map, size, size, dir,
410*4882a593Smuzhiyun attrs | DMA_ATTR_SKIP_CPU_SYNC);
411*4882a593Smuzhiyun return DMA_MAPPING_ERROR;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun done:
415*4882a593Smuzhiyun if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
416*4882a593Smuzhiyun if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
417*4882a593Smuzhiyun arch_sync_dma_for_device(phys, size, dir);
418*4882a593Smuzhiyun else
419*4882a593Smuzhiyun xen_dma_sync_for_device(dev, dev_addr, size, dir);
420*4882a593Smuzhiyun }
421*4882a593Smuzhiyun return dev_addr;
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun /*
425*4882a593Smuzhiyun * Unmap a single streaming mode DMA translation. The dma_addr and size must
426*4882a593Smuzhiyun * match what was provided for in a previous xen_swiotlb_map_page call. All
427*4882a593Smuzhiyun * other usages are undefined.
428*4882a593Smuzhiyun *
429*4882a593Smuzhiyun * After this call, reads by the cpu to the buffer are guaranteed to see
430*4882a593Smuzhiyun * whatever the device wrote there.
431*4882a593Smuzhiyun */
xen_swiotlb_unmap_page(struct device * hwdev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)432*4882a593Smuzhiyun static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
433*4882a593Smuzhiyun size_t size, enum dma_data_direction dir, unsigned long attrs)
434*4882a593Smuzhiyun {
435*4882a593Smuzhiyun phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun BUG_ON(dir == DMA_NONE);
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
440*4882a593Smuzhiyun if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
441*4882a593Smuzhiyun arch_sync_dma_for_cpu(paddr, size, dir);
442*4882a593Smuzhiyun else
443*4882a593Smuzhiyun xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun /* NOTE: We use dev_addr here, not paddr! */
447*4882a593Smuzhiyun if (is_xen_swiotlb_buffer(hwdev, dev_addr))
448*4882a593Smuzhiyun swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs);
449*4882a593Smuzhiyun }
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun static void
xen_swiotlb_sync_single_for_cpu(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)452*4882a593Smuzhiyun xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
453*4882a593Smuzhiyun size_t size, enum dma_data_direction dir)
454*4882a593Smuzhiyun {
455*4882a593Smuzhiyun phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun if (!dev_is_dma_coherent(dev)) {
458*4882a593Smuzhiyun if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
459*4882a593Smuzhiyun arch_sync_dma_for_cpu(paddr, size, dir);
460*4882a593Smuzhiyun else
461*4882a593Smuzhiyun xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun if (is_xen_swiotlb_buffer(dev, dma_addr))
465*4882a593Smuzhiyun swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
466*4882a593Smuzhiyun }
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun static void
xen_swiotlb_sync_single_for_device(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir)469*4882a593Smuzhiyun xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
470*4882a593Smuzhiyun size_t size, enum dma_data_direction dir)
471*4882a593Smuzhiyun {
472*4882a593Smuzhiyun phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun if (is_xen_swiotlb_buffer(dev, dma_addr))
475*4882a593Smuzhiyun swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun if (!dev_is_dma_coherent(dev)) {
478*4882a593Smuzhiyun if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
479*4882a593Smuzhiyun arch_sync_dma_for_device(paddr, size, dir);
480*4882a593Smuzhiyun else
481*4882a593Smuzhiyun xen_dma_sync_for_device(dev, dma_addr, size, dir);
482*4882a593Smuzhiyun }
483*4882a593Smuzhiyun }
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun /*
486*4882a593Smuzhiyun * Unmap a set of streaming mode DMA translations. Again, cpu read rules
487*4882a593Smuzhiyun * concerning calls here are the same as for swiotlb_unmap_page() above.
488*4882a593Smuzhiyun */
489*4882a593Smuzhiyun static void
xen_swiotlb_unmap_sg(struct device * hwdev,struct scatterlist * sgl,int nelems,enum dma_data_direction dir,unsigned long attrs)490*4882a593Smuzhiyun xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
491*4882a593Smuzhiyun enum dma_data_direction dir, unsigned long attrs)
492*4882a593Smuzhiyun {
493*4882a593Smuzhiyun struct scatterlist *sg;
494*4882a593Smuzhiyun int i;
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun BUG_ON(dir == DMA_NONE);
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun for_each_sg(sgl, sg, nelems, i)
499*4882a593Smuzhiyun xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
500*4882a593Smuzhiyun dir, attrs);
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun static int
xen_swiotlb_map_sg(struct device * dev,struct scatterlist * sgl,int nelems,enum dma_data_direction dir,unsigned long attrs)505*4882a593Smuzhiyun xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
506*4882a593Smuzhiyun enum dma_data_direction dir, unsigned long attrs)
507*4882a593Smuzhiyun {
508*4882a593Smuzhiyun struct scatterlist *sg;
509*4882a593Smuzhiyun int i;
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun BUG_ON(dir == DMA_NONE);
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun for_each_sg(sgl, sg, nelems, i) {
514*4882a593Smuzhiyun sg->dma_address = xen_swiotlb_map_page(dev, sg_page(sg),
515*4882a593Smuzhiyun sg->offset, sg->length, dir, attrs);
516*4882a593Smuzhiyun if (sg->dma_address == DMA_MAPPING_ERROR)
517*4882a593Smuzhiyun goto out_unmap;
518*4882a593Smuzhiyun sg_dma_len(sg) = sg->length;
519*4882a593Smuzhiyun }
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun return nelems;
522*4882a593Smuzhiyun out_unmap:
523*4882a593Smuzhiyun xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
524*4882a593Smuzhiyun sg_dma_len(sgl) = 0;
525*4882a593Smuzhiyun return 0;
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun
528*4882a593Smuzhiyun static void
xen_swiotlb_sync_sg_for_cpu(struct device * dev,struct scatterlist * sgl,int nelems,enum dma_data_direction dir)529*4882a593Smuzhiyun xen_swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
530*4882a593Smuzhiyun int nelems, enum dma_data_direction dir)
531*4882a593Smuzhiyun {
532*4882a593Smuzhiyun struct scatterlist *sg;
533*4882a593Smuzhiyun int i;
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun for_each_sg(sgl, sg, nelems, i) {
536*4882a593Smuzhiyun xen_swiotlb_sync_single_for_cpu(dev, sg->dma_address,
537*4882a593Smuzhiyun sg->length, dir);
538*4882a593Smuzhiyun }
539*4882a593Smuzhiyun }
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun static void
xen_swiotlb_sync_sg_for_device(struct device * dev,struct scatterlist * sgl,int nelems,enum dma_data_direction dir)542*4882a593Smuzhiyun xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
543*4882a593Smuzhiyun int nelems, enum dma_data_direction dir)
544*4882a593Smuzhiyun {
545*4882a593Smuzhiyun struct scatterlist *sg;
546*4882a593Smuzhiyun int i;
547*4882a593Smuzhiyun
548*4882a593Smuzhiyun for_each_sg(sgl, sg, nelems, i) {
549*4882a593Smuzhiyun xen_swiotlb_sync_single_for_device(dev, sg->dma_address,
550*4882a593Smuzhiyun sg->length, dir);
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun
554*4882a593Smuzhiyun /*
555*4882a593Smuzhiyun * Return whether the given device DMA address mask can be supported
556*4882a593Smuzhiyun * properly. For example, if your device can only drive the low 24-bits
557*4882a593Smuzhiyun * during bus mastering, then you would pass 0x00ffffff as the mask to
558*4882a593Smuzhiyun * this function.
559*4882a593Smuzhiyun */
560*4882a593Smuzhiyun static int
xen_swiotlb_dma_supported(struct device * hwdev,u64 mask)561*4882a593Smuzhiyun xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
562*4882a593Smuzhiyun {
563*4882a593Smuzhiyun return xen_virt_to_bus(hwdev, xen_io_tlb_end - 1) <= mask;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun const struct dma_map_ops xen_swiotlb_dma_ops = {
567*4882a593Smuzhiyun .alloc = xen_swiotlb_alloc_coherent,
568*4882a593Smuzhiyun .free = xen_swiotlb_free_coherent,
569*4882a593Smuzhiyun .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
570*4882a593Smuzhiyun .sync_single_for_device = xen_swiotlb_sync_single_for_device,
571*4882a593Smuzhiyun .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
572*4882a593Smuzhiyun .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
573*4882a593Smuzhiyun .map_sg = xen_swiotlb_map_sg,
574*4882a593Smuzhiyun .unmap_sg = xen_swiotlb_unmap_sg,
575*4882a593Smuzhiyun .map_page = xen_swiotlb_map_page,
576*4882a593Smuzhiyun .unmap_page = xen_swiotlb_unmap_page,
577*4882a593Smuzhiyun .dma_supported = xen_swiotlb_dma_supported,
578*4882a593Smuzhiyun .mmap = dma_common_mmap,
579*4882a593Smuzhiyun .get_sgtable = dma_common_get_sgtable,
580*4882a593Smuzhiyun .alloc_pages = dma_common_alloc_pages,
581*4882a593Smuzhiyun .free_pages = dma_common_free_pages,
582*4882a593Smuzhiyun };
583