xref: /OK3568_Linux_fs/kernel/arch/arm64/mm/dma-mapping-noalias.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Support for uncached DMA mappings.
4*4882a593Smuzhiyun  * Part of Cortex-A510 erratum 2454944 workaround.
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Copyright (C) 2022-2023 ARM Ltd.
7*4882a593Smuzhiyun  * Author: Robin Murphy <robin.murphy@arm.com>
8*4882a593Smuzhiyun  *	   Activating swiotlb + disabling lazy vunmap: Beata Michalska
9*4882a593Smuzhiyun  */
10*4882a593Smuzhiyun #include <linux/dma-direct.h>
11*4882a593Smuzhiyun #include <linux/dma-map-ops.h>
12*4882a593Smuzhiyun #include <linux/iommu.h>
13*4882a593Smuzhiyun #include <linux/slab.h>
14*4882a593Smuzhiyun #include <linux/swiotlb.h>
15*4882a593Smuzhiyun #include <linux/vmalloc.h>
16*4882a593Smuzhiyun #include <asm/cacheflush.h>
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun /*
19*4882a593Smuzhiyun  * Bits [58:55] of the translation table descriptor are being reserved
20*4882a593Smuzhiyun  * by the architecture for software use purposes. With the assumption that
21*4882a593Smuzhiyun  * those should not be used on linear map addresses (which is not without
22*4882a593Smuzhiyun  * any guarantee though), those bits are being leveraged to trace potential
23*4882a593Smuzhiyun  * cacheable aliases. This is still far from being perfect, to say at least:
24*4882a593Smuzhiyun  * ... categorically the worst, but oh well, needs must...
25*4882a593Smuzhiyun  */
26*4882a593Smuzhiyun #define REFCOUNT_INC BIT(55)
27*4882a593Smuzhiyun #define PTE_REFCOUNT(pte) (((pte) >> 55) & 0xf)
28*4882a593Smuzhiyun 
pte_set_nc(pte_t * ptep,unsigned long addr,void * data)29*4882a593Smuzhiyun static int pte_set_nc(pte_t *ptep, unsigned long addr, void *data)
30*4882a593Smuzhiyun {
31*4882a593Smuzhiyun 	pteval_t old_pte, new_pte, pte;
32*4882a593Smuzhiyun 	unsigned int refcount;
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun 	pte = pte_val(READ_ONCE(*ptep));
35*4882a593Smuzhiyun 	do {
36*4882a593Smuzhiyun 		/* Avoid racing against the transient invalid state */
37*4882a593Smuzhiyun 		old_pte = pte | PTE_VALID;
38*4882a593Smuzhiyun 		new_pte = old_pte + REFCOUNT_INC;
39*4882a593Smuzhiyun 		refcount = PTE_REFCOUNT(pte);
40*4882a593Smuzhiyun 		if (WARN_ON(refcount == 15))
41*4882a593Smuzhiyun 			return -EINVAL;
42*4882a593Smuzhiyun 		if (refcount == 0) {
43*4882a593Smuzhiyun 			new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID);
44*4882a593Smuzhiyun 			new_pte |= PTE_ATTRINDX(MT_NORMAL_NC);
45*4882a593Smuzhiyun 		}
46*4882a593Smuzhiyun 		pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte);
47*4882a593Smuzhiyun 	} while (pte != old_pte);
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun 	*(unsigned int *)data = refcount;
50*4882a593Smuzhiyun 	if (refcount)
51*4882a593Smuzhiyun 		return 0;
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
54*4882a593Smuzhiyun 	WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID));
55*4882a593Smuzhiyun 	return 0;
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun 
pte_clear_nc(pte_t * ptep,unsigned long addr,void * data)58*4882a593Smuzhiyun static int pte_clear_nc(pte_t *ptep, unsigned long addr, void *data)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun 	pteval_t old_pte, new_pte, pte;
61*4882a593Smuzhiyun 	unsigned int refcount;
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun 	pte = pte_val(READ_ONCE(*ptep));
64*4882a593Smuzhiyun 	do {
65*4882a593Smuzhiyun 		old_pte = pte | PTE_VALID;
66*4882a593Smuzhiyun 		new_pte = old_pte - REFCOUNT_INC;
67*4882a593Smuzhiyun 		refcount = PTE_REFCOUNT(pte);
68*4882a593Smuzhiyun 		if (WARN_ON(refcount == 0))
69*4882a593Smuzhiyun 			return -EINVAL;
70*4882a593Smuzhiyun 		if (refcount == 1) {
71*4882a593Smuzhiyun 			new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID);
72*4882a593Smuzhiyun 			new_pte |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
73*4882a593Smuzhiyun 		}
74*4882a593Smuzhiyun 		pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte);
75*4882a593Smuzhiyun 	} while (pte != old_pte);
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun 	if (refcount > 1)
78*4882a593Smuzhiyun 		return 0;
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
81*4882a593Smuzhiyun 	WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID));
82*4882a593Smuzhiyun 	return 0;
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun 
set_nc(void * addr,size_t size)85*4882a593Smuzhiyun static int set_nc(void *addr, size_t size)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun 	unsigned int count;
88*4882a593Smuzhiyun 	int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr,
89*4882a593Smuzhiyun 					       size, pte_set_nc, &count);
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	WARN_RATELIMIT(count == 0 && page_mapped(virt_to_page(addr)),
92*4882a593Smuzhiyun 		       "changing linear mapping but cacheable aliases may still exist\n");
93*4882a593Smuzhiyun 	dsb(ishst);
94*4882a593Smuzhiyun 	isb();
95*4882a593Smuzhiyun 	__flush_dcache_area(addr, size);
96*4882a593Smuzhiyun 	return ret;
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun 
clear_nc(void * addr,size_t size)99*4882a593Smuzhiyun static int clear_nc(void *addr, size_t size)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun 	int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr,
102*4882a593Smuzhiyun 					       size, pte_clear_nc, NULL);
103*4882a593Smuzhiyun 	dsb(ishst);
104*4882a593Smuzhiyun 	isb();
105*4882a593Smuzhiyun 	__inval_dcache_area(addr, size);
106*4882a593Smuzhiyun 	return ret;
107*4882a593Smuzhiyun }
108*4882a593Smuzhiyun 
__arm64_noalias_map(struct device * dev,phys_addr_t phys,size_t size,enum dma_data_direction dir,unsigned long attrs,bool bounce)109*4882a593Smuzhiyun static phys_addr_t __arm64_noalias_map(struct device *dev, phys_addr_t phys,
110*4882a593Smuzhiyun 				       size_t size, enum dma_data_direction dir,
111*4882a593Smuzhiyun 				       unsigned long attrs, bool bounce)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun 	bounce = bounce || (phys | size) & ~PAGE_MASK;
114*4882a593Smuzhiyun 	if (bounce) {
115*4882a593Smuzhiyun 		phys = swiotlb_tbl_map_single(dev, phys, size, PAGE_ALIGN(size),
116*4882a593Smuzhiyun 					      dir, attrs);
117*4882a593Smuzhiyun 		if (phys == DMA_MAPPING_ERROR)
118*4882a593Smuzhiyun 			return DMA_MAPPING_ERROR;
119*4882a593Smuzhiyun 	}
120*4882a593Smuzhiyun 	if (set_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size)))
121*4882a593Smuzhiyun 		goto out_unmap;
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	return phys;
124*4882a593Smuzhiyun out_unmap:
125*4882a593Smuzhiyun 	if (bounce)
126*4882a593Smuzhiyun 		swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir,
127*4882a593Smuzhiyun 					 attrs | DMA_ATTR_SKIP_CPU_SYNC);
128*4882a593Smuzhiyun 	return DMA_MAPPING_ERROR;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun 
__arm64_noalias_unmap(struct device * dev,phys_addr_t phys,size_t size,enum dma_data_direction dir,unsigned long attrs)132*4882a593Smuzhiyun static void __arm64_noalias_unmap(struct device *dev, phys_addr_t phys, size_t size,
133*4882a593Smuzhiyun 				  enum dma_data_direction dir, unsigned long attrs)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun 	clear_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size));
136*4882a593Smuzhiyun 	if (is_swiotlb_buffer(phys))
137*4882a593Smuzhiyun 		swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, attrs);
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun 
__arm64_noalias_sync_for_device(struct device * dev,phys_addr_t phys,size_t size,enum dma_data_direction dir)140*4882a593Smuzhiyun static void __arm64_noalias_sync_for_device(struct device *dev, phys_addr_t phys,
141*4882a593Smuzhiyun 					    size_t size, enum dma_data_direction dir)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun 	if (is_swiotlb_buffer(phys))
144*4882a593Smuzhiyun 		swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE);
145*4882a593Smuzhiyun 	else
146*4882a593Smuzhiyun 		arch_sync_dma_for_device(phys, size, dir);
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun 
__arm64_noalias_sync_for_cpu(struct device * dev,phys_addr_t phys,size_t size,enum dma_data_direction dir)149*4882a593Smuzhiyun static void __arm64_noalias_sync_for_cpu(struct device *dev, phys_addr_t phys,
150*4882a593Smuzhiyun 					 size_t size, enum dma_data_direction dir)
151*4882a593Smuzhiyun {
152*4882a593Smuzhiyun 	if (is_swiotlb_buffer(phys))
153*4882a593Smuzhiyun 		swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
154*4882a593Smuzhiyun 	else
155*4882a593Smuzhiyun 		arch_sync_dma_for_cpu(phys, size, dir);
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun 
arm64_noalias_alloc(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t gfp,unsigned long attrs)158*4882a593Smuzhiyun static void *arm64_noalias_alloc(struct device *dev, size_t size,
159*4882a593Smuzhiyun 				 dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun 	struct page *page;
162*4882a593Smuzhiyun 	void *ret;
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 	if (attrs & DMA_ATTR_NO_WARN)
165*4882a593Smuzhiyun 		gfp |= __GFP_NOWARN;
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	size = PAGE_ALIGN(size);
168*4882a593Smuzhiyun 	page = dma_direct_alloc_pages(dev, size, dma_addr, 0, gfp & ~__GFP_ZERO);
169*4882a593Smuzhiyun 	if (!page)
170*4882a593Smuzhiyun 		return NULL;
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 	ret = page_address(page);
173*4882a593Smuzhiyun 	if (set_nc(ret, size)) {
174*4882a593Smuzhiyun 		dma_direct_free_pages(dev, size, page, *dma_addr, 0);
175*4882a593Smuzhiyun 		return NULL;
176*4882a593Smuzhiyun 	}
177*4882a593Smuzhiyun 	return ret;
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun 
arm64_noalias_free(struct device * dev,size_t size,void * cpu_addr,dma_addr_t dma_addr,unsigned long attrs)180*4882a593Smuzhiyun static void arm64_noalias_free(struct device *dev, size_t size, void *cpu_addr,
181*4882a593Smuzhiyun 			       dma_addr_t dma_addr, unsigned long attrs)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun 	size = PAGE_ALIGN(size);
184*4882a593Smuzhiyun 	clear_nc(cpu_addr, size);
185*4882a593Smuzhiyun 	dma_direct_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0);
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun 
arm64_noalias_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)188*4882a593Smuzhiyun static dma_addr_t arm64_noalias_map_page(struct device *dev, struct page *page,
189*4882a593Smuzhiyun 					 unsigned long offset, size_t size,
190*4882a593Smuzhiyun 					 enum dma_data_direction dir, unsigned long attrs)
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun 	phys_addr_t phys = page_to_phys(page) + offset;
193*4882a593Smuzhiyun 	bool bounce = !dma_capable(dev, phys_to_dma(dev, phys), size, true);
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	if (!bounce && dir == DMA_TO_DEVICE) {
196*4882a593Smuzhiyun 		arch_sync_dma_for_device(phys, size, dir);
197*4882a593Smuzhiyun 		return phys_to_dma(dev, phys);
198*4882a593Smuzhiyun 	}
199*4882a593Smuzhiyun 
200*4882a593Smuzhiyun 	bounce = bounce || page_mapped(page);
201*4882a593Smuzhiyun 	phys = __arm64_noalias_map(dev, phys, size, dir, attrs, bounce);
202*4882a593Smuzhiyun 	if (phys == DMA_MAPPING_ERROR)
203*4882a593Smuzhiyun 		return DMA_MAPPING_ERROR;
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	return phys_to_dma(dev, phys);
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun 
arm64_noalias_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)208*4882a593Smuzhiyun static void arm64_noalias_unmap_page(struct device *dev, dma_addr_t dma_addr,
209*4882a593Smuzhiyun 				     size_t size, enum dma_data_direction dir,
210*4882a593Smuzhiyun 				     unsigned long attrs)
211*4882a593Smuzhiyun {
212*4882a593Smuzhiyun 	if (dir == DMA_TO_DEVICE)
213*4882a593Smuzhiyun 		return;
214*4882a593Smuzhiyun 	__arm64_noalias_unmap(dev, dma_to_phys(dev, dma_addr), size, dir, attrs);
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun 
arm64_noalias_unmap_sg(struct device * dev,struct scatterlist * sgl,int nents,enum dma_data_direction dir,unsigned long attrs)217*4882a593Smuzhiyun static void arm64_noalias_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents,
218*4882a593Smuzhiyun 				   enum dma_data_direction dir, unsigned long attrs)
219*4882a593Smuzhiyun {
220*4882a593Smuzhiyun 	struct scatterlist *sg;
221*4882a593Smuzhiyun 	int i;
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 	if (dir == DMA_TO_DEVICE)
224*4882a593Smuzhiyun 		return;
225*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i)
226*4882a593Smuzhiyun 		__arm64_noalias_unmap(dev, dma_to_phys(dev, sg->dma_address),
227*4882a593Smuzhiyun 				      sg->length, dir, attrs);
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun 
arm64_noalias_map_sg(struct device * dev,struct scatterlist * sgl,int nents,enum dma_data_direction dir,unsigned long attrs)230*4882a593Smuzhiyun static int arm64_noalias_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
231*4882a593Smuzhiyun 				enum dma_data_direction dir, unsigned long attrs)
232*4882a593Smuzhiyun {
233*4882a593Smuzhiyun 	int i;
234*4882a593Smuzhiyun 	struct scatterlist *sg;
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i) {
237*4882a593Smuzhiyun 		sg->dma_address = arm64_noalias_map_page(dev, sg_page(sg), sg->offset,
238*4882a593Smuzhiyun 							 sg->length, dir, attrs);
239*4882a593Smuzhiyun 		if (sg->dma_address == DMA_MAPPING_ERROR)
240*4882a593Smuzhiyun 			goto out_unmap;
241*4882a593Smuzhiyun 		sg->dma_length = sg->length;
242*4882a593Smuzhiyun 	}
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	return nents;
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun out_unmap:
247*4882a593Smuzhiyun 	arm64_noalias_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
248*4882a593Smuzhiyun 	return 0;
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun 
arm64_noalias_sync_single_for_device(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)251*4882a593Smuzhiyun static void arm64_noalias_sync_single_for_device(struct device *dev, dma_addr_t addr,
252*4882a593Smuzhiyun 						 size_t size, enum dma_data_direction dir)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun 	__arm64_noalias_sync_for_device(dev, dma_to_phys(dev, addr), size, dir);
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun 
arm64_noalias_sync_single_for_cpu(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)257*4882a593Smuzhiyun static void arm64_noalias_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
258*4882a593Smuzhiyun 					      size_t size, enum dma_data_direction dir)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun 	__arm64_noalias_sync_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun 
arm64_noalias_sync_sg_for_device(struct device * dev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)263*4882a593Smuzhiyun static void arm64_noalias_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
264*4882a593Smuzhiyun 					     int nents, enum dma_data_direction dir)
265*4882a593Smuzhiyun {
266*4882a593Smuzhiyun 	struct scatterlist *sg;
267*4882a593Smuzhiyun 	int i;
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i)
270*4882a593Smuzhiyun 		arm64_noalias_sync_single_for_device(dev, sg->dma_address, sg->length, dir);
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun 
arm64_noalias_sync_sg_for_cpu(struct device * dev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)273*4882a593Smuzhiyun static void arm64_noalias_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
274*4882a593Smuzhiyun 					  int nents, enum dma_data_direction dir)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun 	struct scatterlist *sg;
277*4882a593Smuzhiyun 	int i;
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i)
280*4882a593Smuzhiyun 		arm64_noalias_sync_single_for_cpu(dev, sg->dma_address, sg->length, dir);
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun static const struct dma_map_ops arm64_noalias_ops = {
284*4882a593Smuzhiyun 	.alloc = arm64_noalias_alloc,
285*4882a593Smuzhiyun 	.free = arm64_noalias_free,
286*4882a593Smuzhiyun 	.alloc_pages = dma_common_alloc_pages,
287*4882a593Smuzhiyun 	.free_pages = dma_common_free_pages,
288*4882a593Smuzhiyun 	.mmap = dma_common_mmap,
289*4882a593Smuzhiyun 	.get_sgtable = dma_common_get_sgtable,
290*4882a593Smuzhiyun 	.map_page = arm64_noalias_map_page,
291*4882a593Smuzhiyun 	.unmap_page = arm64_noalias_unmap_page,
292*4882a593Smuzhiyun 	.map_sg = arm64_noalias_map_sg,
293*4882a593Smuzhiyun 	.unmap_sg = arm64_noalias_unmap_sg,
294*4882a593Smuzhiyun 	.sync_single_for_cpu = arm64_noalias_sync_single_for_cpu,
295*4882a593Smuzhiyun 	.sync_single_for_device = arm64_noalias_sync_single_for_device,
296*4882a593Smuzhiyun 	.sync_sg_for_cpu = arm64_noalias_sync_sg_for_cpu,
297*4882a593Smuzhiyun 	.sync_sg_for_device = arm64_noalias_sync_sg_for_device,
298*4882a593Smuzhiyun 	.dma_supported = dma_direct_supported,
299*4882a593Smuzhiyun 	.get_required_mask = dma_direct_get_required_mask,
300*4882a593Smuzhiyun 	.max_mapping_size = swiotlb_max_mapping_size,
301*4882a593Smuzhiyun };
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun #ifdef CONFIG_IOMMU_DMA
304*4882a593Smuzhiyun static const struct dma_map_ops *iommu_dma_ops;
305*4882a593Smuzhiyun 
arm64_iommu_alloc(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t gfp,unsigned long attrs)306*4882a593Smuzhiyun static void *arm64_iommu_alloc(struct device *dev, size_t size,
307*4882a593Smuzhiyun 			       dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs)
308*4882a593Smuzhiyun {
309*4882a593Smuzhiyun 	struct page **pages;
310*4882a593Smuzhiyun 	void *ret;
311*4882a593Smuzhiyun 	int i;
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	size = PAGE_ALIGN(size);
314*4882a593Smuzhiyun 	if (!gfpflags_allow_blocking(gfp) || (attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
315*4882a593Smuzhiyun 		ret = dma_common_alloc_pages(dev, size, dma_addr, 0, gfp);
316*4882a593Smuzhiyun 		return ret ? page_address(ret) : NULL;
317*4882a593Smuzhiyun 	}
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	ret = iommu_dma_ops->alloc(dev, size, dma_addr, gfp, attrs);
320*4882a593Smuzhiyun 	if (ret) {
321*4882a593Smuzhiyun 		pages = dma_common_find_pages(ret);
322*4882a593Smuzhiyun 		for (i = 0; i < size / PAGE_SIZE; i++)
323*4882a593Smuzhiyun 			if (set_nc(page_address(pages[i]), PAGE_SIZE))
324*4882a593Smuzhiyun 				goto err;
325*4882a593Smuzhiyun 	}
326*4882a593Smuzhiyun 	return ret;
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun err:
329*4882a593Smuzhiyun 	while (i--)
330*4882a593Smuzhiyun 		clear_nc(page_address(pages[i]), PAGE_SIZE);
331*4882a593Smuzhiyun 	iommu_dma_ops->free(dev, size, ret, *dma_addr, attrs);
332*4882a593Smuzhiyun 	return NULL;
333*4882a593Smuzhiyun }
334*4882a593Smuzhiyun 
arm64_iommu_free(struct device * dev,size_t size,void * cpu_addr,dma_addr_t dma_addr,unsigned long attrs)335*4882a593Smuzhiyun static void arm64_iommu_free(struct device *dev, size_t size, void *cpu_addr,
336*4882a593Smuzhiyun 			     dma_addr_t dma_addr, unsigned long attrs)
337*4882a593Smuzhiyun {
338*4882a593Smuzhiyun 	struct page **pages = dma_common_find_pages(cpu_addr);
339*4882a593Smuzhiyun 	int i;
340*4882a593Smuzhiyun 
341*4882a593Smuzhiyun 	size = PAGE_ALIGN(size);
342*4882a593Smuzhiyun 	if (!pages)
343*4882a593Smuzhiyun 		return dma_common_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0);
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	for (i = 0; i < size / PAGE_SIZE; i++)
346*4882a593Smuzhiyun 		clear_nc(page_address(pages[i]), PAGE_SIZE);
347*4882a593Smuzhiyun 	iommu_dma_ops->free(dev, size, cpu_addr, dma_addr, attrs);
348*4882a593Smuzhiyun }
349*4882a593Smuzhiyun 
arm64_iommu_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)350*4882a593Smuzhiyun static dma_addr_t arm64_iommu_map_page(struct device *dev, struct page *page,
351*4882a593Smuzhiyun 				       unsigned long offset, size_t size,
352*4882a593Smuzhiyun 				       enum dma_data_direction dir, unsigned long attrs)
353*4882a593Smuzhiyun {
354*4882a593Smuzhiyun 	phys_addr_t phys = page_to_phys(page) + offset;
355*4882a593Smuzhiyun 	dma_addr_t ret;
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun 	if (dir == DMA_TO_DEVICE)
358*4882a593Smuzhiyun 		return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs);
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	phys = __arm64_noalias_map(dev, phys, size, dir, attrs, page_mapped(page));
361*4882a593Smuzhiyun 	if (phys == DMA_MAPPING_ERROR)
362*4882a593Smuzhiyun 		return DMA_MAPPING_ERROR;
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
365*4882a593Smuzhiyun 	ret = iommu_dma_ops->map_page(dev, phys_to_page(phys), offset_in_page(phys),
366*4882a593Smuzhiyun 				       size, dir, attrs);
367*4882a593Smuzhiyun 	if (ret == DMA_MAPPING_ERROR)
368*4882a593Smuzhiyun 		__arm64_noalias_unmap(dev, phys, size, dir, attrs);
369*4882a593Smuzhiyun 	return ret;
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun 
arm64_iommu_unmap_page(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir,unsigned long attrs)372*4882a593Smuzhiyun static void arm64_iommu_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
373*4882a593Smuzhiyun 				   enum dma_data_direction dir, unsigned long attrs)
374*4882a593Smuzhiyun {
375*4882a593Smuzhiyun 	phys_addr_t phys;
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	if (dir == DMA_TO_DEVICE)
378*4882a593Smuzhiyun 		return iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs);
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr);
381*4882a593Smuzhiyun 	iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
382*4882a593Smuzhiyun 	__arm64_noalias_unmap(dev, phys, size, dir, attrs);
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun 
arm64_iommu_map_sg(struct device * dev,struct scatterlist * sgl,int nents,enum dma_data_direction dir,unsigned long attrs)385*4882a593Smuzhiyun static int arm64_iommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
386*4882a593Smuzhiyun 			      enum dma_data_direction dir, unsigned long attrs)
387*4882a593Smuzhiyun {
388*4882a593Smuzhiyun 	int i, ret;
389*4882a593Smuzhiyun 	struct scatterlist *sg;
390*4882a593Smuzhiyun 	phys_addr_t *orig_phys;
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	if (dir == DMA_TO_DEVICE)
393*4882a593Smuzhiyun 		return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs);
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	orig_phys = kmalloc_array(nents, sizeof(*orig_phys), GFP_ATOMIC);
396*4882a593Smuzhiyun 	if (!orig_phys)
397*4882a593Smuzhiyun 		return 0;
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i) {
400*4882a593Smuzhiyun 		phys_addr_t phys = sg_phys(sg);
401*4882a593Smuzhiyun 		/*
402*4882a593Smuzhiyun 		 * Note we do not have the page_mapped() check here, since
403*4882a593Smuzhiyun 		 * bouncing plays complete havoc with dma-buf imports. Those
404*4882a593Smuzhiyun 		 * may well be mapped in userspace, but we hope and pray that
405*4882a593Smuzhiyun 		 * it's via dma_mmap_attrs() so any such mappings are safely
406*4882a593Smuzhiyun 		 * non-cacheable. DO NOT allow a block device or other similar
407*4882a593Smuzhiyun 		 * scatterlist user to get here (disable IOMMUs if necessary),
408*4882a593Smuzhiyun 		 * since we can't mitigate for both conflicting use-cases.
409*4882a593Smuzhiyun 		 */
410*4882a593Smuzhiyun 		phys = __arm64_noalias_map(dev, phys, sg->length, dir, attrs, false);
411*4882a593Smuzhiyun 		if (phys == DMA_MAPPING_ERROR)
412*4882a593Smuzhiyun 			goto out_unmap;
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun 		orig_phys[i] = sg_phys(sg);
415*4882a593Smuzhiyun 		sg_assign_page(sg, phys_to_page(phys));
416*4882a593Smuzhiyun 		sg->offset = offset_in_page(phys);
417*4882a593Smuzhiyun 	}
418*4882a593Smuzhiyun 	ret = iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
419*4882a593Smuzhiyun 	if (ret <= 0)
420*4882a593Smuzhiyun 		goto out_unmap;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i) {
423*4882a593Smuzhiyun 		sg_assign_page(sg, phys_to_page(orig_phys[i]));
424*4882a593Smuzhiyun 		sg->offset = offset_in_page(orig_phys[i]);
425*4882a593Smuzhiyun 	}
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	kfree(orig_phys);
428*4882a593Smuzhiyun 	return ret;
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun out_unmap:
431*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i) {
432*4882a593Smuzhiyun 		__arm64_noalias_unmap(dev, sg_phys(sg), sg->length, dir, attrs);
433*4882a593Smuzhiyun 		sg_assign_page(sg, phys_to_page(orig_phys[i]));
434*4882a593Smuzhiyun 		sg->offset = offset_in_page(orig_phys[i]);
435*4882a593Smuzhiyun 	}
436*4882a593Smuzhiyun 	kfree(orig_phys);
437*4882a593Smuzhiyun 	return 0;
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun 
arm64_iommu_unmap_sg(struct device * dev,struct scatterlist * sgl,int nents,enum dma_data_direction dir,unsigned long attrs)440*4882a593Smuzhiyun static void arm64_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents,
441*4882a593Smuzhiyun 				 enum dma_data_direction dir, unsigned long attrs)
442*4882a593Smuzhiyun {
443*4882a593Smuzhiyun 	struct iommu_domain *domain;
444*4882a593Smuzhiyun 	struct scatterlist *sg, *tmp;
445*4882a593Smuzhiyun 	dma_addr_t iova;
446*4882a593Smuzhiyun 	int i;
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 	if (dir == DMA_TO_DEVICE)
449*4882a593Smuzhiyun 		return iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs);
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	domain = iommu_get_dma_domain(dev);
452*4882a593Smuzhiyun 	iova = sgl->dma_address;
453*4882a593Smuzhiyun 	tmp = sgl;
454*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i) {
455*4882a593Smuzhiyun 		phys_addr_t phys = iommu_iova_to_phys(domain, iova);
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 		__arm64_noalias_unmap(dev, phys, sg->length, dir, attrs);
458*4882a593Smuzhiyun 		iova += sg->length;
459*4882a593Smuzhiyun 		if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) {
460*4882a593Smuzhiyun 			tmp = sg_next(tmp);
461*4882a593Smuzhiyun 			iova = tmp->dma_address;
462*4882a593Smuzhiyun 		}
463*4882a593Smuzhiyun 	}
464*4882a593Smuzhiyun 	iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
465*4882a593Smuzhiyun }
466*4882a593Smuzhiyun 
arm64_iommu_sync_single_for_device(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)467*4882a593Smuzhiyun static void arm64_iommu_sync_single_for_device(struct device *dev, dma_addr_t addr,
468*4882a593Smuzhiyun 					       size_t size, enum dma_data_direction dir)
469*4882a593Smuzhiyun {
470*4882a593Smuzhiyun 	phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr);
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 	__arm64_noalias_sync_for_device(dev, phys, size, dir);
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun 
arm64_iommu_sync_single_for_cpu(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)475*4882a593Smuzhiyun static void arm64_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
476*4882a593Smuzhiyun 					    size_t size, enum dma_data_direction dir)
477*4882a593Smuzhiyun {
478*4882a593Smuzhiyun 	phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr);
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 	__arm64_noalias_sync_for_cpu(dev, phys, size, dir);
481*4882a593Smuzhiyun }
482*4882a593Smuzhiyun 
arm64_iommu_sync_sg_for_device(struct device * dev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)483*4882a593Smuzhiyun static void arm64_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
484*4882a593Smuzhiyun 					   int nents, enum dma_data_direction dir)
485*4882a593Smuzhiyun {
486*4882a593Smuzhiyun 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
487*4882a593Smuzhiyun 	struct scatterlist *sg, *tmp = sgl;
488*4882a593Smuzhiyun 	dma_addr_t iova = sgl->dma_address;
489*4882a593Smuzhiyun 	int i;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i) {
492*4882a593Smuzhiyun 		phys_addr_t phys = iommu_iova_to_phys(domain, iova);
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 		__arm64_noalias_sync_for_device(dev, phys, sg->length, dir);
495*4882a593Smuzhiyun 		iova += sg->length;
496*4882a593Smuzhiyun 		if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) {
497*4882a593Smuzhiyun 			tmp = sg_next(tmp);
498*4882a593Smuzhiyun 			iova = tmp->dma_address;
499*4882a593Smuzhiyun 		}
500*4882a593Smuzhiyun 	}
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun 
arm64_iommu_sync_sg_for_cpu(struct device * dev,struct scatterlist * sgl,int nents,enum dma_data_direction dir)503*4882a593Smuzhiyun static void arm64_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
504*4882a593Smuzhiyun 					int nents, enum dma_data_direction dir)
505*4882a593Smuzhiyun {
506*4882a593Smuzhiyun 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
507*4882a593Smuzhiyun 	struct scatterlist *sg, *tmp = sgl;
508*4882a593Smuzhiyun 	dma_addr_t iova = sgl->dma_address;
509*4882a593Smuzhiyun 	int i;
510*4882a593Smuzhiyun 
511*4882a593Smuzhiyun 	for_each_sg(sgl, sg, nents, i) {
512*4882a593Smuzhiyun 		phys_addr_t phys = iommu_iova_to_phys(domain, iova);
513*4882a593Smuzhiyun 
514*4882a593Smuzhiyun 		__arm64_noalias_sync_for_cpu(dev, phys, sg->length, dir);
515*4882a593Smuzhiyun 		iova += sg->length;
516*4882a593Smuzhiyun 		if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) {
517*4882a593Smuzhiyun 			tmp = sg_next(tmp);
518*4882a593Smuzhiyun 			iova = tmp->dma_address;
519*4882a593Smuzhiyun 		}
520*4882a593Smuzhiyun 	}
521*4882a593Smuzhiyun }
522*4882a593Smuzhiyun 
523*4882a593Smuzhiyun static struct dma_map_ops arm64_iommu_ops = {
524*4882a593Smuzhiyun 	.alloc = arm64_iommu_alloc,
525*4882a593Smuzhiyun 	.free = arm64_iommu_free,
526*4882a593Smuzhiyun 	.alloc_pages = dma_common_alloc_pages,
527*4882a593Smuzhiyun 	.free_pages = dma_common_free_pages,
528*4882a593Smuzhiyun 	.map_page = arm64_iommu_map_page,
529*4882a593Smuzhiyun 	.unmap_page = arm64_iommu_unmap_page,
530*4882a593Smuzhiyun 	.map_sg = arm64_iommu_map_sg,
531*4882a593Smuzhiyun 	.unmap_sg = arm64_iommu_unmap_sg,
532*4882a593Smuzhiyun 	.sync_single_for_cpu = arm64_iommu_sync_single_for_cpu,
533*4882a593Smuzhiyun 	.sync_single_for_device = arm64_iommu_sync_single_for_device,
534*4882a593Smuzhiyun 	.sync_sg_for_cpu = arm64_iommu_sync_sg_for_cpu,
535*4882a593Smuzhiyun 	.sync_sg_for_device = arm64_iommu_sync_sg_for_device,
536*4882a593Smuzhiyun };
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun #endif /* CONFIG_IOMMU_DMA */
539*4882a593Smuzhiyun 
arm64_noalias_prepare(void)540*4882a593Smuzhiyun static inline void arm64_noalias_prepare(void)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun 	if (!is_swiotlb_active())
543*4882a593Smuzhiyun 		swiotlb_late_init_with_default_size(swiotlb_size_or_default());
544*4882a593Smuzhiyun 	if (lazy_vunmap_enable) {
545*4882a593Smuzhiyun 		lazy_vunmap_enable = false;
546*4882a593Smuzhiyun 		vm_unmap_aliases();
547*4882a593Smuzhiyun 	}
548*4882a593Smuzhiyun }
549*4882a593Smuzhiyun 
arm64_noalias_setup_dma_ops(struct device * dev)550*4882a593Smuzhiyun void arm64_noalias_setup_dma_ops(struct device *dev)
551*4882a593Smuzhiyun {
552*4882a593Smuzhiyun 	if (dev_is_dma_coherent(dev))
553*4882a593Smuzhiyun 		return;
554*4882a593Smuzhiyun 
555*4882a593Smuzhiyun 	dev_info(dev, "applying no-alias DMA workaround\n");
556*4882a593Smuzhiyun 	if (!dev->dma_ops) {
557*4882a593Smuzhiyun 		dev->dma_ops = &arm64_noalias_ops;
558*4882a593Smuzhiyun 		goto done;
559*4882a593Smuzhiyun 	}
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_IOMMU_DMA)) {
562*4882a593Smuzhiyun 		dev->dma_ops = &arm64_iommu_ops;
563*4882a593Smuzhiyun 		if (iommu_dma_ops)
564*4882a593Smuzhiyun 			goto done;
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun 		iommu_dma_ops = dev->dma_ops;
567*4882a593Smuzhiyun 		arm64_iommu_ops.mmap = iommu_dma_ops->mmap;
568*4882a593Smuzhiyun 		arm64_iommu_ops.get_sgtable = iommu_dma_ops->get_sgtable;
569*4882a593Smuzhiyun 		arm64_iommu_ops.map_resource = iommu_dma_ops->map_resource;
570*4882a593Smuzhiyun 		arm64_iommu_ops.unmap_resource = iommu_dma_ops->unmap_resource;
571*4882a593Smuzhiyun 		arm64_iommu_ops.get_merge_boundary = iommu_dma_ops->get_merge_boundary;
572*4882a593Smuzhiyun 	}
573*4882a593Smuzhiyun done:
574*4882a593Smuzhiyun 	arm64_noalias_prepare();
575*4882a593Smuzhiyun }
576*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(arm64_noalias_setup_dma_ops);
577