xref: /OK3568_Linux_fs/kernel/drivers/gpu/host1x/job.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Tegra host1x Job
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (c) 2010-2015, NVIDIA Corporation.
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/dma-mapping.h>
9*4882a593Smuzhiyun #include <linux/err.h>
10*4882a593Smuzhiyun #include <linux/host1x.h>
11*4882a593Smuzhiyun #include <linux/iommu.h>
12*4882a593Smuzhiyun #include <linux/kref.h>
13*4882a593Smuzhiyun #include <linux/module.h>
14*4882a593Smuzhiyun #include <linux/scatterlist.h>
15*4882a593Smuzhiyun #include <linux/slab.h>
16*4882a593Smuzhiyun #include <linux/vmalloc.h>
17*4882a593Smuzhiyun #include <trace/events/host1x.h>
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun #include "channel.h"
20*4882a593Smuzhiyun #include "dev.h"
21*4882a593Smuzhiyun #include "job.h"
22*4882a593Smuzhiyun #include "syncpt.h"
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
25*4882a593Smuzhiyun 
host1x_job_alloc(struct host1x_channel * ch,u32 num_cmdbufs,u32 num_relocs)26*4882a593Smuzhiyun struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
27*4882a593Smuzhiyun 				    u32 num_cmdbufs, u32 num_relocs)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun 	struct host1x_job *job = NULL;
30*4882a593Smuzhiyun 	unsigned int num_unpins = num_relocs;
31*4882a593Smuzhiyun 	u64 total;
32*4882a593Smuzhiyun 	void *mem;
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun 	if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
35*4882a593Smuzhiyun 		num_unpins += num_cmdbufs;
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	/* Check that we're not going to overflow */
38*4882a593Smuzhiyun 	total = sizeof(struct host1x_job) +
39*4882a593Smuzhiyun 		(u64)num_relocs * sizeof(struct host1x_reloc) +
40*4882a593Smuzhiyun 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
41*4882a593Smuzhiyun 		(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
42*4882a593Smuzhiyun 		(u64)num_unpins * sizeof(dma_addr_t) +
43*4882a593Smuzhiyun 		(u64)num_unpins * sizeof(u32 *);
44*4882a593Smuzhiyun 	if (total > ULONG_MAX)
45*4882a593Smuzhiyun 		return NULL;
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun 	mem = job = kzalloc(total, GFP_KERNEL);
48*4882a593Smuzhiyun 	if (!job)
49*4882a593Smuzhiyun 		return NULL;
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	kref_init(&job->ref);
52*4882a593Smuzhiyun 	job->channel = ch;
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun 	/* Redistribute memory to the structs  */
55*4882a593Smuzhiyun 	mem += sizeof(struct host1x_job);
56*4882a593Smuzhiyun 	job->relocs = num_relocs ? mem : NULL;
57*4882a593Smuzhiyun 	mem += num_relocs * sizeof(struct host1x_reloc);
58*4882a593Smuzhiyun 	job->unpins = num_unpins ? mem : NULL;
59*4882a593Smuzhiyun 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
60*4882a593Smuzhiyun 	job->gathers = num_cmdbufs ? mem : NULL;
61*4882a593Smuzhiyun 	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
62*4882a593Smuzhiyun 	job->addr_phys = num_unpins ? mem : NULL;
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun 	job->reloc_addr_phys = job->addr_phys;
65*4882a593Smuzhiyun 	job->gather_addr_phys = &job->addr_phys[num_relocs];
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun 	return job;
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_alloc);
70*4882a593Smuzhiyun 
host1x_job_get(struct host1x_job * job)71*4882a593Smuzhiyun struct host1x_job *host1x_job_get(struct host1x_job *job)
72*4882a593Smuzhiyun {
73*4882a593Smuzhiyun 	kref_get(&job->ref);
74*4882a593Smuzhiyun 	return job;
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_get);
77*4882a593Smuzhiyun 
job_free(struct kref * ref)78*4882a593Smuzhiyun static void job_free(struct kref *ref)
79*4882a593Smuzhiyun {
80*4882a593Smuzhiyun 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 	kfree(job);
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun 
host1x_job_put(struct host1x_job * job)85*4882a593Smuzhiyun void host1x_job_put(struct host1x_job *job)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun 	kref_put(&job->ref, job_free);
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_put);
90*4882a593Smuzhiyun 
host1x_job_add_gather(struct host1x_job * job,struct host1x_bo * bo,unsigned int words,unsigned int offset)91*4882a593Smuzhiyun void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
92*4882a593Smuzhiyun 			   unsigned int words, unsigned int offset)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun 	struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	gather->words = words;
97*4882a593Smuzhiyun 	gather->bo = bo;
98*4882a593Smuzhiyun 	gather->offset = offset;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	job->num_gathers++;
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_add_gather);
103*4882a593Smuzhiyun 
pin_job(struct host1x * host,struct host1x_job * job)104*4882a593Smuzhiyun static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun 	struct host1x_client *client = job->client;
107*4882a593Smuzhiyun 	struct device *dev = client->dev;
108*4882a593Smuzhiyun 	struct host1x_job_gather *g;
109*4882a593Smuzhiyun 	struct iommu_domain *domain;
110*4882a593Smuzhiyun 	unsigned int i;
111*4882a593Smuzhiyun 	int err;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	domain = iommu_get_domain_for_dev(dev);
114*4882a593Smuzhiyun 	job->num_unpins = 0;
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	for (i = 0; i < job->num_relocs; i++) {
117*4882a593Smuzhiyun 		struct host1x_reloc *reloc = &job->relocs[i];
118*4882a593Smuzhiyun 		dma_addr_t phys_addr, *phys;
119*4882a593Smuzhiyun 		struct sg_table *sgt;
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
122*4882a593Smuzhiyun 		if (!reloc->target.bo) {
123*4882a593Smuzhiyun 			err = -EINVAL;
124*4882a593Smuzhiyun 			goto unpin;
125*4882a593Smuzhiyun 		}
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 		/*
128*4882a593Smuzhiyun 		 * If the client device is not attached to an IOMMU, the
129*4882a593Smuzhiyun 		 * physical address of the buffer object can be used.
130*4882a593Smuzhiyun 		 *
131*4882a593Smuzhiyun 		 * Similarly, when an IOMMU domain is shared between all
132*4882a593Smuzhiyun 		 * host1x clients, the IOVA is already available, so no
133*4882a593Smuzhiyun 		 * need to map the buffer object again.
134*4882a593Smuzhiyun 		 *
135*4882a593Smuzhiyun 		 * XXX Note that this isn't always safe to do because it
136*4882a593Smuzhiyun 		 * relies on an assumption that no cache maintenance is
137*4882a593Smuzhiyun 		 * needed on the buffer objects.
138*4882a593Smuzhiyun 		 */
139*4882a593Smuzhiyun 		if (!domain || client->group)
140*4882a593Smuzhiyun 			phys = &phys_addr;
141*4882a593Smuzhiyun 		else
142*4882a593Smuzhiyun 			phys = NULL;
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 		sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
145*4882a593Smuzhiyun 		if (IS_ERR(sgt)) {
146*4882a593Smuzhiyun 			err = PTR_ERR(sgt);
147*4882a593Smuzhiyun 			goto unpin;
148*4882a593Smuzhiyun 		}
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 		if (sgt) {
151*4882a593Smuzhiyun 			unsigned long mask = HOST1X_RELOC_READ |
152*4882a593Smuzhiyun 					     HOST1X_RELOC_WRITE;
153*4882a593Smuzhiyun 			enum dma_data_direction dir;
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 			switch (reloc->flags & mask) {
156*4882a593Smuzhiyun 			case HOST1X_RELOC_READ:
157*4882a593Smuzhiyun 				dir = DMA_TO_DEVICE;
158*4882a593Smuzhiyun 				break;
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 			case HOST1X_RELOC_WRITE:
161*4882a593Smuzhiyun 				dir = DMA_FROM_DEVICE;
162*4882a593Smuzhiyun 				break;
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 			case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
165*4882a593Smuzhiyun 				dir = DMA_BIDIRECTIONAL;
166*4882a593Smuzhiyun 				break;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 			default:
169*4882a593Smuzhiyun 				err = -EINVAL;
170*4882a593Smuzhiyun 				goto unpin;
171*4882a593Smuzhiyun 			}
172*4882a593Smuzhiyun 
173*4882a593Smuzhiyun 			err = dma_map_sgtable(dev, sgt, dir, 0);
174*4882a593Smuzhiyun 			if (err)
175*4882a593Smuzhiyun 				goto unpin;
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun 			job->unpins[job->num_unpins].dev = dev;
178*4882a593Smuzhiyun 			job->unpins[job->num_unpins].dir = dir;
179*4882a593Smuzhiyun 			phys_addr = sg_dma_address(sgt->sgl);
180*4882a593Smuzhiyun 		}
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 		job->addr_phys[job->num_unpins] = phys_addr;
183*4882a593Smuzhiyun 		job->unpins[job->num_unpins].bo = reloc->target.bo;
184*4882a593Smuzhiyun 		job->unpins[job->num_unpins].sgt = sgt;
185*4882a593Smuzhiyun 		job->num_unpins++;
186*4882a593Smuzhiyun 	}
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	/*
189*4882a593Smuzhiyun 	 * We will copy gathers BO content later, so there is no need to
190*4882a593Smuzhiyun 	 * hold and pin them.
191*4882a593Smuzhiyun 	 */
192*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
193*4882a593Smuzhiyun 		return 0;
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	for (i = 0; i < job->num_gathers; i++) {
196*4882a593Smuzhiyun 		size_t gather_size = 0;
197*4882a593Smuzhiyun 		struct scatterlist *sg;
198*4882a593Smuzhiyun 		struct sg_table *sgt;
199*4882a593Smuzhiyun 		dma_addr_t phys_addr;
200*4882a593Smuzhiyun 		unsigned long shift;
201*4882a593Smuzhiyun 		struct iova *alloc;
202*4882a593Smuzhiyun 		dma_addr_t *phys;
203*4882a593Smuzhiyun 		unsigned int j;
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 		g = &job->gathers[i];
206*4882a593Smuzhiyun 		g->bo = host1x_bo_get(g->bo);
207*4882a593Smuzhiyun 		if (!g->bo) {
208*4882a593Smuzhiyun 			err = -EINVAL;
209*4882a593Smuzhiyun 			goto unpin;
210*4882a593Smuzhiyun 		}
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 		/**
213*4882a593Smuzhiyun 		 * If the host1x is not attached to an IOMMU, there is no need
214*4882a593Smuzhiyun 		 * to map the buffer object for the host1x, since the physical
215*4882a593Smuzhiyun 		 * address can simply be used.
216*4882a593Smuzhiyun 		 */
217*4882a593Smuzhiyun 		if (!iommu_get_domain_for_dev(host->dev))
218*4882a593Smuzhiyun 			phys = &phys_addr;
219*4882a593Smuzhiyun 		else
220*4882a593Smuzhiyun 			phys = NULL;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 		sgt = host1x_bo_pin(host->dev, g->bo, phys);
223*4882a593Smuzhiyun 		if (IS_ERR(sgt)) {
224*4882a593Smuzhiyun 			err = PTR_ERR(sgt);
225*4882a593Smuzhiyun 			goto put;
226*4882a593Smuzhiyun 		}
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 		if (host->domain) {
229*4882a593Smuzhiyun 			for_each_sgtable_sg(sgt, sg, j)
230*4882a593Smuzhiyun 				gather_size += sg->length;
231*4882a593Smuzhiyun 			gather_size = iova_align(&host->iova, gather_size);
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 			shift = iova_shift(&host->iova);
234*4882a593Smuzhiyun 			alloc = alloc_iova(&host->iova, gather_size >> shift,
235*4882a593Smuzhiyun 					   host->iova_end >> shift, true);
236*4882a593Smuzhiyun 			if (!alloc) {
237*4882a593Smuzhiyun 				err = -ENOMEM;
238*4882a593Smuzhiyun 				goto put;
239*4882a593Smuzhiyun 			}
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 			err = iommu_map_sgtable(host->domain,
242*4882a593Smuzhiyun 					iova_dma_addr(&host->iova, alloc),
243*4882a593Smuzhiyun 					sgt, IOMMU_READ);
244*4882a593Smuzhiyun 			if (err == 0) {
245*4882a593Smuzhiyun 				__free_iova(&host->iova, alloc);
246*4882a593Smuzhiyun 				err = -EINVAL;
247*4882a593Smuzhiyun 				goto put;
248*4882a593Smuzhiyun 			}
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 			job->unpins[job->num_unpins].size = gather_size;
251*4882a593Smuzhiyun 			phys_addr = iova_dma_addr(&host->iova, alloc);
252*4882a593Smuzhiyun 		} else if (sgt) {
253*4882a593Smuzhiyun 			err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0);
254*4882a593Smuzhiyun 			if (err)
255*4882a593Smuzhiyun 				goto put;
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 			job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
258*4882a593Smuzhiyun 			job->unpins[job->num_unpins].dev = host->dev;
259*4882a593Smuzhiyun 			phys_addr = sg_dma_address(sgt->sgl);
260*4882a593Smuzhiyun 		}
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 		job->addr_phys[job->num_unpins] = phys_addr;
263*4882a593Smuzhiyun 		job->gather_addr_phys[i] = phys_addr;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 		job->unpins[job->num_unpins].bo = g->bo;
266*4882a593Smuzhiyun 		job->unpins[job->num_unpins].sgt = sgt;
267*4882a593Smuzhiyun 		job->num_unpins++;
268*4882a593Smuzhiyun 	}
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	return 0;
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun put:
273*4882a593Smuzhiyun 	host1x_bo_put(g->bo);
274*4882a593Smuzhiyun unpin:
275*4882a593Smuzhiyun 	host1x_job_unpin(job);
276*4882a593Smuzhiyun 	return err;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun 
do_relocs(struct host1x_job * job,struct host1x_job_gather * g)279*4882a593Smuzhiyun static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
280*4882a593Smuzhiyun {
281*4882a593Smuzhiyun 	void *cmdbuf_addr = NULL;
282*4882a593Smuzhiyun 	struct host1x_bo *cmdbuf = g->bo;
283*4882a593Smuzhiyun 	unsigned int i;
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 	/* pin & patch the relocs for one gather */
286*4882a593Smuzhiyun 	for (i = 0; i < job->num_relocs; i++) {
287*4882a593Smuzhiyun 		struct host1x_reloc *reloc = &job->relocs[i];
288*4882a593Smuzhiyun 		u32 reloc_addr = (job->reloc_addr_phys[i] +
289*4882a593Smuzhiyun 				  reloc->target.offset) >> reloc->shift;
290*4882a593Smuzhiyun 		u32 *target;
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 		/* skip all other gathers */
293*4882a593Smuzhiyun 		if (cmdbuf != reloc->cmdbuf.bo)
294*4882a593Smuzhiyun 			continue;
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
297*4882a593Smuzhiyun 			target = (u32 *)job->gather_copy_mapped +
298*4882a593Smuzhiyun 					reloc->cmdbuf.offset / sizeof(u32) +
299*4882a593Smuzhiyun 						g->offset / sizeof(u32);
300*4882a593Smuzhiyun 			goto patch_reloc;
301*4882a593Smuzhiyun 		}
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 		if (!cmdbuf_addr) {
304*4882a593Smuzhiyun 			cmdbuf_addr = host1x_bo_mmap(cmdbuf);
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun 			if (unlikely(!cmdbuf_addr)) {
307*4882a593Smuzhiyun 				pr_err("Could not map cmdbuf for relocation\n");
308*4882a593Smuzhiyun 				return -ENOMEM;
309*4882a593Smuzhiyun 			}
310*4882a593Smuzhiyun 		}
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 		target = cmdbuf_addr + reloc->cmdbuf.offset;
313*4882a593Smuzhiyun patch_reloc:
314*4882a593Smuzhiyun 		*target = reloc_addr;
315*4882a593Smuzhiyun 	}
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	if (cmdbuf_addr)
318*4882a593Smuzhiyun 		host1x_bo_munmap(cmdbuf, cmdbuf_addr);
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	return 0;
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun 
check_reloc(struct host1x_reloc * reloc,struct host1x_bo * cmdbuf,unsigned int offset)323*4882a593Smuzhiyun static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
324*4882a593Smuzhiyun 			unsigned int offset)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun 	offset *= sizeof(u32);
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
329*4882a593Smuzhiyun 		return false;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	/* relocation shift value validation isn't implemented yet */
332*4882a593Smuzhiyun 	if (reloc->shift)
333*4882a593Smuzhiyun 		return false;
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	return true;
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun 
338*4882a593Smuzhiyun struct host1x_firewall {
339*4882a593Smuzhiyun 	struct host1x_job *job;
340*4882a593Smuzhiyun 	struct device *dev;
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	unsigned int num_relocs;
343*4882a593Smuzhiyun 	struct host1x_reloc *reloc;
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	struct host1x_bo *cmdbuf;
346*4882a593Smuzhiyun 	unsigned int offset;
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	u32 words;
349*4882a593Smuzhiyun 	u32 class;
350*4882a593Smuzhiyun 	u32 reg;
351*4882a593Smuzhiyun 	u32 mask;
352*4882a593Smuzhiyun 	u32 count;
353*4882a593Smuzhiyun };
354*4882a593Smuzhiyun 
check_register(struct host1x_firewall * fw,unsigned long offset)355*4882a593Smuzhiyun static int check_register(struct host1x_firewall *fw, unsigned long offset)
356*4882a593Smuzhiyun {
357*4882a593Smuzhiyun 	if (!fw->job->is_addr_reg)
358*4882a593Smuzhiyun 		return 0;
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
361*4882a593Smuzhiyun 		if (!fw->num_relocs)
362*4882a593Smuzhiyun 			return -EINVAL;
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 		if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
365*4882a593Smuzhiyun 			return -EINVAL;
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 		fw->num_relocs--;
368*4882a593Smuzhiyun 		fw->reloc++;
369*4882a593Smuzhiyun 	}
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	return 0;
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun 
check_class(struct host1x_firewall * fw,u32 class)374*4882a593Smuzhiyun static int check_class(struct host1x_firewall *fw, u32 class)
375*4882a593Smuzhiyun {
376*4882a593Smuzhiyun 	if (!fw->job->is_valid_class) {
377*4882a593Smuzhiyun 		if (fw->class != class)
378*4882a593Smuzhiyun 			return -EINVAL;
379*4882a593Smuzhiyun 	} else {
380*4882a593Smuzhiyun 		if (!fw->job->is_valid_class(fw->class))
381*4882a593Smuzhiyun 			return -EINVAL;
382*4882a593Smuzhiyun 	}
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 	return 0;
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun 
check_mask(struct host1x_firewall * fw)387*4882a593Smuzhiyun static int check_mask(struct host1x_firewall *fw)
388*4882a593Smuzhiyun {
389*4882a593Smuzhiyun 	u32 mask = fw->mask;
390*4882a593Smuzhiyun 	u32 reg = fw->reg;
391*4882a593Smuzhiyun 	int ret;
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	while (mask) {
394*4882a593Smuzhiyun 		if (fw->words == 0)
395*4882a593Smuzhiyun 			return -EINVAL;
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 		if (mask & 1) {
398*4882a593Smuzhiyun 			ret = check_register(fw, reg);
399*4882a593Smuzhiyun 			if (ret < 0)
400*4882a593Smuzhiyun 				return ret;
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 			fw->words--;
403*4882a593Smuzhiyun 			fw->offset++;
404*4882a593Smuzhiyun 		}
405*4882a593Smuzhiyun 		mask >>= 1;
406*4882a593Smuzhiyun 		reg++;
407*4882a593Smuzhiyun 	}
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	return 0;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun 
check_incr(struct host1x_firewall * fw)412*4882a593Smuzhiyun static int check_incr(struct host1x_firewall *fw)
413*4882a593Smuzhiyun {
414*4882a593Smuzhiyun 	u32 count = fw->count;
415*4882a593Smuzhiyun 	u32 reg = fw->reg;
416*4882a593Smuzhiyun 	int ret;
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	while (count) {
419*4882a593Smuzhiyun 		if (fw->words == 0)
420*4882a593Smuzhiyun 			return -EINVAL;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 		ret = check_register(fw, reg);
423*4882a593Smuzhiyun 		if (ret < 0)
424*4882a593Smuzhiyun 			return ret;
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 		reg++;
427*4882a593Smuzhiyun 		fw->words--;
428*4882a593Smuzhiyun 		fw->offset++;
429*4882a593Smuzhiyun 		count--;
430*4882a593Smuzhiyun 	}
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 	return 0;
433*4882a593Smuzhiyun }
434*4882a593Smuzhiyun 
check_nonincr(struct host1x_firewall * fw)435*4882a593Smuzhiyun static int check_nonincr(struct host1x_firewall *fw)
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun 	u32 count = fw->count;
438*4882a593Smuzhiyun 	int ret;
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	while (count) {
441*4882a593Smuzhiyun 		if (fw->words == 0)
442*4882a593Smuzhiyun 			return -EINVAL;
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun 		ret = check_register(fw, fw->reg);
445*4882a593Smuzhiyun 		if (ret < 0)
446*4882a593Smuzhiyun 			return ret;
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 		fw->words--;
449*4882a593Smuzhiyun 		fw->offset++;
450*4882a593Smuzhiyun 		count--;
451*4882a593Smuzhiyun 	}
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun 	return 0;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun 
validate(struct host1x_firewall * fw,struct host1x_job_gather * g)456*4882a593Smuzhiyun static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
457*4882a593Smuzhiyun {
458*4882a593Smuzhiyun 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
459*4882a593Smuzhiyun 		(g->offset / sizeof(u32));
460*4882a593Smuzhiyun 	u32 job_class = fw->class;
461*4882a593Smuzhiyun 	int err = 0;
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	fw->words = g->words;
464*4882a593Smuzhiyun 	fw->cmdbuf = g->bo;
465*4882a593Smuzhiyun 	fw->offset = 0;
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 	while (fw->words && !err) {
468*4882a593Smuzhiyun 		u32 word = cmdbuf_base[fw->offset];
469*4882a593Smuzhiyun 		u32 opcode = (word & 0xf0000000) >> 28;
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 		fw->mask = 0;
472*4882a593Smuzhiyun 		fw->reg = 0;
473*4882a593Smuzhiyun 		fw->count = 0;
474*4882a593Smuzhiyun 		fw->words--;
475*4882a593Smuzhiyun 		fw->offset++;
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun 		switch (opcode) {
478*4882a593Smuzhiyun 		case 0:
479*4882a593Smuzhiyun 			fw->class = word >> 6 & 0x3ff;
480*4882a593Smuzhiyun 			fw->mask = word & 0x3f;
481*4882a593Smuzhiyun 			fw->reg = word >> 16 & 0xfff;
482*4882a593Smuzhiyun 			err = check_class(fw, job_class);
483*4882a593Smuzhiyun 			if (!err)
484*4882a593Smuzhiyun 				err = check_mask(fw);
485*4882a593Smuzhiyun 			if (err)
486*4882a593Smuzhiyun 				goto out;
487*4882a593Smuzhiyun 			break;
488*4882a593Smuzhiyun 		case 1:
489*4882a593Smuzhiyun 			fw->reg = word >> 16 & 0xfff;
490*4882a593Smuzhiyun 			fw->count = word & 0xffff;
491*4882a593Smuzhiyun 			err = check_incr(fw);
492*4882a593Smuzhiyun 			if (err)
493*4882a593Smuzhiyun 				goto out;
494*4882a593Smuzhiyun 			break;
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun 		case 2:
497*4882a593Smuzhiyun 			fw->reg = word >> 16 & 0xfff;
498*4882a593Smuzhiyun 			fw->count = word & 0xffff;
499*4882a593Smuzhiyun 			err = check_nonincr(fw);
500*4882a593Smuzhiyun 			if (err)
501*4882a593Smuzhiyun 				goto out;
502*4882a593Smuzhiyun 			break;
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun 		case 3:
505*4882a593Smuzhiyun 			fw->mask = word & 0xffff;
506*4882a593Smuzhiyun 			fw->reg = word >> 16 & 0xfff;
507*4882a593Smuzhiyun 			err = check_mask(fw);
508*4882a593Smuzhiyun 			if (err)
509*4882a593Smuzhiyun 				goto out;
510*4882a593Smuzhiyun 			break;
511*4882a593Smuzhiyun 		case 4:
512*4882a593Smuzhiyun 		case 14:
513*4882a593Smuzhiyun 			break;
514*4882a593Smuzhiyun 		default:
515*4882a593Smuzhiyun 			err = -EINVAL;
516*4882a593Smuzhiyun 			break;
517*4882a593Smuzhiyun 		}
518*4882a593Smuzhiyun 	}
519*4882a593Smuzhiyun 
520*4882a593Smuzhiyun out:
521*4882a593Smuzhiyun 	return err;
522*4882a593Smuzhiyun }
523*4882a593Smuzhiyun 
copy_gathers(struct device * host,struct host1x_job * job,struct device * dev)524*4882a593Smuzhiyun static inline int copy_gathers(struct device *host, struct host1x_job *job,
525*4882a593Smuzhiyun 			       struct device *dev)
526*4882a593Smuzhiyun {
527*4882a593Smuzhiyun 	struct host1x_firewall fw;
528*4882a593Smuzhiyun 	size_t size = 0;
529*4882a593Smuzhiyun 	size_t offset = 0;
530*4882a593Smuzhiyun 	unsigned int i;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	fw.job = job;
533*4882a593Smuzhiyun 	fw.dev = dev;
534*4882a593Smuzhiyun 	fw.reloc = job->relocs;
535*4882a593Smuzhiyun 	fw.num_relocs = job->num_relocs;
536*4882a593Smuzhiyun 	fw.class = job->class;
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	for (i = 0; i < job->num_gathers; i++) {
539*4882a593Smuzhiyun 		struct host1x_job_gather *g = &job->gathers[i];
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 		size += g->words * sizeof(u32);
542*4882a593Smuzhiyun 	}
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	/*
545*4882a593Smuzhiyun 	 * Try a non-blocking allocation from a higher priority pools first,
546*4882a593Smuzhiyun 	 * as awaiting for the allocation here is a major performance hit.
547*4882a593Smuzhiyun 	 */
548*4882a593Smuzhiyun 	job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
549*4882a593Smuzhiyun 					       GFP_NOWAIT);
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	/* the higher priority allocation failed, try the generic-blocking */
552*4882a593Smuzhiyun 	if (!job->gather_copy_mapped)
553*4882a593Smuzhiyun 		job->gather_copy_mapped = dma_alloc_wc(host, size,
554*4882a593Smuzhiyun 						       &job->gather_copy,
555*4882a593Smuzhiyun 						       GFP_KERNEL);
556*4882a593Smuzhiyun 	if (!job->gather_copy_mapped)
557*4882a593Smuzhiyun 		return -ENOMEM;
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	job->gather_copy_size = size;
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	for (i = 0; i < job->num_gathers; i++) {
562*4882a593Smuzhiyun 		struct host1x_job_gather *g = &job->gathers[i];
563*4882a593Smuzhiyun 		void *gather;
564*4882a593Smuzhiyun 
565*4882a593Smuzhiyun 		/* Copy the gather */
566*4882a593Smuzhiyun 		gather = host1x_bo_mmap(g->bo);
567*4882a593Smuzhiyun 		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
568*4882a593Smuzhiyun 		       g->words * sizeof(u32));
569*4882a593Smuzhiyun 		host1x_bo_munmap(g->bo, gather);
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun 		/* Store the location in the buffer */
572*4882a593Smuzhiyun 		g->base = job->gather_copy;
573*4882a593Smuzhiyun 		g->offset = offset;
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 		/* Validate the job */
576*4882a593Smuzhiyun 		if (validate(&fw, g))
577*4882a593Smuzhiyun 			return -EINVAL;
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun 		offset += g->words * sizeof(u32);
580*4882a593Smuzhiyun 	}
581*4882a593Smuzhiyun 
582*4882a593Smuzhiyun 	/* No relocs should remain at this point */
583*4882a593Smuzhiyun 	if (fw.num_relocs)
584*4882a593Smuzhiyun 		return -EINVAL;
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	return 0;
587*4882a593Smuzhiyun }
588*4882a593Smuzhiyun 
host1x_job_pin(struct host1x_job * job,struct device * dev)589*4882a593Smuzhiyun int host1x_job_pin(struct host1x_job *job, struct device *dev)
590*4882a593Smuzhiyun {
591*4882a593Smuzhiyun 	int err;
592*4882a593Smuzhiyun 	unsigned int i, j;
593*4882a593Smuzhiyun 	struct host1x *host = dev_get_drvdata(dev->parent);
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	/* pin memory */
596*4882a593Smuzhiyun 	err = pin_job(host, job);
597*4882a593Smuzhiyun 	if (err)
598*4882a593Smuzhiyun 		goto out;
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
601*4882a593Smuzhiyun 		err = copy_gathers(host->dev, job, dev);
602*4882a593Smuzhiyun 		if (err)
603*4882a593Smuzhiyun 			goto out;
604*4882a593Smuzhiyun 	}
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 	/* patch gathers */
607*4882a593Smuzhiyun 	for (i = 0; i < job->num_gathers; i++) {
608*4882a593Smuzhiyun 		struct host1x_job_gather *g = &job->gathers[i];
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 		/* process each gather mem only once */
611*4882a593Smuzhiyun 		if (g->handled)
612*4882a593Smuzhiyun 			continue;
613*4882a593Smuzhiyun 
614*4882a593Smuzhiyun 		/* copy_gathers() sets gathers base if firewall is enabled */
615*4882a593Smuzhiyun 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
616*4882a593Smuzhiyun 			g->base = job->gather_addr_phys[i];
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun 		for (j = i + 1; j < job->num_gathers; j++) {
619*4882a593Smuzhiyun 			if (job->gathers[j].bo == g->bo) {
620*4882a593Smuzhiyun 				job->gathers[j].handled = true;
621*4882a593Smuzhiyun 				job->gathers[j].base = g->base;
622*4882a593Smuzhiyun 			}
623*4882a593Smuzhiyun 		}
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 		err = do_relocs(job, g);
626*4882a593Smuzhiyun 		if (err)
627*4882a593Smuzhiyun 			break;
628*4882a593Smuzhiyun 	}
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun out:
631*4882a593Smuzhiyun 	if (err)
632*4882a593Smuzhiyun 		host1x_job_unpin(job);
633*4882a593Smuzhiyun 	wmb();
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun 	return err;
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_pin);
638*4882a593Smuzhiyun 
host1x_job_unpin(struct host1x_job * job)639*4882a593Smuzhiyun void host1x_job_unpin(struct host1x_job *job)
640*4882a593Smuzhiyun {
641*4882a593Smuzhiyun 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
642*4882a593Smuzhiyun 	unsigned int i;
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun 	for (i = 0; i < job->num_unpins; i++) {
645*4882a593Smuzhiyun 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
646*4882a593Smuzhiyun 		struct device *dev = unpin->dev ?: host->dev;
647*4882a593Smuzhiyun 		struct sg_table *sgt = unpin->sgt;
648*4882a593Smuzhiyun 
649*4882a593Smuzhiyun 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
650*4882a593Smuzhiyun 		    unpin->size && host->domain) {
651*4882a593Smuzhiyun 			iommu_unmap(host->domain, job->addr_phys[i],
652*4882a593Smuzhiyun 				    unpin->size);
653*4882a593Smuzhiyun 			free_iova(&host->iova,
654*4882a593Smuzhiyun 				iova_pfn(&host->iova, job->addr_phys[i]));
655*4882a593Smuzhiyun 		}
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun 		if (unpin->dev && sgt)
658*4882a593Smuzhiyun 			dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0);
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun 		host1x_bo_unpin(dev, unpin->bo, sgt);
661*4882a593Smuzhiyun 		host1x_bo_put(unpin->bo);
662*4882a593Smuzhiyun 	}
663*4882a593Smuzhiyun 
664*4882a593Smuzhiyun 	job->num_unpins = 0;
665*4882a593Smuzhiyun 
666*4882a593Smuzhiyun 	if (job->gather_copy_size)
667*4882a593Smuzhiyun 		dma_free_wc(host->dev, job->gather_copy_size,
668*4882a593Smuzhiyun 			    job->gather_copy_mapped, job->gather_copy);
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_unpin);
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun /*
673*4882a593Smuzhiyun  * Debug routine used to dump job entries
674*4882a593Smuzhiyun  */
host1x_job_dump(struct device * dev,struct host1x_job * job)675*4882a593Smuzhiyun void host1x_job_dump(struct device *dev, struct host1x_job *job)
676*4882a593Smuzhiyun {
677*4882a593Smuzhiyun 	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt_id);
678*4882a593Smuzhiyun 	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
679*4882a593Smuzhiyun 	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
680*4882a593Smuzhiyun 	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
681*4882a593Smuzhiyun 	dev_dbg(dev, "    NUM_SLOTS   %d\n", job->num_slots);
682*4882a593Smuzhiyun 	dev_dbg(dev, "    NUM_HANDLES %d\n", job->num_unpins);
683*4882a593Smuzhiyun }
684