1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Tegra host1x Job
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (c) 2010-2015, NVIDIA Corporation.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/dma-mapping.h>
9*4882a593Smuzhiyun #include <linux/err.h>
10*4882a593Smuzhiyun #include <linux/host1x.h>
11*4882a593Smuzhiyun #include <linux/iommu.h>
12*4882a593Smuzhiyun #include <linux/kref.h>
13*4882a593Smuzhiyun #include <linux/module.h>
14*4882a593Smuzhiyun #include <linux/scatterlist.h>
15*4882a593Smuzhiyun #include <linux/slab.h>
16*4882a593Smuzhiyun #include <linux/vmalloc.h>
17*4882a593Smuzhiyun #include <trace/events/host1x.h>
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun #include "channel.h"
20*4882a593Smuzhiyun #include "dev.h"
21*4882a593Smuzhiyun #include "job.h"
22*4882a593Smuzhiyun #include "syncpt.h"
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
25*4882a593Smuzhiyun
host1x_job_alloc(struct host1x_channel * ch,u32 num_cmdbufs,u32 num_relocs)26*4882a593Smuzhiyun struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
27*4882a593Smuzhiyun u32 num_cmdbufs, u32 num_relocs)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun struct host1x_job *job = NULL;
30*4882a593Smuzhiyun unsigned int num_unpins = num_relocs;
31*4882a593Smuzhiyun u64 total;
32*4882a593Smuzhiyun void *mem;
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
35*4882a593Smuzhiyun num_unpins += num_cmdbufs;
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun /* Check that we're not going to overflow */
38*4882a593Smuzhiyun total = sizeof(struct host1x_job) +
39*4882a593Smuzhiyun (u64)num_relocs * sizeof(struct host1x_reloc) +
40*4882a593Smuzhiyun (u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
41*4882a593Smuzhiyun (u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
42*4882a593Smuzhiyun (u64)num_unpins * sizeof(dma_addr_t) +
43*4882a593Smuzhiyun (u64)num_unpins * sizeof(u32 *);
44*4882a593Smuzhiyun if (total > ULONG_MAX)
45*4882a593Smuzhiyun return NULL;
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun mem = job = kzalloc(total, GFP_KERNEL);
48*4882a593Smuzhiyun if (!job)
49*4882a593Smuzhiyun return NULL;
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun kref_init(&job->ref);
52*4882a593Smuzhiyun job->channel = ch;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun /* Redistribute memory to the structs */
55*4882a593Smuzhiyun mem += sizeof(struct host1x_job);
56*4882a593Smuzhiyun job->relocs = num_relocs ? mem : NULL;
57*4882a593Smuzhiyun mem += num_relocs * sizeof(struct host1x_reloc);
58*4882a593Smuzhiyun job->unpins = num_unpins ? mem : NULL;
59*4882a593Smuzhiyun mem += num_unpins * sizeof(struct host1x_job_unpin_data);
60*4882a593Smuzhiyun job->gathers = num_cmdbufs ? mem : NULL;
61*4882a593Smuzhiyun mem += num_cmdbufs * sizeof(struct host1x_job_gather);
62*4882a593Smuzhiyun job->addr_phys = num_unpins ? mem : NULL;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun job->reloc_addr_phys = job->addr_phys;
65*4882a593Smuzhiyun job->gather_addr_phys = &job->addr_phys[num_relocs];
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun return job;
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_alloc);
70*4882a593Smuzhiyun
host1x_job_get(struct host1x_job * job)71*4882a593Smuzhiyun struct host1x_job *host1x_job_get(struct host1x_job *job)
72*4882a593Smuzhiyun {
73*4882a593Smuzhiyun kref_get(&job->ref);
74*4882a593Smuzhiyun return job;
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_get);
77*4882a593Smuzhiyun
job_free(struct kref * ref)78*4882a593Smuzhiyun static void job_free(struct kref *ref)
79*4882a593Smuzhiyun {
80*4882a593Smuzhiyun struct host1x_job *job = container_of(ref, struct host1x_job, ref);
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun kfree(job);
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun
host1x_job_put(struct host1x_job * job)85*4882a593Smuzhiyun void host1x_job_put(struct host1x_job *job)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun kref_put(&job->ref, job_free);
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_put);
90*4882a593Smuzhiyun
host1x_job_add_gather(struct host1x_job * job,struct host1x_bo * bo,unsigned int words,unsigned int offset)91*4882a593Smuzhiyun void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
92*4882a593Smuzhiyun unsigned int words, unsigned int offset)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun gather->words = words;
97*4882a593Smuzhiyun gather->bo = bo;
98*4882a593Smuzhiyun gather->offset = offset;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun job->num_gathers++;
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_add_gather);
103*4882a593Smuzhiyun
pin_job(struct host1x * host,struct host1x_job * job)104*4882a593Smuzhiyun static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun struct host1x_client *client = job->client;
107*4882a593Smuzhiyun struct device *dev = client->dev;
108*4882a593Smuzhiyun struct host1x_job_gather *g;
109*4882a593Smuzhiyun struct iommu_domain *domain;
110*4882a593Smuzhiyun unsigned int i;
111*4882a593Smuzhiyun int err;
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun domain = iommu_get_domain_for_dev(dev);
114*4882a593Smuzhiyun job->num_unpins = 0;
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun for (i = 0; i < job->num_relocs; i++) {
117*4882a593Smuzhiyun struct host1x_reloc *reloc = &job->relocs[i];
118*4882a593Smuzhiyun dma_addr_t phys_addr, *phys;
119*4882a593Smuzhiyun struct sg_table *sgt;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun reloc->target.bo = host1x_bo_get(reloc->target.bo);
122*4882a593Smuzhiyun if (!reloc->target.bo) {
123*4882a593Smuzhiyun err = -EINVAL;
124*4882a593Smuzhiyun goto unpin;
125*4882a593Smuzhiyun }
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun /*
128*4882a593Smuzhiyun * If the client device is not attached to an IOMMU, the
129*4882a593Smuzhiyun * physical address of the buffer object can be used.
130*4882a593Smuzhiyun *
131*4882a593Smuzhiyun * Similarly, when an IOMMU domain is shared between all
132*4882a593Smuzhiyun * host1x clients, the IOVA is already available, so no
133*4882a593Smuzhiyun * need to map the buffer object again.
134*4882a593Smuzhiyun *
135*4882a593Smuzhiyun * XXX Note that this isn't always safe to do because it
136*4882a593Smuzhiyun * relies on an assumption that no cache maintenance is
137*4882a593Smuzhiyun * needed on the buffer objects.
138*4882a593Smuzhiyun */
139*4882a593Smuzhiyun if (!domain || client->group)
140*4882a593Smuzhiyun phys = &phys_addr;
141*4882a593Smuzhiyun else
142*4882a593Smuzhiyun phys = NULL;
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
145*4882a593Smuzhiyun if (IS_ERR(sgt)) {
146*4882a593Smuzhiyun err = PTR_ERR(sgt);
147*4882a593Smuzhiyun goto unpin;
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun if (sgt) {
151*4882a593Smuzhiyun unsigned long mask = HOST1X_RELOC_READ |
152*4882a593Smuzhiyun HOST1X_RELOC_WRITE;
153*4882a593Smuzhiyun enum dma_data_direction dir;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun switch (reloc->flags & mask) {
156*4882a593Smuzhiyun case HOST1X_RELOC_READ:
157*4882a593Smuzhiyun dir = DMA_TO_DEVICE;
158*4882a593Smuzhiyun break;
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun case HOST1X_RELOC_WRITE:
161*4882a593Smuzhiyun dir = DMA_FROM_DEVICE;
162*4882a593Smuzhiyun break;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
165*4882a593Smuzhiyun dir = DMA_BIDIRECTIONAL;
166*4882a593Smuzhiyun break;
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun default:
169*4882a593Smuzhiyun err = -EINVAL;
170*4882a593Smuzhiyun goto unpin;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun err = dma_map_sgtable(dev, sgt, dir, 0);
174*4882a593Smuzhiyun if (err)
175*4882a593Smuzhiyun goto unpin;
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun job->unpins[job->num_unpins].dev = dev;
178*4882a593Smuzhiyun job->unpins[job->num_unpins].dir = dir;
179*4882a593Smuzhiyun phys_addr = sg_dma_address(sgt->sgl);
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun job->addr_phys[job->num_unpins] = phys_addr;
183*4882a593Smuzhiyun job->unpins[job->num_unpins].bo = reloc->target.bo;
184*4882a593Smuzhiyun job->unpins[job->num_unpins].sgt = sgt;
185*4882a593Smuzhiyun job->num_unpins++;
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun /*
189*4882a593Smuzhiyun * We will copy gathers BO content later, so there is no need to
190*4882a593Smuzhiyun * hold and pin them.
191*4882a593Smuzhiyun */
192*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
193*4882a593Smuzhiyun return 0;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun for (i = 0; i < job->num_gathers; i++) {
196*4882a593Smuzhiyun size_t gather_size = 0;
197*4882a593Smuzhiyun struct scatterlist *sg;
198*4882a593Smuzhiyun struct sg_table *sgt;
199*4882a593Smuzhiyun dma_addr_t phys_addr;
200*4882a593Smuzhiyun unsigned long shift;
201*4882a593Smuzhiyun struct iova *alloc;
202*4882a593Smuzhiyun dma_addr_t *phys;
203*4882a593Smuzhiyun unsigned int j;
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun g = &job->gathers[i];
206*4882a593Smuzhiyun g->bo = host1x_bo_get(g->bo);
207*4882a593Smuzhiyun if (!g->bo) {
208*4882a593Smuzhiyun err = -EINVAL;
209*4882a593Smuzhiyun goto unpin;
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun /**
213*4882a593Smuzhiyun * If the host1x is not attached to an IOMMU, there is no need
214*4882a593Smuzhiyun * to map the buffer object for the host1x, since the physical
215*4882a593Smuzhiyun * address can simply be used.
216*4882a593Smuzhiyun */
217*4882a593Smuzhiyun if (!iommu_get_domain_for_dev(host->dev))
218*4882a593Smuzhiyun phys = &phys_addr;
219*4882a593Smuzhiyun else
220*4882a593Smuzhiyun phys = NULL;
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun sgt = host1x_bo_pin(host->dev, g->bo, phys);
223*4882a593Smuzhiyun if (IS_ERR(sgt)) {
224*4882a593Smuzhiyun err = PTR_ERR(sgt);
225*4882a593Smuzhiyun goto put;
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun if (host->domain) {
229*4882a593Smuzhiyun for_each_sgtable_sg(sgt, sg, j)
230*4882a593Smuzhiyun gather_size += sg->length;
231*4882a593Smuzhiyun gather_size = iova_align(&host->iova, gather_size);
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun shift = iova_shift(&host->iova);
234*4882a593Smuzhiyun alloc = alloc_iova(&host->iova, gather_size >> shift,
235*4882a593Smuzhiyun host->iova_end >> shift, true);
236*4882a593Smuzhiyun if (!alloc) {
237*4882a593Smuzhiyun err = -ENOMEM;
238*4882a593Smuzhiyun goto put;
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun err = iommu_map_sgtable(host->domain,
242*4882a593Smuzhiyun iova_dma_addr(&host->iova, alloc),
243*4882a593Smuzhiyun sgt, IOMMU_READ);
244*4882a593Smuzhiyun if (err == 0) {
245*4882a593Smuzhiyun __free_iova(&host->iova, alloc);
246*4882a593Smuzhiyun err = -EINVAL;
247*4882a593Smuzhiyun goto put;
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun job->unpins[job->num_unpins].size = gather_size;
251*4882a593Smuzhiyun phys_addr = iova_dma_addr(&host->iova, alloc);
252*4882a593Smuzhiyun } else if (sgt) {
253*4882a593Smuzhiyun err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0);
254*4882a593Smuzhiyun if (err)
255*4882a593Smuzhiyun goto put;
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
258*4882a593Smuzhiyun job->unpins[job->num_unpins].dev = host->dev;
259*4882a593Smuzhiyun phys_addr = sg_dma_address(sgt->sgl);
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun job->addr_phys[job->num_unpins] = phys_addr;
263*4882a593Smuzhiyun job->gather_addr_phys[i] = phys_addr;
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun job->unpins[job->num_unpins].bo = g->bo;
266*4882a593Smuzhiyun job->unpins[job->num_unpins].sgt = sgt;
267*4882a593Smuzhiyun job->num_unpins++;
268*4882a593Smuzhiyun }
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun return 0;
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun put:
273*4882a593Smuzhiyun host1x_bo_put(g->bo);
274*4882a593Smuzhiyun unpin:
275*4882a593Smuzhiyun host1x_job_unpin(job);
276*4882a593Smuzhiyun return err;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun
do_relocs(struct host1x_job * job,struct host1x_job_gather * g)279*4882a593Smuzhiyun static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
280*4882a593Smuzhiyun {
281*4882a593Smuzhiyun void *cmdbuf_addr = NULL;
282*4882a593Smuzhiyun struct host1x_bo *cmdbuf = g->bo;
283*4882a593Smuzhiyun unsigned int i;
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /* pin & patch the relocs for one gather */
286*4882a593Smuzhiyun for (i = 0; i < job->num_relocs; i++) {
287*4882a593Smuzhiyun struct host1x_reloc *reloc = &job->relocs[i];
288*4882a593Smuzhiyun u32 reloc_addr = (job->reloc_addr_phys[i] +
289*4882a593Smuzhiyun reloc->target.offset) >> reloc->shift;
290*4882a593Smuzhiyun u32 *target;
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun /* skip all other gathers */
293*4882a593Smuzhiyun if (cmdbuf != reloc->cmdbuf.bo)
294*4882a593Smuzhiyun continue;
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
297*4882a593Smuzhiyun target = (u32 *)job->gather_copy_mapped +
298*4882a593Smuzhiyun reloc->cmdbuf.offset / sizeof(u32) +
299*4882a593Smuzhiyun g->offset / sizeof(u32);
300*4882a593Smuzhiyun goto patch_reloc;
301*4882a593Smuzhiyun }
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun if (!cmdbuf_addr) {
304*4882a593Smuzhiyun cmdbuf_addr = host1x_bo_mmap(cmdbuf);
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun if (unlikely(!cmdbuf_addr)) {
307*4882a593Smuzhiyun pr_err("Could not map cmdbuf for relocation\n");
308*4882a593Smuzhiyun return -ENOMEM;
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun }
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun target = cmdbuf_addr + reloc->cmdbuf.offset;
313*4882a593Smuzhiyun patch_reloc:
314*4882a593Smuzhiyun *target = reloc_addr;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun if (cmdbuf_addr)
318*4882a593Smuzhiyun host1x_bo_munmap(cmdbuf, cmdbuf_addr);
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun return 0;
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun
check_reloc(struct host1x_reloc * reloc,struct host1x_bo * cmdbuf,unsigned int offset)323*4882a593Smuzhiyun static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
324*4882a593Smuzhiyun unsigned int offset)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun offset *= sizeof(u32);
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
329*4882a593Smuzhiyun return false;
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun /* relocation shift value validation isn't implemented yet */
332*4882a593Smuzhiyun if (reloc->shift)
333*4882a593Smuzhiyun return false;
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun return true;
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun struct host1x_firewall {
339*4882a593Smuzhiyun struct host1x_job *job;
340*4882a593Smuzhiyun struct device *dev;
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun unsigned int num_relocs;
343*4882a593Smuzhiyun struct host1x_reloc *reloc;
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun struct host1x_bo *cmdbuf;
346*4882a593Smuzhiyun unsigned int offset;
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun u32 words;
349*4882a593Smuzhiyun u32 class;
350*4882a593Smuzhiyun u32 reg;
351*4882a593Smuzhiyun u32 mask;
352*4882a593Smuzhiyun u32 count;
353*4882a593Smuzhiyun };
354*4882a593Smuzhiyun
check_register(struct host1x_firewall * fw,unsigned long offset)355*4882a593Smuzhiyun static int check_register(struct host1x_firewall *fw, unsigned long offset)
356*4882a593Smuzhiyun {
357*4882a593Smuzhiyun if (!fw->job->is_addr_reg)
358*4882a593Smuzhiyun return 0;
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
361*4882a593Smuzhiyun if (!fw->num_relocs)
362*4882a593Smuzhiyun return -EINVAL;
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
365*4882a593Smuzhiyun return -EINVAL;
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun fw->num_relocs--;
368*4882a593Smuzhiyun fw->reloc++;
369*4882a593Smuzhiyun }
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun return 0;
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun
check_class(struct host1x_firewall * fw,u32 class)374*4882a593Smuzhiyun static int check_class(struct host1x_firewall *fw, u32 class)
375*4882a593Smuzhiyun {
376*4882a593Smuzhiyun if (!fw->job->is_valid_class) {
377*4882a593Smuzhiyun if (fw->class != class)
378*4882a593Smuzhiyun return -EINVAL;
379*4882a593Smuzhiyun } else {
380*4882a593Smuzhiyun if (!fw->job->is_valid_class(fw->class))
381*4882a593Smuzhiyun return -EINVAL;
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun return 0;
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun
check_mask(struct host1x_firewall * fw)387*4882a593Smuzhiyun static int check_mask(struct host1x_firewall *fw)
388*4882a593Smuzhiyun {
389*4882a593Smuzhiyun u32 mask = fw->mask;
390*4882a593Smuzhiyun u32 reg = fw->reg;
391*4882a593Smuzhiyun int ret;
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun while (mask) {
394*4882a593Smuzhiyun if (fw->words == 0)
395*4882a593Smuzhiyun return -EINVAL;
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun if (mask & 1) {
398*4882a593Smuzhiyun ret = check_register(fw, reg);
399*4882a593Smuzhiyun if (ret < 0)
400*4882a593Smuzhiyun return ret;
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun fw->words--;
403*4882a593Smuzhiyun fw->offset++;
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun mask >>= 1;
406*4882a593Smuzhiyun reg++;
407*4882a593Smuzhiyun }
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun return 0;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
check_incr(struct host1x_firewall * fw)412*4882a593Smuzhiyun static int check_incr(struct host1x_firewall *fw)
413*4882a593Smuzhiyun {
414*4882a593Smuzhiyun u32 count = fw->count;
415*4882a593Smuzhiyun u32 reg = fw->reg;
416*4882a593Smuzhiyun int ret;
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun while (count) {
419*4882a593Smuzhiyun if (fw->words == 0)
420*4882a593Smuzhiyun return -EINVAL;
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun ret = check_register(fw, reg);
423*4882a593Smuzhiyun if (ret < 0)
424*4882a593Smuzhiyun return ret;
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun reg++;
427*4882a593Smuzhiyun fw->words--;
428*4882a593Smuzhiyun fw->offset++;
429*4882a593Smuzhiyun count--;
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun return 0;
433*4882a593Smuzhiyun }
434*4882a593Smuzhiyun
check_nonincr(struct host1x_firewall * fw)435*4882a593Smuzhiyun static int check_nonincr(struct host1x_firewall *fw)
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun u32 count = fw->count;
438*4882a593Smuzhiyun int ret;
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun while (count) {
441*4882a593Smuzhiyun if (fw->words == 0)
442*4882a593Smuzhiyun return -EINVAL;
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun ret = check_register(fw, fw->reg);
445*4882a593Smuzhiyun if (ret < 0)
446*4882a593Smuzhiyun return ret;
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun fw->words--;
449*4882a593Smuzhiyun fw->offset++;
450*4882a593Smuzhiyun count--;
451*4882a593Smuzhiyun }
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun return 0;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun
validate(struct host1x_firewall * fw,struct host1x_job_gather * g)456*4882a593Smuzhiyun static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
457*4882a593Smuzhiyun {
458*4882a593Smuzhiyun u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
459*4882a593Smuzhiyun (g->offset / sizeof(u32));
460*4882a593Smuzhiyun u32 job_class = fw->class;
461*4882a593Smuzhiyun int err = 0;
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun fw->words = g->words;
464*4882a593Smuzhiyun fw->cmdbuf = g->bo;
465*4882a593Smuzhiyun fw->offset = 0;
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun while (fw->words && !err) {
468*4882a593Smuzhiyun u32 word = cmdbuf_base[fw->offset];
469*4882a593Smuzhiyun u32 opcode = (word & 0xf0000000) >> 28;
470*4882a593Smuzhiyun
471*4882a593Smuzhiyun fw->mask = 0;
472*4882a593Smuzhiyun fw->reg = 0;
473*4882a593Smuzhiyun fw->count = 0;
474*4882a593Smuzhiyun fw->words--;
475*4882a593Smuzhiyun fw->offset++;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun switch (opcode) {
478*4882a593Smuzhiyun case 0:
479*4882a593Smuzhiyun fw->class = word >> 6 & 0x3ff;
480*4882a593Smuzhiyun fw->mask = word & 0x3f;
481*4882a593Smuzhiyun fw->reg = word >> 16 & 0xfff;
482*4882a593Smuzhiyun err = check_class(fw, job_class);
483*4882a593Smuzhiyun if (!err)
484*4882a593Smuzhiyun err = check_mask(fw);
485*4882a593Smuzhiyun if (err)
486*4882a593Smuzhiyun goto out;
487*4882a593Smuzhiyun break;
488*4882a593Smuzhiyun case 1:
489*4882a593Smuzhiyun fw->reg = word >> 16 & 0xfff;
490*4882a593Smuzhiyun fw->count = word & 0xffff;
491*4882a593Smuzhiyun err = check_incr(fw);
492*4882a593Smuzhiyun if (err)
493*4882a593Smuzhiyun goto out;
494*4882a593Smuzhiyun break;
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun case 2:
497*4882a593Smuzhiyun fw->reg = word >> 16 & 0xfff;
498*4882a593Smuzhiyun fw->count = word & 0xffff;
499*4882a593Smuzhiyun err = check_nonincr(fw);
500*4882a593Smuzhiyun if (err)
501*4882a593Smuzhiyun goto out;
502*4882a593Smuzhiyun break;
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun case 3:
505*4882a593Smuzhiyun fw->mask = word & 0xffff;
506*4882a593Smuzhiyun fw->reg = word >> 16 & 0xfff;
507*4882a593Smuzhiyun err = check_mask(fw);
508*4882a593Smuzhiyun if (err)
509*4882a593Smuzhiyun goto out;
510*4882a593Smuzhiyun break;
511*4882a593Smuzhiyun case 4:
512*4882a593Smuzhiyun case 14:
513*4882a593Smuzhiyun break;
514*4882a593Smuzhiyun default:
515*4882a593Smuzhiyun err = -EINVAL;
516*4882a593Smuzhiyun break;
517*4882a593Smuzhiyun }
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun out:
521*4882a593Smuzhiyun return err;
522*4882a593Smuzhiyun }
523*4882a593Smuzhiyun
copy_gathers(struct device * host,struct host1x_job * job,struct device * dev)524*4882a593Smuzhiyun static inline int copy_gathers(struct device *host, struct host1x_job *job,
525*4882a593Smuzhiyun struct device *dev)
526*4882a593Smuzhiyun {
527*4882a593Smuzhiyun struct host1x_firewall fw;
528*4882a593Smuzhiyun size_t size = 0;
529*4882a593Smuzhiyun size_t offset = 0;
530*4882a593Smuzhiyun unsigned int i;
531*4882a593Smuzhiyun
532*4882a593Smuzhiyun fw.job = job;
533*4882a593Smuzhiyun fw.dev = dev;
534*4882a593Smuzhiyun fw.reloc = job->relocs;
535*4882a593Smuzhiyun fw.num_relocs = job->num_relocs;
536*4882a593Smuzhiyun fw.class = job->class;
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun for (i = 0; i < job->num_gathers; i++) {
539*4882a593Smuzhiyun struct host1x_job_gather *g = &job->gathers[i];
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun size += g->words * sizeof(u32);
542*4882a593Smuzhiyun }
543*4882a593Smuzhiyun
544*4882a593Smuzhiyun /*
545*4882a593Smuzhiyun * Try a non-blocking allocation from a higher priority pools first,
546*4882a593Smuzhiyun * as awaiting for the allocation here is a major performance hit.
547*4882a593Smuzhiyun */
548*4882a593Smuzhiyun job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
549*4882a593Smuzhiyun GFP_NOWAIT);
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun /* the higher priority allocation failed, try the generic-blocking */
552*4882a593Smuzhiyun if (!job->gather_copy_mapped)
553*4882a593Smuzhiyun job->gather_copy_mapped = dma_alloc_wc(host, size,
554*4882a593Smuzhiyun &job->gather_copy,
555*4882a593Smuzhiyun GFP_KERNEL);
556*4882a593Smuzhiyun if (!job->gather_copy_mapped)
557*4882a593Smuzhiyun return -ENOMEM;
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun job->gather_copy_size = size;
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun for (i = 0; i < job->num_gathers; i++) {
562*4882a593Smuzhiyun struct host1x_job_gather *g = &job->gathers[i];
563*4882a593Smuzhiyun void *gather;
564*4882a593Smuzhiyun
565*4882a593Smuzhiyun /* Copy the gather */
566*4882a593Smuzhiyun gather = host1x_bo_mmap(g->bo);
567*4882a593Smuzhiyun memcpy(job->gather_copy_mapped + offset, gather + g->offset,
568*4882a593Smuzhiyun g->words * sizeof(u32));
569*4882a593Smuzhiyun host1x_bo_munmap(g->bo, gather);
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun /* Store the location in the buffer */
572*4882a593Smuzhiyun g->base = job->gather_copy;
573*4882a593Smuzhiyun g->offset = offset;
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun /* Validate the job */
576*4882a593Smuzhiyun if (validate(&fw, g))
577*4882a593Smuzhiyun return -EINVAL;
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun offset += g->words * sizeof(u32);
580*4882a593Smuzhiyun }
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun /* No relocs should remain at this point */
583*4882a593Smuzhiyun if (fw.num_relocs)
584*4882a593Smuzhiyun return -EINVAL;
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun return 0;
587*4882a593Smuzhiyun }
588*4882a593Smuzhiyun
host1x_job_pin(struct host1x_job * job,struct device * dev)589*4882a593Smuzhiyun int host1x_job_pin(struct host1x_job *job, struct device *dev)
590*4882a593Smuzhiyun {
591*4882a593Smuzhiyun int err;
592*4882a593Smuzhiyun unsigned int i, j;
593*4882a593Smuzhiyun struct host1x *host = dev_get_drvdata(dev->parent);
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun /* pin memory */
596*4882a593Smuzhiyun err = pin_job(host, job);
597*4882a593Smuzhiyun if (err)
598*4882a593Smuzhiyun goto out;
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
601*4882a593Smuzhiyun err = copy_gathers(host->dev, job, dev);
602*4882a593Smuzhiyun if (err)
603*4882a593Smuzhiyun goto out;
604*4882a593Smuzhiyun }
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun /* patch gathers */
607*4882a593Smuzhiyun for (i = 0; i < job->num_gathers; i++) {
608*4882a593Smuzhiyun struct host1x_job_gather *g = &job->gathers[i];
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun /* process each gather mem only once */
611*4882a593Smuzhiyun if (g->handled)
612*4882a593Smuzhiyun continue;
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun /* copy_gathers() sets gathers base if firewall is enabled */
615*4882a593Smuzhiyun if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
616*4882a593Smuzhiyun g->base = job->gather_addr_phys[i];
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun for (j = i + 1; j < job->num_gathers; j++) {
619*4882a593Smuzhiyun if (job->gathers[j].bo == g->bo) {
620*4882a593Smuzhiyun job->gathers[j].handled = true;
621*4882a593Smuzhiyun job->gathers[j].base = g->base;
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun }
624*4882a593Smuzhiyun
625*4882a593Smuzhiyun err = do_relocs(job, g);
626*4882a593Smuzhiyun if (err)
627*4882a593Smuzhiyun break;
628*4882a593Smuzhiyun }
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun out:
631*4882a593Smuzhiyun if (err)
632*4882a593Smuzhiyun host1x_job_unpin(job);
633*4882a593Smuzhiyun wmb();
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun return err;
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_pin);
638*4882a593Smuzhiyun
host1x_job_unpin(struct host1x_job * job)639*4882a593Smuzhiyun void host1x_job_unpin(struct host1x_job *job)
640*4882a593Smuzhiyun {
641*4882a593Smuzhiyun struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
642*4882a593Smuzhiyun unsigned int i;
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun for (i = 0; i < job->num_unpins; i++) {
645*4882a593Smuzhiyun struct host1x_job_unpin_data *unpin = &job->unpins[i];
646*4882a593Smuzhiyun struct device *dev = unpin->dev ?: host->dev;
647*4882a593Smuzhiyun struct sg_table *sgt = unpin->sgt;
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
650*4882a593Smuzhiyun unpin->size && host->domain) {
651*4882a593Smuzhiyun iommu_unmap(host->domain, job->addr_phys[i],
652*4882a593Smuzhiyun unpin->size);
653*4882a593Smuzhiyun free_iova(&host->iova,
654*4882a593Smuzhiyun iova_pfn(&host->iova, job->addr_phys[i]));
655*4882a593Smuzhiyun }
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun if (unpin->dev && sgt)
658*4882a593Smuzhiyun dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0);
659*4882a593Smuzhiyun
660*4882a593Smuzhiyun host1x_bo_unpin(dev, unpin->bo, sgt);
661*4882a593Smuzhiyun host1x_bo_put(unpin->bo);
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun job->num_unpins = 0;
665*4882a593Smuzhiyun
666*4882a593Smuzhiyun if (job->gather_copy_size)
667*4882a593Smuzhiyun dma_free_wc(host->dev, job->gather_copy_size,
668*4882a593Smuzhiyun job->gather_copy_mapped, job->gather_copy);
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun EXPORT_SYMBOL(host1x_job_unpin);
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun /*
673*4882a593Smuzhiyun * Debug routine used to dump job entries
674*4882a593Smuzhiyun */
host1x_job_dump(struct device * dev,struct host1x_job * job)675*4882a593Smuzhiyun void host1x_job_dump(struct device *dev, struct host1x_job *job)
676*4882a593Smuzhiyun {
677*4882a593Smuzhiyun dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id);
678*4882a593Smuzhiyun dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end);
679*4882a593Smuzhiyun dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get);
680*4882a593Smuzhiyun dev_dbg(dev, " TIMEOUT %d\n", job->timeout);
681*4882a593Smuzhiyun dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots);
682*4882a593Smuzhiyun dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins);
683*4882a593Smuzhiyun }
684