1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) Rockchip Electronics Co.Ltd
4*4882a593Smuzhiyun * Author: Felix Zeng <felix.zeng@rock-chips.com>
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <linux/slab.h>
8*4882a593Smuzhiyun #include <linux/delay.h>
9*4882a593Smuzhiyun #include <linux/sync_file.h>
10*4882a593Smuzhiyun #include <linux/io.h>
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include "rknpu_ioctl.h"
13*4882a593Smuzhiyun #include "rknpu_drv.h"
14*4882a593Smuzhiyun #include "rknpu_reset.h"
15*4882a593Smuzhiyun #include "rknpu_gem.h"
16*4882a593Smuzhiyun #include "rknpu_fence.h"
17*4882a593Smuzhiyun #include "rknpu_job.h"
18*4882a593Smuzhiyun #include "rknpu_mem.h"
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun #define _REG_READ(base, offset) readl(base + (offset))
21*4882a593Smuzhiyun #define _REG_WRITE(base, value, offset) writel(value, base + (offset))
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun #define REG_READ(offset) _REG_READ(rknpu_core_base, offset)
24*4882a593Smuzhiyun #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset)
25*4882a593Smuzhiyun
rknpu_core_index(int core_mask)26*4882a593Smuzhiyun static int rknpu_core_index(int core_mask)
27*4882a593Smuzhiyun {
28*4882a593Smuzhiyun int index = 0;
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun if (core_mask & RKNPU_CORE0_MASK)
31*4882a593Smuzhiyun index = 0;
32*4882a593Smuzhiyun else if (core_mask & RKNPU_CORE1_MASK)
33*4882a593Smuzhiyun index = 1;
34*4882a593Smuzhiyun else if (core_mask & RKNPU_CORE2_MASK)
35*4882a593Smuzhiyun index = 2;
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun return index;
38*4882a593Smuzhiyun }
39*4882a593Smuzhiyun
rknpu_core_mask(int core_index)40*4882a593Smuzhiyun static int rknpu_core_mask(int core_index)
41*4882a593Smuzhiyun {
42*4882a593Smuzhiyun int core_mask = RKNPU_CORE_AUTO_MASK;
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun switch (core_index) {
45*4882a593Smuzhiyun case 0:
46*4882a593Smuzhiyun core_mask = RKNPU_CORE0_MASK;
47*4882a593Smuzhiyun break;
48*4882a593Smuzhiyun case 1:
49*4882a593Smuzhiyun core_mask = RKNPU_CORE1_MASK;
50*4882a593Smuzhiyun break;
51*4882a593Smuzhiyun case 2:
52*4882a593Smuzhiyun core_mask = RKNPU_CORE2_MASK;
53*4882a593Smuzhiyun break;
54*4882a593Smuzhiyun default:
55*4882a593Smuzhiyun break;
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun return core_mask;
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun
rknn_get_task_number(struct rknpu_job * job,int core_index)61*4882a593Smuzhiyun static int rknn_get_task_number(struct rknpu_job *job, int core_index)
62*4882a593Smuzhiyun {
63*4882a593Smuzhiyun int task_num = job->args->task_number;
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun if (job->use_core_num == 2)
66*4882a593Smuzhiyun task_num = job->args->subcore_task[core_index].task_number;
67*4882a593Smuzhiyun else if (job->use_core_num == 3)
68*4882a593Smuzhiyun task_num = job->args->subcore_task[core_index + 2].task_number;
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun return task_num;
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
rknpu_job_free(struct rknpu_job * job)73*4882a593Smuzhiyun static void rknpu_job_free(struct rknpu_job *job)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
76*4882a593Smuzhiyun struct rknpu_gem_object *task_obj = NULL;
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun task_obj =
79*4882a593Smuzhiyun (struct rknpu_gem_object *)(uintptr_t)job->args->task_obj_addr;
80*4882a593Smuzhiyun if (task_obj)
81*4882a593Smuzhiyun rknpu_gem_object_put(&task_obj->base);
82*4882a593Smuzhiyun #endif
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun if (job->fence)
85*4882a593Smuzhiyun dma_fence_put(job->fence);
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun if (job->args_owner)
88*4882a593Smuzhiyun kfree(job->args);
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun kfree(job);
91*4882a593Smuzhiyun }
92*4882a593Smuzhiyun
rknpu_job_cleanup(struct rknpu_job * job)93*4882a593Smuzhiyun static int rknpu_job_cleanup(struct rknpu_job *job)
94*4882a593Smuzhiyun {
95*4882a593Smuzhiyun rknpu_job_free(job);
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun return 0;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
rknpu_job_cleanup_work(struct work_struct * work)100*4882a593Smuzhiyun static void rknpu_job_cleanup_work(struct work_struct *work)
101*4882a593Smuzhiyun {
102*4882a593Smuzhiyun struct rknpu_job *job =
103*4882a593Smuzhiyun container_of(work, struct rknpu_job, cleanup_work);
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun rknpu_job_cleanup(job);
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun
rknpu_job_alloc(struct rknpu_device * rknpu_dev,struct rknpu_submit * args)108*4882a593Smuzhiyun static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev,
109*4882a593Smuzhiyun struct rknpu_submit *args)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun struct rknpu_job *job = NULL;
112*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
113*4882a593Smuzhiyun struct rknpu_gem_object *task_obj = NULL;
114*4882a593Smuzhiyun #endif
115*4882a593Smuzhiyun if (rknpu_dev->config->num_irqs == 1)
116*4882a593Smuzhiyun args->core_mask = RKNPU_CORE0_MASK;
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun job = kzalloc(sizeof(*job), GFP_KERNEL);
119*4882a593Smuzhiyun if (!job)
120*4882a593Smuzhiyun return NULL;
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun job->timestamp = ktime_get();
123*4882a593Smuzhiyun job->rknpu_dev = rknpu_dev;
124*4882a593Smuzhiyun job->use_core_num = (args->core_mask & RKNPU_CORE0_MASK) +
125*4882a593Smuzhiyun ((args->core_mask & RKNPU_CORE1_MASK) >> 1) +
126*4882a593Smuzhiyun ((args->core_mask & RKNPU_CORE2_MASK) >> 2);
127*4882a593Smuzhiyun atomic_set(&job->run_count, job->use_core_num);
128*4882a593Smuzhiyun atomic_set(&job->interrupt_count, job->use_core_num);
129*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
130*4882a593Smuzhiyun task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
131*4882a593Smuzhiyun if (task_obj)
132*4882a593Smuzhiyun rknpu_gem_object_get(&task_obj->base);
133*4882a593Smuzhiyun #endif
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun if (!(args->flags & RKNPU_JOB_NONBLOCK)) {
136*4882a593Smuzhiyun job->args = args;
137*4882a593Smuzhiyun job->args_owner = false;
138*4882a593Smuzhiyun return job;
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun job->args = kzalloc(sizeof(*args), GFP_KERNEL);
142*4882a593Smuzhiyun if (!job->args) {
143*4882a593Smuzhiyun kfree(job);
144*4882a593Smuzhiyun return NULL;
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun *job->args = *args;
147*4882a593Smuzhiyun job->args_owner = true;
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun INIT_WORK(&job->cleanup_work, rknpu_job_cleanup_work);
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun return job;
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun
rknpu_job_wait(struct rknpu_job * job)154*4882a593Smuzhiyun static inline int rknpu_job_wait(struct rknpu_job *job)
155*4882a593Smuzhiyun {
156*4882a593Smuzhiyun struct rknpu_device *rknpu_dev = job->rknpu_dev;
157*4882a593Smuzhiyun struct rknpu_submit *args = job->args;
158*4882a593Smuzhiyun struct rknpu_task *last_task = NULL;
159*4882a593Smuzhiyun struct rknpu_subcore_data *subcore_data = NULL;
160*4882a593Smuzhiyun void __iomem *rknpu_core_base = NULL;
161*4882a593Smuzhiyun int core_index = rknpu_core_index(job->args->core_mask);
162*4882a593Smuzhiyun unsigned long flags;
163*4882a593Smuzhiyun int wait_count = 0;
164*4882a593Smuzhiyun int ret = -EINVAL;
165*4882a593Smuzhiyun int i = 0;
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun subcore_data = &rknpu_dev->subcore_datas[core_index];
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun do {
170*4882a593Smuzhiyun ret = wait_event_timeout(subcore_data->job_done_wq,
171*4882a593Smuzhiyun job->flags & RKNPU_JOB_DONE ||
172*4882a593Smuzhiyun rknpu_dev->soft_reseting,
173*4882a593Smuzhiyun msecs_to_jiffies(args->timeout));
174*4882a593Smuzhiyun if (++wait_count >= 3)
175*4882a593Smuzhiyun break;
176*4882a593Smuzhiyun } while (ret == 0 && job->in_queue[core_index]);
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun if (job->in_queue[core_index]) {
179*4882a593Smuzhiyun spin_lock_irqsave(&rknpu_dev->lock, flags);
180*4882a593Smuzhiyun subcore_data->task_num -= rknn_get_task_number(job, core_index);
181*4882a593Smuzhiyun if (job->use_core_num == 1) {
182*4882a593Smuzhiyun list_del_init(&job->head[core_index]);
183*4882a593Smuzhiyun job->in_queue[core_index] = false;
184*4882a593Smuzhiyun } else if (job->use_core_num > 1) {
185*4882a593Smuzhiyun for (i = 0; i < job->use_core_num; i++) {
186*4882a593Smuzhiyun if (job->in_queue[i]) {
187*4882a593Smuzhiyun list_del_init(&job->head[i]);
188*4882a593Smuzhiyun job->in_queue[i] = false;
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->lock, flags);
193*4882a593Smuzhiyun return ret < 0 ? ret : -EINVAL;
194*4882a593Smuzhiyun }
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun last_task = job->last_task;
197*4882a593Smuzhiyun if (!last_task)
198*4882a593Smuzhiyun return ret < 0 ? ret : -EINVAL;
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun last_task->int_status = job->int_status[core_index];
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun if (ret <= 0) {
203*4882a593Smuzhiyun args->task_counter = 0;
204*4882a593Smuzhiyun rknpu_core_base = rknpu_dev->base[core_index];
205*4882a593Smuzhiyun if (args->flags & RKNPU_JOB_PC) {
206*4882a593Smuzhiyun uint32_t task_status = REG_READ(
207*4882a593Smuzhiyun rknpu_dev->config->pc_task_status_offset);
208*4882a593Smuzhiyun args->task_counter =
209*4882a593Smuzhiyun (task_status &
210*4882a593Smuzhiyun rknpu_dev->config->pc_task_number_mask);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun LOG_ERROR(
214*4882a593Smuzhiyun "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n",
215*4882a593Smuzhiyun args->task_counter, args->flags, ret,
216*4882a593Smuzhiyun ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun return ret < 0 ? ret : -ETIMEDOUT;
219*4882a593Smuzhiyun }
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun if (!(job->flags & RKNPU_JOB_DONE))
222*4882a593Smuzhiyun return -EINVAL;
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun args->task_counter = args->task_number;
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun return 0;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
rknpu_job_commit_pc(struct rknpu_job * job,int core_index)229*4882a593Smuzhiyun static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun struct rknpu_device *rknpu_dev = job->rknpu_dev;
232*4882a593Smuzhiyun struct rknpu_submit *args = job->args;
233*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
234*4882a593Smuzhiyun struct rknpu_gem_object *task_obj =
235*4882a593Smuzhiyun (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
236*4882a593Smuzhiyun #endif
237*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP
238*4882a593Smuzhiyun struct rknpu_mem_object *task_obj =
239*4882a593Smuzhiyun (struct rknpu_mem_object *)(uintptr_t)args->task_obj_addr;
240*4882a593Smuzhiyun #endif
241*4882a593Smuzhiyun struct rknpu_task *task_base = NULL;
242*4882a593Smuzhiyun struct rknpu_task *first_task = NULL;
243*4882a593Smuzhiyun struct rknpu_task *last_task = NULL;
244*4882a593Smuzhiyun void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
245*4882a593Smuzhiyun int task_start = args->task_start;
246*4882a593Smuzhiyun int task_end = args->task_start + args->task_number - 1;
247*4882a593Smuzhiyun int task_number = args->task_number;
248*4882a593Smuzhiyun int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0;
249*4882a593Smuzhiyun int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale;
250*4882a593Smuzhiyun int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits;
251*4882a593Smuzhiyun int i = 0;
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun if (!task_obj)
254*4882a593Smuzhiyun return -EINVAL;
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun if (rknpu_dev->config->num_irqs > 1) {
257*4882a593Smuzhiyun for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
258*4882a593Smuzhiyun if (i == core_index) {
259*4882a593Smuzhiyun REG_WRITE((0xe + 0x10000000 * i), 0x1004);
260*4882a593Smuzhiyun REG_WRITE((0xe + 0x10000000 * i), 0x3004);
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun }
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun if (job->use_core_num == 1) {
265*4882a593Smuzhiyun task_start = args->subcore_task[core_index].task_start;
266*4882a593Smuzhiyun task_end = args->subcore_task[core_index].task_start +
267*4882a593Smuzhiyun args->subcore_task[core_index].task_number -
268*4882a593Smuzhiyun 1;
269*4882a593Smuzhiyun task_number =
270*4882a593Smuzhiyun args->subcore_task[core_index].task_number;
271*4882a593Smuzhiyun } else if (job->use_core_num == 2) {
272*4882a593Smuzhiyun task_start = args->subcore_task[core_index].task_start;
273*4882a593Smuzhiyun task_end = args->subcore_task[core_index].task_start +
274*4882a593Smuzhiyun args->subcore_task[core_index].task_number -
275*4882a593Smuzhiyun 1;
276*4882a593Smuzhiyun task_number =
277*4882a593Smuzhiyun args->subcore_task[core_index].task_number;
278*4882a593Smuzhiyun } else if (job->use_core_num == 3) {
279*4882a593Smuzhiyun task_start =
280*4882a593Smuzhiyun args->subcore_task[core_index + 2].task_start;
281*4882a593Smuzhiyun task_end =
282*4882a593Smuzhiyun args->subcore_task[core_index + 2].task_start +
283*4882a593Smuzhiyun args->subcore_task[core_index + 2].task_number -
284*4882a593Smuzhiyun 1;
285*4882a593Smuzhiyun task_number =
286*4882a593Smuzhiyun args->subcore_task[core_index + 2].task_number;
287*4882a593Smuzhiyun }
288*4882a593Smuzhiyun }
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun task_base = task_obj->kv_addr;
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun first_task = &task_base[task_start];
293*4882a593Smuzhiyun last_task = &task_base[task_end];
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT +
298*4882a593Smuzhiyun pc_data_amount_scale - 1) /
299*4882a593Smuzhiyun pc_data_amount_scale -
300*4882a593Smuzhiyun 1,
301*4882a593Smuzhiyun RKNPU_OFFSET_PC_DATA_AMOUNT);
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun REG_WRITE(last_task->int_mask, RKNPU_OFFSET_INT_MASK);
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun REG_WRITE(first_task->int_mask, RKNPU_OFFSET_INT_CLEAR);
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun REG_WRITE(((0x6 | task_pp_en) << pc_task_number_bits) | task_number,
308*4882a593Smuzhiyun RKNPU_OFFSET_PC_TASK_CONTROL);
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun REG_WRITE(args->task_base_addr, RKNPU_OFFSET_PC_DMA_BASE_ADDR);
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun job->first_task = first_task;
313*4882a593Smuzhiyun job->last_task = last_task;
314*4882a593Smuzhiyun job->int_mask[core_index] = last_task->int_mask;
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun REG_WRITE(0x1, RKNPU_OFFSET_PC_OP_EN);
317*4882a593Smuzhiyun REG_WRITE(0x0, RKNPU_OFFSET_PC_OP_EN);
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun return 0;
320*4882a593Smuzhiyun }
321*4882a593Smuzhiyun
rknpu_job_commit(struct rknpu_job * job,int core_index)322*4882a593Smuzhiyun static int rknpu_job_commit(struct rknpu_job *job, int core_index)
323*4882a593Smuzhiyun {
324*4882a593Smuzhiyun struct rknpu_device *rknpu_dev = job->rknpu_dev;
325*4882a593Smuzhiyun struct rknpu_submit *args = job->args;
326*4882a593Smuzhiyun void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun // switch to slave mode
329*4882a593Smuzhiyun REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun if (!(args->flags & RKNPU_JOB_PC))
332*4882a593Smuzhiyun return -EINVAL;
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun return rknpu_job_commit_pc(job, core_index);
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun
rknpu_job_next(struct rknpu_device * rknpu_dev,int core_index)337*4882a593Smuzhiyun static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index)
338*4882a593Smuzhiyun {
339*4882a593Smuzhiyun struct rknpu_job *job = NULL;
340*4882a593Smuzhiyun struct rknpu_subcore_data *subcore_data = NULL;
341*4882a593Smuzhiyun unsigned long flags;
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun if (rknpu_dev->soft_reseting)
344*4882a593Smuzhiyun return;
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun subcore_data = &rknpu_dev->subcore_datas[core_index];
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
349*4882a593Smuzhiyun
350*4882a593Smuzhiyun if (subcore_data->job || list_empty(&subcore_data->todo_list)) {
351*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
352*4882a593Smuzhiyun return;
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun
355*4882a593Smuzhiyun job = list_first_entry(&subcore_data->todo_list, struct rknpu_job,
356*4882a593Smuzhiyun head[core_index]);
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun list_del_init(&job->head[core_index]);
359*4882a593Smuzhiyun job->in_queue[core_index] = false;
360*4882a593Smuzhiyun subcore_data->job = job;
361*4882a593Smuzhiyun job->hw_recoder_time = ktime_get();
362*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun if (atomic_dec_and_test(&job->run_count)) {
365*4882a593Smuzhiyun if (job->args->core_mask & RKNPU_CORE0_MASK)
366*4882a593Smuzhiyun job->ret = rknpu_job_commit(job, 0);
367*4882a593Smuzhiyun if (job->args->core_mask & RKNPU_CORE1_MASK)
368*4882a593Smuzhiyun job->ret = rknpu_job_commit(job, 1);
369*4882a593Smuzhiyun if (job->args->core_mask & RKNPU_CORE2_MASK)
370*4882a593Smuzhiyun job->ret = rknpu_job_commit(job, 2);
371*4882a593Smuzhiyun }
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun
rknpu_job_done(struct rknpu_job * job,int ret,int core_index)374*4882a593Smuzhiyun static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index)
375*4882a593Smuzhiyun {
376*4882a593Smuzhiyun struct rknpu_device *rknpu_dev = job->rknpu_dev;
377*4882a593Smuzhiyun struct rknpu_subcore_data *subcore_data = NULL;
378*4882a593Smuzhiyun unsigned long flags;
379*4882a593Smuzhiyun ktime_t now = ktime_get();
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun subcore_data = &rknpu_dev->subcore_datas[core_index];
382*4882a593Smuzhiyun
383*4882a593Smuzhiyun spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
384*4882a593Smuzhiyun subcore_data->job = NULL;
385*4882a593Smuzhiyun subcore_data->task_num -= rknn_get_task_number(job, core_index);
386*4882a593Smuzhiyun subcore_data->timer.busy_time +=
387*4882a593Smuzhiyun ktime_us_delta(now, job->hw_recoder_time);
388*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun if (atomic_dec_and_test(&job->interrupt_count)) {
391*4882a593Smuzhiyun int use_core_num = job->use_core_num;
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun job->flags |= RKNPU_JOB_DONE;
394*4882a593Smuzhiyun job->ret = ret;
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun if (job->fence)
397*4882a593Smuzhiyun dma_fence_signal(job->fence);
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun if (job->flags & RKNPU_JOB_ASYNC)
400*4882a593Smuzhiyun schedule_work(&job->cleanup_work);
401*4882a593Smuzhiyun
402*4882a593Smuzhiyun if (use_core_num > 1)
403*4882a593Smuzhiyun wake_up(&(&rknpu_dev->subcore_datas[0])->job_done_wq);
404*4882a593Smuzhiyun else
405*4882a593Smuzhiyun wake_up(&subcore_data->job_done_wq);
406*4882a593Smuzhiyun }
407*4882a593Smuzhiyun
408*4882a593Smuzhiyun rknpu_job_next(rknpu_dev, core_index);
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
rknpu_job_schedule(struct rknpu_job * job)411*4882a593Smuzhiyun static void rknpu_job_schedule(struct rknpu_job *job)
412*4882a593Smuzhiyun {
413*4882a593Smuzhiyun struct rknpu_device *rknpu_dev = job->rknpu_dev;
414*4882a593Smuzhiyun struct rknpu_subcore_data *subcore_data = NULL;
415*4882a593Smuzhiyun int i = 0, core_index = 0;
416*4882a593Smuzhiyun unsigned long flags;
417*4882a593Smuzhiyun int task_num_list[3] = { 0, 1, 2 };
418*4882a593Smuzhiyun int tmp = 0;
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) {
421*4882a593Smuzhiyun if (rknpu_dev->subcore_datas[0].task_num >
422*4882a593Smuzhiyun rknpu_dev->subcore_datas[1].task_num) {
423*4882a593Smuzhiyun tmp = task_num_list[1];
424*4882a593Smuzhiyun task_num_list[1] = task_num_list[0];
425*4882a593Smuzhiyun task_num_list[0] = tmp;
426*4882a593Smuzhiyun }
427*4882a593Smuzhiyun if (rknpu_dev->subcore_datas[task_num_list[0]].task_num >
428*4882a593Smuzhiyun rknpu_dev->subcore_datas[2].task_num) {
429*4882a593Smuzhiyun tmp = task_num_list[2];
430*4882a593Smuzhiyun task_num_list[2] = task_num_list[1];
431*4882a593Smuzhiyun task_num_list[1] = task_num_list[0];
432*4882a593Smuzhiyun task_num_list[0] = tmp;
433*4882a593Smuzhiyun } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num >
434*4882a593Smuzhiyun rknpu_dev->subcore_datas[2].task_num) {
435*4882a593Smuzhiyun tmp = task_num_list[2];
436*4882a593Smuzhiyun task_num_list[2] = task_num_list[1];
437*4882a593Smuzhiyun task_num_list[1] = tmp;
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun if (!rknpu_dev->subcore_datas[task_num_list[0]].job)
440*4882a593Smuzhiyun core_index = task_num_list[0];
441*4882a593Smuzhiyun else if (!rknpu_dev->subcore_datas[task_num_list[1]].job)
442*4882a593Smuzhiyun core_index = task_num_list[1];
443*4882a593Smuzhiyun else if (!rknpu_dev->subcore_datas[task_num_list[2]].job)
444*4882a593Smuzhiyun core_index = task_num_list[2];
445*4882a593Smuzhiyun else
446*4882a593Smuzhiyun core_index = task_num_list[0];
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun job->args->core_mask = rknpu_core_mask(core_index);
449*4882a593Smuzhiyun job->use_core_num = 1;
450*4882a593Smuzhiyun atomic_set(&job->run_count, job->use_core_num);
451*4882a593Smuzhiyun atomic_set(&job->interrupt_count, job->use_core_num);
452*4882a593Smuzhiyun }
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
455*4882a593Smuzhiyun for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
456*4882a593Smuzhiyun if (job->args->core_mask & rknpu_core_mask(i)) {
457*4882a593Smuzhiyun subcore_data = &rknpu_dev->subcore_datas[i];
458*4882a593Smuzhiyun list_add_tail(&job->head[i], &subcore_data->todo_list);
459*4882a593Smuzhiyun subcore_data->task_num += rknn_get_task_number(job, i);
460*4882a593Smuzhiyun job->in_queue[i] = true;
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
466*4882a593Smuzhiyun if (job->args->core_mask & rknpu_core_mask(i))
467*4882a593Smuzhiyun rknpu_job_next(rknpu_dev, i);
468*4882a593Smuzhiyun }
469*4882a593Smuzhiyun }
470*4882a593Smuzhiyun
rknpu_job_abort(struct rknpu_job * job)471*4882a593Smuzhiyun static void rknpu_job_abort(struct rknpu_job *job)
472*4882a593Smuzhiyun {
473*4882a593Smuzhiyun struct rknpu_device *rknpu_dev = job->rknpu_dev;
474*4882a593Smuzhiyun struct rknpu_subcore_data *subcore_data = NULL;
475*4882a593Smuzhiyun unsigned long flags;
476*4882a593Smuzhiyun int i = 0;
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun msleep(100);
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
481*4882a593Smuzhiyun if (job->args->core_mask & rknpu_core_mask(i)) {
482*4882a593Smuzhiyun subcore_data = &rknpu_dev->subcore_datas[i];
483*4882a593Smuzhiyun spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
484*4882a593Smuzhiyun if (job == subcore_data->job && !job->irq_entry[i]) {
485*4882a593Smuzhiyun subcore_data->job = NULL;
486*4882a593Smuzhiyun subcore_data->task_num -=
487*4882a593Smuzhiyun rknn_get_task_number(job, i);
488*4882a593Smuzhiyun }
489*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun if (job->ret == -ETIMEDOUT) {
494*4882a593Smuzhiyun LOG_ERROR("job timeout, flags: %#x:\n", job->flags);
495*4882a593Smuzhiyun for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
496*4882a593Smuzhiyun if (job->args->core_mask & rknpu_core_mask(i)) {
497*4882a593Smuzhiyun void __iomem *rknpu_core_base =
498*4882a593Smuzhiyun rknpu_dev->base[i];
499*4882a593Smuzhiyun LOG_ERROR(
500*4882a593Smuzhiyun "\tcore %d irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n",
501*4882a593Smuzhiyun i, REG_READ(RKNPU_OFFSET_INT_STATUS),
502*4882a593Smuzhiyun REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
503*4882a593Smuzhiyun job->int_mask[i],
504*4882a593Smuzhiyun (REG_READ(
505*4882a593Smuzhiyun rknpu_dev->config
506*4882a593Smuzhiyun ->pc_task_status_offset) &
507*4882a593Smuzhiyun rknpu_dev->config->pc_task_number_mask),
508*4882a593Smuzhiyun ktime_to_us(ktime_sub(ktime_get(),
509*4882a593Smuzhiyun job->timestamp)));
510*4882a593Smuzhiyun }
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun rknpu_soft_reset(rknpu_dev);
513*4882a593Smuzhiyun } else {
514*4882a593Smuzhiyun LOG_ERROR(
515*4882a593Smuzhiyun "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n",
516*4882a593Smuzhiyun job->flags, job->ret,
517*4882a593Smuzhiyun ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun rknpu_job_cleanup(job);
521*4882a593Smuzhiyun }
522*4882a593Smuzhiyun
rknpu_fuzz_status(uint32_t status)523*4882a593Smuzhiyun static inline uint32_t rknpu_fuzz_status(uint32_t status)
524*4882a593Smuzhiyun {
525*4882a593Smuzhiyun uint32_t fuzz_status = 0;
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun if ((status & 0x3) != 0)
528*4882a593Smuzhiyun fuzz_status |= 0x3;
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun if ((status & 0xc) != 0)
531*4882a593Smuzhiyun fuzz_status |= 0xc;
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun if ((status & 0x30) != 0)
534*4882a593Smuzhiyun fuzz_status |= 0x30;
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun if ((status & 0xc0) != 0)
537*4882a593Smuzhiyun fuzz_status |= 0xc0;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun if ((status & 0x300) != 0)
540*4882a593Smuzhiyun fuzz_status |= 0x300;
541*4882a593Smuzhiyun
542*4882a593Smuzhiyun if ((status & 0xc00) != 0)
543*4882a593Smuzhiyun fuzz_status |= 0xc00;
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun return fuzz_status;
546*4882a593Smuzhiyun }
547*4882a593Smuzhiyun
rknpu_irq_handler(int irq,void * data,int core_index)548*4882a593Smuzhiyun static inline irqreturn_t rknpu_irq_handler(int irq, void *data, int core_index)
549*4882a593Smuzhiyun {
550*4882a593Smuzhiyun struct rknpu_device *rknpu_dev = data;
551*4882a593Smuzhiyun void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
552*4882a593Smuzhiyun struct rknpu_subcore_data *subcore_data = NULL;
553*4882a593Smuzhiyun struct rknpu_job *job = NULL;
554*4882a593Smuzhiyun uint32_t status = 0;
555*4882a593Smuzhiyun unsigned long flags;
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun subcore_data = &rknpu_dev->subcore_datas[core_index];
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
560*4882a593Smuzhiyun job = subcore_data->job;
561*4882a593Smuzhiyun if (!job) {
562*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
563*4882a593Smuzhiyun REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
564*4882a593Smuzhiyun rknpu_job_next(rknpu_dev, core_index);
565*4882a593Smuzhiyun return IRQ_HANDLED;
566*4882a593Smuzhiyun }
567*4882a593Smuzhiyun job->irq_entry[core_index] = true;
568*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun status = REG_READ(RKNPU_OFFSET_INT_STATUS);
571*4882a593Smuzhiyun
572*4882a593Smuzhiyun job->int_status[core_index] = status;
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun if (rknpu_fuzz_status(status) != job->int_mask[core_index]) {
575*4882a593Smuzhiyun LOG_ERROR(
576*4882a593Smuzhiyun "invalid irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n",
577*4882a593Smuzhiyun status, REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
578*4882a593Smuzhiyun job->int_mask[core_index],
579*4882a593Smuzhiyun (REG_READ(rknpu_dev->config->pc_task_status_offset) &
580*4882a593Smuzhiyun rknpu_dev->config->pc_task_number_mask));
581*4882a593Smuzhiyun REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
582*4882a593Smuzhiyun return IRQ_HANDLED;
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun
585*4882a593Smuzhiyun REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun rknpu_job_done(job, 0, core_index);
588*4882a593Smuzhiyun
589*4882a593Smuzhiyun return IRQ_HANDLED;
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun
rknpu_core0_irq_handler(int irq,void * data)592*4882a593Smuzhiyun irqreturn_t rknpu_core0_irq_handler(int irq, void *data)
593*4882a593Smuzhiyun {
594*4882a593Smuzhiyun return rknpu_irq_handler(irq, data, 0);
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun
rknpu_core1_irq_handler(int irq,void * data)597*4882a593Smuzhiyun irqreturn_t rknpu_core1_irq_handler(int irq, void *data)
598*4882a593Smuzhiyun {
599*4882a593Smuzhiyun return rknpu_irq_handler(irq, data, 1);
600*4882a593Smuzhiyun }
601*4882a593Smuzhiyun
rknpu_core2_irq_handler(int irq,void * data)602*4882a593Smuzhiyun irqreturn_t rknpu_core2_irq_handler(int irq, void *data)
603*4882a593Smuzhiyun {
604*4882a593Smuzhiyun return rknpu_irq_handler(irq, data, 2);
605*4882a593Smuzhiyun }
606*4882a593Smuzhiyun
rknpu_job_timeout_clean(struct rknpu_device * rknpu_dev,int core_mask)607*4882a593Smuzhiyun static void rknpu_job_timeout_clean(struct rknpu_device *rknpu_dev,
608*4882a593Smuzhiyun int core_mask)
609*4882a593Smuzhiyun {
610*4882a593Smuzhiyun struct rknpu_job *job = NULL;
611*4882a593Smuzhiyun unsigned long flags;
612*4882a593Smuzhiyun ktime_t now = ktime_get();
613*4882a593Smuzhiyun struct rknpu_subcore_data *subcore_data = NULL;
614*4882a593Smuzhiyun int i = 0;
615*4882a593Smuzhiyun
616*4882a593Smuzhiyun for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
617*4882a593Smuzhiyun if (core_mask & rknpu_core_mask(i)) {
618*4882a593Smuzhiyun subcore_data = &rknpu_dev->subcore_datas[i];
619*4882a593Smuzhiyun job = subcore_data->job;
620*4882a593Smuzhiyun if (job &&
621*4882a593Smuzhiyun ktime_to_ms(ktime_sub(now, job->timestamp)) >=
622*4882a593Smuzhiyun job->args->timeout) {
623*4882a593Smuzhiyun rknpu_soft_reset(rknpu_dev);
624*4882a593Smuzhiyun
625*4882a593Smuzhiyun spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
626*4882a593Smuzhiyun subcore_data->job = NULL;
627*4882a593Smuzhiyun spin_unlock_irqrestore(&rknpu_dev->irq_lock,
628*4882a593Smuzhiyun flags);
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun do {
631*4882a593Smuzhiyun schedule_work(&job->cleanup_work);
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun spin_lock_irqsave(&rknpu_dev->irq_lock,
634*4882a593Smuzhiyun flags);
635*4882a593Smuzhiyun
636*4882a593Smuzhiyun if (!list_empty(
637*4882a593Smuzhiyun &subcore_data->todo_list)) {
638*4882a593Smuzhiyun job = list_first_entry(
639*4882a593Smuzhiyun &subcore_data->todo_list,
640*4882a593Smuzhiyun struct rknpu_job,
641*4882a593Smuzhiyun head[i]);
642*4882a593Smuzhiyun list_del_init(&job->head[i]);
643*4882a593Smuzhiyun job->in_queue[i] = false;
644*4882a593Smuzhiyun } else {
645*4882a593Smuzhiyun job = NULL;
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun spin_unlock_irqrestore(
649*4882a593Smuzhiyun &rknpu_dev->irq_lock, flags);
650*4882a593Smuzhiyun } while (job);
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun }
653*4882a593Smuzhiyun }
654*4882a593Smuzhiyun }
655*4882a593Smuzhiyun
rknpu_submit(struct rknpu_device * rknpu_dev,struct rknpu_submit * args)656*4882a593Smuzhiyun static int rknpu_submit(struct rknpu_device *rknpu_dev,
657*4882a593Smuzhiyun struct rknpu_submit *args)
658*4882a593Smuzhiyun {
659*4882a593Smuzhiyun struct rknpu_job *job = NULL;
660*4882a593Smuzhiyun int ret = -EINVAL;
661*4882a593Smuzhiyun
662*4882a593Smuzhiyun if (args->task_number == 0) {
663*4882a593Smuzhiyun LOG_ERROR("invalid rknpu task number!\n");
664*4882a593Smuzhiyun return -EINVAL;
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun
667*4882a593Smuzhiyun job = rknpu_job_alloc(rknpu_dev, args);
668*4882a593Smuzhiyun if (!job) {
669*4882a593Smuzhiyun LOG_ERROR("failed to allocate rknpu job!\n");
670*4882a593Smuzhiyun return -ENOMEM;
671*4882a593Smuzhiyun }
672*4882a593Smuzhiyun
673*4882a593Smuzhiyun if (args->flags & RKNPU_JOB_FENCE_IN) {
674*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_FENCE
675*4882a593Smuzhiyun struct dma_fence *in_fence;
676*4882a593Smuzhiyun
677*4882a593Smuzhiyun in_fence = sync_file_get_fence(args->fence_fd);
678*4882a593Smuzhiyun
679*4882a593Smuzhiyun if (!in_fence) {
680*4882a593Smuzhiyun LOG_ERROR("invalid fence in fd, fd: %d\n",
681*4882a593Smuzhiyun args->fence_fd);
682*4882a593Smuzhiyun return -EINVAL;
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun args->fence_fd = -1;
685*4882a593Smuzhiyun
686*4882a593Smuzhiyun /*
687*4882a593Smuzhiyun * Wait if the fence is from a foreign context, or if the fence
688*4882a593Smuzhiyun * array contains any fence from a foreign context.
689*4882a593Smuzhiyun */
690*4882a593Smuzhiyun ret = 0;
691*4882a593Smuzhiyun if (!dma_fence_match_context(in_fence,
692*4882a593Smuzhiyun rknpu_dev->fence_ctx->context))
693*4882a593Smuzhiyun ret = dma_fence_wait_timeout(in_fence, true,
694*4882a593Smuzhiyun args->timeout);
695*4882a593Smuzhiyun dma_fence_put(in_fence);
696*4882a593Smuzhiyun if (ret < 0) {
697*4882a593Smuzhiyun if (ret != -ERESTARTSYS)
698*4882a593Smuzhiyun LOG_ERROR("Error (%d) waiting for fence!\n",
699*4882a593Smuzhiyun ret);
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun return ret;
702*4882a593Smuzhiyun }
703*4882a593Smuzhiyun #else
704*4882a593Smuzhiyun LOG_ERROR(
705*4882a593Smuzhiyun "failed to use rknpu fence, please enable rknpu fence config!\n");
706*4882a593Smuzhiyun rknpu_job_free(job);
707*4882a593Smuzhiyun return -EINVAL;
708*4882a593Smuzhiyun #endif
709*4882a593Smuzhiyun }
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun if (args->flags & RKNPU_JOB_FENCE_OUT) {
712*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_FENCE
713*4882a593Smuzhiyun ret = rknpu_fence_alloc(job);
714*4882a593Smuzhiyun if (ret) {
715*4882a593Smuzhiyun rknpu_job_free(job);
716*4882a593Smuzhiyun return ret;
717*4882a593Smuzhiyun }
718*4882a593Smuzhiyun job->args->fence_fd = rknpu_fence_get_fd(job);
719*4882a593Smuzhiyun args->fence_fd = job->args->fence_fd;
720*4882a593Smuzhiyun #else
721*4882a593Smuzhiyun LOG_ERROR(
722*4882a593Smuzhiyun "failed to use rknpu fence, please enable rknpu fence config!\n");
723*4882a593Smuzhiyun rknpu_job_free(job);
724*4882a593Smuzhiyun return -EINVAL;
725*4882a593Smuzhiyun #endif
726*4882a593Smuzhiyun }
727*4882a593Smuzhiyun
728*4882a593Smuzhiyun if (args->flags & RKNPU_JOB_NONBLOCK) {
729*4882a593Smuzhiyun job->flags |= RKNPU_JOB_ASYNC;
730*4882a593Smuzhiyun rknpu_job_timeout_clean(rknpu_dev, job->args->core_mask);
731*4882a593Smuzhiyun rknpu_job_schedule(job);
732*4882a593Smuzhiyun ret = job->ret;
733*4882a593Smuzhiyun if (ret) {
734*4882a593Smuzhiyun rknpu_job_abort(job);
735*4882a593Smuzhiyun return ret;
736*4882a593Smuzhiyun }
737*4882a593Smuzhiyun } else {
738*4882a593Smuzhiyun rknpu_job_schedule(job);
739*4882a593Smuzhiyun if (args->flags & RKNPU_JOB_PC)
740*4882a593Smuzhiyun job->ret = rknpu_job_wait(job);
741*4882a593Smuzhiyun
742*4882a593Smuzhiyun args->task_counter = job->args->task_counter;
743*4882a593Smuzhiyun ret = job->ret;
744*4882a593Smuzhiyun if (!ret)
745*4882a593Smuzhiyun rknpu_job_cleanup(job);
746*4882a593Smuzhiyun else
747*4882a593Smuzhiyun rknpu_job_abort(job);
748*4882a593Smuzhiyun }
749*4882a593Smuzhiyun
750*4882a593Smuzhiyun return ret;
751*4882a593Smuzhiyun }
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
rknpu_submit_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)754*4882a593Smuzhiyun int rknpu_submit_ioctl(struct drm_device *dev, void *data,
755*4882a593Smuzhiyun struct drm_file *file_priv)
756*4882a593Smuzhiyun {
757*4882a593Smuzhiyun struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev);
758*4882a593Smuzhiyun struct rknpu_submit *args = data;
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun return rknpu_submit(rknpu_dev, args);
761*4882a593Smuzhiyun }
762*4882a593Smuzhiyun #endif
763*4882a593Smuzhiyun
764*4882a593Smuzhiyun #ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP
rknpu_submit_ioctl(struct rknpu_device * rknpu_dev,unsigned long data)765*4882a593Smuzhiyun int rknpu_submit_ioctl(struct rknpu_device *rknpu_dev, unsigned long data)
766*4882a593Smuzhiyun {
767*4882a593Smuzhiyun struct rknpu_submit args;
768*4882a593Smuzhiyun int ret = -EINVAL;
769*4882a593Smuzhiyun
770*4882a593Smuzhiyun if (unlikely(copy_from_user(&args, (struct rknpu_submit *)data,
771*4882a593Smuzhiyun sizeof(struct rknpu_submit)))) {
772*4882a593Smuzhiyun LOG_ERROR("%s: copy_from_user failed\n", __func__);
773*4882a593Smuzhiyun ret = -EFAULT;
774*4882a593Smuzhiyun return ret;
775*4882a593Smuzhiyun }
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun ret = rknpu_submit(rknpu_dev, &args);
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun if (unlikely(copy_to_user((struct rknpu_submit *)data, &args,
780*4882a593Smuzhiyun sizeof(struct rknpu_submit)))) {
781*4882a593Smuzhiyun LOG_ERROR("%s: copy_to_user failed\n", __func__);
782*4882a593Smuzhiyun ret = -EFAULT;
783*4882a593Smuzhiyun return ret;
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun return ret;
787*4882a593Smuzhiyun }
788*4882a593Smuzhiyun #endif
789*4882a593Smuzhiyun
rknpu_get_hw_version(struct rknpu_device * rknpu_dev,uint32_t * version)790*4882a593Smuzhiyun int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version)
791*4882a593Smuzhiyun {
792*4882a593Smuzhiyun void __iomem *rknpu_core_base = rknpu_dev->base[0];
793*4882a593Smuzhiyun
794*4882a593Smuzhiyun if (version == NULL)
795*4882a593Smuzhiyun return -EINVAL;
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun *version = REG_READ(RKNPU_OFFSET_VERSION) +
798*4882a593Smuzhiyun (REG_READ(RKNPU_OFFSET_VERSION_NUM) & 0xffff);
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun return 0;
801*4882a593Smuzhiyun }
802*4882a593Smuzhiyun
rknpu_get_bw_priority(struct rknpu_device * rknpu_dev,uint32_t * priority,uint32_t * expect,uint32_t * tw)803*4882a593Smuzhiyun int rknpu_get_bw_priority(struct rknpu_device *rknpu_dev, uint32_t *priority,
804*4882a593Smuzhiyun uint32_t *expect, uint32_t *tw)
805*4882a593Smuzhiyun {
806*4882a593Smuzhiyun void __iomem *base = rknpu_dev->bw_priority_base;
807*4882a593Smuzhiyun
808*4882a593Smuzhiyun if (!rknpu_dev->config->bw_enable) {
809*4882a593Smuzhiyun LOG_WARN("Get bw_priority is not supported on this device!\n");
810*4882a593Smuzhiyun return 0;
811*4882a593Smuzhiyun }
812*4882a593Smuzhiyun
813*4882a593Smuzhiyun if (!base)
814*4882a593Smuzhiyun return -EINVAL;
815*4882a593Smuzhiyun
816*4882a593Smuzhiyun spin_lock(&rknpu_dev->lock);
817*4882a593Smuzhiyun
818*4882a593Smuzhiyun if (priority != NULL)
819*4882a593Smuzhiyun *priority = _REG_READ(base, 0x0);
820*4882a593Smuzhiyun
821*4882a593Smuzhiyun if (expect != NULL)
822*4882a593Smuzhiyun *expect = _REG_READ(base, 0x8);
823*4882a593Smuzhiyun
824*4882a593Smuzhiyun if (tw != NULL)
825*4882a593Smuzhiyun *tw = _REG_READ(base, 0xc);
826*4882a593Smuzhiyun
827*4882a593Smuzhiyun spin_unlock(&rknpu_dev->lock);
828*4882a593Smuzhiyun
829*4882a593Smuzhiyun return 0;
830*4882a593Smuzhiyun }
831*4882a593Smuzhiyun
rknpu_set_bw_priority(struct rknpu_device * rknpu_dev,uint32_t priority,uint32_t expect,uint32_t tw)832*4882a593Smuzhiyun int rknpu_set_bw_priority(struct rknpu_device *rknpu_dev, uint32_t priority,
833*4882a593Smuzhiyun uint32_t expect, uint32_t tw)
834*4882a593Smuzhiyun {
835*4882a593Smuzhiyun void __iomem *base = rknpu_dev->bw_priority_base;
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun if (!rknpu_dev->config->bw_enable) {
838*4882a593Smuzhiyun LOG_WARN("Set bw_priority is not supported on this device!\n");
839*4882a593Smuzhiyun return 0;
840*4882a593Smuzhiyun }
841*4882a593Smuzhiyun
842*4882a593Smuzhiyun if (!base)
843*4882a593Smuzhiyun return -EINVAL;
844*4882a593Smuzhiyun
845*4882a593Smuzhiyun spin_lock(&rknpu_dev->lock);
846*4882a593Smuzhiyun
847*4882a593Smuzhiyun if (priority != 0)
848*4882a593Smuzhiyun _REG_WRITE(base, priority, 0x0);
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun if (expect != 0)
851*4882a593Smuzhiyun _REG_WRITE(base, expect, 0x8);
852*4882a593Smuzhiyun
853*4882a593Smuzhiyun if (tw != 0)
854*4882a593Smuzhiyun _REG_WRITE(base, tw, 0xc);
855*4882a593Smuzhiyun
856*4882a593Smuzhiyun spin_unlock(&rknpu_dev->lock);
857*4882a593Smuzhiyun
858*4882a593Smuzhiyun return 0;
859*4882a593Smuzhiyun }
860*4882a593Smuzhiyun
rknpu_clear_rw_amount(struct rknpu_device * rknpu_dev)861*4882a593Smuzhiyun int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev)
862*4882a593Smuzhiyun {
863*4882a593Smuzhiyun void __iomem *rknpu_core_base = rknpu_dev->base[0];
864*4882a593Smuzhiyun
865*4882a593Smuzhiyun if (!rknpu_dev->config->bw_enable) {
866*4882a593Smuzhiyun LOG_WARN("Clear rw_amount is not supported on this device!\n");
867*4882a593Smuzhiyun return 0;
868*4882a593Smuzhiyun }
869*4882a593Smuzhiyun
870*4882a593Smuzhiyun spin_lock(&rknpu_dev->lock);
871*4882a593Smuzhiyun
872*4882a593Smuzhiyun if (rknpu_dev->config->pc_dma_ctrl) {
873*4882a593Smuzhiyun uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR);
874*4882a593Smuzhiyun
875*4882a593Smuzhiyun REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
876*4882a593Smuzhiyun REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
877*4882a593Smuzhiyun REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
878*4882a593Smuzhiyun REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR);
879*4882a593Smuzhiyun } else {
880*4882a593Smuzhiyun REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
881*4882a593Smuzhiyun REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
882*4882a593Smuzhiyun }
883*4882a593Smuzhiyun
884*4882a593Smuzhiyun spin_unlock(&rknpu_dev->lock);
885*4882a593Smuzhiyun
886*4882a593Smuzhiyun return 0;
887*4882a593Smuzhiyun }
888*4882a593Smuzhiyun
rknpu_get_rw_amount(struct rknpu_device * rknpu_dev,uint32_t * dt_wr,uint32_t * dt_rd,uint32_t * wd_rd)889*4882a593Smuzhiyun int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr,
890*4882a593Smuzhiyun uint32_t *dt_rd, uint32_t *wd_rd)
891*4882a593Smuzhiyun {
892*4882a593Smuzhiyun void __iomem *rknpu_core_base = rknpu_dev->base[0];
893*4882a593Smuzhiyun int amount_scale = rknpu_dev->config->pc_data_amount_scale;
894*4882a593Smuzhiyun
895*4882a593Smuzhiyun if (!rknpu_dev->config->bw_enable) {
896*4882a593Smuzhiyun LOG_WARN("Get rw_amount is not supported on this device!\n");
897*4882a593Smuzhiyun return 0;
898*4882a593Smuzhiyun }
899*4882a593Smuzhiyun
900*4882a593Smuzhiyun spin_lock(&rknpu_dev->lock);
901*4882a593Smuzhiyun
902*4882a593Smuzhiyun if (dt_wr != NULL)
903*4882a593Smuzhiyun *dt_wr = REG_READ(RKNPU_OFFSET_DT_WR_AMOUNT) * amount_scale;
904*4882a593Smuzhiyun
905*4882a593Smuzhiyun if (dt_rd != NULL)
906*4882a593Smuzhiyun *dt_rd = REG_READ(RKNPU_OFFSET_DT_RD_AMOUNT) * amount_scale;
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun if (wd_rd != NULL)
909*4882a593Smuzhiyun *wd_rd = REG_READ(RKNPU_OFFSET_WT_RD_AMOUNT) * amount_scale;
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun spin_unlock(&rknpu_dev->lock);
912*4882a593Smuzhiyun
913*4882a593Smuzhiyun return 0;
914*4882a593Smuzhiyun }
915*4882a593Smuzhiyun
rknpu_get_total_rw_amount(struct rknpu_device * rknpu_dev,uint32_t * amount)916*4882a593Smuzhiyun int rknpu_get_total_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *amount)
917*4882a593Smuzhiyun {
918*4882a593Smuzhiyun uint32_t dt_wr = 0;
919*4882a593Smuzhiyun uint32_t dt_rd = 0;
920*4882a593Smuzhiyun uint32_t wd_rd = 0;
921*4882a593Smuzhiyun int ret = -EINVAL;
922*4882a593Smuzhiyun
923*4882a593Smuzhiyun if (!rknpu_dev->config->bw_enable) {
924*4882a593Smuzhiyun LOG_WARN(
925*4882a593Smuzhiyun "Get total_rw_amount is not supported on this device!\n");
926*4882a593Smuzhiyun return 0;
927*4882a593Smuzhiyun }
928*4882a593Smuzhiyun
929*4882a593Smuzhiyun ret = rknpu_get_rw_amount(rknpu_dev, &dt_wr, &dt_rd, &wd_rd);
930*4882a593Smuzhiyun
931*4882a593Smuzhiyun if (amount != NULL)
932*4882a593Smuzhiyun *amount = dt_wr + dt_rd + wd_rd;
933*4882a593Smuzhiyun
934*4882a593Smuzhiyun return ret;
935*4882a593Smuzhiyun }
936