1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) Rockchip Electronics Co.Ltd
4 * Author: Felix Zeng <felix.zeng@rock-chips.com>
5 */
6
7 #include <linux/slab.h>
8 #include <linux/delay.h>
9 #include <linux/sync_file.h>
10 #include <linux/io.h>
11
12 #include "rknpu_ioctl.h"
13 #include "rknpu_drv.h"
14 #include "rknpu_reset.h"
15 #include "rknpu_gem.h"
16 #include "rknpu_fence.h"
17 #include "rknpu_job.h"
18 #include "rknpu_mem.h"
19
20 #define _REG_READ(base, offset) readl(base + (offset))
21 #define _REG_WRITE(base, value, offset) writel(value, base + (offset))
22
23 #define REG_READ(offset) _REG_READ(rknpu_core_base, offset)
24 #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset)
25
rknpu_core_index(int core_mask)26 static int rknpu_core_index(int core_mask)
27 {
28 int index = 0;
29
30 if (core_mask & RKNPU_CORE0_MASK)
31 index = 0;
32 else if (core_mask & RKNPU_CORE1_MASK)
33 index = 1;
34 else if (core_mask & RKNPU_CORE2_MASK)
35 index = 2;
36
37 return index;
38 }
39
rknpu_core_mask(int core_index)40 static int rknpu_core_mask(int core_index)
41 {
42 int core_mask = RKNPU_CORE_AUTO_MASK;
43
44 switch (core_index) {
45 case 0:
46 core_mask = RKNPU_CORE0_MASK;
47 break;
48 case 1:
49 core_mask = RKNPU_CORE1_MASK;
50 break;
51 case 2:
52 core_mask = RKNPU_CORE2_MASK;
53 break;
54 default:
55 break;
56 }
57
58 return core_mask;
59 }
60
rknn_get_task_number(struct rknpu_job * job,int core_index)61 static int rknn_get_task_number(struct rknpu_job *job, int core_index)
62 {
63 int task_num = job->args->task_number;
64
65 if (job->use_core_num == 2)
66 task_num = job->args->subcore_task[core_index].task_number;
67 else if (job->use_core_num == 3)
68 task_num = job->args->subcore_task[core_index + 2].task_number;
69
70 return task_num;
71 }
72
rknpu_job_free(struct rknpu_job * job)73 static void rknpu_job_free(struct rknpu_job *job)
74 {
75 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
76 struct rknpu_gem_object *task_obj = NULL;
77
78 task_obj =
79 (struct rknpu_gem_object *)(uintptr_t)job->args->task_obj_addr;
80 if (task_obj)
81 rknpu_gem_object_put(&task_obj->base);
82 #endif
83
84 if (job->fence)
85 dma_fence_put(job->fence);
86
87 if (job->args_owner)
88 kfree(job->args);
89
90 kfree(job);
91 }
92
rknpu_job_cleanup(struct rknpu_job * job)93 static int rknpu_job_cleanup(struct rknpu_job *job)
94 {
95 rknpu_job_free(job);
96
97 return 0;
98 }
99
rknpu_job_cleanup_work(struct work_struct * work)100 static void rknpu_job_cleanup_work(struct work_struct *work)
101 {
102 struct rknpu_job *job =
103 container_of(work, struct rknpu_job, cleanup_work);
104
105 rknpu_job_cleanup(job);
106 }
107
rknpu_job_alloc(struct rknpu_device * rknpu_dev,struct rknpu_submit * args)108 static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev,
109 struct rknpu_submit *args)
110 {
111 struct rknpu_job *job = NULL;
112 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
113 struct rknpu_gem_object *task_obj = NULL;
114 #endif
115 if (rknpu_dev->config->num_irqs == 1)
116 args->core_mask = RKNPU_CORE0_MASK;
117
118 job = kzalloc(sizeof(*job), GFP_KERNEL);
119 if (!job)
120 return NULL;
121
122 job->timestamp = ktime_get();
123 job->rknpu_dev = rknpu_dev;
124 job->use_core_num = (args->core_mask & RKNPU_CORE0_MASK) +
125 ((args->core_mask & RKNPU_CORE1_MASK) >> 1) +
126 ((args->core_mask & RKNPU_CORE2_MASK) >> 2);
127 atomic_set(&job->run_count, job->use_core_num);
128 atomic_set(&job->interrupt_count, job->use_core_num);
129 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
130 task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
131 if (task_obj)
132 rknpu_gem_object_get(&task_obj->base);
133 #endif
134
135 if (!(args->flags & RKNPU_JOB_NONBLOCK)) {
136 job->args = args;
137 job->args_owner = false;
138 return job;
139 }
140
141 job->args = kzalloc(sizeof(*args), GFP_KERNEL);
142 if (!job->args) {
143 kfree(job);
144 return NULL;
145 }
146 *job->args = *args;
147 job->args_owner = true;
148
149 INIT_WORK(&job->cleanup_work, rknpu_job_cleanup_work);
150
151 return job;
152 }
153
rknpu_job_wait(struct rknpu_job * job)154 static inline int rknpu_job_wait(struct rknpu_job *job)
155 {
156 struct rknpu_device *rknpu_dev = job->rknpu_dev;
157 struct rknpu_submit *args = job->args;
158 struct rknpu_task *last_task = NULL;
159 struct rknpu_subcore_data *subcore_data = NULL;
160 void __iomem *rknpu_core_base = NULL;
161 int core_index = rknpu_core_index(job->args->core_mask);
162 unsigned long flags;
163 int wait_count = 0;
164 int ret = -EINVAL;
165 int i = 0;
166
167 subcore_data = &rknpu_dev->subcore_datas[core_index];
168
169 do {
170 ret = wait_event_timeout(subcore_data->job_done_wq,
171 job->flags & RKNPU_JOB_DONE ||
172 rknpu_dev->soft_reseting,
173 msecs_to_jiffies(args->timeout));
174 if (++wait_count >= 3)
175 break;
176 } while (ret == 0 && job->in_queue[core_index]);
177
178 if (job->in_queue[core_index]) {
179 spin_lock_irqsave(&rknpu_dev->lock, flags);
180 subcore_data->task_num -= rknn_get_task_number(job, core_index);
181 if (job->use_core_num == 1) {
182 list_del_init(&job->head[core_index]);
183 job->in_queue[core_index] = false;
184 } else if (job->use_core_num > 1) {
185 for (i = 0; i < job->use_core_num; i++) {
186 if (job->in_queue[i]) {
187 list_del_init(&job->head[i]);
188 job->in_queue[i] = false;
189 }
190 }
191 }
192 spin_unlock_irqrestore(&rknpu_dev->lock, flags);
193 return ret < 0 ? ret : -EINVAL;
194 }
195
196 last_task = job->last_task;
197 if (!last_task)
198 return ret < 0 ? ret : -EINVAL;
199
200 last_task->int_status = job->int_status[core_index];
201
202 if (ret <= 0) {
203 args->task_counter = 0;
204 rknpu_core_base = rknpu_dev->base[core_index];
205 if (args->flags & RKNPU_JOB_PC) {
206 uint32_t task_status = REG_READ(
207 rknpu_dev->config->pc_task_status_offset);
208 args->task_counter =
209 (task_status &
210 rknpu_dev->config->pc_task_number_mask);
211 }
212
213 LOG_ERROR(
214 "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n",
215 args->task_counter, args->flags, ret,
216 ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
217
218 return ret < 0 ? ret : -ETIMEDOUT;
219 }
220
221 if (!(job->flags & RKNPU_JOB_DONE))
222 return -EINVAL;
223
224 args->task_counter = args->task_number;
225
226 return 0;
227 }
228
rknpu_job_commit_pc(struct rknpu_job * job,int core_index)229 static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index)
230 {
231 struct rknpu_device *rknpu_dev = job->rknpu_dev;
232 struct rknpu_submit *args = job->args;
233 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
234 struct rknpu_gem_object *task_obj =
235 (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
236 #endif
237 #ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP
238 struct rknpu_mem_object *task_obj =
239 (struct rknpu_mem_object *)(uintptr_t)args->task_obj_addr;
240 #endif
241 struct rknpu_task *task_base = NULL;
242 struct rknpu_task *first_task = NULL;
243 struct rknpu_task *last_task = NULL;
244 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
245 int task_start = args->task_start;
246 int task_end = args->task_start + args->task_number - 1;
247 int task_number = args->task_number;
248 int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0;
249 int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale;
250 int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits;
251 int i = 0;
252
253 if (!task_obj)
254 return -EINVAL;
255
256 if (rknpu_dev->config->num_irqs > 1) {
257 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
258 if (i == core_index) {
259 REG_WRITE((0xe + 0x10000000 * i), 0x1004);
260 REG_WRITE((0xe + 0x10000000 * i), 0x3004);
261 }
262 }
263
264 if (job->use_core_num == 1) {
265 task_start = args->subcore_task[core_index].task_start;
266 task_end = args->subcore_task[core_index].task_start +
267 args->subcore_task[core_index].task_number -
268 1;
269 task_number =
270 args->subcore_task[core_index].task_number;
271 } else if (job->use_core_num == 2) {
272 task_start = args->subcore_task[core_index].task_start;
273 task_end = args->subcore_task[core_index].task_start +
274 args->subcore_task[core_index].task_number -
275 1;
276 task_number =
277 args->subcore_task[core_index].task_number;
278 } else if (job->use_core_num == 3) {
279 task_start =
280 args->subcore_task[core_index + 2].task_start;
281 task_end =
282 args->subcore_task[core_index + 2].task_start +
283 args->subcore_task[core_index + 2].task_number -
284 1;
285 task_number =
286 args->subcore_task[core_index + 2].task_number;
287 }
288 }
289
290 task_base = task_obj->kv_addr;
291
292 first_task = &task_base[task_start];
293 last_task = &task_base[task_end];
294
295 REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
296
297 REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT +
298 pc_data_amount_scale - 1) /
299 pc_data_amount_scale -
300 1,
301 RKNPU_OFFSET_PC_DATA_AMOUNT);
302
303 REG_WRITE(last_task->int_mask, RKNPU_OFFSET_INT_MASK);
304
305 REG_WRITE(first_task->int_mask, RKNPU_OFFSET_INT_CLEAR);
306
307 REG_WRITE(((0x6 | task_pp_en) << pc_task_number_bits) | task_number,
308 RKNPU_OFFSET_PC_TASK_CONTROL);
309
310 REG_WRITE(args->task_base_addr, RKNPU_OFFSET_PC_DMA_BASE_ADDR);
311
312 job->first_task = first_task;
313 job->last_task = last_task;
314 job->int_mask[core_index] = last_task->int_mask;
315
316 REG_WRITE(0x1, RKNPU_OFFSET_PC_OP_EN);
317 REG_WRITE(0x0, RKNPU_OFFSET_PC_OP_EN);
318
319 return 0;
320 }
321
rknpu_job_commit(struct rknpu_job * job,int core_index)322 static int rknpu_job_commit(struct rknpu_job *job, int core_index)
323 {
324 struct rknpu_device *rknpu_dev = job->rknpu_dev;
325 struct rknpu_submit *args = job->args;
326 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
327
328 // switch to slave mode
329 REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
330
331 if (!(args->flags & RKNPU_JOB_PC))
332 return -EINVAL;
333
334 return rknpu_job_commit_pc(job, core_index);
335 }
336
rknpu_job_next(struct rknpu_device * rknpu_dev,int core_index)337 static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index)
338 {
339 struct rknpu_job *job = NULL;
340 struct rknpu_subcore_data *subcore_data = NULL;
341 unsigned long flags;
342
343 if (rknpu_dev->soft_reseting)
344 return;
345
346 subcore_data = &rknpu_dev->subcore_datas[core_index];
347
348 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
349
350 if (subcore_data->job || list_empty(&subcore_data->todo_list)) {
351 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
352 return;
353 }
354
355 job = list_first_entry(&subcore_data->todo_list, struct rknpu_job,
356 head[core_index]);
357
358 list_del_init(&job->head[core_index]);
359 job->in_queue[core_index] = false;
360 subcore_data->job = job;
361 job->hw_recoder_time = ktime_get();
362 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
363
364 if (atomic_dec_and_test(&job->run_count)) {
365 if (job->args->core_mask & RKNPU_CORE0_MASK)
366 job->ret = rknpu_job_commit(job, 0);
367 if (job->args->core_mask & RKNPU_CORE1_MASK)
368 job->ret = rknpu_job_commit(job, 1);
369 if (job->args->core_mask & RKNPU_CORE2_MASK)
370 job->ret = rknpu_job_commit(job, 2);
371 }
372 }
373
rknpu_job_done(struct rknpu_job * job,int ret,int core_index)374 static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index)
375 {
376 struct rknpu_device *rknpu_dev = job->rknpu_dev;
377 struct rknpu_subcore_data *subcore_data = NULL;
378 unsigned long flags;
379 ktime_t now = ktime_get();
380
381 subcore_data = &rknpu_dev->subcore_datas[core_index];
382
383 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
384 subcore_data->job = NULL;
385 subcore_data->task_num -= rknn_get_task_number(job, core_index);
386 subcore_data->timer.busy_time +=
387 ktime_us_delta(now, job->hw_recoder_time);
388 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
389
390 if (atomic_dec_and_test(&job->interrupt_count)) {
391 int use_core_num = job->use_core_num;
392
393 job->flags |= RKNPU_JOB_DONE;
394 job->ret = ret;
395
396 if (job->fence)
397 dma_fence_signal(job->fence);
398
399 if (job->flags & RKNPU_JOB_ASYNC)
400 schedule_work(&job->cleanup_work);
401
402 if (use_core_num > 1)
403 wake_up(&(&rknpu_dev->subcore_datas[0])->job_done_wq);
404 else
405 wake_up(&subcore_data->job_done_wq);
406 }
407
408 rknpu_job_next(rknpu_dev, core_index);
409 }
410
rknpu_job_schedule(struct rknpu_job * job)411 static void rknpu_job_schedule(struct rknpu_job *job)
412 {
413 struct rknpu_device *rknpu_dev = job->rknpu_dev;
414 struct rknpu_subcore_data *subcore_data = NULL;
415 int i = 0, core_index = 0;
416 unsigned long flags;
417 int task_num_list[3] = { 0, 1, 2 };
418 int tmp = 0;
419
420 if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) {
421 if (rknpu_dev->subcore_datas[0].task_num >
422 rknpu_dev->subcore_datas[1].task_num) {
423 tmp = task_num_list[1];
424 task_num_list[1] = task_num_list[0];
425 task_num_list[0] = tmp;
426 }
427 if (rknpu_dev->subcore_datas[task_num_list[0]].task_num >
428 rknpu_dev->subcore_datas[2].task_num) {
429 tmp = task_num_list[2];
430 task_num_list[2] = task_num_list[1];
431 task_num_list[1] = task_num_list[0];
432 task_num_list[0] = tmp;
433 } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num >
434 rknpu_dev->subcore_datas[2].task_num) {
435 tmp = task_num_list[2];
436 task_num_list[2] = task_num_list[1];
437 task_num_list[1] = tmp;
438 }
439 if (!rknpu_dev->subcore_datas[task_num_list[0]].job)
440 core_index = task_num_list[0];
441 else if (!rknpu_dev->subcore_datas[task_num_list[1]].job)
442 core_index = task_num_list[1];
443 else if (!rknpu_dev->subcore_datas[task_num_list[2]].job)
444 core_index = task_num_list[2];
445 else
446 core_index = task_num_list[0];
447
448 job->args->core_mask = rknpu_core_mask(core_index);
449 job->use_core_num = 1;
450 atomic_set(&job->run_count, job->use_core_num);
451 atomic_set(&job->interrupt_count, job->use_core_num);
452 }
453
454 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
455 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
456 if (job->args->core_mask & rknpu_core_mask(i)) {
457 subcore_data = &rknpu_dev->subcore_datas[i];
458 list_add_tail(&job->head[i], &subcore_data->todo_list);
459 subcore_data->task_num += rknn_get_task_number(job, i);
460 job->in_queue[i] = true;
461 }
462 }
463 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
464
465 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
466 if (job->args->core_mask & rknpu_core_mask(i))
467 rknpu_job_next(rknpu_dev, i);
468 }
469 }
470
rknpu_job_abort(struct rknpu_job * job)471 static void rknpu_job_abort(struct rknpu_job *job)
472 {
473 struct rknpu_device *rknpu_dev = job->rknpu_dev;
474 struct rknpu_subcore_data *subcore_data = NULL;
475 unsigned long flags;
476 int i = 0;
477
478 msleep(100);
479
480 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
481 if (job->args->core_mask & rknpu_core_mask(i)) {
482 subcore_data = &rknpu_dev->subcore_datas[i];
483 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
484 if (job == subcore_data->job && !job->irq_entry[i]) {
485 subcore_data->job = NULL;
486 subcore_data->task_num -=
487 rknn_get_task_number(job, i);
488 }
489 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
490 }
491 }
492
493 if (job->ret == -ETIMEDOUT) {
494 LOG_ERROR("job timeout, flags: %#x:\n", job->flags);
495 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
496 if (job->args->core_mask & rknpu_core_mask(i)) {
497 void __iomem *rknpu_core_base =
498 rknpu_dev->base[i];
499 LOG_ERROR(
500 "\tcore %d irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n",
501 i, REG_READ(RKNPU_OFFSET_INT_STATUS),
502 REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
503 job->int_mask[i],
504 (REG_READ(
505 rknpu_dev->config
506 ->pc_task_status_offset) &
507 rknpu_dev->config->pc_task_number_mask),
508 ktime_to_us(ktime_sub(ktime_get(),
509 job->timestamp)));
510 }
511 }
512 rknpu_soft_reset(rknpu_dev);
513 } else {
514 LOG_ERROR(
515 "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n",
516 job->flags, job->ret,
517 ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
518 }
519
520 rknpu_job_cleanup(job);
521 }
522
rknpu_fuzz_status(uint32_t status)523 static inline uint32_t rknpu_fuzz_status(uint32_t status)
524 {
525 uint32_t fuzz_status = 0;
526
527 if ((status & 0x3) != 0)
528 fuzz_status |= 0x3;
529
530 if ((status & 0xc) != 0)
531 fuzz_status |= 0xc;
532
533 if ((status & 0x30) != 0)
534 fuzz_status |= 0x30;
535
536 if ((status & 0xc0) != 0)
537 fuzz_status |= 0xc0;
538
539 if ((status & 0x300) != 0)
540 fuzz_status |= 0x300;
541
542 if ((status & 0xc00) != 0)
543 fuzz_status |= 0xc00;
544
545 return fuzz_status;
546 }
547
rknpu_irq_handler(int irq,void * data,int core_index)548 static inline irqreturn_t rknpu_irq_handler(int irq, void *data, int core_index)
549 {
550 struct rknpu_device *rknpu_dev = data;
551 void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
552 struct rknpu_subcore_data *subcore_data = NULL;
553 struct rknpu_job *job = NULL;
554 uint32_t status = 0;
555 unsigned long flags;
556
557 subcore_data = &rknpu_dev->subcore_datas[core_index];
558
559 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
560 job = subcore_data->job;
561 if (!job) {
562 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
563 REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
564 rknpu_job_next(rknpu_dev, core_index);
565 return IRQ_HANDLED;
566 }
567 job->irq_entry[core_index] = true;
568 spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
569
570 status = REG_READ(RKNPU_OFFSET_INT_STATUS);
571
572 job->int_status[core_index] = status;
573
574 if (rknpu_fuzz_status(status) != job->int_mask[core_index]) {
575 LOG_ERROR(
576 "invalid irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n",
577 status, REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
578 job->int_mask[core_index],
579 (REG_READ(rknpu_dev->config->pc_task_status_offset) &
580 rknpu_dev->config->pc_task_number_mask));
581 REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
582 return IRQ_HANDLED;
583 }
584
585 REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
586
587 rknpu_job_done(job, 0, core_index);
588
589 return IRQ_HANDLED;
590 }
591
rknpu_core0_irq_handler(int irq,void * data)592 irqreturn_t rknpu_core0_irq_handler(int irq, void *data)
593 {
594 return rknpu_irq_handler(irq, data, 0);
595 }
596
rknpu_core1_irq_handler(int irq,void * data)597 irqreturn_t rknpu_core1_irq_handler(int irq, void *data)
598 {
599 return rknpu_irq_handler(irq, data, 1);
600 }
601
rknpu_core2_irq_handler(int irq,void * data)602 irqreturn_t rknpu_core2_irq_handler(int irq, void *data)
603 {
604 return rknpu_irq_handler(irq, data, 2);
605 }
606
rknpu_job_timeout_clean(struct rknpu_device * rknpu_dev,int core_mask)607 static void rknpu_job_timeout_clean(struct rknpu_device *rknpu_dev,
608 int core_mask)
609 {
610 struct rknpu_job *job = NULL;
611 unsigned long flags;
612 ktime_t now = ktime_get();
613 struct rknpu_subcore_data *subcore_data = NULL;
614 int i = 0;
615
616 for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
617 if (core_mask & rknpu_core_mask(i)) {
618 subcore_data = &rknpu_dev->subcore_datas[i];
619 job = subcore_data->job;
620 if (job &&
621 ktime_to_ms(ktime_sub(now, job->timestamp)) >=
622 job->args->timeout) {
623 rknpu_soft_reset(rknpu_dev);
624
625 spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
626 subcore_data->job = NULL;
627 spin_unlock_irqrestore(&rknpu_dev->irq_lock,
628 flags);
629
630 do {
631 schedule_work(&job->cleanup_work);
632
633 spin_lock_irqsave(&rknpu_dev->irq_lock,
634 flags);
635
636 if (!list_empty(
637 &subcore_data->todo_list)) {
638 job = list_first_entry(
639 &subcore_data->todo_list,
640 struct rknpu_job,
641 head[i]);
642 list_del_init(&job->head[i]);
643 job->in_queue[i] = false;
644 } else {
645 job = NULL;
646 }
647
648 spin_unlock_irqrestore(
649 &rknpu_dev->irq_lock, flags);
650 } while (job);
651 }
652 }
653 }
654 }
655
rknpu_submit(struct rknpu_device * rknpu_dev,struct rknpu_submit * args)656 static int rknpu_submit(struct rknpu_device *rknpu_dev,
657 struct rknpu_submit *args)
658 {
659 struct rknpu_job *job = NULL;
660 int ret = -EINVAL;
661
662 if (args->task_number == 0) {
663 LOG_ERROR("invalid rknpu task number!\n");
664 return -EINVAL;
665 }
666
667 job = rknpu_job_alloc(rknpu_dev, args);
668 if (!job) {
669 LOG_ERROR("failed to allocate rknpu job!\n");
670 return -ENOMEM;
671 }
672
673 if (args->flags & RKNPU_JOB_FENCE_IN) {
674 #ifdef CONFIG_ROCKCHIP_RKNPU_FENCE
675 struct dma_fence *in_fence;
676
677 in_fence = sync_file_get_fence(args->fence_fd);
678
679 if (!in_fence) {
680 LOG_ERROR("invalid fence in fd, fd: %d\n",
681 args->fence_fd);
682 return -EINVAL;
683 }
684 args->fence_fd = -1;
685
686 /*
687 * Wait if the fence is from a foreign context, or if the fence
688 * array contains any fence from a foreign context.
689 */
690 ret = 0;
691 if (!dma_fence_match_context(in_fence,
692 rknpu_dev->fence_ctx->context))
693 ret = dma_fence_wait_timeout(in_fence, true,
694 args->timeout);
695 dma_fence_put(in_fence);
696 if (ret < 0) {
697 if (ret != -ERESTARTSYS)
698 LOG_ERROR("Error (%d) waiting for fence!\n",
699 ret);
700
701 return ret;
702 }
703 #else
704 LOG_ERROR(
705 "failed to use rknpu fence, please enable rknpu fence config!\n");
706 rknpu_job_free(job);
707 return -EINVAL;
708 #endif
709 }
710
711 if (args->flags & RKNPU_JOB_FENCE_OUT) {
712 #ifdef CONFIG_ROCKCHIP_RKNPU_FENCE
713 ret = rknpu_fence_alloc(job);
714 if (ret) {
715 rknpu_job_free(job);
716 return ret;
717 }
718 job->args->fence_fd = rknpu_fence_get_fd(job);
719 args->fence_fd = job->args->fence_fd;
720 #else
721 LOG_ERROR(
722 "failed to use rknpu fence, please enable rknpu fence config!\n");
723 rknpu_job_free(job);
724 return -EINVAL;
725 #endif
726 }
727
728 if (args->flags & RKNPU_JOB_NONBLOCK) {
729 job->flags |= RKNPU_JOB_ASYNC;
730 rknpu_job_timeout_clean(rknpu_dev, job->args->core_mask);
731 rknpu_job_schedule(job);
732 ret = job->ret;
733 if (ret) {
734 rknpu_job_abort(job);
735 return ret;
736 }
737 } else {
738 rknpu_job_schedule(job);
739 if (args->flags & RKNPU_JOB_PC)
740 job->ret = rknpu_job_wait(job);
741
742 args->task_counter = job->args->task_counter;
743 ret = job->ret;
744 if (!ret)
745 rknpu_job_cleanup(job);
746 else
747 rknpu_job_abort(job);
748 }
749
750 return ret;
751 }
752
753 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
rknpu_submit_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)754 int rknpu_submit_ioctl(struct drm_device *dev, void *data,
755 struct drm_file *file_priv)
756 {
757 struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev);
758 struct rknpu_submit *args = data;
759
760 return rknpu_submit(rknpu_dev, args);
761 }
762 #endif
763
764 #ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP
rknpu_submit_ioctl(struct rknpu_device * rknpu_dev,unsigned long data)765 int rknpu_submit_ioctl(struct rknpu_device *rknpu_dev, unsigned long data)
766 {
767 struct rknpu_submit args;
768 int ret = -EINVAL;
769
770 if (unlikely(copy_from_user(&args, (struct rknpu_submit *)data,
771 sizeof(struct rknpu_submit)))) {
772 LOG_ERROR("%s: copy_from_user failed\n", __func__);
773 ret = -EFAULT;
774 return ret;
775 }
776
777 ret = rknpu_submit(rknpu_dev, &args);
778
779 if (unlikely(copy_to_user((struct rknpu_submit *)data, &args,
780 sizeof(struct rknpu_submit)))) {
781 LOG_ERROR("%s: copy_to_user failed\n", __func__);
782 ret = -EFAULT;
783 return ret;
784 }
785
786 return ret;
787 }
788 #endif
789
rknpu_get_hw_version(struct rknpu_device * rknpu_dev,uint32_t * version)790 int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version)
791 {
792 void __iomem *rknpu_core_base = rknpu_dev->base[0];
793
794 if (version == NULL)
795 return -EINVAL;
796
797 *version = REG_READ(RKNPU_OFFSET_VERSION) +
798 (REG_READ(RKNPU_OFFSET_VERSION_NUM) & 0xffff);
799
800 return 0;
801 }
802
rknpu_get_bw_priority(struct rknpu_device * rknpu_dev,uint32_t * priority,uint32_t * expect,uint32_t * tw)803 int rknpu_get_bw_priority(struct rknpu_device *rknpu_dev, uint32_t *priority,
804 uint32_t *expect, uint32_t *tw)
805 {
806 void __iomem *base = rknpu_dev->bw_priority_base;
807
808 if (!rknpu_dev->config->bw_enable) {
809 LOG_WARN("Get bw_priority is not supported on this device!\n");
810 return 0;
811 }
812
813 if (!base)
814 return -EINVAL;
815
816 spin_lock(&rknpu_dev->lock);
817
818 if (priority != NULL)
819 *priority = _REG_READ(base, 0x0);
820
821 if (expect != NULL)
822 *expect = _REG_READ(base, 0x8);
823
824 if (tw != NULL)
825 *tw = _REG_READ(base, 0xc);
826
827 spin_unlock(&rknpu_dev->lock);
828
829 return 0;
830 }
831
rknpu_set_bw_priority(struct rknpu_device * rknpu_dev,uint32_t priority,uint32_t expect,uint32_t tw)832 int rknpu_set_bw_priority(struct rknpu_device *rknpu_dev, uint32_t priority,
833 uint32_t expect, uint32_t tw)
834 {
835 void __iomem *base = rknpu_dev->bw_priority_base;
836
837 if (!rknpu_dev->config->bw_enable) {
838 LOG_WARN("Set bw_priority is not supported on this device!\n");
839 return 0;
840 }
841
842 if (!base)
843 return -EINVAL;
844
845 spin_lock(&rknpu_dev->lock);
846
847 if (priority != 0)
848 _REG_WRITE(base, priority, 0x0);
849
850 if (expect != 0)
851 _REG_WRITE(base, expect, 0x8);
852
853 if (tw != 0)
854 _REG_WRITE(base, tw, 0xc);
855
856 spin_unlock(&rknpu_dev->lock);
857
858 return 0;
859 }
860
rknpu_clear_rw_amount(struct rknpu_device * rknpu_dev)861 int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev)
862 {
863 void __iomem *rknpu_core_base = rknpu_dev->base[0];
864
865 if (!rknpu_dev->config->bw_enable) {
866 LOG_WARN("Clear rw_amount is not supported on this device!\n");
867 return 0;
868 }
869
870 spin_lock(&rknpu_dev->lock);
871
872 if (rknpu_dev->config->pc_dma_ctrl) {
873 uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR);
874
875 REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
876 REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
877 REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
878 REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR);
879 } else {
880 REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
881 REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
882 }
883
884 spin_unlock(&rknpu_dev->lock);
885
886 return 0;
887 }
888
rknpu_get_rw_amount(struct rknpu_device * rknpu_dev,uint32_t * dt_wr,uint32_t * dt_rd,uint32_t * wd_rd)889 int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr,
890 uint32_t *dt_rd, uint32_t *wd_rd)
891 {
892 void __iomem *rknpu_core_base = rknpu_dev->base[0];
893 int amount_scale = rknpu_dev->config->pc_data_amount_scale;
894
895 if (!rknpu_dev->config->bw_enable) {
896 LOG_WARN("Get rw_amount is not supported on this device!\n");
897 return 0;
898 }
899
900 spin_lock(&rknpu_dev->lock);
901
902 if (dt_wr != NULL)
903 *dt_wr = REG_READ(RKNPU_OFFSET_DT_WR_AMOUNT) * amount_scale;
904
905 if (dt_rd != NULL)
906 *dt_rd = REG_READ(RKNPU_OFFSET_DT_RD_AMOUNT) * amount_scale;
907
908 if (wd_rd != NULL)
909 *wd_rd = REG_READ(RKNPU_OFFSET_WT_RD_AMOUNT) * amount_scale;
910
911 spin_unlock(&rknpu_dev->lock);
912
913 return 0;
914 }
915
rknpu_get_total_rw_amount(struct rknpu_device * rknpu_dev,uint32_t * amount)916 int rknpu_get_total_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *amount)
917 {
918 uint32_t dt_wr = 0;
919 uint32_t dt_rd = 0;
920 uint32_t wd_rd = 0;
921 int ret = -EINVAL;
922
923 if (!rknpu_dev->config->bw_enable) {
924 LOG_WARN(
925 "Get total_rw_amount is not supported on this device!\n");
926 return 0;
927 }
928
929 ret = rknpu_get_rw_amount(rknpu_dev, &dt_wr, &dt_rd, &wd_rd);
930
931 if (amount != NULL)
932 *amount = dt_wr + dt_rd + wd_rd;
933
934 return ret;
935 }
936