xref: /OK3568_Linux_fs/kernel/drivers/rknpu/rknpu_job.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) Rockchip Electronics Co.Ltd
4  * Author: Felix Zeng <felix.zeng@rock-chips.com>
5  */
6 
7 #include <linux/slab.h>
8 #include <linux/delay.h>
9 #include <linux/sync_file.h>
10 #include <linux/io.h>
11 
12 #include "rknpu_ioctl.h"
13 #include "rknpu_drv.h"
14 #include "rknpu_reset.h"
15 #include "rknpu_gem.h"
16 #include "rknpu_fence.h"
17 #include "rknpu_job.h"
18 #include "rknpu_mem.h"
19 
20 #define _REG_READ(base, offset) readl(base + (offset))
21 #define _REG_WRITE(base, value, offset) writel(value, base + (offset))
22 
23 #define REG_READ(offset) _REG_READ(rknpu_core_base, offset)
24 #define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset)
25 
rknpu_core_index(int core_mask)26 static int rknpu_core_index(int core_mask)
27 {
28 	int index = 0;
29 
30 	if (core_mask & RKNPU_CORE0_MASK)
31 		index = 0;
32 	else if (core_mask & RKNPU_CORE1_MASK)
33 		index = 1;
34 	else if (core_mask & RKNPU_CORE2_MASK)
35 		index = 2;
36 
37 	return index;
38 }
39 
rknpu_core_mask(int core_index)40 static int rknpu_core_mask(int core_index)
41 {
42 	int core_mask = RKNPU_CORE_AUTO_MASK;
43 
44 	switch (core_index) {
45 	case 0:
46 		core_mask = RKNPU_CORE0_MASK;
47 		break;
48 	case 1:
49 		core_mask = RKNPU_CORE1_MASK;
50 		break;
51 	case 2:
52 		core_mask = RKNPU_CORE2_MASK;
53 		break;
54 	default:
55 		break;
56 	}
57 
58 	return core_mask;
59 }
60 
rknn_get_task_number(struct rknpu_job * job,int core_index)61 static int rknn_get_task_number(struct rknpu_job *job, int core_index)
62 {
63 	int task_num = job->args->task_number;
64 
65 	if (job->use_core_num == 2)
66 		task_num = job->args->subcore_task[core_index].task_number;
67 	else if (job->use_core_num == 3)
68 		task_num = job->args->subcore_task[core_index + 2].task_number;
69 
70 	return task_num;
71 }
72 
rknpu_job_free(struct rknpu_job * job)73 static void rknpu_job_free(struct rknpu_job *job)
74 {
75 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
76 	struct rknpu_gem_object *task_obj = NULL;
77 
78 	task_obj =
79 		(struct rknpu_gem_object *)(uintptr_t)job->args->task_obj_addr;
80 	if (task_obj)
81 		rknpu_gem_object_put(&task_obj->base);
82 #endif
83 
84 	if (job->fence)
85 		dma_fence_put(job->fence);
86 
87 	if (job->args_owner)
88 		kfree(job->args);
89 
90 	kfree(job);
91 }
92 
rknpu_job_cleanup(struct rknpu_job * job)93 static int rknpu_job_cleanup(struct rknpu_job *job)
94 {
95 	rknpu_job_free(job);
96 
97 	return 0;
98 }
99 
rknpu_job_cleanup_work(struct work_struct * work)100 static void rknpu_job_cleanup_work(struct work_struct *work)
101 {
102 	struct rknpu_job *job =
103 		container_of(work, struct rknpu_job, cleanup_work);
104 
105 	rknpu_job_cleanup(job);
106 }
107 
rknpu_job_alloc(struct rknpu_device * rknpu_dev,struct rknpu_submit * args)108 static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev,
109 						struct rknpu_submit *args)
110 {
111 	struct rknpu_job *job = NULL;
112 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
113 	struct rknpu_gem_object *task_obj = NULL;
114 #endif
115 	if (rknpu_dev->config->num_irqs == 1)
116 		args->core_mask = RKNPU_CORE0_MASK;
117 
118 	job = kzalloc(sizeof(*job), GFP_KERNEL);
119 	if (!job)
120 		return NULL;
121 
122 	job->timestamp = ktime_get();
123 	job->rknpu_dev = rknpu_dev;
124 	job->use_core_num = (args->core_mask & RKNPU_CORE0_MASK) +
125 			    ((args->core_mask & RKNPU_CORE1_MASK) >> 1) +
126 			    ((args->core_mask & RKNPU_CORE2_MASK) >> 2);
127 	atomic_set(&job->run_count, job->use_core_num);
128 	atomic_set(&job->interrupt_count, job->use_core_num);
129 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
130 	task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
131 	if (task_obj)
132 		rknpu_gem_object_get(&task_obj->base);
133 #endif
134 
135 	if (!(args->flags & RKNPU_JOB_NONBLOCK)) {
136 		job->args = args;
137 		job->args_owner = false;
138 		return job;
139 	}
140 
141 	job->args = kzalloc(sizeof(*args), GFP_KERNEL);
142 	if (!job->args) {
143 		kfree(job);
144 		return NULL;
145 	}
146 	*job->args = *args;
147 	job->args_owner = true;
148 
149 	INIT_WORK(&job->cleanup_work, rknpu_job_cleanup_work);
150 
151 	return job;
152 }
153 
rknpu_job_wait(struct rknpu_job * job)154 static inline int rknpu_job_wait(struct rknpu_job *job)
155 {
156 	struct rknpu_device *rknpu_dev = job->rknpu_dev;
157 	struct rknpu_submit *args = job->args;
158 	struct rknpu_task *last_task = NULL;
159 	struct rknpu_subcore_data *subcore_data = NULL;
160 	void __iomem *rknpu_core_base = NULL;
161 	int core_index = rknpu_core_index(job->args->core_mask);
162 	unsigned long flags;
163 	int wait_count = 0;
164 	int ret = -EINVAL;
165 	int i = 0;
166 
167 	subcore_data = &rknpu_dev->subcore_datas[core_index];
168 
169 	do {
170 		ret = wait_event_timeout(subcore_data->job_done_wq,
171 					 job->flags & RKNPU_JOB_DONE ||
172 						 rknpu_dev->soft_reseting,
173 					 msecs_to_jiffies(args->timeout));
174 		if (++wait_count >= 3)
175 			break;
176 	} while (ret == 0 && job->in_queue[core_index]);
177 
178 	if (job->in_queue[core_index]) {
179 		spin_lock_irqsave(&rknpu_dev->lock, flags);
180 		subcore_data->task_num -= rknn_get_task_number(job, core_index);
181 		if (job->use_core_num == 1) {
182 			list_del_init(&job->head[core_index]);
183 			job->in_queue[core_index] = false;
184 		} else if (job->use_core_num > 1) {
185 			for (i = 0; i < job->use_core_num; i++) {
186 				if (job->in_queue[i]) {
187 					list_del_init(&job->head[i]);
188 					job->in_queue[i] = false;
189 				}
190 			}
191 		}
192 		spin_unlock_irqrestore(&rknpu_dev->lock, flags);
193 		return ret < 0 ? ret : -EINVAL;
194 	}
195 
196 	last_task = job->last_task;
197 	if (!last_task)
198 		return ret < 0 ? ret : -EINVAL;
199 
200 	last_task->int_status = job->int_status[core_index];
201 
202 	if (ret <= 0) {
203 		args->task_counter = 0;
204 		rknpu_core_base = rknpu_dev->base[core_index];
205 		if (args->flags & RKNPU_JOB_PC) {
206 			uint32_t task_status = REG_READ(
207 				rknpu_dev->config->pc_task_status_offset);
208 			args->task_counter =
209 				(task_status &
210 				 rknpu_dev->config->pc_task_number_mask);
211 		}
212 
213 		LOG_ERROR(
214 			"failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n",
215 			args->task_counter, args->flags, ret,
216 			ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
217 
218 		return ret < 0 ? ret : -ETIMEDOUT;
219 	}
220 
221 	if (!(job->flags & RKNPU_JOB_DONE))
222 		return -EINVAL;
223 
224 	args->task_counter = args->task_number;
225 
226 	return 0;
227 }
228 
rknpu_job_commit_pc(struct rknpu_job * job,int core_index)229 static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index)
230 {
231 	struct rknpu_device *rknpu_dev = job->rknpu_dev;
232 	struct rknpu_submit *args = job->args;
233 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
234 	struct rknpu_gem_object *task_obj =
235 		(struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr;
236 #endif
237 #ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP
238 	struct rknpu_mem_object *task_obj =
239 		(struct rknpu_mem_object *)(uintptr_t)args->task_obj_addr;
240 #endif
241 	struct rknpu_task *task_base = NULL;
242 	struct rknpu_task *first_task = NULL;
243 	struct rknpu_task *last_task = NULL;
244 	void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
245 	int task_start = args->task_start;
246 	int task_end = args->task_start + args->task_number - 1;
247 	int task_number = args->task_number;
248 	int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0;
249 	int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale;
250 	int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits;
251 	int i = 0;
252 
253 	if (!task_obj)
254 		return -EINVAL;
255 
256 	if (rknpu_dev->config->num_irqs > 1) {
257 		for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
258 			if (i == core_index) {
259 				REG_WRITE((0xe + 0x10000000 * i), 0x1004);
260 				REG_WRITE((0xe + 0x10000000 * i), 0x3004);
261 			}
262 		}
263 
264 		if (job->use_core_num == 1) {
265 			task_start = args->subcore_task[core_index].task_start;
266 			task_end = args->subcore_task[core_index].task_start +
267 				   args->subcore_task[core_index].task_number -
268 				   1;
269 			task_number =
270 				args->subcore_task[core_index].task_number;
271 		} else if (job->use_core_num == 2) {
272 			task_start = args->subcore_task[core_index].task_start;
273 			task_end = args->subcore_task[core_index].task_start +
274 				   args->subcore_task[core_index].task_number -
275 				   1;
276 			task_number =
277 				args->subcore_task[core_index].task_number;
278 		} else if (job->use_core_num == 3) {
279 			task_start =
280 				args->subcore_task[core_index + 2].task_start;
281 			task_end =
282 				args->subcore_task[core_index + 2].task_start +
283 				args->subcore_task[core_index + 2].task_number -
284 				1;
285 			task_number =
286 				args->subcore_task[core_index + 2].task_number;
287 		}
288 	}
289 
290 	task_base = task_obj->kv_addr;
291 
292 	first_task = &task_base[task_start];
293 	last_task = &task_base[task_end];
294 
295 	REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR);
296 
297 	REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT +
298 		   pc_data_amount_scale - 1) /
299 				  pc_data_amount_scale -
300 			  1,
301 		  RKNPU_OFFSET_PC_DATA_AMOUNT);
302 
303 	REG_WRITE(last_task->int_mask, RKNPU_OFFSET_INT_MASK);
304 
305 	REG_WRITE(first_task->int_mask, RKNPU_OFFSET_INT_CLEAR);
306 
307 	REG_WRITE(((0x6 | task_pp_en) << pc_task_number_bits) | task_number,
308 		  RKNPU_OFFSET_PC_TASK_CONTROL);
309 
310 	REG_WRITE(args->task_base_addr, RKNPU_OFFSET_PC_DMA_BASE_ADDR);
311 
312 	job->first_task = first_task;
313 	job->last_task = last_task;
314 	job->int_mask[core_index] = last_task->int_mask;
315 
316 	REG_WRITE(0x1, RKNPU_OFFSET_PC_OP_EN);
317 	REG_WRITE(0x0, RKNPU_OFFSET_PC_OP_EN);
318 
319 	return 0;
320 }
321 
rknpu_job_commit(struct rknpu_job * job,int core_index)322 static int rknpu_job_commit(struct rknpu_job *job, int core_index)
323 {
324 	struct rknpu_device *rknpu_dev = job->rknpu_dev;
325 	struct rknpu_submit *args = job->args;
326 	void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
327 
328 	// switch to slave mode
329 	REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
330 
331 	if (!(args->flags & RKNPU_JOB_PC))
332 		return -EINVAL;
333 
334 	return rknpu_job_commit_pc(job, core_index);
335 }
336 
rknpu_job_next(struct rknpu_device * rknpu_dev,int core_index)337 static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index)
338 {
339 	struct rknpu_job *job = NULL;
340 	struct rknpu_subcore_data *subcore_data = NULL;
341 	unsigned long flags;
342 
343 	if (rknpu_dev->soft_reseting)
344 		return;
345 
346 	subcore_data = &rknpu_dev->subcore_datas[core_index];
347 
348 	spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
349 
350 	if (subcore_data->job || list_empty(&subcore_data->todo_list)) {
351 		spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
352 		return;
353 	}
354 
355 	job = list_first_entry(&subcore_data->todo_list, struct rknpu_job,
356 			       head[core_index]);
357 
358 	list_del_init(&job->head[core_index]);
359 	job->in_queue[core_index] = false;
360 	subcore_data->job = job;
361 	job->hw_recoder_time = ktime_get();
362 	spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
363 
364 	if (atomic_dec_and_test(&job->run_count)) {
365 		if (job->args->core_mask & RKNPU_CORE0_MASK)
366 			job->ret = rknpu_job_commit(job, 0);
367 		if (job->args->core_mask & RKNPU_CORE1_MASK)
368 			job->ret = rknpu_job_commit(job, 1);
369 		if (job->args->core_mask & RKNPU_CORE2_MASK)
370 			job->ret = rknpu_job_commit(job, 2);
371 	}
372 }
373 
rknpu_job_done(struct rknpu_job * job,int ret,int core_index)374 static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index)
375 {
376 	struct rknpu_device *rknpu_dev = job->rknpu_dev;
377 	struct rknpu_subcore_data *subcore_data = NULL;
378 	unsigned long flags;
379 	ktime_t now = ktime_get();
380 
381 	subcore_data = &rknpu_dev->subcore_datas[core_index];
382 
383 	spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
384 	subcore_data->job = NULL;
385 	subcore_data->task_num -= rknn_get_task_number(job, core_index);
386 	subcore_data->timer.busy_time +=
387 		ktime_us_delta(now, job->hw_recoder_time);
388 	spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
389 
390 	if (atomic_dec_and_test(&job->interrupt_count)) {
391 		int use_core_num = job->use_core_num;
392 
393 		job->flags |= RKNPU_JOB_DONE;
394 		job->ret = ret;
395 
396 		if (job->fence)
397 			dma_fence_signal(job->fence);
398 
399 		if (job->flags & RKNPU_JOB_ASYNC)
400 			schedule_work(&job->cleanup_work);
401 
402 		if (use_core_num > 1)
403 			wake_up(&(&rknpu_dev->subcore_datas[0])->job_done_wq);
404 		else
405 			wake_up(&subcore_data->job_done_wq);
406 	}
407 
408 	rknpu_job_next(rknpu_dev, core_index);
409 }
410 
rknpu_job_schedule(struct rknpu_job * job)411 static void rknpu_job_schedule(struct rknpu_job *job)
412 {
413 	struct rknpu_device *rknpu_dev = job->rknpu_dev;
414 	struct rknpu_subcore_data *subcore_data = NULL;
415 	int i = 0, core_index = 0;
416 	unsigned long flags;
417 	int task_num_list[3] = { 0, 1, 2 };
418 	int tmp = 0;
419 
420 	if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) {
421 		if (rknpu_dev->subcore_datas[0].task_num >
422 		    rknpu_dev->subcore_datas[1].task_num) {
423 			tmp = task_num_list[1];
424 			task_num_list[1] = task_num_list[0];
425 			task_num_list[0] = tmp;
426 		}
427 		if (rknpu_dev->subcore_datas[task_num_list[0]].task_num >
428 		    rknpu_dev->subcore_datas[2].task_num) {
429 			tmp = task_num_list[2];
430 			task_num_list[2] = task_num_list[1];
431 			task_num_list[1] = task_num_list[0];
432 			task_num_list[0] = tmp;
433 		} else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num >
434 			   rknpu_dev->subcore_datas[2].task_num) {
435 			tmp = task_num_list[2];
436 			task_num_list[2] = task_num_list[1];
437 			task_num_list[1] = tmp;
438 		}
439 		if (!rknpu_dev->subcore_datas[task_num_list[0]].job)
440 			core_index = task_num_list[0];
441 		else if (!rknpu_dev->subcore_datas[task_num_list[1]].job)
442 			core_index = task_num_list[1];
443 		else if (!rknpu_dev->subcore_datas[task_num_list[2]].job)
444 			core_index = task_num_list[2];
445 		else
446 			core_index = task_num_list[0];
447 
448 		job->args->core_mask = rknpu_core_mask(core_index);
449 		job->use_core_num = 1;
450 		atomic_set(&job->run_count, job->use_core_num);
451 		atomic_set(&job->interrupt_count, job->use_core_num);
452 	}
453 
454 	spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
455 	for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
456 		if (job->args->core_mask & rknpu_core_mask(i)) {
457 			subcore_data = &rknpu_dev->subcore_datas[i];
458 			list_add_tail(&job->head[i], &subcore_data->todo_list);
459 			subcore_data->task_num += rknn_get_task_number(job, i);
460 			job->in_queue[i] = true;
461 		}
462 	}
463 	spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
464 
465 	for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
466 		if (job->args->core_mask & rknpu_core_mask(i))
467 			rknpu_job_next(rknpu_dev, i);
468 	}
469 }
470 
rknpu_job_abort(struct rknpu_job * job)471 static void rknpu_job_abort(struct rknpu_job *job)
472 {
473 	struct rknpu_device *rknpu_dev = job->rknpu_dev;
474 	struct rknpu_subcore_data *subcore_data = NULL;
475 	unsigned long flags;
476 	int i = 0;
477 
478 	msleep(100);
479 
480 	for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
481 		if (job->args->core_mask & rknpu_core_mask(i)) {
482 			subcore_data = &rknpu_dev->subcore_datas[i];
483 			spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
484 			if (job == subcore_data->job && !job->irq_entry[i]) {
485 				subcore_data->job = NULL;
486 				subcore_data->task_num -=
487 					rknn_get_task_number(job, i);
488 			}
489 			spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
490 		}
491 	}
492 
493 	if (job->ret == -ETIMEDOUT) {
494 		LOG_ERROR("job timeout, flags: %#x:\n", job->flags);
495 		for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
496 			if (job->args->core_mask & rknpu_core_mask(i)) {
497 				void __iomem *rknpu_core_base =
498 					rknpu_dev->base[i];
499 				LOG_ERROR(
500 					"\tcore %d irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n",
501 					i, REG_READ(RKNPU_OFFSET_INT_STATUS),
502 					REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
503 					job->int_mask[i],
504 					(REG_READ(
505 						 rknpu_dev->config
506 							 ->pc_task_status_offset) &
507 					 rknpu_dev->config->pc_task_number_mask),
508 					ktime_to_us(ktime_sub(ktime_get(),
509 							      job->timestamp)));
510 			}
511 		}
512 		rknpu_soft_reset(rknpu_dev);
513 	} else {
514 		LOG_ERROR(
515 			"job abort, flags: %#x, ret: %d, elapsed time: %lldus\n",
516 			job->flags, job->ret,
517 			ktime_to_us(ktime_sub(ktime_get(), job->timestamp)));
518 	}
519 
520 	rknpu_job_cleanup(job);
521 }
522 
rknpu_fuzz_status(uint32_t status)523 static inline uint32_t rknpu_fuzz_status(uint32_t status)
524 {
525 	uint32_t fuzz_status = 0;
526 
527 	if ((status & 0x3) != 0)
528 		fuzz_status |= 0x3;
529 
530 	if ((status & 0xc) != 0)
531 		fuzz_status |= 0xc;
532 
533 	if ((status & 0x30) != 0)
534 		fuzz_status |= 0x30;
535 
536 	if ((status & 0xc0) != 0)
537 		fuzz_status |= 0xc0;
538 
539 	if ((status & 0x300) != 0)
540 		fuzz_status |= 0x300;
541 
542 	if ((status & 0xc00) != 0)
543 		fuzz_status |= 0xc00;
544 
545 	return fuzz_status;
546 }
547 
rknpu_irq_handler(int irq,void * data,int core_index)548 static inline irqreturn_t rknpu_irq_handler(int irq, void *data, int core_index)
549 {
550 	struct rknpu_device *rknpu_dev = data;
551 	void __iomem *rknpu_core_base = rknpu_dev->base[core_index];
552 	struct rknpu_subcore_data *subcore_data = NULL;
553 	struct rknpu_job *job = NULL;
554 	uint32_t status = 0;
555 	unsigned long flags;
556 
557 	subcore_data = &rknpu_dev->subcore_datas[core_index];
558 
559 	spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
560 	job = subcore_data->job;
561 	if (!job) {
562 		spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
563 		REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
564 		rknpu_job_next(rknpu_dev, core_index);
565 		return IRQ_HANDLED;
566 	}
567 	job->irq_entry[core_index] = true;
568 	spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags);
569 
570 	status = REG_READ(RKNPU_OFFSET_INT_STATUS);
571 
572 	job->int_status[core_index] = status;
573 
574 	if (rknpu_fuzz_status(status) != job->int_mask[core_index]) {
575 		LOG_ERROR(
576 			"invalid irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n",
577 			status, REG_READ(RKNPU_OFFSET_INT_RAW_STATUS),
578 			job->int_mask[core_index],
579 			(REG_READ(rknpu_dev->config->pc_task_status_offset) &
580 			 rknpu_dev->config->pc_task_number_mask));
581 		REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
582 		return IRQ_HANDLED;
583 	}
584 
585 	REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR);
586 
587 	rknpu_job_done(job, 0, core_index);
588 
589 	return IRQ_HANDLED;
590 }
591 
rknpu_core0_irq_handler(int irq,void * data)592 irqreturn_t rknpu_core0_irq_handler(int irq, void *data)
593 {
594 	return rknpu_irq_handler(irq, data, 0);
595 }
596 
rknpu_core1_irq_handler(int irq,void * data)597 irqreturn_t rknpu_core1_irq_handler(int irq, void *data)
598 {
599 	return rknpu_irq_handler(irq, data, 1);
600 }
601 
rknpu_core2_irq_handler(int irq,void * data)602 irqreturn_t rknpu_core2_irq_handler(int irq, void *data)
603 {
604 	return rknpu_irq_handler(irq, data, 2);
605 }
606 
rknpu_job_timeout_clean(struct rknpu_device * rknpu_dev,int core_mask)607 static void rknpu_job_timeout_clean(struct rknpu_device *rknpu_dev,
608 				    int core_mask)
609 {
610 	struct rknpu_job *job = NULL;
611 	unsigned long flags;
612 	ktime_t now = ktime_get();
613 	struct rknpu_subcore_data *subcore_data = NULL;
614 	int i = 0;
615 
616 	for (i = 0; i < rknpu_dev->config->num_irqs; i++) {
617 		if (core_mask & rknpu_core_mask(i)) {
618 			subcore_data = &rknpu_dev->subcore_datas[i];
619 			job = subcore_data->job;
620 			if (job &&
621 			    ktime_to_ms(ktime_sub(now, job->timestamp)) >=
622 				    job->args->timeout) {
623 				rknpu_soft_reset(rknpu_dev);
624 
625 				spin_lock_irqsave(&rknpu_dev->irq_lock, flags);
626 				subcore_data->job = NULL;
627 				spin_unlock_irqrestore(&rknpu_dev->irq_lock,
628 						       flags);
629 
630 				do {
631 					schedule_work(&job->cleanup_work);
632 
633 					spin_lock_irqsave(&rknpu_dev->irq_lock,
634 							  flags);
635 
636 					if (!list_empty(
637 						    &subcore_data->todo_list)) {
638 						job = list_first_entry(
639 							&subcore_data->todo_list,
640 							struct rknpu_job,
641 							head[i]);
642 						list_del_init(&job->head[i]);
643 						job->in_queue[i] = false;
644 					} else {
645 						job = NULL;
646 					}
647 
648 					spin_unlock_irqrestore(
649 						&rknpu_dev->irq_lock, flags);
650 				} while (job);
651 			}
652 		}
653 	}
654 }
655 
rknpu_submit(struct rknpu_device * rknpu_dev,struct rknpu_submit * args)656 static int rknpu_submit(struct rknpu_device *rknpu_dev,
657 			struct rknpu_submit *args)
658 {
659 	struct rknpu_job *job = NULL;
660 	int ret = -EINVAL;
661 
662 	if (args->task_number == 0) {
663 		LOG_ERROR("invalid rknpu task number!\n");
664 		return -EINVAL;
665 	}
666 
667 	job = rknpu_job_alloc(rknpu_dev, args);
668 	if (!job) {
669 		LOG_ERROR("failed to allocate rknpu job!\n");
670 		return -ENOMEM;
671 	}
672 
673 	if (args->flags & RKNPU_JOB_FENCE_IN) {
674 #ifdef CONFIG_ROCKCHIP_RKNPU_FENCE
675 		struct dma_fence *in_fence;
676 
677 		in_fence = sync_file_get_fence(args->fence_fd);
678 
679 		if (!in_fence) {
680 			LOG_ERROR("invalid fence in fd, fd: %d\n",
681 				  args->fence_fd);
682 			return -EINVAL;
683 		}
684 		args->fence_fd = -1;
685 
686 		/*
687 		 * Wait if the fence is from a foreign context, or if the fence
688 		 * array contains any fence from a foreign context.
689 		 */
690 		ret = 0;
691 		if (!dma_fence_match_context(in_fence,
692 					     rknpu_dev->fence_ctx->context))
693 			ret = dma_fence_wait_timeout(in_fence, true,
694 						     args->timeout);
695 		dma_fence_put(in_fence);
696 		if (ret < 0) {
697 			if (ret != -ERESTARTSYS)
698 				LOG_ERROR("Error (%d) waiting for fence!\n",
699 					  ret);
700 
701 			return ret;
702 		}
703 #else
704 		LOG_ERROR(
705 			"failed to use rknpu fence, please enable rknpu fence config!\n");
706 		rknpu_job_free(job);
707 		return -EINVAL;
708 #endif
709 	}
710 
711 	if (args->flags & RKNPU_JOB_FENCE_OUT) {
712 #ifdef CONFIG_ROCKCHIP_RKNPU_FENCE
713 		ret = rknpu_fence_alloc(job);
714 		if (ret) {
715 			rknpu_job_free(job);
716 			return ret;
717 		}
718 		job->args->fence_fd = rknpu_fence_get_fd(job);
719 		args->fence_fd = job->args->fence_fd;
720 #else
721 		LOG_ERROR(
722 			"failed to use rknpu fence, please enable rknpu fence config!\n");
723 		rknpu_job_free(job);
724 		return -EINVAL;
725 #endif
726 	}
727 
728 	if (args->flags & RKNPU_JOB_NONBLOCK) {
729 		job->flags |= RKNPU_JOB_ASYNC;
730 		rknpu_job_timeout_clean(rknpu_dev, job->args->core_mask);
731 		rknpu_job_schedule(job);
732 		ret = job->ret;
733 		if (ret) {
734 			rknpu_job_abort(job);
735 			return ret;
736 		}
737 	} else {
738 		rknpu_job_schedule(job);
739 		if (args->flags & RKNPU_JOB_PC)
740 			job->ret = rknpu_job_wait(job);
741 
742 		args->task_counter = job->args->task_counter;
743 		ret = job->ret;
744 		if (!ret)
745 			rknpu_job_cleanup(job);
746 		else
747 			rknpu_job_abort(job);
748 	}
749 
750 	return ret;
751 }
752 
753 #ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM
rknpu_submit_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)754 int rknpu_submit_ioctl(struct drm_device *dev, void *data,
755 		       struct drm_file *file_priv)
756 {
757 	struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev);
758 	struct rknpu_submit *args = data;
759 
760 	return rknpu_submit(rknpu_dev, args);
761 }
762 #endif
763 
764 #ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP
rknpu_submit_ioctl(struct rknpu_device * rknpu_dev,unsigned long data)765 int rknpu_submit_ioctl(struct rknpu_device *rknpu_dev, unsigned long data)
766 {
767 	struct rknpu_submit args;
768 	int ret = -EINVAL;
769 
770 	if (unlikely(copy_from_user(&args, (struct rknpu_submit *)data,
771 				    sizeof(struct rknpu_submit)))) {
772 		LOG_ERROR("%s: copy_from_user failed\n", __func__);
773 		ret = -EFAULT;
774 		return ret;
775 	}
776 
777 	ret = rknpu_submit(rknpu_dev, &args);
778 
779 	if (unlikely(copy_to_user((struct rknpu_submit *)data, &args,
780 				  sizeof(struct rknpu_submit)))) {
781 		LOG_ERROR("%s: copy_to_user failed\n", __func__);
782 		ret = -EFAULT;
783 		return ret;
784 	}
785 
786 	return ret;
787 }
788 #endif
789 
rknpu_get_hw_version(struct rknpu_device * rknpu_dev,uint32_t * version)790 int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version)
791 {
792 	void __iomem *rknpu_core_base = rknpu_dev->base[0];
793 
794 	if (version == NULL)
795 		return -EINVAL;
796 
797 	*version = REG_READ(RKNPU_OFFSET_VERSION) +
798 		   (REG_READ(RKNPU_OFFSET_VERSION_NUM) & 0xffff);
799 
800 	return 0;
801 }
802 
rknpu_get_bw_priority(struct rknpu_device * rknpu_dev,uint32_t * priority,uint32_t * expect,uint32_t * tw)803 int rknpu_get_bw_priority(struct rknpu_device *rknpu_dev, uint32_t *priority,
804 			  uint32_t *expect, uint32_t *tw)
805 {
806 	void __iomem *base = rknpu_dev->bw_priority_base;
807 
808 	if (!rknpu_dev->config->bw_enable) {
809 		LOG_WARN("Get bw_priority is not supported on this device!\n");
810 		return 0;
811 	}
812 
813 	if (!base)
814 		return -EINVAL;
815 
816 	spin_lock(&rknpu_dev->lock);
817 
818 	if (priority != NULL)
819 		*priority = _REG_READ(base, 0x0);
820 
821 	if (expect != NULL)
822 		*expect = _REG_READ(base, 0x8);
823 
824 	if (tw != NULL)
825 		*tw = _REG_READ(base, 0xc);
826 
827 	spin_unlock(&rknpu_dev->lock);
828 
829 	return 0;
830 }
831 
rknpu_set_bw_priority(struct rknpu_device * rknpu_dev,uint32_t priority,uint32_t expect,uint32_t tw)832 int rknpu_set_bw_priority(struct rknpu_device *rknpu_dev, uint32_t priority,
833 			  uint32_t expect, uint32_t tw)
834 {
835 	void __iomem *base = rknpu_dev->bw_priority_base;
836 
837 	if (!rknpu_dev->config->bw_enable) {
838 		LOG_WARN("Set bw_priority is not supported on this device!\n");
839 		return 0;
840 	}
841 
842 	if (!base)
843 		return -EINVAL;
844 
845 	spin_lock(&rknpu_dev->lock);
846 
847 	if (priority != 0)
848 		_REG_WRITE(base, priority, 0x0);
849 
850 	if (expect != 0)
851 		_REG_WRITE(base, expect, 0x8);
852 
853 	if (tw != 0)
854 		_REG_WRITE(base, tw, 0xc);
855 
856 	spin_unlock(&rknpu_dev->lock);
857 
858 	return 0;
859 }
860 
rknpu_clear_rw_amount(struct rknpu_device * rknpu_dev)861 int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev)
862 {
863 	void __iomem *rknpu_core_base = rknpu_dev->base[0];
864 
865 	if (!rknpu_dev->config->bw_enable) {
866 		LOG_WARN("Clear rw_amount is not supported on this device!\n");
867 		return 0;
868 	}
869 
870 	spin_lock(&rknpu_dev->lock);
871 
872 	if (rknpu_dev->config->pc_dma_ctrl) {
873 		uint32_t pc_data_addr = REG_READ(RKNPU_OFFSET_PC_DATA_ADDR);
874 
875 		REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR);
876 		REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
877 		REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
878 		REG_WRITE(pc_data_addr, RKNPU_OFFSET_PC_DATA_ADDR);
879 	} else {
880 		REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
881 		REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT);
882 	}
883 
884 	spin_unlock(&rknpu_dev->lock);
885 
886 	return 0;
887 }
888 
rknpu_get_rw_amount(struct rknpu_device * rknpu_dev,uint32_t * dt_wr,uint32_t * dt_rd,uint32_t * wd_rd)889 int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr,
890 			uint32_t *dt_rd, uint32_t *wd_rd)
891 {
892 	void __iomem *rknpu_core_base = rknpu_dev->base[0];
893 	int amount_scale = rknpu_dev->config->pc_data_amount_scale;
894 
895 	if (!rknpu_dev->config->bw_enable) {
896 		LOG_WARN("Get rw_amount is not supported on this device!\n");
897 		return 0;
898 	}
899 
900 	spin_lock(&rknpu_dev->lock);
901 
902 	if (dt_wr != NULL)
903 		*dt_wr = REG_READ(RKNPU_OFFSET_DT_WR_AMOUNT) * amount_scale;
904 
905 	if (dt_rd != NULL)
906 		*dt_rd = REG_READ(RKNPU_OFFSET_DT_RD_AMOUNT) * amount_scale;
907 
908 	if (wd_rd != NULL)
909 		*wd_rd = REG_READ(RKNPU_OFFSET_WT_RD_AMOUNT) * amount_scale;
910 
911 	spin_unlock(&rknpu_dev->lock);
912 
913 	return 0;
914 }
915 
rknpu_get_total_rw_amount(struct rknpu_device * rknpu_dev,uint32_t * amount)916 int rknpu_get_total_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *amount)
917 {
918 	uint32_t dt_wr = 0;
919 	uint32_t dt_rd = 0;
920 	uint32_t wd_rd = 0;
921 	int ret = -EINVAL;
922 
923 	if (!rknpu_dev->config->bw_enable) {
924 		LOG_WARN(
925 			"Get total_rw_amount is not supported on this device!\n");
926 		return 0;
927 	}
928 
929 	ret = rknpu_get_rw_amount(rknpu_dev, &dt_wr, &dt_rd, &wd_rd);
930 
931 	if (amount != NULL)
932 		*amount = dt_wr + dt_rd + wd_rd;
933 
934 	return ret;
935 }
936