1 /*
2 * Copyright 2015 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_jpege_vepu2"
18
19 #include <string.h>
20
21 #include "mpp_env.h"
22 #include "mpp_common.h"
23 #include "mpp_mem.h"
24 #include "mpp_platform.h"
25 #include "mpp_dmabuf.h"
26
27 #include "mpp_enc_hal.h"
28 #include "vcodec_service.h"
29
30 #include "hal_jpege_debug.h"
31 #include "hal_jpege_api_v2.h"
32 #include "hal_jpege_base.h"
33
34 #define VEPU_JPEGE_VEPU2_NUM_REGS 184
35 #define VEPU2_REG_INPUT_Y 48
36 #define VEPU2_REG_INPUT_U 49
37 #define VEPU2_REG_INPUT_V 50
38
39 typedef struct jpege_vepu2_reg_set_t {
40 RK_U32 val[VEPU_JPEGE_VEPU2_NUM_REGS];
41 } jpege_vepu2_reg_set;
42
43 #define MAX_CORE_NUM 4
44
45 typedef struct JpegeMultiCoreCtx_t {
46 RK_U32 multi_core_enabled;
47 RK_U32 partion_num;
48 MppDevRegOffCfgs *reg_cfg;
49
50 MppBufferGroup partions_group;
51 MppBuffer partions_buf[MAX_CORE_NUM - 1];
52 RK_U32 buf_size;
53
54 RK_U32 part_rows[MAX_CORE_NUM];
55 RK_U32 ecs_cnt[MAX_CORE_NUM];
56
57 void *regs_base;
58 void *regs[MAX_CORE_NUM];
59 void *regs_out[MAX_CORE_NUM];
60 } JpegeMultiCoreCtx;
61
hal_jpege_vepu2_init(void * hal,MppEncHalCfg * cfg)62 MPP_RET hal_jpege_vepu2_init(void *hal, MppEncHalCfg *cfg)
63 {
64 MPP_RET ret = MPP_OK;
65 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
66 MppClientType type;
67 RK_U32 vcodec_type = mpp_get_vcodec_type();
68
69 mpp_env_get_u32("hal_jpege_debug", &hal_jpege_debug, 0);
70 hal_jpege_dbg_func("enter hal %p cfg %p\n", hal, cfg);
71
72 /* update output to MppEnc */
73 type = (vcodec_type & HAVE_VEPU2_JPEG) ?
74 VPU_CLIENT_VEPU2_JPEG : VPU_CLIENT_VEPU2;
75
76 cfg->type = type;
77 ret = mpp_dev_init(&cfg->dev, type);
78 if (ret) {
79 mpp_err_f("mpp_dev_init failed. ret: %d\n", ret);
80 return ret;
81 }
82 ctx->dev = cfg->dev;
83 ctx->type = cfg->type;
84 ctx->task_cnt = cfg->task_cnt;
85
86 jpege_bits_init(&ctx->bits);
87 mpp_assert(ctx->bits);
88
89 ctx->cfg = cfg->cfg;
90 ctx->reg_size = sizeof(RK_U32) * VEPU_JPEGE_VEPU2_NUM_REGS;
91 ctx->regs = mpp_calloc_size(void, (ctx->reg_size + EXTRA_INFO_SIZE) * ctx->task_cnt);
92 if (NULL == ctx->regs) {
93 mpp_err_f("failed to malloc vepu2 regs\n");
94 return MPP_NOK;
95 }
96
97 ctx->regs_out = mpp_calloc_size(void, (ctx->reg_size + EXTRA_INFO_SIZE) * ctx->task_cnt);
98 if (NULL == ctx->regs_out) {
99 mpp_err_f("failed to malloc vepu2 regs\n");
100 return MPP_NOK;
101 }
102
103 hal_jpege_rc_init(&ctx->hal_rc);
104
105 hal_jpege_dbg_func("leave hal %p\n", hal);
106 return MPP_OK;
107 }
108
hal_jpege_vepu2_deinit(void * hal)109 MPP_RET hal_jpege_vepu2_deinit(void *hal)
110 {
111 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
112
113 hal_jpege_dbg_func("enter hal %p\n", hal);
114
115 if (ctx->bits) {
116 jpege_bits_deinit(ctx->bits);
117 ctx->bits = NULL;
118 }
119
120 if (ctx->dev) {
121 mpp_dev_deinit(ctx->dev);
122 ctx->dev = NULL;
123 }
124
125 if (ctx->ctx_ext) {
126 JpegeMultiCoreCtx *ctx_ext = ctx->ctx_ext;
127 RK_U32 i;
128
129 if (ctx_ext->reg_cfg) {
130 mpp_dev_multi_offset_deinit(ctx_ext->reg_cfg);
131 ctx_ext->reg_cfg = NULL;
132 }
133
134 for (i = 0; i < MAX_CORE_NUM - 1; i++)
135 if (ctx_ext->partions_buf[i])
136 mpp_buffer_put(ctx_ext->partions_buf[i]);
137
138 if (ctx_ext->partions_group) {
139 mpp_buffer_group_put(ctx_ext->partions_group);
140 ctx_ext->partions_group = NULL;
141 }
142
143 MPP_FREE(ctx_ext->regs_base);
144 MPP_FREE(ctx->ctx_ext);
145 }
146
147 MPP_FREE(ctx->regs);
148 MPP_FREE(ctx->regs_out);
149
150 hal_jpege_dbg_func("leave hal %p\n", hal);
151 return MPP_OK;
152 }
153
hal_jpege_vepu2_get_task(void * hal,HalEncTask * task)154 MPP_RET hal_jpege_vepu2_get_task(void *hal, HalEncTask *task)
155 {
156 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
157 JpegeSyntax *syntax = (JpegeSyntax *)task->syntax.data;
158 JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
159 RK_U32 i = 0;
160
161 hal_jpege_dbg_func("enter hal %p\n", hal);
162
163 memcpy(&ctx->syntax, syntax, sizeof(ctx->syntax));
164
165 ctx->hal_start_pos = mpp_packet_get_length(task->packet);
166
167 /* prepare for part encoding */
168 ctx->mcu_y = 0;
169 ctx->mcu_h = syntax->mcu_ver_cnt;
170 ctx->sw_bit = 0;
171 ctx->part_bytepos = 0;
172 ctx->part_x_fill = 0;
173 ctx->part_y_fill = 0;
174 ctx->rst_marker_idx = 0;
175 task->part_first = 1;
176 task->part_last = 0;
177 task->flags.reg_idx = 0;
178
179 /* rk3588 4 core frame parallel */
180 if (ctx->task_cnt > 1) {
181 task->flags.reg_idx = ctx->task_idx++;
182 if (ctx->task_idx >= ctx->task_cnt)
183 ctx->task_idx = 0;
184 goto MULTI_CORE_SPLIT_DONE;
185 }
186
187 /* Split single task to multi cores on rk3588 */
188 if (ctx_ext)
189 ctx_ext->multi_core_enabled = 0;
190
191 if (ctx->type == VPU_CLIENT_VEPU2_JPEG) {
192 RK_U32 width = ctx->cfg->prep.width;
193 RK_U32 height = ctx->cfg->prep.height;
194 RK_U32 buf_size = width * height / 2;
195
196 /* small image do not need to split into four segments */
197 if (width * height <= 1280 * 720 && (height <= 720 || width <= 720))
198 goto MULTI_CORE_SPLIT_DONE;
199
200 if (!ctx_ext) {
201 ctx_ext = mpp_calloc(JpegeMultiCoreCtx, 1);
202 ctx->ctx_ext = ctx_ext;
203 }
204
205 mpp_assert(ctx_ext);
206
207 if (!ctx_ext->partions_group) {
208 mpp_buffer_group_get_internal(&ctx_ext->partions_group, MPP_BUFFER_TYPE_DMA_HEAP | MPP_BUFFER_FLAGS_CACHABLE);
209 if (!ctx_ext->partions_group)
210 mpp_buffer_group_get_internal(&ctx_ext->partions_group, MPP_BUFFER_TYPE_ION);
211 }
212
213 mpp_assert(ctx_ext->partions_group);
214
215 if (ctx_ext->buf_size != buf_size) {
216 MppBuffer buf = NULL;
217
218 for (i = 0; i < MAX_CORE_NUM - 1; i++) {
219 buf = ctx_ext->partions_buf[i];
220 if (buf)
221 mpp_buffer_put(buf);
222 }
223
224 mpp_buffer_group_clear(ctx_ext->partions_group);
225
226 for (i = 0; i < MAX_CORE_NUM - 1; i++) {
227 mpp_buffer_get(ctx_ext->partions_group, &buf, buf_size);
228 mpp_assert(buf);
229 ctx_ext->partions_buf[i] = buf;
230 }
231
232 ctx_ext->buf_size = buf_size;
233 }
234
235 if (!ctx_ext->regs_base) {
236 void *regs_base = mpp_calloc_size(void, ctx->reg_size * MAX_CORE_NUM * 2);
237 size_t reg_size = ctx->reg_size;
238
239 ctx_ext->regs_base = regs_base;
240 for (i = 0; i < MAX_CORE_NUM; i++) {
241 ctx_ext->regs[i] = regs_base;
242 regs_base += reg_size;
243
244 ctx_ext->regs_out[i] = regs_base;
245 regs_base += reg_size;
246 }
247 }
248
249 {
250 RK_U32 mb_w = MPP_ALIGN(width, 16) / 16;
251 RK_U32 mb_h = MPP_ALIGN(height, 16) / 16;
252 RK_U32 part_rows = MPP_ALIGN(mb_h, 4) / 4;
253
254 ctx_ext->partion_num = 0;
255
256 if (ctx->cfg->split.split_mode == MPP_ENC_SPLIT_BY_CTU) {
257 RK_U32 ecs_num = (mb_h + syntax->part_rows - 1) / syntax->part_rows;
258 RK_U32 *core_ecs = ctx_ext->ecs_cnt;
259
260 if (ecs_num > 24 || ecs_num <= 8) {
261 RK_U32 divider = ecs_num > 24 ? 8 : 1;
262 RK_U32 quotient = ecs_num / divider;
263 RK_U32 remainder = ecs_num % divider;
264 RK_U32 runs = quotient / MAX_CORE_NUM;
265 RK_U32 runs_left = quotient % MAX_CORE_NUM;
266
267 if (runs > 0) {
268 for (i = 0; i < MAX_CORE_NUM; i++)
269 core_ecs[i] = runs * divider;
270 }
271
272 for (i = 0; i < runs_left; i++)
273 core_ecs[i] += divider;
274
275 core_ecs[MAX_CORE_NUM - 1] += remainder;
276 } else if (ecs_num > 20) {
277 core_ecs[0] = core_ecs[1] = 8;
278 core_ecs[2] = (ecs_num - 8 * 2) / 2;
279 core_ecs[3] = ecs_num - 8 * 2 - core_ecs[2];
280 } else if (ecs_num > 16) {
281 core_ecs[0] = 8;
282 core_ecs[1] = core_ecs[2] = 4;
283 core_ecs[3] = ecs_num - 8 - 4 * 2;
284 } else if (ecs_num > 8) {
285 core_ecs[0] = core_ecs[1] = 4;
286 core_ecs[2] = (ecs_num - 4 * 2) / 2;
287 core_ecs[3] = ecs_num - 4 * 2 - core_ecs[2];
288 }
289
290 for (i = 0; i < MAX_CORE_NUM; i++) {
291 ctx_ext->part_rows[i] = core_ecs[i] * syntax->part_rows;
292 hal_jpege_dbg_detail("part %d, ecs %d, rows %d", i, core_ecs[i],
293 ctx_ext->part_rows[i]);
294 if (core_ecs[i])
295 ctx_ext->partion_num++;
296 }
297 } else {
298 for (i = 0; i < MAX_CORE_NUM; i++) {
299 part_rows = (mb_h >= part_rows) ? part_rows : mb_h;
300
301 ctx_ext->part_rows[i] = part_rows;
302 ctx_ext->ecs_cnt[i] = 1;
303
304 hal_jpege_dbg_detail("part %d row %d restart %d\n",
305 i, part_rows, mb_w * part_rows);
306
307 if (part_rows)
308 ctx_ext->partion_num++;
309
310 if (i == 0 && !ctx->syntax.restart_ri)
311 ctx->syntax.restart_ri = mb_w * part_rows;
312
313 mb_h -= part_rows;
314 }
315 }
316 }
317
318 if (!ctx_ext->reg_cfg)
319 mpp_dev_multi_offset_init(&ctx_ext->reg_cfg, 24);
320
321 syntax->low_delay = 1;
322 ctx_ext->multi_core_enabled = 1;
323 }
324
325 if (ctx->cfg->jpeg.update) {
326 hal_jpege_rc_update(&ctx->hal_rc, syntax);
327 ctx->cfg->jpeg.update = 0;
328 }
329
330 task->rc_task->frm.is_intra = 1;
331
332 MULTI_CORE_SPLIT_DONE:
333
334 hal_jpege_dbg_func("leave hal %p\n", hal);
335
336 return MPP_OK;
337 }
338
hal_jpege_vepu2_set_extra_info(MppDev dev,JpegeSyntax * syntax,RK_U32 start_mbrow)339 static MPP_RET hal_jpege_vepu2_set_extra_info(MppDev dev, JpegeSyntax *syntax,
340 RK_U32 start_mbrow)
341 {
342 VepuOffsetCfg cfg;
343
344 cfg.fmt = syntax->format;
345 cfg.width = syntax->width;
346 cfg.height = syntax->height;
347 cfg.hor_stride = syntax->hor_stride;
348 cfg.ver_stride = syntax->ver_stride;
349 cfg.offset_x = syntax->offset_x;
350 cfg.offset_y = syntax->offset_y + start_mbrow * 16;
351
352 get_vepu_offset_cfg(&cfg);
353
354 if (cfg.offset_byte[0])
355 mpp_dev_set_reg_offset(dev, VEPU2_REG_INPUT_Y, cfg.offset_byte[0]);
356
357 if (cfg.offset_byte[1])
358 mpp_dev_set_reg_offset(dev, VEPU2_REG_INPUT_U, cfg.offset_byte[1]);
359
360 if (cfg.offset_byte[2])
361 mpp_dev_set_reg_offset(dev, VEPU2_REG_INPUT_V, cfg.offset_byte[2]);
362
363 return MPP_OK;
364 }
365
hal_jpege_vepu2_gen_regs(void * hal,HalEncTask * task)366 MPP_RET hal_jpege_vepu2_gen_regs(void *hal, HalEncTask *task)
367 {
368 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
369 MppBuffer input = task->input;
370 MppBuffer output = task->output;
371 JpegeSyntax *syntax = &ctx->syntax;
372 RK_U32 width = syntax->width;
373 RK_U32 width_align = MPP_ALIGN(width, 16);
374 RK_U32 height = syntax->height;
375 MppFrameFormat fmt = syntax->format;
376 RK_U32 hor_stride = 0;
377 RK_U32 ver_stride = MPP_ALIGN(height, 16);
378 JpegeBits bits = ctx->bits;
379 RK_S32 reg_idx = task->flags.reg_idx;
380 RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs + ctx->reg_size * reg_idx);
381 size_t length = mpp_packet_get_length(task->packet);
382 RK_U8 *buf = mpp_buffer_get_ptr(output);
383 size_t size = mpp_buffer_get_size(output);
384 RK_S32 bitpos;
385 RK_S32 bytepos;
386 RK_U32 x_fill = 0;
387 RK_U32 y_fill = 0;
388 VepuFormatCfg fmt_cfg;
389 RK_U32 rotation = 0;
390
391 hal_jpege_dbg_func("enter hal %p\n", hal);
392
393 // do not support mirroring
394 if (syntax->mirroring)
395 mpp_err_f("Warning: do not support mirroring\n");
396
397 if (syntax->rotation == MPP_ENC_ROT_90)
398 rotation = 1;
399 else if (syntax->rotation == MPP_ENC_ROT_270)
400 rotation = 2;
401 else if (syntax->rotation != MPP_ENC_ROT_0)
402 mpp_err_f("Warning: only support 90 or 270 degree rotate, request rotate %d", syntax->rotation);
403 if (rotation) {
404 MPP_SWAP(RK_U32, width, height);
405 MPP_SWAP(RK_U32, width_align, ver_stride);
406 }
407 hor_stride = get_vepu_pixel_stride(&ctx->stride_cfg, width,
408 syntax->hor_stride, fmt);
409
410 //hor_stride must be align with 8, and ver_stride mus align with 2
411 if ((hor_stride & 0x7) || (ver_stride & 0x1) || (hor_stride >= (1 << 15))) {
412 mpp_err_f("illegal resolution, hor_stride %d, ver_stride %d, width %d, height %d\n",
413 syntax->hor_stride, syntax->ver_stride,
414 syntax->width, syntax->height);
415 }
416
417 x_fill = (width_align - width) / 4;
418 y_fill = (ver_stride - height);
419 mpp_assert(x_fill <= 3);
420 mpp_assert(y_fill <= 15);
421 ctx->part_x_fill = x_fill;
422 ctx->part_y_fill = y_fill;
423
424 mpp_buffer_sync_begin(output);
425
426 if (syntax->q_mode == JPEG_QFACTOR) {
427 syntax->q_factor = 100 - task->rc_task->info.quality_target;
428 hal_jpege_rc_update(&ctx->hal_rc, syntax);
429 }
430
431 /* write header to output buffer */
432 jpege_bits_setup(bits, buf, (RK_U32)size);
433 /* seek length bytes data */
434 jpege_seek_bits(bits, length << 3);
435 /* NOTE: write header will update qtable */
436 write_jpeg_header(bits, syntax, &ctx->hal_rc);
437
438 memset(regs, 0, sizeof(RK_U32) * VEPU_JPEGE_VEPU2_NUM_REGS);
439 // input address setup
440 regs[VEPU2_REG_INPUT_Y] = mpp_buffer_get_fd(input);
441 regs[VEPU2_REG_INPUT_U] = regs[VEPU2_REG_INPUT_Y];
442 regs[VEPU2_REG_INPUT_V] = regs[VEPU2_REG_INPUT_Y];
443
444 // output address setup
445 bitpos = jpege_bits_get_bitpos(bits);
446 bytepos = (bitpos + 7) >> 3;
447 ctx->base = buf;
448 ctx->size = size;
449 ctx->sw_bit = bitpos;
450 ctx->part_bytepos = bytepos;
451
452 get_msb_lsb_at_pos(®s[51], ®s[52], buf, bytepos);
453
454 mpp_buffer_sync_end(output);
455
456 regs[53] = size - bytepos;
457
458 // bus config
459 regs[54] = 16 << 8;
460
461 regs[60] = (((bytepos & 7) * 8) << 16) |
462 (x_fill << 4) |
463 (y_fill);
464 regs[61] = hor_stride;
465
466 regs[77] = mpp_buffer_get_fd(output);
467 if (bytepos)
468 mpp_dev_set_reg_offset(ctx->dev, 77, bytepos);
469 /* 95 - 97 color conversion parameter */
470 {
471 RK_U32 coeffA;
472 RK_U32 coeffB;
473 RK_U32 coeffC;
474 RK_U32 coeffE;
475 RK_U32 coeffF;
476
477 switch (syntax->color_conversion_type) {
478 case 0 : { /* BT.601 */
479 /*
480 * Y = 0.2989 R + 0.5866 G + 0.1145 B
481 * Cb = 0.5647 (B - Y) + 128
482 * Cr = 0.7132 (R - Y) + 128
483 */
484 coeffA = 19589;
485 coeffB = 38443;
486 coeffC = 7504;
487 coeffE = 37008;
488 coeffF = 46740;
489 } break;
490 case 1 : { /* BT.709 */
491 /*
492 * Y = 0.2126 R + 0.7152 G + 0.0722 B
493 * Cb = 0.5389 (B - Y) + 128
494 * Cr = 0.6350 (R - Y) + 128
495 */
496 coeffA = 13933;
497 coeffB = 46871;
498 coeffC = 4732;
499 coeffE = 35317;
500 coeffF = 41615;
501 } break;
502 case 2 : {
503 coeffA = syntax->coeffA;
504 coeffB = syntax->coeffB;
505 coeffC = syntax->coeffC;
506 coeffE = syntax->coeffE;
507 coeffF = syntax->coeffF;
508 } break;
509 default : {
510 mpp_err("invalid color conversion type %d\n",
511 syntax->color_conversion_type);
512 coeffA = 19589;
513 coeffB = 38443;
514 coeffC = 7504;
515 coeffE = 37008;
516 coeffF = 46740;
517 } break;
518 }
519
520 regs[95] = coeffA | (coeffB << 16);
521 regs[96] = coeffC | (coeffE << 16);
522 regs[97] = coeffF;
523 }
524
525 regs[103] = (width_align >> 4) << 8 |
526 (ver_stride >> 4) << 20 |
527 (1 << 6) | /* intra coding */
528 (2 << 4) | /* format jpeg */
529 1; /* encoder start */
530
531 if (!get_vepu_fmt(&fmt_cfg, fmt)) {
532 regs[74] = (fmt_cfg.format << 4) |
533 (rotation << 2);
534 regs[98] = (fmt_cfg.b_mask & 0x1f) << 16 |
535 (fmt_cfg.g_mask & 0x1f) << 8 |
536 (fmt_cfg.r_mask & 0x1f);
537 regs[105] = 7 << 26 | (fmt_cfg.swap_32_in & 1) << 29 |
538 (fmt_cfg.swap_16_in & 1) << 30 |
539 (fmt_cfg.swap_8_in & 1) << 31;
540 }
541
542 regs[107] = ((syntax->part_rows & 0xff) << 16) |
543 jpege_restart_marker[ctx->rst_marker_idx & 7];
544
545 /* encoder interrupt */
546 regs[109] = 1 << 12 | /* clock gating */
547 1 << 10; /* enable timeout interrupt */
548
549 if (syntax->low_delay) {
550 /* slice encode end by RST */
551 regs[107] |= (1 << 24);
552 /* slice interrupt enable */
553 regs[109] |= (1 << 16);
554 }
555
556 /* 0 ~ 31 quantization tables */
557 {
558 RK_S32 i;
559
560 for (i = 0; i < 16; i++) {
561 /* qtable need to reorder in particular order */
562 regs[i] = ctx->hal_rc.qtables[0][qp_reorder_table[i * 4 + 0]] << 24 |
563 ctx->hal_rc.qtables[0][qp_reorder_table[i * 4 + 1]] << 16 |
564 ctx->hal_rc.qtables[0][qp_reorder_table[i * 4 + 2]] << 8 |
565 ctx->hal_rc.qtables[0][qp_reorder_table[i * 4 + 3]];
566 }
567 for (i = 0; i < 16; i++) {
568 /* qtable need to reorder in particular order */
569 regs[i + 16] = ctx->hal_rc.qtables[1][qp_reorder_table[i * 4 + 0]] << 24 |
570 ctx->hal_rc.qtables[1][qp_reorder_table[i * 4 + 1]] << 16 |
571 ctx->hal_rc.qtables[1][qp_reorder_table[i * 4 + 2]] << 8 |
572 ctx->hal_rc.qtables[1][qp_reorder_table[i * 4 + 3]];
573 }
574 }
575
576 hal_jpege_dbg_func("leave hal %p\n", hal);
577 return MPP_OK;
578 }
579
multi_core_start(HalJpegeCtx * ctx,HalEncTask * task)580 static MPP_RET multi_core_start(HalJpegeCtx *ctx, HalEncTask *task)
581 {
582 JpegeMultiCoreCtx *ctx_ext = ctx->ctx_ext;
583 JpegeSyntax *syntax = &ctx->syntax;
584 MppDevRegOffCfgs *reg_cfg = ctx_ext->reg_cfg;
585 MppDev dev = ctx->dev;
586 RK_S32 reg_idx = task->flags.reg_idx;
587 RK_U32 *src = (RK_U32 *)((RK_U8 *)ctx->regs + ctx->reg_size * reg_idx);
588 RK_U32 reg_size = ctx->reg_size;
589 MPP_RET ret = MPP_OK;
590 RK_U32 partion_num = ctx_ext->partion_num;
591 RK_U32 mcu_y = 0;
592 RK_U32 i;
593
594 hal_jpege_dbg_detail("start %d partions\n", partion_num);
595
596 for (i = 0; i < partion_num; i++) {
597 RK_U32 part_not_end = i < partion_num - 1;
598 RK_U32 part_not_start = i > 0;
599 RK_U32 *regs = (RK_U32 *)ctx_ext->regs[i];
600 RK_U32 part_enc_mcu_h = ctx_ext->part_rows[i];
601 RK_U32 part_x_fill = ctx->part_x_fill;
602 RK_U32 part_y_fill = ctx->part_y_fill;
603 RK_U32 part_bytepos = ctx->part_bytepos;
604
605 // it only needs to fill the partition on the right and below.
606 if (syntax->rotation == MPP_ENC_ROT_90) {
607 if (part_not_end)
608 part_x_fill = 0;
609 } else if (syntax->rotation == MPP_ENC_ROT_0 || syntax->rotation == MPP_ENC_ROT_180) {
610 if (part_not_end)
611 part_y_fill = 0;
612 } else if (syntax->rotation == MPP_ENC_ROT_270) {
613 if (part_not_start)
614 part_x_fill = 0;
615 } else
616 mpp_err_f("input rotation %d not supported", syntax->rotation);
617
618 memcpy(regs, src, reg_size);
619
620 mpp_dev_multi_offset_reset(reg_cfg);
621
622 if (i == 0) {
623 get_msb_lsb_at_pos(®s[51], ®s[52], ctx->base, part_bytepos);
624 regs[77] = mpp_buffer_get_fd(task->output);
625 regs[53] = mpp_buffer_get_size(task->output) - part_bytepos;
626 regs[60] = (((part_bytepos & 7) * 8) << 16) |
627 (part_x_fill << 4) |
628 (part_y_fill);
629 /* the stream offset had been setup */
630 } else {
631 MppBuffer buf = ctx_ext->partions_buf[i - 1];
632
633 regs[77] = mpp_buffer_get_fd(buf);
634 regs[53] = mpp_buffer_get_size(buf);
635 regs[60] = (((0 & 7) * 8) << 16) |
636 (part_x_fill << 4) |
637 (part_y_fill);
638 }
639
640 regs[103] = syntax->mcu_hor_cnt << 8 |
641 (part_enc_mcu_h) << 20 |
642 (1 << 6) | /* intra coding */
643 (2 << 4) | /* format jpeg */
644 1; /* encoder start */
645
646 hal_jpege_dbg_detail("part %d, part_not_end 0x%x, rst_marker_idx %d",
647 i, part_not_end, ctx->rst_marker_idx);
648 regs[107] = part_not_end << 24 | ((syntax->part_rows & 0xff) << 16) |
649 jpege_restart_marker[ctx->rst_marker_idx & 7];
650 ctx->rst_marker_idx += ctx_ext->ecs_cnt[i];
651
652 VepuOffsetCfg cfg;
653
654 memset(&cfg, 0, sizeof(cfg));
655
656 cfg.fmt = syntax->format;
657 cfg.width = syntax->width;
658 cfg.height = syntax->height;
659 cfg.hor_stride = syntax->hor_stride;
660 cfg.ver_stride = syntax->ver_stride;
661 cfg.offset_x = syntax->offset_x;
662 cfg.offset_y = syntax->offset_y + mcu_y * 16;
663
664 if (syntax->rotation == MPP_ENC_ROT_90 || syntax->rotation == MPP_ENC_ROT_270) {
665 regs[103] = part_enc_mcu_h << 8 |
666 (syntax->mcu_hor_cnt) << 20 |
667 (1 << 6) | /* intra coding */
668 (2 << 4) | /* format jpeg */
669 1; /* encoder start */
670
671 /*
672 * It is opposite that position of partitions
673 * of rotation 90 degree and rotation 270 degree.
674 */
675 if (syntax->rotation == MPP_ENC_ROT_270)
676 cfg.offset_x = syntax->offset_x +
677 (syntax->mcu_ver_cnt - ctx_ext->part_rows[0] - mcu_y) * 16;
678 else
679 cfg.offset_x = syntax->offset_x + mcu_y * 16;
680
681 cfg.offset_y = syntax->offset_y;
682 }
683
684 get_vepu_offset_cfg(&cfg);
685 mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_Y, cfg.offset_byte[0]);
686 mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_U, cfg.offset_byte[1]);
687 mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_V, cfg.offset_byte[2]);
688
689 mcu_y += part_enc_mcu_h;
690
691 do {
692 MppDevRegWrCfg wr_cfg;
693 MppDevRegRdCfg rd_cfg;
694
695 wr_cfg.reg = regs;
696 wr_cfg.size = reg_size;
697 wr_cfg.offset = 0;
698
699 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
700 if (ret) {
701 mpp_err_f("set register write failed %d\n", ret);
702 break;
703 }
704
705 rd_cfg.reg = ctx_ext->regs_out[i];
706 rd_cfg.size = reg_size;
707 rd_cfg.offset = 0;
708
709 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
710 if (ret) {
711 mpp_err_f("set register read failed %d\n", ret);
712 break;
713 }
714
715 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_OFFS, reg_cfg);
716 if (ret) {
717 mpp_err_f("set register offsets failed %d\n", ret);
718 break;
719 }
720
721 if (i < partion_num - 1) {
722 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_DELIMIT, NULL);
723 if (ret) {
724 mpp_err_f("send delimit failed %d\n", ret);
725 break;
726 }
727 }
728 } while (0);
729 }
730
731 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
732 if (ret)
733 mpp_err_f("send cmd failed %d\n", ret);
734
735 return ret;
736 }
737
multi_core_wait(HalJpegeCtx * ctx,HalEncTask * task)738 static MPP_RET multi_core_wait(HalJpegeCtx *ctx, HalEncTask *task)
739 {
740 JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
741 JpegeFeedback *feedback = &ctx->feedback;
742 RK_U32 sw_bit = 0;
743 RK_U32 hw_bit = 0;
744 MPP_RET ret = MPP_OK;
745 RK_U32 val;
746 RK_U32 i;
747
748 hal_jpege_dbg_detail("poll partion_num %d\n", ctx_ext->partion_num);
749
750 for (i = 0; i < ctx_ext->partion_num; i++) {
751 RK_U32 *regs = ctx_ext->regs_out[i];
752
753 hal_jpege_dbg_detail("poll reg %d %p", i, regs);
754
755 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
756 if (ret)
757 mpp_err_f("poll cmd failed %d\n", ret);
758
759 if (i == 0) {
760 RK_S32 fd = mpp_buffer_get_fd(task->output);
761
762 val = regs[109];
763 hal_jpege_dbg_output("hw_status %08x\n", val);
764 feedback->hw_status = val & 0x70;
765 val = regs[53];
766 sw_bit = jpege_bits_get_bitpos(ctx->bits);
767 hw_bit = val;
768 feedback->stream_length = ((sw_bit / 8) & (~0x7)) + hw_bit / 8;
769 hal_jpege_dbg_detail("partion len = %d", hw_bit / 8);
770 task->length = feedback->stream_length;
771 task->hw_length = task->length - ctx->hal_start_pos;
772
773 mpp_dmabuf_sync_partial_begin(fd, 1, 0, task->length, __FUNCTION__);
774 } else {
775 void *stream_ptr = mpp_buffer_get_ptr(task->output);
776 void *partion_ptr = mpp_buffer_get_ptr(ctx_ext->partions_buf[i - 1]);
777 RK_S32 partion_fd = mpp_buffer_get_fd(ctx_ext->partions_buf[i - 1]);
778 RK_U32 partion_len = 0;
779
780 val = regs[109];
781 hal_jpege_dbg_output("hw_status %08x\n", val);
782 feedback->hw_status = val & 0x70;
783 partion_len = regs[53] / 8;
784 hal_jpege_dbg_detail("partion_len = %d", partion_len);
785
786 mpp_dmabuf_sync_partial_begin(partion_fd, 1, 0, partion_len, __FUNCTION__);
787
788 memcpy(stream_ptr + feedback->stream_length, partion_ptr, partion_len);
789 feedback->stream_length += partion_len;
790 task->length = feedback->stream_length;
791 task->hw_length += partion_len;
792 }
793 }
794
795 hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
796 sw_bit, hw_bit, feedback->stream_length, task->hw_length);
797
798 return ret;
799 }
800
hal_jpege_vepu2_start(void * hal,HalEncTask * task)801 MPP_RET hal_jpege_vepu2_start(void *hal, HalEncTask *task)
802 {
803 MPP_RET ret = MPP_OK;
804 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
805 JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
806
807 hal_jpege_dbg_func("enter hal %p\n", hal);
808
809 if (ctx_ext && ctx_ext->multi_core_enabled) {
810 multi_core_start(ctx, task);
811 } else {
812 hal_jpege_vepu2_set_extra_info(ctx->dev, &ctx->syntax, 0);
813 do {
814 MppDevRegWrCfg wr_cfg;
815 MppDevRegRdCfg rd_cfg;
816 RK_U32 reg_size = ctx->reg_size;
817 RK_S32 reg_idx = task->flags.reg_idx;
818 RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs + reg_size * reg_idx);
819
820 wr_cfg.reg = regs;
821 wr_cfg.size = reg_size;
822 wr_cfg.offset = 0;
823
824 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
825 if (ret) {
826 mpp_err_f("set register write failed %d\n", ret);
827 break;
828 }
829
830 rd_cfg.reg = regs;
831 rd_cfg.size = reg_size;
832 rd_cfg.offset = 0;
833
834 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
835 if (ret) {
836 mpp_err_f("set register read failed %d\n", ret);
837 break;
838 }
839
840 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
841 if (ret) {
842 mpp_err_f("send cmd failed %d\n", ret);
843 break;
844 }
845 } while (0);
846 }
847 hal_jpege_dbg_func("leave hal %p\n", hal);
848 (void)task;
849 return ret;
850 }
851
hal_jpege_vepu2_wait(void * hal,HalEncTask * task)852 MPP_RET hal_jpege_vepu2_wait(void *hal, HalEncTask *task)
853 {
854 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
855 JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
856 MPP_RET ret = MPP_OK;
857
858 hal_jpege_dbg_func("enter hal %p\n", hal);
859
860 if (ctx_ext && ctx_ext->multi_core_enabled) {
861 multi_core_wait(ctx, task);
862 } else {
863 JpegeFeedback *feedback = &ctx->feedback;
864 JpegeBits bits = ctx->bits;
865 RK_S32 reg_idx = task->flags.reg_idx;
866 RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs + ctx->reg_size * reg_idx);
867 RK_U32 sw_bit = 0;
868 RK_U32 hw_bit = 0;
869 RK_U32 val;
870
871 if (ctx->dev) {
872 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
873 if (ret)
874 mpp_err_f("poll cmd failed %d\n", ret);
875 }
876
877 val = regs[109];
878 hal_jpege_dbg_output("hw_status %08x\n", val);
879 feedback->hw_status = val & 0x70;
880 val = regs[53];
881
882 sw_bit = jpege_bits_get_bitpos(bits);
883 hw_bit = val;
884
885 // NOTE: hardware will return 64 bit access byte count
886 feedback->stream_length = ((sw_bit / 8) & (~0x7)) + hw_bit / 8;
887 task->length = feedback->stream_length;
888 task->hw_length = task->length - ctx->hal_start_pos;
889
890 hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
891 sw_bit, hw_bit, feedback->stream_length, task->hw_length);
892 }
893
894 hal_jpege_dbg_func("leave hal %p\n", hal);
895 return ret;
896 }
897
hal_jpege_vepu2_part_start(void * hal,HalEncTask * task)898 MPP_RET hal_jpege_vepu2_part_start(void *hal, HalEncTask *task)
899 {
900 MPP_RET ret = MPP_OK;
901 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
902 JpegeSyntax *syntax = (JpegeSyntax *)task->syntax.data;
903 RK_U32 mcu_w = syntax->mcu_hor_cnt;
904 RK_U32 mcu_h = syntax->mcu_ver_cnt;
905 RK_U32 mcu_y = ctx->mcu_y;
906 RK_U32 part_mcu_h = syntax->part_rows;
907 RK_S32 reg_idx = task->flags.reg_idx;
908 RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs + ctx->reg_size * reg_idx);
909 RK_U32 part_enc_h;
910 RK_U32 part_enc_mcu_h;
911 RK_U32 part_y_fill;
912 RK_U32 part_not_end;
913
914 hal_jpege_dbg_func("enter part start %p\n", hal);
915
916 /* Fix register for each part encoding */
917 task->part_first = !mcu_y;
918 if (mcu_y + part_mcu_h < mcu_h) {
919 part_enc_h = part_mcu_h * 16;
920 part_enc_mcu_h = part_mcu_h;
921 part_y_fill = 0;
922 part_not_end = 1;
923 task->part_last = 0;
924 } else {
925 part_enc_h = syntax->height - mcu_y * 16;
926 part_enc_mcu_h = MPP_ALIGN(part_enc_h, 16) / 16;;
927 part_y_fill = ctx->part_y_fill;
928 part_not_end = 0;
929 task->part_last = 1;
930 }
931
932 hal_jpege_dbg_detail("part first %d last %d\n", task->part_first, task->part_last);
933
934 get_msb_lsb_at_pos(®s[51], ®s[52], ctx->base, ctx->part_bytepos);
935
936 regs[53] = ctx->size - ctx->part_bytepos;
937
938 regs[60] = (((ctx->part_bytepos & 7) * 8) << 16) |
939 (ctx->part_x_fill << 4) |
940 (part_y_fill);
941
942 regs[77] = mpp_buffer_get_fd(task->output);
943 if (ctx->part_bytepos)
944 mpp_dev_set_reg_offset(ctx->dev, 77, ctx->part_bytepos);
945
946 regs[103] = mcu_w << 8 |
947 (part_enc_mcu_h) << 20 |
948 (1 << 6) | /* intra coding */
949 (2 << 4) | /* format jpeg */
950 1; /* encoder start */
951
952 hal_jpege_dbg_detail("part_not_end 0x%x, rst_marker_idx %d",
953 part_not_end, ctx->rst_marker_idx);
954 regs[107] = part_not_end << 24 | jpege_restart_marker[ctx->rst_marker_idx & 7];
955 ctx->rst_marker_idx++;
956
957 hal_jpege_vepu2_set_extra_info(ctx->dev, syntax, mcu_y);
958 ctx->mcu_y += part_enc_mcu_h;
959
960 do {
961 MppDevRegWrCfg wr_cfg;
962 MppDevRegRdCfg rd_cfg;
963 RK_U32 reg_size = ctx->reg_size;
964
965 wr_cfg.reg = ctx->regs;
966 wr_cfg.size = reg_size;
967 wr_cfg.offset = 0;
968
969 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
970 if (ret) {
971 mpp_err_f("set register write failed %d\n", ret);
972 break;
973 }
974
975 rd_cfg.reg = ctx->regs_out;
976 rd_cfg.size = reg_size;
977 rd_cfg.offset = 0;
978
979 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
980 if (ret) {
981 mpp_err_f("set register read failed %d\n", ret);
982 break;
983 }
984
985 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
986 if (ret) {
987 mpp_err_f("send cmd failed %d\n", ret);
988 break;
989 }
990 } while (0);
991
992 hal_jpege_dbg_func("leave part start %p\n", hal);
993 (void)task;
994 return ret;
995 }
996
hal_jpege_vepu2_part_wait(void * hal,HalEncTask * task)997 MPP_RET hal_jpege_vepu2_part_wait(void *hal, HalEncTask *task)
998 {
999 MPP_RET ret = MPP_OK;
1000 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
1001 RK_S32 reg_idx = task->flags.reg_idx;
1002 RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs_out + ctx->reg_size * reg_idx);
1003 JpegeFeedback *feedback = &ctx->feedback;
1004 RK_U32 hw_bit = 0;
1005
1006 hal_jpege_dbg_func("enter part wait %p\n", hal);
1007
1008 if (ctx->dev) {
1009 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
1010 if (ret)
1011 mpp_err_f("poll cmd failed %d\n", ret);
1012 }
1013
1014 hal_jpege_dbg_detail("hw_status %08x\n", regs[109]);
1015
1016 hw_bit = regs[53];
1017
1018 hal_jpege_dbg_detail("byte pos %d -> %d\n", ctx->part_bytepos,
1019 (ctx->part_bytepos & (~7)) + (hw_bit / 8));
1020 ctx->part_bytepos = (ctx->part_bytepos & (~7)) + (hw_bit / 8);
1021
1022 feedback->stream_length = ctx->part_bytepos;
1023 task->length = ctx->part_bytepos;
1024 task->hw_length = task->length - ctx->hal_start_pos;
1025
1026 hal_jpege_dbg_detail("stream_length %d, hw_byte %d",
1027 feedback->stream_length, hw_bit / 8);
1028
1029 hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
1030 ctx->sw_bit, hw_bit, feedback->stream_length, task->hw_length);
1031
1032 hal_jpege_dbg_func("leave part wait %p\n", hal);
1033 return ret;
1034 }
1035
hal_jpege_vepu2_ret_task(void * hal,HalEncTask * task)1036 MPP_RET hal_jpege_vepu2_ret_task(void *hal, HalEncTask *task)
1037 {
1038 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
1039 EncRcTaskInfo *rc_info = &task->rc_task->info;
1040
1041 task->rc_task->info.bit_real = ctx->feedback.stream_length * 8;
1042 task->hal_ret.data = &ctx->feedback;
1043 task->hal_ret.number = 1;
1044
1045 rc_info->quality_real = rc_info->quality_target;
1046
1047 return MPP_OK;
1048 }
1049
1050 const MppEncHalApi hal_jpege_vepu2 = {
1051 .name = "hal_jpege_vepu2",
1052 .coding = MPP_VIDEO_CodingMJPEG,
1053 .ctx_size = sizeof(HalJpegeCtx),
1054 .flag = 0,
1055 .init = hal_jpege_vepu2_init,
1056 .deinit = hal_jpege_vepu2_deinit,
1057 .prepare = NULL,
1058 .get_task = hal_jpege_vepu2_get_task,
1059 .gen_regs = hal_jpege_vepu2_gen_regs,
1060 .start = hal_jpege_vepu2_start,
1061 .wait = hal_jpege_vepu2_wait,
1062 .part_start = hal_jpege_vepu2_part_start,
1063 .part_wait = hal_jpege_vepu2_part_wait,
1064 .ret_task = hal_jpege_vepu2_ret_task,
1065 };
1066