1 /*
2 * Copyright 2015 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_jpege_vepu2"
18
19 #include <string.h>
20
21 #include "mpp_env.h"
22 #include "mpp_common.h"
23 #include "mpp_mem.h"
24 #include "mpp_platform.h"
25
26 #include "mpp_enc_hal.h"
27 #include "vcodec_service.h"
28
29 #include "hal_jpege_debug.h"
30 #include "hal_jpege_api_v2.h"
31 #include "hal_jpege_base.h"
32
33 #define VEPU_JPEGE_VEPU2_NUM_REGS 184
34 #define VEPU2_REG_INPUT_Y 48
35 #define VEPU2_REG_INPUT_U 49
36 #define VEPU2_REG_INPUT_V 50
37
38 typedef struct jpege_vepu2_reg_set_t {
39 RK_U32 val[VEPU_JPEGE_VEPU2_NUM_REGS];
40 } jpege_vepu2_reg_set;
41
42 #define MAX_CORE_NUM 4
43
44 typedef struct JpegeMultiCoreCtx_t {
45 RK_U32 multi_core_enabled;
46 RK_U32 partion_num;
47 MppDevRegOffCfgs *reg_cfg;
48
49 MppBufferGroup partions_group;
50 MppBuffer partions_buf[MAX_CORE_NUM - 1];
51 RK_U32 buf_size;
52
53 RK_U32 part_rows[MAX_CORE_NUM];
54 RK_U32 ecs_cnt[MAX_CORE_NUM];
55
56 void *regs_base;
57 void *regs[MAX_CORE_NUM];
58 void *regs_out[MAX_CORE_NUM];
59 } JpegeMultiCoreCtx;
60
hal_jpege_vepu2_init(void * hal,MppEncHalCfg * cfg)61 MPP_RET hal_jpege_vepu2_init(void *hal, MppEncHalCfg *cfg)
62 {
63 MPP_RET ret = MPP_OK;
64 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
65 MppClientType type;
66 RK_U32 vcodec_type = mpp_get_vcodec_type();
67
68 mpp_env_get_u32("hal_jpege_debug", &hal_jpege_debug, 0);
69 hal_jpege_dbg_func("enter hal %p cfg %p\n", hal, cfg);
70
71 /* update output to MppEnc */
72 type = (vcodec_type & HAVE_VEPU2_JPEG) ?
73 VPU_CLIENT_VEPU2_JPEG : VPU_CLIENT_VEPU2;
74
75 cfg->type = type;
76 ret = mpp_dev_init(&cfg->dev, type);
77 if (ret) {
78 mpp_err_f("mpp_dev_init failed. ret: %d\n", ret);
79 return ret;
80 }
81 ctx->dev = cfg->dev;
82 ctx->type = cfg->type;
83
84 jpege_bits_init(&ctx->bits);
85 mpp_assert(ctx->bits);
86 ret = hal_jpege_vepu_init_rc(&ctx->hal_rc);
87 if (ret)
88 return ret;
89
90 ctx->cfg = cfg->cfg;
91 ctx->reg_size = sizeof(RK_U32) * VEPU_JPEGE_VEPU2_NUM_REGS;
92 ctx->regs = mpp_calloc_size(void, ctx->reg_size + EXTRA_INFO_SIZE);
93 if (NULL == ctx->regs) {
94 mpp_err_f("failed to malloc vepu2 regs\n");
95 return MPP_NOK;
96 }
97
98 ctx->regs_out = mpp_calloc_size(void, ctx->reg_size + EXTRA_INFO_SIZE);
99 if (NULL == ctx->regs_out) {
100 mpp_err_f("failed to malloc vepu2 regs\n");
101 return MPP_NOK;
102 }
103
104 hal_jpege_dbg_func("leave hal %p\n", hal);
105 return MPP_OK;
106 }
107
hal_jpege_vepu2_deinit(void * hal)108 MPP_RET hal_jpege_vepu2_deinit(void *hal)
109 {
110 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
111
112 hal_jpege_dbg_func("enter hal %p\n", hal);
113
114 if (ctx->bits) {
115 jpege_bits_deinit(ctx->bits);
116 ctx->bits = NULL;
117 }
118
119 if (ctx->dev) {
120 mpp_dev_deinit(ctx->dev);
121 ctx->dev = NULL;
122 }
123
124 hal_jpege_vepu_deinit_rc(&ctx->hal_rc);
125
126 if (ctx->ctx_ext) {
127 JpegeMultiCoreCtx *ctx_ext = ctx->ctx_ext;
128 RK_U32 i;
129
130 if (ctx_ext->reg_cfg) {
131 mpp_dev_multi_offset_deinit(ctx_ext->reg_cfg);
132 ctx_ext->reg_cfg = NULL;
133 }
134
135 for (i = 0; i < MAX_CORE_NUM - 1; i++)
136 if (ctx_ext->partions_buf[i])
137 mpp_buffer_put(ctx_ext->partions_buf[i]);
138
139 if (ctx_ext->partions_group) {
140 mpp_buffer_group_put(ctx_ext->partions_group);
141 ctx_ext->partions_group = NULL;
142 }
143
144 MPP_FREE(ctx_ext->regs_base);
145 MPP_FREE(ctx->ctx_ext);
146 }
147
148 MPP_FREE(ctx->regs);
149 MPP_FREE(ctx->regs_out);
150
151 hal_jpege_dbg_func("leave hal %p\n", hal);
152 return MPP_OK;
153 }
154
hal_jpege_vepu2_get_task(void * hal,HalEncTask * task)155 MPP_RET hal_jpege_vepu2_get_task(void *hal, HalEncTask *task)
156 {
157 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
158 JpegeSyntax *syntax = (JpegeSyntax *)task->syntax.data;
159 JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
160 RK_U32 i = 0;
161
162 hal_jpege_dbg_func("enter hal %p\n", hal);
163
164 memcpy(&ctx->syntax, syntax, sizeof(ctx->syntax));
165 /* Set rc paramters */
166 hal_jpege_dbg_input("rc_mode %d\n", ctx->cfg->rc.rc_mode);
167 if (ctx->cfg->rc.rc_mode != MPP_ENC_RC_MODE_FIXQP) {
168 if (!ctx->hal_rc.q_factor) {
169 task->rc_task->info.quality_target = syntax->q_factor ? (100 - syntax->q_factor) : 80;
170 task->rc_task->info.quality_min = 100 - syntax->qf_max;
171 task->rc_task->info.quality_max = 100 - syntax->qf_min;
172 task->rc_task->frm.is_intra = 1;
173 } else {
174 task->rc_task->info.quality_target = ctx->hal_rc.last_quality;
175 task->rc_task->info.quality_min = 100 - syntax->qf_max;
176 task->rc_task->info.quality_max = 100 - syntax->qf_min;
177 }
178 }
179 ctx->hal_start_pos = mpp_packet_get_length(task->packet);
180
181 /* prepare for part encoding */
182 ctx->mcu_y = 0;
183 ctx->mcu_h = syntax->mcu_h;
184 ctx->sw_bit = 0;
185 ctx->part_bytepos = 0;
186 ctx->part_x_fill = 0;
187 ctx->part_y_fill = 0;
188 ctx->rst_marker_idx = 0;
189 task->part_first = 1;
190 task->part_last = 0;
191
192 /* Split single task to multi cores on rk3588 */
193 if (ctx_ext)
194 ctx_ext->multi_core_enabled = 0;
195
196 if (ctx->type == VPU_CLIENT_VEPU2_JPEG) {
197 RK_U32 width = ctx->cfg->prep.width;
198 RK_U32 height = ctx->cfg->prep.height;
199 RK_U32 buf_size = width * height / 2;
200
201 /* small image do not need to split into four segments */
202 if (width * height <= 1280 * 720 && (height <= 720 || width <= 720))
203 goto MULTI_CORE_SPLIT_DONE;
204
205 if (!ctx_ext) {
206 ctx_ext = mpp_calloc(JpegeMultiCoreCtx, 1);
207 ctx->ctx_ext = ctx_ext;
208 }
209
210 mpp_assert(ctx_ext);
211
212 if (!ctx_ext->partions_group) {
213 mpp_buffer_group_get_internal(&ctx_ext->partions_group, MPP_BUFFER_TYPE_DMA_HEAP | MPP_BUFFER_FLAGS_CACHABLE);
214 if (!ctx_ext->partions_group)
215 mpp_buffer_group_get_internal(&ctx_ext->partions_group, MPP_BUFFER_TYPE_ION);
216 }
217
218 mpp_assert(ctx_ext->partions_group);
219
220 if (ctx_ext->buf_size != buf_size) {
221 MppBuffer buf = NULL;
222
223 for (i = 0; i < MAX_CORE_NUM - 1; i++) {
224 buf = ctx_ext->partions_buf[i];
225 if (buf)
226 mpp_buffer_put(buf);
227 }
228
229 mpp_buffer_group_clear(ctx_ext->partions_group);
230
231 for (i = 0; i < MAX_CORE_NUM - 1; i++) {
232 mpp_buffer_get(ctx_ext->partions_group, &buf, buf_size);
233 mpp_assert(buf);
234 ctx_ext->partions_buf[i] = buf;
235 }
236
237 ctx_ext->buf_size = buf_size;
238 }
239
240 if (!ctx_ext->regs_base) {
241 void *regs_base = mpp_calloc_size(void, ctx->reg_size * MAX_CORE_NUM * 2);
242 size_t reg_size = ctx->reg_size;
243
244 ctx_ext->regs_base = regs_base;
245 for (i = 0; i < MAX_CORE_NUM; i++) {
246 ctx_ext->regs[i] = regs_base;
247 regs_base += reg_size;
248
249 ctx_ext->regs_out[i] = regs_base;
250 regs_base += reg_size;
251 }
252 }
253
254 {
255 RK_U32 mb_w = MPP_ALIGN(width, 16) / 16;
256 RK_U32 mb_h = MPP_ALIGN(height, 16) / 16;
257 RK_U32 part_rows = MPP_ALIGN(mb_h, 4) / 4;
258
259 ctx_ext->partion_num = 0;
260
261 if (ctx->cfg->split.split_mode == MPP_ENC_SPLIT_BY_CTU) {
262 RK_U32 ecs_num = (mb_h + syntax->part_rows - 1) / syntax->part_rows;
263 RK_U32 *core_ecs = ctx_ext->ecs_cnt;
264
265 if (ecs_num > 24 || ecs_num <= 8) {
266 RK_U32 divider = ecs_num > 24 ? 8 : 1;
267 RK_U32 quotient = ecs_num / divider;
268 RK_U32 remainder = ecs_num % divider;
269 RK_U32 runs = quotient / MAX_CORE_NUM;
270 RK_U32 runs_left = quotient % MAX_CORE_NUM;
271
272 if (runs > 0) {
273 for (i = 0; i < MAX_CORE_NUM; i++)
274 core_ecs[i] = runs * divider;
275 }
276
277 for (i = 0; i < runs_left; i++)
278 core_ecs[i] += divider;
279
280 core_ecs[MAX_CORE_NUM - 1] += remainder;
281 } else if (ecs_num > 20) {
282 core_ecs[0] = core_ecs[1] = 8;
283 core_ecs[2] = (ecs_num - 8 * 2) / 2;
284 core_ecs[3] = ecs_num - 8 * 2 - core_ecs[2];
285 } else if (ecs_num > 16) {
286 core_ecs[0] = 8;
287 core_ecs[1] = core_ecs[2] = 4;
288 core_ecs[3] = ecs_num - 8 - 4 * 2;
289 } else if (ecs_num > 8) {
290 core_ecs[0] = core_ecs[1] = 4;
291 core_ecs[2] = (ecs_num - 4 * 2) / 2;
292 core_ecs[3] = ecs_num - 4 * 2 - core_ecs[2];
293 }
294
295 for (i = 0; i < MAX_CORE_NUM; i++) {
296 ctx_ext->part_rows[i] = core_ecs[i] * syntax->part_rows;
297 hal_jpege_dbg_detail("part %d, ecs %d, rows %d", i, core_ecs[i],
298 ctx_ext->part_rows[i]);
299 if (core_ecs[i])
300 ctx_ext->partion_num++;
301 }
302 } else {
303 for (i = 0; i < MAX_CORE_NUM; i++) {
304 part_rows = (mb_h >= part_rows) ? part_rows : mb_h;
305
306 ctx_ext->part_rows[i] = part_rows;
307 ctx_ext->ecs_cnt[i] = 1;
308
309 hal_jpege_dbg_detail("part %d row %d restart %d\n",
310 i, part_rows, mb_w * part_rows);
311
312 if (part_rows)
313 ctx_ext->partion_num++;
314
315 if (i == 0 && !ctx->syntax.restart_ri)
316 ctx->syntax.restart_ri = mb_w * part_rows;
317
318 mb_h -= part_rows;
319 }
320 }
321 }
322
323 if (!ctx_ext->reg_cfg)
324 mpp_dev_multi_offset_init(&ctx_ext->reg_cfg, 24);
325
326 syntax->low_delay = 1;
327 ctx_ext->multi_core_enabled = 1;
328 }
329 MULTI_CORE_SPLIT_DONE:
330
331 hal_jpege_dbg_func("leave hal %p\n", hal);
332
333 return MPP_OK;
334 }
335
hal_jpege_vepu2_set_extra_info(MppDev dev,JpegeSyntax * syntax,RK_U32 start_mbrow)336 static MPP_RET hal_jpege_vepu2_set_extra_info(MppDev dev, JpegeSyntax *syntax,
337 RK_U32 start_mbrow)
338 {
339 VepuOffsetCfg cfg;
340 MppDevRegOffsetCfg trans_cfg;
341
342 cfg.fmt = syntax->format;
343 cfg.width = syntax->width;
344 cfg.height = syntax->height;
345 cfg.hor_stride = syntax->hor_stride;
346 cfg.ver_stride = syntax->ver_stride;
347 cfg.offset_x = syntax->offset_x;
348 cfg.offset_y = syntax->offset_y + start_mbrow * 16;
349
350 get_vepu_offset_cfg(&cfg);
351
352 if (cfg.offset_byte[0]) {
353 trans_cfg.reg_idx = VEPU2_REG_INPUT_Y;
354 trans_cfg.offset = cfg.offset_byte[0];
355
356 mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg);
357 }
358
359 if (cfg.offset_byte[1]) {
360 trans_cfg.reg_idx = VEPU2_REG_INPUT_U;
361 trans_cfg.offset = cfg.offset_byte[1];
362
363 mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg);
364 }
365
366 if (cfg.offset_byte[2]) {
367 trans_cfg.reg_idx = VEPU2_REG_INPUT_V;
368 trans_cfg.offset = cfg.offset_byte[2];
369
370 mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg);
371 }
372
373 return MPP_OK;
374 }
375
hal_jpege_vepu2_gen_regs(void * hal,HalEncTask * task)376 MPP_RET hal_jpege_vepu2_gen_regs(void *hal, HalEncTask *task)
377 {
378 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
379 MppBuffer input = task->input;
380 MppBuffer output = task->output;
381 JpegeSyntax *syntax = &ctx->syntax;
382 RK_U32 width = syntax->width;
383 RK_U32 width_align = MPP_ALIGN(width, 16);
384 RK_U32 height = syntax->height;
385 MppFrameFormat fmt = syntax->format;
386 RK_U32 hor_stride = 0;
387 RK_U32 ver_stride = MPP_ALIGN(height, 16);
388 JpegeBits bits = ctx->bits;
389 RK_U32 *regs = (RK_U32 *)ctx->regs;
390 size_t length = mpp_packet_get_length(task->packet);
391 RK_U8 *buf = mpp_buffer_get_ptr(output);
392 size_t size = mpp_buffer_get_size(output);
393 const RK_U8 *qtable[2] = {NULL};
394 RK_S32 bitpos;
395 RK_S32 bytepos;
396 RK_U32 x_fill = 0;
397 RK_U32 y_fill = 0;
398 VepuFormatCfg fmt_cfg;
399 RK_U32 rotation = 0;
400
401 hal_jpege_dbg_func("enter hal %p\n", hal);
402
403 // do not support mirroring
404 if (syntax->mirroring)
405 mpp_err_f("Warning: do not support mirroring\n");
406
407 if (syntax->rotation == MPP_ENC_ROT_90)
408 rotation = 1;
409 else if (syntax->rotation == MPP_ENC_ROT_270)
410 rotation = 2;
411 else if (syntax->rotation != MPP_ENC_ROT_0)
412 mpp_err_f("Warning: only support 90 or 270 degree rotate, request rotate %d", syntax->rotation);
413 if (rotation) {
414 MPP_SWAP(RK_U32, width, height);
415 MPP_SWAP(RK_U32, width_align, ver_stride);
416 }
417 hor_stride = get_vepu_pixel_stride(&ctx->stride_cfg, width,
418 syntax->hor_stride, fmt);
419
420 //hor_stride must be align with 8, and ver_stride mus align with 2
421 if ((hor_stride & 0x7) || (ver_stride & 0x1) || (hor_stride >= (1 << 15))) {
422 mpp_err_f("illegal resolution, hor_stride %d, ver_stride %d, width %d, height %d\n",
423 syntax->hor_stride, syntax->ver_stride,
424 syntax->width, syntax->height);
425 }
426
427 x_fill = (width_align - width) / 4;
428 y_fill = (ver_stride - height);
429 mpp_assert(x_fill <= 3);
430 mpp_assert(y_fill <= 15);
431 ctx->part_x_fill = x_fill;
432 ctx->part_y_fill = y_fill;
433
434 /* write header to output buffer */
435 jpege_bits_setup(bits, buf, (RK_U32)size);
436 /* seek length bytes data */
437 jpege_seek_bits(bits, length << 3);
438 /* NOTE: write header will update qtable */
439 if (ctx->cfg->rc.rc_mode != MPP_ENC_RC_MODE_FIXQP) {
440 hal_jpege_vepu_rc(ctx, task);
441 qtable[0] = ctx->hal_rc.qtable_y;
442 qtable[1] = ctx->hal_rc.qtable_c;
443 } else {
444 qtable[0] = NULL;
445 qtable[1] = NULL;
446 }
447 write_jpeg_header(bits, syntax, qtable);
448
449 memset(regs, 0, sizeof(RK_U32) * VEPU_JPEGE_VEPU2_NUM_REGS);
450 // input address setup
451 regs[VEPU2_REG_INPUT_Y] = mpp_buffer_get_fd(input);
452 regs[VEPU2_REG_INPUT_U] = regs[VEPU2_REG_INPUT_Y];
453 regs[VEPU2_REG_INPUT_V] = regs[VEPU2_REG_INPUT_Y];
454
455 // output address setup
456 bitpos = jpege_bits_get_bitpos(bits);
457 bytepos = (bitpos + 7) >> 3;
458 ctx->base = buf;
459 ctx->size = size;
460 ctx->sw_bit = bitpos;
461 ctx->part_bytepos = bytepos;
462
463 get_msb_lsb_at_pos(®s[51], ®s[52], buf, bytepos);
464
465 regs[53] = size - bytepos;
466
467 // bus config
468 regs[54] = 16 << 8;
469
470 regs[60] = (((bytepos & 7) * 8) << 16) |
471 (x_fill << 4) |
472 (y_fill);
473 regs[61] = hor_stride;
474
475 regs[77] = mpp_buffer_get_fd(output);
476 if (bytepos)
477 mpp_dev_set_reg_offset(ctx->dev, 77, bytepos);
478 /* 95 - 97 color conversion parameter */
479 {
480 RK_U32 coeffA;
481 RK_U32 coeffB;
482 RK_U32 coeffC;
483 RK_U32 coeffE;
484 RK_U32 coeffF;
485
486 switch (syntax->color_conversion_type) {
487 case 0 : { /* BT.601 */
488 /*
489 * Y = 0.2989 R + 0.5866 G + 0.1145 B
490 * Cb = 0.5647 (B - Y) + 128
491 * Cr = 0.7132 (R - Y) + 128
492 */
493 coeffA = 19589;
494 coeffB = 38443;
495 coeffC = 7504;
496 coeffE = 37008;
497 coeffF = 46740;
498 } break;
499 case 1 : { /* BT.709 */
500 /*
501 * Y = 0.2126 R + 0.7152 G + 0.0722 B
502 * Cb = 0.5389 (B - Y) + 128
503 * Cr = 0.6350 (R - Y) + 128
504 */
505 coeffA = 13933;
506 coeffB = 46871;
507 coeffC = 4732;
508 coeffE = 35317;
509 coeffF = 41615;
510 } break;
511 case 2 : {
512 coeffA = syntax->coeffA;
513 coeffB = syntax->coeffB;
514 coeffC = syntax->coeffC;
515 coeffE = syntax->coeffE;
516 coeffF = syntax->coeffF;
517 } break;
518 default : {
519 mpp_err("invalid color conversion type %d\n",
520 syntax->color_conversion_type);
521 coeffA = 19589;
522 coeffB = 38443;
523 coeffC = 7504;
524 coeffE = 37008;
525 coeffF = 46740;
526 } break;
527 }
528
529 regs[95] = coeffA | (coeffB << 16);
530 regs[96] = coeffC | (coeffE << 16);
531 regs[97] = coeffF;
532 }
533
534 regs[103] = (width_align >> 4) << 8 |
535 (ver_stride >> 4) << 20 |
536 (1 << 6) | /* intra coding */
537 (2 << 4) | /* format jpeg */
538 1; /* encoder start */
539
540 if (!get_vepu_fmt(&fmt_cfg, fmt)) {
541 regs[74] = (fmt_cfg.format << 4) |
542 (rotation << 2);
543 regs[98] = (fmt_cfg.b_mask & 0x1f) << 16 |
544 (fmt_cfg.g_mask & 0x1f) << 8 |
545 (fmt_cfg.r_mask & 0x1f);
546 regs[105] = 7 << 26 | (fmt_cfg.swap_32_in & 1) << 29 |
547 (fmt_cfg.swap_16_in & 1) << 30 |
548 (fmt_cfg.swap_8_in & 1) << 31;
549 }
550
551 regs[107] = ((syntax->part_rows & 0xff) << 16) |
552 jpege_restart_marker[ctx->rst_marker_idx & 7];
553
554 /* encoder interrupt */
555 regs[109] = 1 << 12 | /* clock gating */
556 1 << 10; /* enable timeout interrupt */
557
558 if (syntax->low_delay) {
559 /* slice encode end by RST */
560 regs[107] |= (1 << 24);
561 /* slice interrupt enable */
562 regs[109] |= (1 << 16);
563 }
564
565 /* 0 ~ 31 quantization tables */
566 {
567 RK_S32 i;
568
569 for (i = 0; i < 16; i++) {
570 /* qtable need to reorder in particular order */
571 regs[i] = qtable[0][qp_reorder_table[i * 4 + 0]] << 24 |
572 qtable[0][qp_reorder_table[i * 4 + 1]] << 16 |
573 qtable[0][qp_reorder_table[i * 4 + 2]] << 8 |
574 qtable[0][qp_reorder_table[i * 4 + 3]];
575 }
576 for (i = 0; i < 16; i++) {
577 /* qtable need to reorder in particular order */
578 regs[i + 16] = qtable[1][qp_reorder_table[i * 4 + 0]] << 24 |
579 qtable[1][qp_reorder_table[i * 4 + 1]] << 16 |
580 qtable[1][qp_reorder_table[i * 4 + 2]] << 8 |
581 qtable[1][qp_reorder_table[i * 4 + 3]];
582 }
583 }
584
585 hal_jpege_dbg_func("leave hal %p\n", hal);
586 return MPP_OK;
587 }
588
multi_core_start(HalJpegeCtx * ctx,HalEncTask * task)589 static MPP_RET multi_core_start(HalJpegeCtx *ctx, HalEncTask *task)
590 {
591 JpegeMultiCoreCtx *ctx_ext = ctx->ctx_ext;
592 JpegeSyntax *syntax = &ctx->syntax;
593 MppDevRegOffCfgs *reg_cfg = ctx_ext->reg_cfg;
594 MppDev dev = ctx->dev;
595 RK_U32 *src = (RK_U32 *)ctx->regs;
596 RK_U32 reg_size = ctx->reg_size;
597 MPP_RET ret = MPP_OK;
598 RK_U32 partion_num = ctx_ext->partion_num;
599 RK_U32 mcu_y = 0;
600 RK_U32 i;
601
602 hal_jpege_dbg_detail("start %d partions\n", partion_num);
603
604 for (i = 0; i < partion_num; i++) {
605 RK_U32 part_not_end = i < partion_num - 1;
606 RK_U32 part_not_start = i > 0;
607 RK_U32 *regs = (RK_U32 *)ctx_ext->regs[i];
608 RK_U32 part_enc_mcu_h = ctx_ext->part_rows[i];
609 RK_U32 part_x_fill = ctx->part_x_fill;
610 RK_U32 part_y_fill = ctx->part_y_fill;
611 RK_U32 part_bytepos = ctx->part_bytepos;
612
613 // it only needs to fill the partition on the right and below.
614 if (syntax->rotation == MPP_ENC_ROT_90) {
615 if (part_not_end)
616 part_x_fill = 0;
617 } else if (syntax->rotation == MPP_ENC_ROT_0 || syntax->rotation == MPP_ENC_ROT_180) {
618 if (part_not_end)
619 part_y_fill = 0;
620 } else if (syntax->rotation == MPP_ENC_ROT_270) {
621 if (part_not_start)
622 part_x_fill = 0;
623 } else
624 mpp_err_f("input rotation %d not supported", syntax->rotation);
625
626 memcpy(regs, src, reg_size);
627
628 mpp_dev_multi_offset_reset(reg_cfg);
629
630 if (i == 0) {
631 get_msb_lsb_at_pos(®s[51], ®s[52], ctx->base, part_bytepos);
632 regs[77] = mpp_buffer_get_fd(task->output);
633 regs[53] = mpp_buffer_get_size(task->output) - part_bytepos;
634 regs[60] = (((part_bytepos & 7) * 8) << 16) |
635 (part_x_fill << 4) |
636 (part_y_fill);
637 /* the stream offset had been setup */
638 } else {
639 MppBuffer buf = ctx_ext->partions_buf[i - 1];
640
641 regs[77] = mpp_buffer_get_fd(buf);
642 regs[53] = mpp_buffer_get_size(buf);
643 regs[60] = (((0 & 7) * 8) << 16) |
644 (part_x_fill << 4) |
645 (part_y_fill);
646 }
647
648 regs[103] = syntax->mcu_w << 8 |
649 (part_enc_mcu_h) << 20 |
650 (1 << 6) | /* intra coding */
651 (2 << 4) | /* format jpeg */
652 1; /* encoder start */
653
654 hal_jpege_dbg_detail("part %d, part_not_end 0x%x, rst_marker_idx %d",
655 i, part_not_end, ctx->rst_marker_idx);
656 regs[107] = part_not_end << 24 | ((syntax->part_rows & 0xff) << 16) |
657 jpege_restart_marker[ctx->rst_marker_idx & 7];
658 ctx->rst_marker_idx += ctx_ext->ecs_cnt[i];
659
660 VepuOffsetCfg cfg;
661
662 memset(&cfg, 0, sizeof(cfg));
663
664 cfg.fmt = syntax->format;
665 cfg.width = syntax->width;
666 cfg.height = syntax->height;
667 cfg.hor_stride = syntax->hor_stride;
668 cfg.ver_stride = syntax->ver_stride;
669 cfg.offset_x = syntax->offset_x;
670 cfg.offset_y = syntax->offset_y + mcu_y * 16;
671
672 if (syntax->rotation == MPP_ENC_ROT_90 || syntax->rotation == MPP_ENC_ROT_270) {
673 regs[103] = part_enc_mcu_h << 8 |
674 (syntax->mcu_w) << 20 |
675 (1 << 6) | /* intra coding */
676 (2 << 4) | /* format jpeg */
677 1; /* encoder start */
678
679 /*
680 * It is opposite that position of partitions
681 * of rotation 90 degree and rotation 270 degree.
682 */
683 if (syntax->rotation == MPP_ENC_ROT_270)
684 cfg.offset_x = syntax->offset_x +
685 (syntax->mcu_h - ctx_ext->part_rows[0] - mcu_y) * 16;
686 else
687 cfg.offset_x = syntax->offset_x + mcu_y * 16;
688
689 cfg.offset_y = syntax->offset_y;
690 }
691
692 get_vepu_offset_cfg(&cfg);
693 mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_Y, cfg.offset_byte[0]);
694 mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_U, cfg.offset_byte[1]);
695 mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_V, cfg.offset_byte[2]);
696
697 mcu_y += part_enc_mcu_h;
698
699 do {
700 MppDevRegWrCfg wr_cfg;
701 MppDevRegRdCfg rd_cfg;
702
703 wr_cfg.reg = regs;
704 wr_cfg.size = reg_size;
705 wr_cfg.offset = 0;
706
707 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
708 if (ret) {
709 mpp_err_f("set register write failed %d\n", ret);
710 break;
711 }
712
713 rd_cfg.reg = ctx_ext->regs_out[i];
714 rd_cfg.size = reg_size;
715 rd_cfg.offset = 0;
716
717 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
718 if (ret) {
719 mpp_err_f("set register read failed %d\n", ret);
720 break;
721 }
722
723 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_OFFS, reg_cfg);
724 if (ret) {
725 mpp_err_f("set register offsets failed %d\n", ret);
726 break;
727 }
728
729 if (i < partion_num - 1) {
730 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_DELIMIT, NULL);
731 if (ret) {
732 mpp_err_f("send delimit failed %d\n", ret);
733 break;
734 }
735 }
736 } while (0);
737 }
738
739 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
740 if (ret)
741 mpp_err_f("send cmd failed %d\n", ret);
742
743 return ret;
744 }
745
multi_core_wait(HalJpegeCtx * ctx,HalEncTask * task)746 static MPP_RET multi_core_wait(HalJpegeCtx *ctx, HalEncTask *task)
747 {
748 JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
749 JpegeFeedback *feedback = &ctx->feedback;
750 RK_U32 sw_bit = 0;
751 RK_U32 hw_bit = 0;
752 MPP_RET ret = MPP_OK;
753 RK_U32 val;
754 RK_U32 i;
755
756 hal_jpege_dbg_detail("poll partion_num %d\n", ctx_ext->partion_num);
757
758 for (i = 0; i < ctx_ext->partion_num; i++) {
759 RK_U32 *regs = ctx_ext->regs_out[i];
760
761 hal_jpege_dbg_detail("poll reg %d %p", i, regs);
762
763 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
764 if (ret)
765 mpp_err_f("poll cmd failed %d\n", ret);
766
767 if (i == 0) {
768 val = regs[109];
769 hal_jpege_dbg_output("hw_status %08x\n", val);
770 feedback->hw_status = val & 0x70;
771 val = regs[53];
772 sw_bit = jpege_bits_get_bitpos(ctx->bits);
773 hw_bit = val;
774 feedback->stream_length = ((sw_bit / 8) & (~0x7)) + hw_bit / 8;
775 hal_jpege_dbg_detail("partion len = %d", hw_bit / 8);
776 task->length = feedback->stream_length;
777 task->hw_length = task->length - ctx->hal_start_pos;
778 } else {
779 void *stream_ptr = mpp_buffer_get_ptr(task->output);
780 void *partion_ptr = mpp_buffer_get_ptr(ctx_ext->partions_buf[i - 1]);
781 RK_U32 partion_len = 0;
782
783 val = regs[109];
784 hal_jpege_dbg_output("hw_status %08x\n", val);
785 feedback->hw_status = val & 0x70;
786 partion_len = regs[53] / 8;
787
788 hal_jpege_dbg_detail("partion_len = %d", partion_len);
789 memcpy(stream_ptr + feedback->stream_length, partion_ptr, partion_len);
790 feedback->stream_length += partion_len;
791 task->length = feedback->stream_length;
792 task->hw_length += partion_len;
793 }
794 }
795
796 hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
797 sw_bit, hw_bit, feedback->stream_length, task->hw_length);
798
799 return ret;
800 }
801
hal_jpege_vepu2_start(void * hal,HalEncTask * task)802 MPP_RET hal_jpege_vepu2_start(void *hal, HalEncTask *task)
803 {
804 MPP_RET ret = MPP_OK;
805 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
806 JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
807
808 hal_jpege_dbg_func("enter hal %p\n", hal);
809
810 if (ctx_ext && ctx_ext->multi_core_enabled) {
811 multi_core_start(ctx, task);
812 } else {
813 hal_jpege_vepu2_set_extra_info(ctx->dev, &ctx->syntax, 0);
814 do {
815 MppDevRegWrCfg wr_cfg;
816 MppDevRegRdCfg rd_cfg;
817 RK_U32 reg_size = ctx->reg_size;
818
819 wr_cfg.reg = ctx->regs;
820 wr_cfg.size = reg_size;
821 wr_cfg.offset = 0;
822
823 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
824 if (ret) {
825 mpp_err_f("set register write failed %d\n", ret);
826 break;
827 }
828
829 rd_cfg.reg = ctx->regs;
830 rd_cfg.size = reg_size;
831 rd_cfg.offset = 0;
832
833 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
834 if (ret) {
835 mpp_err_f("set register read failed %d\n", ret);
836 break;
837 }
838
839 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
840 if (ret) {
841 mpp_err_f("send cmd failed %d\n", ret);
842 break;
843 }
844 } while (0);
845 }
846 hal_jpege_dbg_func("leave hal %p\n", hal);
847 (void)task;
848 return ret;
849 }
850
hal_jpege_vepu2_wait(void * hal,HalEncTask * task)851 MPP_RET hal_jpege_vepu2_wait(void *hal, HalEncTask *task)
852 {
853 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
854 JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
855 MPP_RET ret = MPP_OK;
856
857 hal_jpege_dbg_func("enter hal %p\n", hal);
858
859 if (ctx_ext && ctx_ext->multi_core_enabled) {
860 multi_core_wait(ctx, task);
861 } else {
862 JpegeFeedback *feedback = &ctx->feedback;
863 JpegeBits bits = ctx->bits;
864 RK_U32 *regs = ctx->regs;
865 RK_U32 sw_bit = 0;
866 RK_U32 hw_bit = 0;
867 RK_U32 val;
868
869 if (ctx->dev) {
870 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
871 if (ret)
872 mpp_err_f("poll cmd failed %d\n", ret);
873 }
874
875 val = regs[109];
876 hal_jpege_dbg_output("hw_status %08x\n", val);
877 feedback->hw_status = val & 0x70;
878 val = regs[53];
879
880 sw_bit = jpege_bits_get_bitpos(bits);
881 hw_bit = val;
882
883 // NOTE: hardware will return 64 bit access byte count
884 feedback->stream_length = ((sw_bit / 8) & (~0x7)) + hw_bit / 8;
885 task->length = feedback->stream_length;
886 task->hw_length = task->length - ctx->hal_start_pos;
887
888 hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
889 sw_bit, hw_bit, feedback->stream_length, task->hw_length);
890 }
891
892 hal_jpege_dbg_func("leave hal %p\n", hal);
893 return ret;
894 }
895
hal_jpege_vepu2_part_start(void * hal,HalEncTask * task)896 MPP_RET hal_jpege_vepu2_part_start(void *hal, HalEncTask *task)
897 {
898 MPP_RET ret = MPP_OK;
899 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
900 JpegeSyntax *syntax = (JpegeSyntax *)task->syntax.data;
901 RK_U32 mcu_w = syntax->mcu_w;
902 RK_U32 mcu_h = syntax->mcu_h;
903 RK_U32 mcu_y = ctx->mcu_y;
904 RK_U32 part_mcu_h = syntax->part_rows;
905 RK_U32 *regs = (RK_U32 *)ctx->regs;
906 RK_U32 part_enc_h;
907 RK_U32 part_enc_mcu_h;
908 RK_U32 part_y_fill;
909 RK_U32 part_not_end;
910
911 hal_jpege_dbg_func("enter part start %p\n", hal);
912
913 /* Fix register for each part encoding */
914 task->part_first = !mcu_y;
915 if (mcu_y + part_mcu_h < mcu_h) {
916 part_enc_h = part_mcu_h * 16;
917 part_enc_mcu_h = part_mcu_h;
918 part_y_fill = 0;
919 part_not_end = 1;
920 task->part_last = 0;
921 } else {
922 part_enc_h = syntax->height - mcu_y * 16;
923 part_enc_mcu_h = MPP_ALIGN(part_enc_h, 16) / 16;;
924 part_y_fill = ctx->part_y_fill;
925 part_not_end = 0;
926 task->part_last = 1;
927 }
928
929 hal_jpege_dbg_detail("part first %d last %d\n", task->part_first, task->part_last);
930
931 get_msb_lsb_at_pos(®s[51], ®s[52], ctx->base, ctx->part_bytepos);
932
933 regs[53] = ctx->size - ctx->part_bytepos;
934
935 regs[60] = (((ctx->part_bytepos & 7) * 8) << 16) |
936 (ctx->part_x_fill << 4) |
937 (part_y_fill);
938
939 regs[77] = mpp_buffer_get_fd(task->output);
940 if (ctx->part_bytepos)
941 mpp_dev_set_reg_offset(ctx->dev, 77, ctx->part_bytepos);
942
943 regs[103] = mcu_w << 8 |
944 (part_enc_mcu_h) << 20 |
945 (1 << 6) | /* intra coding */
946 (2 << 4) | /* format jpeg */
947 1; /* encoder start */
948
949 hal_jpege_dbg_detail("part_not_end 0x%x, rst_marker_idx %d",
950 part_not_end, ctx->rst_marker_idx);
951 regs[107] = part_not_end << 24 | jpege_restart_marker[ctx->rst_marker_idx & 7];
952 ctx->rst_marker_idx++;
953
954 hal_jpege_vepu2_set_extra_info(ctx->dev, syntax, mcu_y);
955 ctx->mcu_y += part_enc_mcu_h;
956
957 do {
958 MppDevRegWrCfg wr_cfg;
959 MppDevRegRdCfg rd_cfg;
960 RK_U32 reg_size = ctx->reg_size;
961
962 wr_cfg.reg = ctx->regs;
963 wr_cfg.size = reg_size;
964 wr_cfg.offset = 0;
965
966 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
967 if (ret) {
968 mpp_err_f("set register write failed %d\n", ret);
969 break;
970 }
971
972 rd_cfg.reg = ctx->regs_out;
973 rd_cfg.size = reg_size;
974 rd_cfg.offset = 0;
975
976 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
977 if (ret) {
978 mpp_err_f("set register read failed %d\n", ret);
979 break;
980 }
981
982 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
983 if (ret) {
984 mpp_err_f("send cmd failed %d\n", ret);
985 break;
986 }
987 } while (0);
988
989 hal_jpege_dbg_func("leave part start %p\n", hal);
990 (void)task;
991 return ret;
992 }
993
hal_jpege_vepu2_part_wait(void * hal,HalEncTask * task)994 MPP_RET hal_jpege_vepu2_part_wait(void *hal, HalEncTask *task)
995 {
996 MPP_RET ret = MPP_OK;
997 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
998 RK_U32 *regs = ctx->regs_out;
999 JpegeFeedback *feedback = &ctx->feedback;
1000 RK_U32 hw_bit = 0;
1001
1002 hal_jpege_dbg_func("enter part wait %p\n", hal);
1003
1004 if (ctx->dev) {
1005 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
1006 if (ret)
1007 mpp_err_f("poll cmd failed %d\n", ret);
1008 }
1009
1010 hal_jpege_dbg_detail("hw_status %08x\n", regs[109]);
1011
1012 hw_bit = regs[53];
1013
1014 hal_jpege_dbg_detail("byte pos %d -> %d\n", ctx->part_bytepos,
1015 (ctx->part_bytepos & (~7)) + (hw_bit / 8));
1016 ctx->part_bytepos = (ctx->part_bytepos & (~7)) + (hw_bit / 8);
1017
1018 feedback->stream_length = ctx->part_bytepos;
1019 task->length = ctx->part_bytepos;
1020 task->hw_length = task->length - ctx->hal_start_pos;
1021
1022 hal_jpege_dbg_detail("stream_length %d, hw_byte %d",
1023 feedback->stream_length, hw_bit / 8);
1024
1025 hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
1026 ctx->sw_bit, hw_bit, feedback->stream_length, task->hw_length);
1027
1028 hal_jpege_dbg_func("leave part wait %p\n", hal);
1029 return ret;
1030 }
1031
hal_jpege_vepu2_ret_task(void * hal,HalEncTask * task)1032 MPP_RET hal_jpege_vepu2_ret_task(void *hal, HalEncTask *task)
1033 {
1034 HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
1035 EncRcTaskInfo *rc_info = &task->rc_task->info;
1036
1037 ctx->hal_rc.last_quality = task->rc_task->info.quality_target;
1038 task->rc_task->info.bit_real = ctx->feedback.stream_length * 8;
1039 task->hal_ret.data = &ctx->feedback;
1040 task->hal_ret.number = 1;
1041
1042 if (ctx->cfg->rc.rc_mode != MPP_ENC_RC_MODE_FIXQP) {
1043 if (!ctx->hal_rc.q_factor)
1044 rc_info->quality_real = rc_info->quality_target;
1045 else
1046 rc_info->quality_real = ctx->hal_rc.q_factor;
1047 } else {
1048 rc_info->quality_real = ctx->cfg->codec.jpeg.q_factor;
1049 }
1050
1051 return MPP_OK;
1052 }
1053
1054 const MppEncHalApi hal_jpege_vepu2 = {
1055 .name = "hal_jpege_vepu2",
1056 .coding = MPP_VIDEO_CodingMJPEG,
1057 .ctx_size = sizeof(HalJpegeCtx),
1058 .flag = 0,
1059 .init = hal_jpege_vepu2_init,
1060 .deinit = hal_jpege_vepu2_deinit,
1061 .prepare = NULL,
1062 .get_task = hal_jpege_vepu2_get_task,
1063 .gen_regs = hal_jpege_vepu2_gen_regs,
1064 .start = hal_jpege_vepu2_start,
1065 .wait = hal_jpege_vepu2_wait,
1066 .part_start = hal_jpege_vepu2_part_start,
1067 .part_wait = hal_jpege_vepu2_part_wait,
1068 .ret_task = hal_jpege_vepu2_ret_task,
1069 };
1070