xref: /rockchip-linux_mpp/mpp/hal/vpu/jpege/hal_jpege_vepu2_v2.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /*
2  * Copyright 2015 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_jpege_vepu2"
18 
19 #include <string.h>
20 
21 #include "mpp_env.h"
22 #include "mpp_common.h"
23 #include "mpp_mem.h"
24 #include "mpp_platform.h"
25 #include "mpp_dmabuf.h"
26 
27 #include "mpp_enc_hal.h"
28 #include "vcodec_service.h"
29 
30 #include "hal_jpege_debug.h"
31 #include "hal_jpege_api_v2.h"
32 #include "hal_jpege_base.h"
33 
34 #define VEPU_JPEGE_VEPU2_NUM_REGS   184
35 #define VEPU2_REG_INPUT_Y           48
36 #define VEPU2_REG_INPUT_U           49
37 #define VEPU2_REG_INPUT_V           50
38 
39 typedef struct jpege_vepu2_reg_set_t {
40     RK_U32  val[VEPU_JPEGE_VEPU2_NUM_REGS];
41 } jpege_vepu2_reg_set;
42 
43 #define MAX_CORE_NUM                4
44 
45 typedef struct JpegeMultiCoreCtx_t {
46     RK_U32              multi_core_enabled;
47     RK_U32              partion_num;
48     MppDevRegOffCfgs    *reg_cfg;
49 
50     MppBufferGroup      partions_group;
51     MppBuffer           partions_buf[MAX_CORE_NUM - 1];
52     RK_U32              buf_size;
53 
54     RK_U32              part_rows[MAX_CORE_NUM];
55     RK_U32              ecs_cnt[MAX_CORE_NUM];
56 
57     void                *regs_base;
58     void                *regs[MAX_CORE_NUM];
59     void                *regs_out[MAX_CORE_NUM];
60 } JpegeMultiCoreCtx;
61 
hal_jpege_vepu2_init(void * hal,MppEncHalCfg * cfg)62 MPP_RET hal_jpege_vepu2_init(void *hal, MppEncHalCfg *cfg)
63 {
64     MPP_RET ret = MPP_OK;
65     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
66     MppClientType type;
67     RK_U32 vcodec_type = mpp_get_vcodec_type();
68 
69     mpp_env_get_u32("hal_jpege_debug", &hal_jpege_debug, 0);
70     hal_jpege_dbg_func("enter hal %p cfg %p\n", hal, cfg);
71 
72     /* update output to MppEnc */
73     type = (vcodec_type & HAVE_VEPU2_JPEG) ?
74            VPU_CLIENT_VEPU2_JPEG : VPU_CLIENT_VEPU2;
75 
76     cfg->type = type;
77     ret = mpp_dev_init(&cfg->dev, type);
78     if (ret) {
79         mpp_err_f("mpp_dev_init failed. ret: %d\n", ret);
80         return ret;
81     }
82     ctx->dev = cfg->dev;
83     ctx->type = cfg->type;
84     ctx->task_cnt = cfg->task_cnt;
85 
86     jpege_bits_init(&ctx->bits);
87     mpp_assert(ctx->bits);
88 
89     ctx->cfg = cfg->cfg;
90     ctx->reg_size = sizeof(RK_U32) * VEPU_JPEGE_VEPU2_NUM_REGS;
91     ctx->regs = mpp_calloc_size(void, (ctx->reg_size + EXTRA_INFO_SIZE) * ctx->task_cnt);
92     if (NULL == ctx->regs) {
93         mpp_err_f("failed to malloc vepu2 regs\n");
94         return MPP_NOK;
95     }
96 
97     ctx->regs_out = mpp_calloc_size(void, (ctx->reg_size + EXTRA_INFO_SIZE) *  ctx->task_cnt);
98     if (NULL == ctx->regs_out) {
99         mpp_err_f("failed to malloc vepu2 regs\n");
100         return MPP_NOK;
101     }
102 
103     hal_jpege_rc_init(&ctx->hal_rc);
104 
105     hal_jpege_dbg_func("leave hal %p\n", hal);
106     return MPP_OK;
107 }
108 
hal_jpege_vepu2_deinit(void * hal)109 MPP_RET hal_jpege_vepu2_deinit(void *hal)
110 {
111     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
112 
113     hal_jpege_dbg_func("enter hal %p\n", hal);
114 
115     if (ctx->bits) {
116         jpege_bits_deinit(ctx->bits);
117         ctx->bits = NULL;
118     }
119 
120     if (ctx->dev) {
121         mpp_dev_deinit(ctx->dev);
122         ctx->dev = NULL;
123     }
124 
125     if (ctx->ctx_ext) {
126         JpegeMultiCoreCtx *ctx_ext = ctx->ctx_ext;
127         RK_U32 i;
128 
129         if (ctx_ext->reg_cfg) {
130             mpp_dev_multi_offset_deinit(ctx_ext->reg_cfg);
131             ctx_ext->reg_cfg = NULL;
132         }
133 
134         for (i = 0; i < MAX_CORE_NUM - 1; i++)
135             if (ctx_ext->partions_buf[i])
136                 mpp_buffer_put(ctx_ext->partions_buf[i]);
137 
138         if (ctx_ext->partions_group) {
139             mpp_buffer_group_put(ctx_ext->partions_group);
140             ctx_ext->partions_group = NULL;
141         }
142 
143         MPP_FREE(ctx_ext->regs_base);
144         MPP_FREE(ctx->ctx_ext);
145     }
146 
147     MPP_FREE(ctx->regs);
148     MPP_FREE(ctx->regs_out);
149 
150     hal_jpege_dbg_func("leave hal %p\n", hal);
151     return MPP_OK;
152 }
153 
hal_jpege_vepu2_get_task(void * hal,HalEncTask * task)154 MPP_RET hal_jpege_vepu2_get_task(void *hal, HalEncTask *task)
155 {
156     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
157     JpegeSyntax *syntax = (JpegeSyntax *)task->syntax.data;
158     JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
159     RK_U32 i = 0;
160 
161     hal_jpege_dbg_func("enter hal %p\n", hal);
162 
163     memcpy(&ctx->syntax, syntax, sizeof(ctx->syntax));
164 
165     ctx->hal_start_pos = mpp_packet_get_length(task->packet);
166 
167     /* prepare for part encoding */
168     ctx->mcu_y = 0;
169     ctx->mcu_h = syntax->mcu_ver_cnt;
170     ctx->sw_bit = 0;
171     ctx->part_bytepos = 0;
172     ctx->part_x_fill = 0;
173     ctx->part_y_fill = 0;
174     ctx->rst_marker_idx = 0;
175     task->part_first = 1;
176     task->part_last = 0;
177     task->flags.reg_idx = 0;
178 
179     /* rk3588 4 core frame parallel */
180     if (ctx->task_cnt > 1) {
181         task->flags.reg_idx = ctx->task_idx++;
182         if (ctx->task_idx >= ctx->task_cnt)
183             ctx->task_idx = 0;
184         goto MULTI_CORE_SPLIT_DONE;
185     }
186 
187     /* Split single task to multi cores on rk3588 */
188     if (ctx_ext)
189         ctx_ext->multi_core_enabled = 0;
190 
191     if (ctx->type == VPU_CLIENT_VEPU2_JPEG) {
192         RK_U32 width = ctx->cfg->prep.width;
193         RK_U32 height = ctx->cfg->prep.height;
194         RK_U32 buf_size = width * height / 2;
195 
196         /* small image do not need to split into four segments */
197         if (width * height <= 1280 * 720 && (height <= 720 || width <= 720))
198             goto MULTI_CORE_SPLIT_DONE;
199 
200         if (!ctx_ext) {
201             ctx_ext = mpp_calloc(JpegeMultiCoreCtx, 1);
202             ctx->ctx_ext = ctx_ext;
203         }
204 
205         mpp_assert(ctx_ext);
206 
207         if (!ctx_ext->partions_group) {
208             mpp_buffer_group_get_internal(&ctx_ext->partions_group, MPP_BUFFER_TYPE_DMA_HEAP | MPP_BUFFER_FLAGS_CACHABLE);
209             if (!ctx_ext->partions_group)
210                 mpp_buffer_group_get_internal(&ctx_ext->partions_group, MPP_BUFFER_TYPE_ION);
211         }
212 
213         mpp_assert(ctx_ext->partions_group);
214 
215         if (ctx_ext->buf_size != buf_size) {
216             MppBuffer buf = NULL;
217 
218             for (i = 0; i < MAX_CORE_NUM - 1; i++) {
219                 buf = ctx_ext->partions_buf[i];
220                 if (buf)
221                     mpp_buffer_put(buf);
222             }
223 
224             mpp_buffer_group_clear(ctx_ext->partions_group);
225 
226             for (i = 0; i < MAX_CORE_NUM - 1; i++) {
227                 mpp_buffer_get(ctx_ext->partions_group, &buf, buf_size);
228                 mpp_assert(buf);
229                 ctx_ext->partions_buf[i] = buf;
230             }
231 
232             ctx_ext->buf_size = buf_size;
233         }
234 
235         if (!ctx_ext->regs_base) {
236             void *regs_base = mpp_calloc_size(void, ctx->reg_size * MAX_CORE_NUM * 2);
237             size_t reg_size = ctx->reg_size;
238 
239             ctx_ext->regs_base = regs_base;
240             for (i = 0; i < MAX_CORE_NUM; i++) {
241                 ctx_ext->regs[i] = regs_base;
242                 regs_base += reg_size;
243 
244                 ctx_ext->regs_out[i] = regs_base;
245                 regs_base += reg_size;
246             }
247         }
248 
249         {
250             RK_U32 mb_w = MPP_ALIGN(width, 16) / 16;
251             RK_U32 mb_h = MPP_ALIGN(height, 16) / 16;
252             RK_U32 part_rows = MPP_ALIGN(mb_h, 4) / 4;
253 
254             ctx_ext->partion_num = 0;
255 
256             if (ctx->cfg->split.split_mode == MPP_ENC_SPLIT_BY_CTU) {
257                 RK_U32 ecs_num = (mb_h + syntax->part_rows - 1) / syntax->part_rows;
258                 RK_U32 *core_ecs = ctx_ext->ecs_cnt;
259 
260                 if (ecs_num > 24 || ecs_num <= 8) {
261                     RK_U32 divider = ecs_num > 24 ? 8 : 1;
262                     RK_U32 quotient = ecs_num / divider;
263                     RK_U32 remainder = ecs_num % divider;
264                     RK_U32 runs = quotient  / MAX_CORE_NUM;
265                     RK_U32 runs_left = quotient % MAX_CORE_NUM;
266 
267                     if (runs > 0) {
268                         for (i = 0; i < MAX_CORE_NUM; i++)
269                             core_ecs[i] = runs * divider;
270                     }
271 
272                     for (i = 0; i < runs_left; i++)
273                         core_ecs[i] += divider;
274 
275                     core_ecs[MAX_CORE_NUM - 1] += remainder;
276                 } else if (ecs_num > 20) {
277                     core_ecs[0] = core_ecs[1] = 8;
278                     core_ecs[2] = (ecs_num - 8 * 2) / 2;
279                     core_ecs[3] = ecs_num - 8 * 2 - core_ecs[2];
280                 } else if (ecs_num > 16) {
281                     core_ecs[0] = 8;
282                     core_ecs[1] = core_ecs[2] = 4;
283                     core_ecs[3] = ecs_num - 8 - 4 * 2;
284                 } else if (ecs_num > 8) {
285                     core_ecs[0] = core_ecs[1] = 4;
286                     core_ecs[2] = (ecs_num - 4 * 2) / 2;
287                     core_ecs[3] = ecs_num - 4 * 2 - core_ecs[2];
288                 }
289 
290                 for (i = 0; i < MAX_CORE_NUM; i++) {
291                     ctx_ext->part_rows[i] = core_ecs[i] * syntax->part_rows;
292                     hal_jpege_dbg_detail("part %d, ecs %d, rows %d", i, core_ecs[i],
293                                          ctx_ext->part_rows[i]);
294                     if (core_ecs[i])
295                         ctx_ext->partion_num++;
296                 }
297             } else {
298                 for (i = 0; i < MAX_CORE_NUM; i++) {
299                     part_rows = (mb_h >= part_rows) ? part_rows : mb_h;
300 
301                     ctx_ext->part_rows[i] = part_rows;
302                     ctx_ext->ecs_cnt[i] = 1;
303 
304                     hal_jpege_dbg_detail("part %d row %d restart %d\n",
305                                          i, part_rows, mb_w * part_rows);
306 
307                     if (part_rows)
308                         ctx_ext->partion_num++;
309 
310                     if (i == 0 && !ctx->syntax.restart_ri)
311                         ctx->syntax.restart_ri = mb_w * part_rows;
312 
313                     mb_h -= part_rows;
314                 }
315             }
316         }
317 
318         if (!ctx_ext->reg_cfg)
319             mpp_dev_multi_offset_init(&ctx_ext->reg_cfg, 24);
320 
321         syntax->low_delay = 1;
322         ctx_ext->multi_core_enabled = 1;
323     }
324 
325     if (ctx->cfg->jpeg.update) {
326         hal_jpege_rc_update(&ctx->hal_rc, syntax);
327         ctx->cfg->jpeg.update = 0;
328     }
329 
330     task->rc_task->frm.is_intra = 1;
331 
332 MULTI_CORE_SPLIT_DONE:
333 
334     hal_jpege_dbg_func("leave hal %p\n", hal);
335 
336     return MPP_OK;
337 }
338 
hal_jpege_vepu2_set_extra_info(MppDev dev,JpegeSyntax * syntax,RK_U32 start_mbrow)339 static MPP_RET hal_jpege_vepu2_set_extra_info(MppDev dev, JpegeSyntax *syntax,
340                                               RK_U32 start_mbrow)
341 {
342     VepuOffsetCfg cfg;
343 
344     cfg.fmt = syntax->format;
345     cfg.width = syntax->width;
346     cfg.height = syntax->height;
347     cfg.hor_stride = syntax->hor_stride;
348     cfg.ver_stride = syntax->ver_stride;
349     cfg.offset_x = syntax->offset_x;
350     cfg.offset_y = syntax->offset_y + start_mbrow * 16;
351 
352     get_vepu_offset_cfg(&cfg);
353 
354     if (cfg.offset_byte[0])
355         mpp_dev_set_reg_offset(dev, VEPU2_REG_INPUT_Y, cfg.offset_byte[0]);
356 
357     if (cfg.offset_byte[1])
358         mpp_dev_set_reg_offset(dev, VEPU2_REG_INPUT_U, cfg.offset_byte[1]);
359 
360     if (cfg.offset_byte[2])
361         mpp_dev_set_reg_offset(dev, VEPU2_REG_INPUT_V, cfg.offset_byte[2]);
362 
363     return MPP_OK;
364 }
365 
hal_jpege_vepu2_gen_regs(void * hal,HalEncTask * task)366 MPP_RET hal_jpege_vepu2_gen_regs(void *hal, HalEncTask *task)
367 {
368     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
369     MppBuffer input  = task->input;
370     MppBuffer output = task->output;
371     JpegeSyntax *syntax = &ctx->syntax;
372     RK_U32 width        = syntax->width;
373     RK_U32 width_align  = MPP_ALIGN(width, 16);
374     RK_U32 height       = syntax->height;
375     MppFrameFormat fmt  = syntax->format;
376     RK_U32 hor_stride   = 0;
377     RK_U32 ver_stride   = MPP_ALIGN(height, 16);
378     JpegeBits bits      = ctx->bits;
379     RK_S32 reg_idx      = task->flags.reg_idx;
380     RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs + ctx->reg_size * reg_idx);
381     size_t length = mpp_packet_get_length(task->packet);
382     RK_U8  *buf = mpp_buffer_get_ptr(output);
383     size_t size = mpp_buffer_get_size(output);
384     RK_S32 bitpos;
385     RK_S32 bytepos;
386     RK_U32 x_fill = 0;
387     RK_U32 y_fill = 0;
388     VepuFormatCfg fmt_cfg;
389     RK_U32 rotation = 0;
390 
391     hal_jpege_dbg_func("enter hal %p\n", hal);
392 
393     // do not support mirroring
394     if (syntax->mirroring)
395         mpp_err_f("Warning: do not support mirroring\n");
396 
397     if (syntax->rotation == MPP_ENC_ROT_90)
398         rotation = 1;
399     else if (syntax->rotation == MPP_ENC_ROT_270)
400         rotation = 2;
401     else if (syntax->rotation != MPP_ENC_ROT_0)
402         mpp_err_f("Warning: only support 90 or 270 degree rotate, request rotate %d", syntax->rotation);
403     if (rotation) {
404         MPP_SWAP(RK_U32, width, height);
405         MPP_SWAP(RK_U32, width_align, ver_stride);
406     }
407     hor_stride = get_vepu_pixel_stride(&ctx->stride_cfg, width,
408                                        syntax->hor_stride, fmt);
409 
410     //hor_stride must be align with 8, and ver_stride mus align with 2
411     if ((hor_stride & 0x7) || (ver_stride & 0x1) || (hor_stride >= (1 << 15))) {
412         mpp_err_f("illegal resolution, hor_stride %d, ver_stride %d, width %d, height %d\n",
413                   syntax->hor_stride, syntax->ver_stride,
414                   syntax->width, syntax->height);
415     }
416 
417     x_fill = (width_align - width) / 4;
418     y_fill = (ver_stride - height);
419     mpp_assert(x_fill <= 3);
420     mpp_assert(y_fill <= 15);
421     ctx->part_x_fill = x_fill;
422     ctx->part_y_fill = y_fill;
423 
424     mpp_buffer_sync_begin(output);
425 
426     if (syntax->q_mode == JPEG_QFACTOR) {
427         syntax->q_factor = 100 - task->rc_task->info.quality_target;
428         hal_jpege_rc_update(&ctx->hal_rc, syntax);
429     }
430 
431     /* write header to output buffer */
432     jpege_bits_setup(bits, buf, (RK_U32)size);
433     /* seek length bytes data */
434     jpege_seek_bits(bits, length << 3);
435     /* NOTE: write header will update qtable */
436     write_jpeg_header(bits, syntax, &ctx->hal_rc);
437 
438     memset(regs, 0, sizeof(RK_U32) * VEPU_JPEGE_VEPU2_NUM_REGS);
439     // input address setup
440     regs[VEPU2_REG_INPUT_Y] = mpp_buffer_get_fd(input);
441     regs[VEPU2_REG_INPUT_U] = regs[VEPU2_REG_INPUT_Y];
442     regs[VEPU2_REG_INPUT_V] = regs[VEPU2_REG_INPUT_Y];
443 
444     // output address setup
445     bitpos = jpege_bits_get_bitpos(bits);
446     bytepos = (bitpos + 7) >> 3;
447     ctx->base = buf;
448     ctx->size = size;
449     ctx->sw_bit = bitpos;
450     ctx->part_bytepos = bytepos;
451 
452     get_msb_lsb_at_pos(&regs[51], &regs[52], buf, bytepos);
453 
454     mpp_buffer_sync_end(output);
455 
456     regs[53] = size - bytepos;
457 
458     // bus config
459     regs[54] = 16 << 8;
460 
461     regs[60] = (((bytepos & 7) * 8) << 16) |
462                (x_fill << 4) |
463                (y_fill);
464     regs[61] = hor_stride;
465 
466     regs[77] = mpp_buffer_get_fd(output);
467     if (bytepos)
468         mpp_dev_set_reg_offset(ctx->dev, 77, bytepos);
469     /* 95 - 97 color conversion parameter */
470     {
471         RK_U32 coeffA;
472         RK_U32 coeffB;
473         RK_U32 coeffC;
474         RK_U32 coeffE;
475         RK_U32 coeffF;
476 
477         switch (syntax->color_conversion_type) {
478         case 0 : {  /* BT.601 */
479             /*
480              * Y  = 0.2989 R + 0.5866 G + 0.1145 B
481              * Cb = 0.5647 (B - Y) + 128
482              * Cr = 0.7132 (R - Y) + 128
483              */
484             coeffA = 19589;
485             coeffB = 38443;
486             coeffC = 7504;
487             coeffE = 37008;
488             coeffF = 46740;
489         } break;
490         case 1 : {  /* BT.709 */
491             /*
492              * Y  = 0.2126 R + 0.7152 G + 0.0722 B
493              * Cb = 0.5389 (B - Y) + 128
494              * Cr = 0.6350 (R - Y) + 128
495              */
496             coeffA = 13933;
497             coeffB = 46871;
498             coeffC = 4732;
499             coeffE = 35317;
500             coeffF = 41615;
501         } break;
502         case 2 : {
503             coeffA = syntax->coeffA;
504             coeffB = syntax->coeffB;
505             coeffC = syntax->coeffC;
506             coeffE = syntax->coeffE;
507             coeffF = syntax->coeffF;
508         } break;
509         default : {
510             mpp_err("invalid color conversion type %d\n",
511                     syntax->color_conversion_type);
512             coeffA = 19589;
513             coeffB = 38443;
514             coeffC = 7504;
515             coeffE = 37008;
516             coeffF = 46740;
517         } break;
518         }
519 
520         regs[95] = coeffA | (coeffB << 16);
521         regs[96] = coeffC | (coeffE << 16);
522         regs[97] = coeffF;
523     }
524 
525     regs[103] = (width_align >> 4) << 8  |
526                 (ver_stride >> 4) << 20 |
527                 (1 << 6) |  /* intra coding  */
528                 (2 << 4) |  /* format jpeg   */
529                 1;          /* encoder start */
530 
531     if (!get_vepu_fmt(&fmt_cfg, fmt)) {
532         regs[74] = (fmt_cfg.format << 4) |
533                    (rotation << 2);
534         regs[98] = (fmt_cfg.b_mask & 0x1f) << 16 |
535                    (fmt_cfg.g_mask & 0x1f) << 8  |
536                    (fmt_cfg.r_mask & 0x1f);
537         regs[105] = 7 << 26 | (fmt_cfg.swap_32_in & 1) << 29 |
538                     (fmt_cfg.swap_16_in & 1) << 30 |
539                     (fmt_cfg.swap_8_in & 1) << 31;
540     }
541 
542     regs[107] = ((syntax->part_rows & 0xff) << 16) |
543                 jpege_restart_marker[ctx->rst_marker_idx & 7];
544 
545     /* encoder interrupt */
546     regs[109] = 1 << 12 |   /* clock gating */
547                 1 << 10;    /* enable timeout interrupt */
548 
549     if (syntax->low_delay) {
550         /* slice encode end by RST */
551         regs[107] |= (1 << 24);
552         /* slice interrupt enable */
553         regs[109] |= (1 << 16);
554     }
555 
556     /* 0 ~ 31 quantization tables */
557     {
558         RK_S32 i;
559 
560         for (i = 0; i < 16; i++) {
561             /* qtable need to reorder in particular order */
562             regs[i] = ctx->hal_rc.qtables[0][qp_reorder_table[i * 4 + 0]] << 24 |
563                       ctx->hal_rc.qtables[0][qp_reorder_table[i * 4 + 1]] << 16 |
564                       ctx->hal_rc.qtables[0][qp_reorder_table[i * 4 + 2]] << 8 |
565                       ctx->hal_rc.qtables[0][qp_reorder_table[i * 4 + 3]];
566         }
567         for (i = 0; i < 16; i++) {
568             /* qtable need to reorder in particular order */
569             regs[i + 16] = ctx->hal_rc.qtables[1][qp_reorder_table[i * 4 + 0]] << 24 |
570                            ctx->hal_rc.qtables[1][qp_reorder_table[i * 4 + 1]] << 16 |
571                            ctx->hal_rc.qtables[1][qp_reorder_table[i * 4 + 2]] << 8 |
572                            ctx->hal_rc.qtables[1][qp_reorder_table[i * 4 + 3]];
573         }
574     }
575 
576     hal_jpege_dbg_func("leave hal %p\n", hal);
577     return MPP_OK;
578 }
579 
multi_core_start(HalJpegeCtx * ctx,HalEncTask * task)580 static MPP_RET multi_core_start(HalJpegeCtx *ctx, HalEncTask *task)
581 {
582     JpegeMultiCoreCtx *ctx_ext = ctx->ctx_ext;
583     JpegeSyntax *syntax = &ctx->syntax;
584     MppDevRegOffCfgs *reg_cfg = ctx_ext->reg_cfg;
585     MppDev dev = ctx->dev;
586     RK_S32 reg_idx = task->flags.reg_idx;
587     RK_U32 *src = (RK_U32 *)((RK_U8 *)ctx->regs + ctx->reg_size * reg_idx);
588     RK_U32 reg_size = ctx->reg_size;
589     MPP_RET ret = MPP_OK;
590     RK_U32 partion_num = ctx_ext->partion_num;
591     RK_U32 mcu_y = 0;
592     RK_U32 i;
593 
594     hal_jpege_dbg_detail("start %d partions\n", partion_num);
595 
596     for (i = 0; i < partion_num; i++) {
597         RK_U32 part_not_end = i < partion_num - 1;
598         RK_U32 part_not_start = i > 0;
599         RK_U32 *regs = (RK_U32 *)ctx_ext->regs[i];
600         RK_U32 part_enc_mcu_h = ctx_ext->part_rows[i];
601         RK_U32 part_x_fill = ctx->part_x_fill;
602         RK_U32 part_y_fill = ctx->part_y_fill;
603         RK_U32 part_bytepos = ctx->part_bytepos;
604 
605         // it only needs to fill the partition on the right and below.
606         if (syntax->rotation == MPP_ENC_ROT_90) {
607             if (part_not_end)
608                 part_x_fill = 0;
609         } else if (syntax->rotation == MPP_ENC_ROT_0 || syntax->rotation == MPP_ENC_ROT_180) {
610             if (part_not_end)
611                 part_y_fill = 0;
612         } else if (syntax->rotation == MPP_ENC_ROT_270) {
613             if (part_not_start)
614                 part_x_fill = 0;
615         } else
616             mpp_err_f("input rotation %d not supported", syntax->rotation);
617 
618         memcpy(regs, src, reg_size);
619 
620         mpp_dev_multi_offset_reset(reg_cfg);
621 
622         if (i == 0) {
623             get_msb_lsb_at_pos(&regs[51], &regs[52], ctx->base, part_bytepos);
624             regs[77] = mpp_buffer_get_fd(task->output);
625             regs[53] = mpp_buffer_get_size(task->output) - part_bytepos;
626             regs[60] = (((part_bytepos & 7) * 8) << 16) |
627                        (part_x_fill << 4) |
628                        (part_y_fill);
629             /* the stream offset had been setup */
630         } else {
631             MppBuffer buf = ctx_ext->partions_buf[i - 1];
632 
633             regs[77] = mpp_buffer_get_fd(buf);
634             regs[53] = mpp_buffer_get_size(buf);
635             regs[60] = (((0 & 7) * 8) << 16) |
636                        (part_x_fill << 4) |
637                        (part_y_fill);
638         }
639 
640         regs[103] = syntax->mcu_hor_cnt << 8  |
641                     (part_enc_mcu_h) << 20 |
642                     (1 << 6) |  /* intra coding  */
643                     (2 << 4) |  /* format jpeg   */
644                     1;          /* encoder start */
645 
646         hal_jpege_dbg_detail("part %d, part_not_end 0x%x, rst_marker_idx %d",
647                              i, part_not_end, ctx->rst_marker_idx);
648         regs[107] = part_not_end << 24 | ((syntax->part_rows & 0xff) << 16) |
649                     jpege_restart_marker[ctx->rst_marker_idx & 7];
650         ctx->rst_marker_idx += ctx_ext->ecs_cnt[i];
651 
652         VepuOffsetCfg cfg;
653 
654         memset(&cfg, 0, sizeof(cfg));
655 
656         cfg.fmt = syntax->format;
657         cfg.width = syntax->width;
658         cfg.height = syntax->height;
659         cfg.hor_stride = syntax->hor_stride;
660         cfg.ver_stride = syntax->ver_stride;
661         cfg.offset_x = syntax->offset_x;
662         cfg.offset_y = syntax->offset_y + mcu_y * 16;
663 
664         if (syntax->rotation == MPP_ENC_ROT_90 || syntax->rotation == MPP_ENC_ROT_270) {
665             regs[103] = part_enc_mcu_h << 8  |
666                         (syntax->mcu_hor_cnt) << 20 |
667                         (1 << 6) |  /* intra coding  */
668                         (2 << 4) |  /* format jpeg   */
669                         1;          /* encoder start */
670 
671             /*
672              * It is opposite that position of partitions
673              * of rotation 90 degree and rotation 270 degree.
674              */
675             if (syntax->rotation == MPP_ENC_ROT_270)
676                 cfg.offset_x = syntax->offset_x +
677                                (syntax->mcu_ver_cnt - ctx_ext->part_rows[0] - mcu_y) * 16;
678             else
679                 cfg.offset_x = syntax->offset_x + mcu_y * 16;
680 
681             cfg.offset_y = syntax->offset_y;
682         }
683 
684         get_vepu_offset_cfg(&cfg);
685         mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_Y, cfg.offset_byte[0]);
686         mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_U, cfg.offset_byte[1]);
687         mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_V, cfg.offset_byte[2]);
688 
689         mcu_y += part_enc_mcu_h;
690 
691         do {
692             MppDevRegWrCfg wr_cfg;
693             MppDevRegRdCfg rd_cfg;
694 
695             wr_cfg.reg = regs;
696             wr_cfg.size = reg_size;
697             wr_cfg.offset = 0;
698 
699             ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
700             if (ret) {
701                 mpp_err_f("set register write failed %d\n", ret);
702                 break;
703             }
704 
705             rd_cfg.reg = ctx_ext->regs_out[i];
706             rd_cfg.size = reg_size;
707             rd_cfg.offset = 0;
708 
709             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
710             if (ret) {
711                 mpp_err_f("set register read failed %d\n", ret);
712                 break;
713             }
714 
715             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_OFFS, reg_cfg);
716             if (ret) {
717                 mpp_err_f("set register offsets failed %d\n", ret);
718                 break;
719             }
720 
721             if (i < partion_num - 1) {
722                 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_DELIMIT, NULL);
723                 if (ret) {
724                     mpp_err_f("send delimit failed %d\n", ret);
725                     break;
726                 }
727             }
728         } while (0);
729     }
730 
731     ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
732     if (ret)
733         mpp_err_f("send cmd failed %d\n", ret);
734 
735     return ret;
736 }
737 
multi_core_wait(HalJpegeCtx * ctx,HalEncTask * task)738 static MPP_RET multi_core_wait(HalJpegeCtx *ctx, HalEncTask *task)
739 {
740     JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
741     JpegeFeedback *feedback = &ctx->feedback;
742     RK_U32 sw_bit = 0;
743     RK_U32 hw_bit = 0;
744     MPP_RET ret = MPP_OK;
745     RK_U32 val;
746     RK_U32 i;
747 
748     hal_jpege_dbg_detail("poll partion_num %d\n", ctx_ext->partion_num);
749 
750     for (i = 0; i < ctx_ext->partion_num; i++) {
751         RK_U32 *regs = ctx_ext->regs_out[i];
752 
753         hal_jpege_dbg_detail("poll reg %d %p", i, regs);
754 
755         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
756         if (ret)
757             mpp_err_f("poll cmd failed %d\n", ret);
758 
759         if (i == 0) {
760             RK_S32 fd = mpp_buffer_get_fd(task->output);
761 
762             val = regs[109];
763             hal_jpege_dbg_output("hw_status %08x\n", val);
764             feedback->hw_status = val & 0x70;
765             val = regs[53];
766             sw_bit = jpege_bits_get_bitpos(ctx->bits);
767             hw_bit = val;
768             feedback->stream_length = ((sw_bit / 8) & (~0x7)) + hw_bit / 8;
769             hal_jpege_dbg_detail("partion len = %d", hw_bit / 8);
770             task->length = feedback->stream_length;
771             task->hw_length = task->length - ctx->hal_start_pos;
772 
773             mpp_dmabuf_sync_partial_begin(fd, 1, 0, task->length, __FUNCTION__);
774         } else {
775             void *stream_ptr = mpp_buffer_get_ptr(task->output);
776             void *partion_ptr = mpp_buffer_get_ptr(ctx_ext->partions_buf[i - 1]);
777             RK_S32 partion_fd = mpp_buffer_get_fd(ctx_ext->partions_buf[i - 1]);
778             RK_U32 partion_len = 0;
779 
780             val = regs[109];
781             hal_jpege_dbg_output("hw_status %08x\n", val);
782             feedback->hw_status = val & 0x70;
783             partion_len = regs[53] / 8;
784             hal_jpege_dbg_detail("partion_len = %d", partion_len);
785 
786             mpp_dmabuf_sync_partial_begin(partion_fd, 1, 0, partion_len, __FUNCTION__);
787 
788             memcpy(stream_ptr + feedback->stream_length, partion_ptr, partion_len);
789             feedback->stream_length += partion_len;
790             task->length = feedback->stream_length;
791             task->hw_length += partion_len;
792         }
793     }
794 
795     hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
796                          sw_bit, hw_bit, feedback->stream_length, task->hw_length);
797 
798     return ret;
799 }
800 
hal_jpege_vepu2_start(void * hal,HalEncTask * task)801 MPP_RET hal_jpege_vepu2_start(void *hal, HalEncTask *task)
802 {
803     MPP_RET ret = MPP_OK;
804     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
805     JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
806 
807     hal_jpege_dbg_func("enter hal %p\n", hal);
808 
809     if (ctx_ext && ctx_ext->multi_core_enabled) {
810         multi_core_start(ctx, task);
811     } else {
812         hal_jpege_vepu2_set_extra_info(ctx->dev, &ctx->syntax, 0);
813         do {
814             MppDevRegWrCfg wr_cfg;
815             MppDevRegRdCfg rd_cfg;
816             RK_U32 reg_size = ctx->reg_size;
817             RK_S32 reg_idx = task->flags.reg_idx;
818             RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs + reg_size * reg_idx);
819 
820             wr_cfg.reg = regs;
821             wr_cfg.size = reg_size;
822             wr_cfg.offset = 0;
823 
824             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
825             if (ret) {
826                 mpp_err_f("set register write failed %d\n", ret);
827                 break;
828             }
829 
830             rd_cfg.reg = regs;
831             rd_cfg.size = reg_size;
832             rd_cfg.offset = 0;
833 
834             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
835             if (ret) {
836                 mpp_err_f("set register read failed %d\n", ret);
837                 break;
838             }
839 
840             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
841             if (ret) {
842                 mpp_err_f("send cmd failed %d\n", ret);
843                 break;
844             }
845         } while (0);
846     }
847     hal_jpege_dbg_func("leave hal %p\n", hal);
848     (void)task;
849     return ret;
850 }
851 
hal_jpege_vepu2_wait(void * hal,HalEncTask * task)852 MPP_RET hal_jpege_vepu2_wait(void *hal, HalEncTask *task)
853 {
854     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
855     JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
856     MPP_RET ret = MPP_OK;
857 
858     hal_jpege_dbg_func("enter hal %p\n", hal);
859 
860     if (ctx_ext && ctx_ext->multi_core_enabled) {
861         multi_core_wait(ctx, task);
862     } else {
863         JpegeFeedback *feedback = &ctx->feedback;
864         JpegeBits bits = ctx->bits;
865         RK_S32 reg_idx = task->flags.reg_idx;
866         RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs + ctx->reg_size * reg_idx);
867         RK_U32 sw_bit = 0;
868         RK_U32 hw_bit = 0;
869         RK_U32 val;
870 
871         if (ctx->dev) {
872             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
873             if (ret)
874                 mpp_err_f("poll cmd failed %d\n", ret);
875         }
876 
877         val = regs[109];
878         hal_jpege_dbg_output("hw_status %08x\n", val);
879         feedback->hw_status = val & 0x70;
880         val = regs[53];
881 
882         sw_bit = jpege_bits_get_bitpos(bits);
883         hw_bit = val;
884 
885         // NOTE: hardware will return 64 bit access byte count
886         feedback->stream_length = ((sw_bit / 8) & (~0x7)) + hw_bit / 8;
887         task->length = feedback->stream_length;
888         task->hw_length = task->length - ctx->hal_start_pos;
889 
890         hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
891                              sw_bit, hw_bit, feedback->stream_length, task->hw_length);
892     }
893 
894     hal_jpege_dbg_func("leave hal %p\n", hal);
895     return ret;
896 }
897 
hal_jpege_vepu2_part_start(void * hal,HalEncTask * task)898 MPP_RET hal_jpege_vepu2_part_start(void *hal, HalEncTask *task)
899 {
900     MPP_RET ret = MPP_OK;
901     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
902     JpegeSyntax *syntax = (JpegeSyntax *)task->syntax.data;
903     RK_U32 mcu_w = syntax->mcu_hor_cnt;
904     RK_U32 mcu_h = syntax->mcu_ver_cnt;
905     RK_U32 mcu_y = ctx->mcu_y;
906     RK_U32 part_mcu_h = syntax->part_rows;
907     RK_S32 reg_idx = task->flags.reg_idx;
908     RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs + ctx->reg_size * reg_idx);
909     RK_U32 part_enc_h;
910     RK_U32 part_enc_mcu_h;
911     RK_U32 part_y_fill;
912     RK_U32 part_not_end;
913 
914     hal_jpege_dbg_func("enter part start %p\n", hal);
915 
916     /* Fix register for each part encoding */
917     task->part_first = !mcu_y;
918     if (mcu_y + part_mcu_h < mcu_h) {
919         part_enc_h = part_mcu_h * 16;
920         part_enc_mcu_h = part_mcu_h;
921         part_y_fill = 0;
922         part_not_end = 1;
923         task->part_last = 0;
924     } else {
925         part_enc_h = syntax->height - mcu_y * 16;
926         part_enc_mcu_h = MPP_ALIGN(part_enc_h, 16) / 16;;
927         part_y_fill = ctx->part_y_fill;
928         part_not_end = 0;
929         task->part_last = 1;
930     }
931 
932     hal_jpege_dbg_detail("part first %d last %d\n", task->part_first, task->part_last);
933 
934     get_msb_lsb_at_pos(&regs[51], &regs[52], ctx->base, ctx->part_bytepos);
935 
936     regs[53] = ctx->size - ctx->part_bytepos;
937 
938     regs[60] = (((ctx->part_bytepos & 7) * 8) << 16) |
939                (ctx->part_x_fill << 4) |
940                (part_y_fill);
941 
942     regs[77] = mpp_buffer_get_fd(task->output);
943     if (ctx->part_bytepos)
944         mpp_dev_set_reg_offset(ctx->dev, 77, ctx->part_bytepos);
945 
946     regs[103] = mcu_w << 8  |
947                 (part_enc_mcu_h) << 20 |
948                 (1 << 6) |  /* intra coding  */
949                 (2 << 4) |  /* format jpeg   */
950                 1;          /* encoder start */
951 
952     hal_jpege_dbg_detail("part_not_end 0x%x, rst_marker_idx %d",
953                          part_not_end, ctx->rst_marker_idx);
954     regs[107] = part_not_end << 24 | jpege_restart_marker[ctx->rst_marker_idx & 7];
955     ctx->rst_marker_idx++;
956 
957     hal_jpege_vepu2_set_extra_info(ctx->dev, syntax, mcu_y);
958     ctx->mcu_y += part_enc_mcu_h;
959 
960     do {
961         MppDevRegWrCfg wr_cfg;
962         MppDevRegRdCfg rd_cfg;
963         RK_U32 reg_size = ctx->reg_size;
964 
965         wr_cfg.reg = ctx->regs;
966         wr_cfg.size = reg_size;
967         wr_cfg.offset = 0;
968 
969         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
970         if (ret) {
971             mpp_err_f("set register write failed %d\n", ret);
972             break;
973         }
974 
975         rd_cfg.reg = ctx->regs_out;
976         rd_cfg.size = reg_size;
977         rd_cfg.offset = 0;
978 
979         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
980         if (ret) {
981             mpp_err_f("set register read failed %d\n", ret);
982             break;
983         }
984 
985         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
986         if (ret) {
987             mpp_err_f("send cmd failed %d\n", ret);
988             break;
989         }
990     } while (0);
991 
992     hal_jpege_dbg_func("leave part start %p\n", hal);
993     (void)task;
994     return ret;
995 }
996 
hal_jpege_vepu2_part_wait(void * hal,HalEncTask * task)997 MPP_RET hal_jpege_vepu2_part_wait(void *hal, HalEncTask *task)
998 {
999     MPP_RET ret = MPP_OK;
1000     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
1001     RK_S32 reg_idx = task->flags.reg_idx;
1002     RK_U32 *regs = (RK_U32 *)((RK_U8 *)ctx->regs_out + ctx->reg_size * reg_idx);
1003     JpegeFeedback *feedback = &ctx->feedback;
1004     RK_U32 hw_bit = 0;
1005 
1006     hal_jpege_dbg_func("enter part wait %p\n", hal);
1007 
1008     if (ctx->dev) {
1009         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
1010         if (ret)
1011             mpp_err_f("poll cmd failed %d\n", ret);
1012     }
1013 
1014     hal_jpege_dbg_detail("hw_status %08x\n", regs[109]);
1015 
1016     hw_bit = regs[53];
1017 
1018     hal_jpege_dbg_detail("byte pos %d -> %d\n", ctx->part_bytepos,
1019                          (ctx->part_bytepos & (~7)) + (hw_bit / 8));
1020     ctx->part_bytepos = (ctx->part_bytepos & (~7)) + (hw_bit / 8);
1021 
1022     feedback->stream_length = ctx->part_bytepos;
1023     task->length = ctx->part_bytepos;
1024     task->hw_length = task->length - ctx->hal_start_pos;
1025 
1026     hal_jpege_dbg_detail("stream_length %d, hw_byte %d",
1027                          feedback->stream_length, hw_bit / 8);
1028 
1029     hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
1030                          ctx->sw_bit, hw_bit, feedback->stream_length, task->hw_length);
1031 
1032     hal_jpege_dbg_func("leave part wait %p\n", hal);
1033     return ret;
1034 }
1035 
hal_jpege_vepu2_ret_task(void * hal,HalEncTask * task)1036 MPP_RET hal_jpege_vepu2_ret_task(void *hal, HalEncTask *task)
1037 {
1038     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
1039     EncRcTaskInfo *rc_info = &task->rc_task->info;
1040 
1041     task->rc_task->info.bit_real = ctx->feedback.stream_length * 8;
1042     task->hal_ret.data = &ctx->feedback;
1043     task->hal_ret.number = 1;
1044 
1045     rc_info->quality_real = rc_info->quality_target;
1046 
1047     return MPP_OK;
1048 }
1049 
1050 const MppEncHalApi hal_jpege_vepu2 = {
1051     .name       = "hal_jpege_vepu2",
1052     .coding     = MPP_VIDEO_CodingMJPEG,
1053     .ctx_size   = sizeof(HalJpegeCtx),
1054     .flag       = 0,
1055     .init       = hal_jpege_vepu2_init,
1056     .deinit     = hal_jpege_vepu2_deinit,
1057     .prepare    = NULL,
1058     .get_task   = hal_jpege_vepu2_get_task,
1059     .gen_regs   = hal_jpege_vepu2_gen_regs,
1060     .start      = hal_jpege_vepu2_start,
1061     .wait       = hal_jpege_vepu2_wait,
1062     .part_start = hal_jpege_vepu2_part_start,
1063     .part_wait  = hal_jpege_vepu2_part_wait,
1064     .ret_task   = hal_jpege_vepu2_ret_task,
1065 };
1066