xref: /OK3568_Linux_fs/external/mpp/mpp/hal/vpu/jpege/hal_jpege_vepu2_v2.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Copyright 2015 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_jpege_vepu2"
18 
19 #include <string.h>
20 
21 #include "mpp_env.h"
22 #include "mpp_common.h"
23 #include "mpp_mem.h"
24 #include "mpp_platform.h"
25 
26 #include "mpp_enc_hal.h"
27 #include "vcodec_service.h"
28 
29 #include "hal_jpege_debug.h"
30 #include "hal_jpege_api_v2.h"
31 #include "hal_jpege_base.h"
32 
33 #define VEPU_JPEGE_VEPU2_NUM_REGS   184
34 #define VEPU2_REG_INPUT_Y           48
35 #define VEPU2_REG_INPUT_U           49
36 #define VEPU2_REG_INPUT_V           50
37 
38 typedef struct jpege_vepu2_reg_set_t {
39     RK_U32  val[VEPU_JPEGE_VEPU2_NUM_REGS];
40 } jpege_vepu2_reg_set;
41 
42 #define MAX_CORE_NUM                4
43 
44 typedef struct JpegeMultiCoreCtx_t {
45     RK_U32              multi_core_enabled;
46     RK_U32              partion_num;
47     MppDevRegOffCfgs    *reg_cfg;
48 
49     MppBufferGroup      partions_group;
50     MppBuffer           partions_buf[MAX_CORE_NUM - 1];
51     RK_U32              buf_size;
52 
53     RK_U32              part_rows[MAX_CORE_NUM];
54     RK_U32              ecs_cnt[MAX_CORE_NUM];
55 
56     void                *regs_base;
57     void                *regs[MAX_CORE_NUM];
58     void                *regs_out[MAX_CORE_NUM];
59 } JpegeMultiCoreCtx;
60 
hal_jpege_vepu2_init(void * hal,MppEncHalCfg * cfg)61 MPP_RET hal_jpege_vepu2_init(void *hal, MppEncHalCfg *cfg)
62 {
63     MPP_RET ret = MPP_OK;
64     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
65     MppClientType type;
66     RK_U32 vcodec_type = mpp_get_vcodec_type();
67 
68     mpp_env_get_u32("hal_jpege_debug", &hal_jpege_debug, 0);
69     hal_jpege_dbg_func("enter hal %p cfg %p\n", hal, cfg);
70 
71     /* update output to MppEnc */
72     type = (vcodec_type & HAVE_VEPU2_JPEG) ?
73            VPU_CLIENT_VEPU2_JPEG : VPU_CLIENT_VEPU2;
74 
75     cfg->type = type;
76     ret = mpp_dev_init(&cfg->dev, type);
77     if (ret) {
78         mpp_err_f("mpp_dev_init failed. ret: %d\n", ret);
79         return ret;
80     }
81     ctx->dev = cfg->dev;
82     ctx->type = cfg->type;
83 
84     jpege_bits_init(&ctx->bits);
85     mpp_assert(ctx->bits);
86     ret = hal_jpege_vepu_init_rc(&ctx->hal_rc);
87     if (ret)
88         return ret;
89 
90     ctx->cfg = cfg->cfg;
91     ctx->reg_size = sizeof(RK_U32) * VEPU_JPEGE_VEPU2_NUM_REGS;
92     ctx->regs = mpp_calloc_size(void, ctx->reg_size + EXTRA_INFO_SIZE);
93     if (NULL == ctx->regs) {
94         mpp_err_f("failed to malloc vepu2 regs\n");
95         return MPP_NOK;
96     }
97 
98     ctx->regs_out = mpp_calloc_size(void, ctx->reg_size + EXTRA_INFO_SIZE);
99     if (NULL == ctx->regs_out) {
100         mpp_err_f("failed to malloc vepu2 regs\n");
101         return MPP_NOK;
102     }
103 
104     hal_jpege_dbg_func("leave hal %p\n", hal);
105     return MPP_OK;
106 }
107 
hal_jpege_vepu2_deinit(void * hal)108 MPP_RET hal_jpege_vepu2_deinit(void *hal)
109 {
110     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
111 
112     hal_jpege_dbg_func("enter hal %p\n", hal);
113 
114     if (ctx->bits) {
115         jpege_bits_deinit(ctx->bits);
116         ctx->bits = NULL;
117     }
118 
119     if (ctx->dev) {
120         mpp_dev_deinit(ctx->dev);
121         ctx->dev = NULL;
122     }
123 
124     hal_jpege_vepu_deinit_rc(&ctx->hal_rc);
125 
126     if (ctx->ctx_ext) {
127         JpegeMultiCoreCtx *ctx_ext = ctx->ctx_ext;
128         RK_U32 i;
129 
130         if (ctx_ext->reg_cfg) {
131             mpp_dev_multi_offset_deinit(ctx_ext->reg_cfg);
132             ctx_ext->reg_cfg = NULL;
133         }
134 
135         for (i = 0; i < MAX_CORE_NUM - 1; i++)
136             if (ctx_ext->partions_buf[i])
137                 mpp_buffer_put(ctx_ext->partions_buf[i]);
138 
139         if (ctx_ext->partions_group) {
140             mpp_buffer_group_put(ctx_ext->partions_group);
141             ctx_ext->partions_group = NULL;
142         }
143 
144         MPP_FREE(ctx_ext->regs_base);
145         MPP_FREE(ctx->ctx_ext);
146     }
147 
148     MPP_FREE(ctx->regs);
149     MPP_FREE(ctx->regs_out);
150 
151     hal_jpege_dbg_func("leave hal %p\n", hal);
152     return MPP_OK;
153 }
154 
hal_jpege_vepu2_get_task(void * hal,HalEncTask * task)155 MPP_RET hal_jpege_vepu2_get_task(void *hal, HalEncTask *task)
156 {
157     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
158     JpegeSyntax *syntax = (JpegeSyntax *)task->syntax.data;
159     JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
160     RK_U32 i = 0;
161 
162     hal_jpege_dbg_func("enter hal %p\n", hal);
163 
164     memcpy(&ctx->syntax, syntax, sizeof(ctx->syntax));
165     /* Set rc paramters */
166     hal_jpege_dbg_input("rc_mode %d\n", ctx->cfg->rc.rc_mode);
167     if (ctx->cfg->rc.rc_mode != MPP_ENC_RC_MODE_FIXQP) {
168         if (!ctx->hal_rc.q_factor) {
169             task->rc_task->info.quality_target = syntax->q_factor ? (100 - syntax->q_factor) : 80;
170             task->rc_task->info.quality_min = 100 - syntax->qf_max;
171             task->rc_task->info.quality_max = 100 - syntax->qf_min;
172             task->rc_task->frm.is_intra = 1;
173         } else {
174             task->rc_task->info.quality_target = ctx->hal_rc.last_quality;
175             task->rc_task->info.quality_min = 100 - syntax->qf_max;
176             task->rc_task->info.quality_max = 100 - syntax->qf_min;
177         }
178     }
179     ctx->hal_start_pos = mpp_packet_get_length(task->packet);
180 
181     /* prepare for part encoding */
182     ctx->mcu_y = 0;
183     ctx->mcu_h = syntax->mcu_h;
184     ctx->sw_bit = 0;
185     ctx->part_bytepos = 0;
186     ctx->part_x_fill = 0;
187     ctx->part_y_fill = 0;
188     ctx->rst_marker_idx = 0;
189     task->part_first = 1;
190     task->part_last = 0;
191 
192     /* Split single task to multi cores on rk3588 */
193     if (ctx_ext)
194         ctx_ext->multi_core_enabled = 0;
195 
196     if (ctx->type == VPU_CLIENT_VEPU2_JPEG) {
197         RK_U32 width = ctx->cfg->prep.width;
198         RK_U32 height = ctx->cfg->prep.height;
199         RK_U32 buf_size = width * height / 2;
200 
201         /* small image do not need to split into four segments */
202         if (width * height <= 1280 * 720 && (height <= 720 || width <= 720))
203             goto MULTI_CORE_SPLIT_DONE;
204 
205         if (!ctx_ext) {
206             ctx_ext = mpp_calloc(JpegeMultiCoreCtx, 1);
207             ctx->ctx_ext = ctx_ext;
208         }
209 
210         mpp_assert(ctx_ext);
211 
212         if (!ctx_ext->partions_group) {
213             mpp_buffer_group_get_internal(&ctx_ext->partions_group, MPP_BUFFER_TYPE_DMA_HEAP | MPP_BUFFER_FLAGS_CACHABLE);
214             if (!ctx_ext->partions_group)
215                 mpp_buffer_group_get_internal(&ctx_ext->partions_group, MPP_BUFFER_TYPE_ION);
216         }
217 
218         mpp_assert(ctx_ext->partions_group);
219 
220         if (ctx_ext->buf_size != buf_size) {
221             MppBuffer buf = NULL;
222 
223             for (i = 0; i < MAX_CORE_NUM - 1; i++) {
224                 buf = ctx_ext->partions_buf[i];
225                 if (buf)
226                     mpp_buffer_put(buf);
227             }
228 
229             mpp_buffer_group_clear(ctx_ext->partions_group);
230 
231             for (i = 0; i < MAX_CORE_NUM - 1; i++) {
232                 mpp_buffer_get(ctx_ext->partions_group, &buf, buf_size);
233                 mpp_assert(buf);
234                 ctx_ext->partions_buf[i] = buf;
235             }
236 
237             ctx_ext->buf_size = buf_size;
238         }
239 
240         if (!ctx_ext->regs_base) {
241             void *regs_base = mpp_calloc_size(void, ctx->reg_size * MAX_CORE_NUM * 2);
242             size_t reg_size = ctx->reg_size;
243 
244             ctx_ext->regs_base = regs_base;
245             for (i = 0; i < MAX_CORE_NUM; i++) {
246                 ctx_ext->regs[i] = regs_base;
247                 regs_base += reg_size;
248 
249                 ctx_ext->regs_out[i] = regs_base;
250                 regs_base += reg_size;
251             }
252         }
253 
254         {
255             RK_U32 mb_w = MPP_ALIGN(width, 16) / 16;
256             RK_U32 mb_h = MPP_ALIGN(height, 16) / 16;
257             RK_U32 part_rows = MPP_ALIGN(mb_h, 4) / 4;
258 
259             ctx_ext->partion_num = 0;
260 
261             if (ctx->cfg->split.split_mode == MPP_ENC_SPLIT_BY_CTU) {
262                 RK_U32 ecs_num = (mb_h + syntax->part_rows - 1) / syntax->part_rows;
263                 RK_U32 *core_ecs = ctx_ext->ecs_cnt;
264 
265                 if (ecs_num > 24 || ecs_num <= 8) {
266                     RK_U32 divider = ecs_num > 24 ? 8 : 1;
267                     RK_U32 quotient = ecs_num / divider;
268                     RK_U32 remainder = ecs_num % divider;
269                     RK_U32 runs = quotient  / MAX_CORE_NUM;
270                     RK_U32 runs_left = quotient % MAX_CORE_NUM;
271 
272                     if (runs > 0) {
273                         for (i = 0; i < MAX_CORE_NUM; i++)
274                             core_ecs[i] = runs * divider;
275                     }
276 
277                     for (i = 0; i < runs_left; i++)
278                         core_ecs[i] += divider;
279 
280                     core_ecs[MAX_CORE_NUM - 1] += remainder;
281                 } else if (ecs_num > 20) {
282                     core_ecs[0] = core_ecs[1] = 8;
283                     core_ecs[2] = (ecs_num - 8 * 2) / 2;
284                     core_ecs[3] = ecs_num - 8 * 2 - core_ecs[2];
285                 } else if (ecs_num > 16) {
286                     core_ecs[0] = 8;
287                     core_ecs[1] = core_ecs[2] = 4;
288                     core_ecs[3] = ecs_num - 8 - 4 * 2;
289                 } else if (ecs_num > 8) {
290                     core_ecs[0] = core_ecs[1] = 4;
291                     core_ecs[2] = (ecs_num - 4 * 2) / 2;
292                     core_ecs[3] = ecs_num - 4 * 2 - core_ecs[2];
293                 }
294 
295                 for (i = 0; i < MAX_CORE_NUM; i++) {
296                     ctx_ext->part_rows[i] = core_ecs[i] * syntax->part_rows;
297                     hal_jpege_dbg_detail("part %d, ecs %d, rows %d", i, core_ecs[i],
298                                          ctx_ext->part_rows[i]);
299                     if (core_ecs[i])
300                         ctx_ext->partion_num++;
301                 }
302             } else {
303                 for (i = 0; i < MAX_CORE_NUM; i++) {
304                     part_rows = (mb_h >= part_rows) ? part_rows : mb_h;
305 
306                     ctx_ext->part_rows[i] = part_rows;
307                     ctx_ext->ecs_cnt[i] = 1;
308 
309                     hal_jpege_dbg_detail("part %d row %d restart %d\n",
310                                          i, part_rows, mb_w * part_rows);
311 
312                     if (part_rows)
313                         ctx_ext->partion_num++;
314 
315                     if (i == 0 && !ctx->syntax.restart_ri)
316                         ctx->syntax.restart_ri = mb_w * part_rows;
317 
318                     mb_h -= part_rows;
319                 }
320             }
321         }
322 
323         if (!ctx_ext->reg_cfg)
324             mpp_dev_multi_offset_init(&ctx_ext->reg_cfg, 24);
325 
326         syntax->low_delay = 1;
327         ctx_ext->multi_core_enabled = 1;
328     }
329 MULTI_CORE_SPLIT_DONE:
330 
331     hal_jpege_dbg_func("leave hal %p\n", hal);
332 
333     return MPP_OK;
334 }
335 
hal_jpege_vepu2_set_extra_info(MppDev dev,JpegeSyntax * syntax,RK_U32 start_mbrow)336 static MPP_RET hal_jpege_vepu2_set_extra_info(MppDev dev, JpegeSyntax *syntax,
337                                               RK_U32 start_mbrow)
338 {
339     VepuOffsetCfg cfg;
340     MppDevRegOffsetCfg trans_cfg;
341 
342     cfg.fmt = syntax->format;
343     cfg.width = syntax->width;
344     cfg.height = syntax->height;
345     cfg.hor_stride = syntax->hor_stride;
346     cfg.ver_stride = syntax->ver_stride;
347     cfg.offset_x = syntax->offset_x;
348     cfg.offset_y = syntax->offset_y + start_mbrow * 16;
349 
350     get_vepu_offset_cfg(&cfg);
351 
352     if (cfg.offset_byte[0]) {
353         trans_cfg.reg_idx = VEPU2_REG_INPUT_Y;
354         trans_cfg.offset = cfg.offset_byte[0];
355 
356         mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg);
357     }
358 
359     if (cfg.offset_byte[1]) {
360         trans_cfg.reg_idx = VEPU2_REG_INPUT_U;
361         trans_cfg.offset = cfg.offset_byte[1];
362 
363         mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg);
364     }
365 
366     if (cfg.offset_byte[2]) {
367         trans_cfg.reg_idx = VEPU2_REG_INPUT_V;
368         trans_cfg.offset = cfg.offset_byte[2];
369 
370         mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg);
371     }
372 
373     return MPP_OK;
374 }
375 
hal_jpege_vepu2_gen_regs(void * hal,HalEncTask * task)376 MPP_RET hal_jpege_vepu2_gen_regs(void *hal, HalEncTask *task)
377 {
378     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
379     MppBuffer input  = task->input;
380     MppBuffer output = task->output;
381     JpegeSyntax *syntax = &ctx->syntax;
382     RK_U32 width        = syntax->width;
383     RK_U32 width_align  = MPP_ALIGN(width, 16);
384     RK_U32 height       = syntax->height;
385     MppFrameFormat fmt  = syntax->format;
386     RK_U32 hor_stride   = 0;
387     RK_U32 ver_stride   = MPP_ALIGN(height, 16);
388     JpegeBits bits      = ctx->bits;
389     RK_U32 *regs = (RK_U32 *)ctx->regs;
390     size_t length = mpp_packet_get_length(task->packet);
391     RK_U8  *buf = mpp_buffer_get_ptr(output);
392     size_t size = mpp_buffer_get_size(output);
393     const RK_U8 *qtable[2] = {NULL};
394     RK_S32 bitpos;
395     RK_S32 bytepos;
396     RK_U32 x_fill = 0;
397     RK_U32 y_fill = 0;
398     VepuFormatCfg fmt_cfg;
399     RK_U32 rotation = 0;
400 
401     hal_jpege_dbg_func("enter hal %p\n", hal);
402 
403     // do not support mirroring
404     if (syntax->mirroring)
405         mpp_err_f("Warning: do not support mirroring\n");
406 
407     if (syntax->rotation == MPP_ENC_ROT_90)
408         rotation = 1;
409     else if (syntax->rotation == MPP_ENC_ROT_270)
410         rotation = 2;
411     else if (syntax->rotation != MPP_ENC_ROT_0)
412         mpp_err_f("Warning: only support 90 or 270 degree rotate, request rotate %d", syntax->rotation);
413     if (rotation) {
414         MPP_SWAP(RK_U32, width, height);
415         MPP_SWAP(RK_U32, width_align, ver_stride);
416     }
417     hor_stride = get_vepu_pixel_stride(&ctx->stride_cfg, width,
418                                        syntax->hor_stride, fmt);
419 
420     //hor_stride must be align with 8, and ver_stride mus align with 2
421     if ((hor_stride & 0x7) || (ver_stride & 0x1) || (hor_stride >= (1 << 15))) {
422         mpp_err_f("illegal resolution, hor_stride %d, ver_stride %d, width %d, height %d\n",
423                   syntax->hor_stride, syntax->ver_stride,
424                   syntax->width, syntax->height);
425     }
426 
427     x_fill = (width_align - width) / 4;
428     y_fill = (ver_stride - height);
429     mpp_assert(x_fill <= 3);
430     mpp_assert(y_fill <= 15);
431     ctx->part_x_fill = x_fill;
432     ctx->part_y_fill = y_fill;
433 
434     /* write header to output buffer */
435     jpege_bits_setup(bits, buf, (RK_U32)size);
436     /* seek length bytes data */
437     jpege_seek_bits(bits, length << 3);
438     /* NOTE: write header will update qtable */
439     if (ctx->cfg->rc.rc_mode != MPP_ENC_RC_MODE_FIXQP) {
440         hal_jpege_vepu_rc(ctx, task);
441         qtable[0] = ctx->hal_rc.qtable_y;
442         qtable[1] = ctx->hal_rc.qtable_c;
443     } else {
444         qtable[0] = NULL;
445         qtable[1] = NULL;
446     }
447     write_jpeg_header(bits, syntax, qtable);
448 
449     memset(regs, 0, sizeof(RK_U32) * VEPU_JPEGE_VEPU2_NUM_REGS);
450     // input address setup
451     regs[VEPU2_REG_INPUT_Y] = mpp_buffer_get_fd(input);
452     regs[VEPU2_REG_INPUT_U] = regs[VEPU2_REG_INPUT_Y];
453     regs[VEPU2_REG_INPUT_V] = regs[VEPU2_REG_INPUT_Y];
454 
455     // output address setup
456     bitpos = jpege_bits_get_bitpos(bits);
457     bytepos = (bitpos + 7) >> 3;
458     ctx->base = buf;
459     ctx->size = size;
460     ctx->sw_bit = bitpos;
461     ctx->part_bytepos = bytepos;
462 
463     get_msb_lsb_at_pos(&regs[51], &regs[52], buf, bytepos);
464 
465     regs[53] = size - bytepos;
466 
467     // bus config
468     regs[54] = 16 << 8;
469 
470     regs[60] = (((bytepos & 7) * 8) << 16) |
471                (x_fill << 4) |
472                (y_fill);
473     regs[61] = hor_stride;
474 
475     regs[77] = mpp_buffer_get_fd(output);
476     if (bytepos)
477         mpp_dev_set_reg_offset(ctx->dev, 77, bytepos);
478     /* 95 - 97 color conversion parameter */
479     {
480         RK_U32 coeffA;
481         RK_U32 coeffB;
482         RK_U32 coeffC;
483         RK_U32 coeffE;
484         RK_U32 coeffF;
485 
486         switch (syntax->color_conversion_type) {
487         case 0 : {  /* BT.601 */
488             /*
489              * Y  = 0.2989 R + 0.5866 G + 0.1145 B
490              * Cb = 0.5647 (B - Y) + 128
491              * Cr = 0.7132 (R - Y) + 128
492              */
493             coeffA = 19589;
494             coeffB = 38443;
495             coeffC = 7504;
496             coeffE = 37008;
497             coeffF = 46740;
498         } break;
499         case 1 : {  /* BT.709 */
500             /*
501              * Y  = 0.2126 R + 0.7152 G + 0.0722 B
502              * Cb = 0.5389 (B - Y) + 128
503              * Cr = 0.6350 (R - Y) + 128
504              */
505             coeffA = 13933;
506             coeffB = 46871;
507             coeffC = 4732;
508             coeffE = 35317;
509             coeffF = 41615;
510         } break;
511         case 2 : {
512             coeffA = syntax->coeffA;
513             coeffB = syntax->coeffB;
514             coeffC = syntax->coeffC;
515             coeffE = syntax->coeffE;
516             coeffF = syntax->coeffF;
517         } break;
518         default : {
519             mpp_err("invalid color conversion type %d\n",
520                     syntax->color_conversion_type);
521             coeffA = 19589;
522             coeffB = 38443;
523             coeffC = 7504;
524             coeffE = 37008;
525             coeffF = 46740;
526         } break;
527         }
528 
529         regs[95] = coeffA | (coeffB << 16);
530         regs[96] = coeffC | (coeffE << 16);
531         regs[97] = coeffF;
532     }
533 
534     regs[103] = (width_align >> 4) << 8  |
535                 (ver_stride >> 4) << 20 |
536                 (1 << 6) |  /* intra coding  */
537                 (2 << 4) |  /* format jpeg   */
538                 1;          /* encoder start */
539 
540     if (!get_vepu_fmt(&fmt_cfg, fmt)) {
541         regs[74] = (fmt_cfg.format << 4) |
542                    (rotation << 2);
543         regs[98] = (fmt_cfg.b_mask & 0x1f) << 16 |
544                    (fmt_cfg.g_mask & 0x1f) << 8  |
545                    (fmt_cfg.r_mask & 0x1f);
546         regs[105] = 7 << 26 | (fmt_cfg.swap_32_in & 1) << 29 |
547                     (fmt_cfg.swap_16_in & 1) << 30 |
548                     (fmt_cfg.swap_8_in & 1) << 31;
549     }
550 
551     regs[107] = ((syntax->part_rows & 0xff) << 16) |
552                 jpege_restart_marker[ctx->rst_marker_idx & 7];
553 
554     /* encoder interrupt */
555     regs[109] = 1 << 12 |   /* clock gating */
556                 1 << 10;    /* enable timeout interrupt */
557 
558     if (syntax->low_delay) {
559         /* slice encode end by RST */
560         regs[107] |= (1 << 24);
561         /* slice interrupt enable */
562         regs[109] |= (1 << 16);
563     }
564 
565     /* 0 ~ 31 quantization tables */
566     {
567         RK_S32 i;
568 
569         for (i = 0; i < 16; i++) {
570             /* qtable need to reorder in particular order */
571             regs[i] = qtable[0][qp_reorder_table[i * 4 + 0]] << 24 |
572                       qtable[0][qp_reorder_table[i * 4 + 1]] << 16 |
573                       qtable[0][qp_reorder_table[i * 4 + 2]] << 8 |
574                       qtable[0][qp_reorder_table[i * 4 + 3]];
575         }
576         for (i = 0; i < 16; i++) {
577             /* qtable need to reorder in particular order */
578             regs[i + 16] = qtable[1][qp_reorder_table[i * 4 + 0]] << 24 |
579                            qtable[1][qp_reorder_table[i * 4 + 1]] << 16 |
580                            qtable[1][qp_reorder_table[i * 4 + 2]] << 8 |
581                            qtable[1][qp_reorder_table[i * 4 + 3]];
582         }
583     }
584 
585     hal_jpege_dbg_func("leave hal %p\n", hal);
586     return MPP_OK;
587 }
588 
multi_core_start(HalJpegeCtx * ctx,HalEncTask * task)589 static MPP_RET multi_core_start(HalJpegeCtx *ctx, HalEncTask *task)
590 {
591     JpegeMultiCoreCtx *ctx_ext = ctx->ctx_ext;
592     JpegeSyntax *syntax = &ctx->syntax;
593     MppDevRegOffCfgs *reg_cfg = ctx_ext->reg_cfg;
594     MppDev dev = ctx->dev;
595     RK_U32 *src = (RK_U32 *)ctx->regs;
596     RK_U32 reg_size = ctx->reg_size;
597     MPP_RET ret = MPP_OK;
598     RK_U32 partion_num = ctx_ext->partion_num;
599     RK_U32 mcu_y = 0;
600     RK_U32 i;
601 
602     hal_jpege_dbg_detail("start %d partions\n", partion_num);
603 
604     for (i = 0; i < partion_num; i++) {
605         RK_U32 part_not_end = i < partion_num - 1;
606         RK_U32 part_not_start = i > 0;
607         RK_U32 *regs = (RK_U32 *)ctx_ext->regs[i];
608         RK_U32 part_enc_mcu_h = ctx_ext->part_rows[i];
609         RK_U32 part_x_fill = ctx->part_x_fill;
610         RK_U32 part_y_fill = ctx->part_y_fill;
611         RK_U32 part_bytepos = ctx->part_bytepos;
612 
613         // it only needs to fill the partition on the right and below.
614         if (syntax->rotation == MPP_ENC_ROT_90) {
615             if (part_not_end)
616                 part_x_fill = 0;
617         } else if (syntax->rotation == MPP_ENC_ROT_0 || syntax->rotation == MPP_ENC_ROT_180) {
618             if (part_not_end)
619                 part_y_fill = 0;
620         } else if (syntax->rotation == MPP_ENC_ROT_270) {
621             if (part_not_start)
622                 part_x_fill = 0;
623         } else
624             mpp_err_f("input rotation %d not supported", syntax->rotation);
625 
626         memcpy(regs, src, reg_size);
627 
628         mpp_dev_multi_offset_reset(reg_cfg);
629 
630         if (i == 0) {
631             get_msb_lsb_at_pos(&regs[51], &regs[52], ctx->base, part_bytepos);
632             regs[77] = mpp_buffer_get_fd(task->output);
633             regs[53] = mpp_buffer_get_size(task->output) - part_bytepos;
634             regs[60] = (((part_bytepos & 7) * 8) << 16) |
635                        (part_x_fill << 4) |
636                        (part_y_fill);
637             /* the stream offset had been setup */
638         } else {
639             MppBuffer buf = ctx_ext->partions_buf[i - 1];
640 
641             regs[77] = mpp_buffer_get_fd(buf);
642             regs[53] = mpp_buffer_get_size(buf);
643             regs[60] = (((0 & 7) * 8) << 16) |
644                        (part_x_fill << 4) |
645                        (part_y_fill);
646         }
647 
648         regs[103] = syntax->mcu_w << 8  |
649                     (part_enc_mcu_h) << 20 |
650                     (1 << 6) |  /* intra coding  */
651                     (2 << 4) |  /* format jpeg   */
652                     1;          /* encoder start */
653 
654         hal_jpege_dbg_detail("part %d, part_not_end 0x%x, rst_marker_idx %d",
655                              i, part_not_end, ctx->rst_marker_idx);
656         regs[107] = part_not_end << 24 | ((syntax->part_rows & 0xff) << 16) |
657                     jpege_restart_marker[ctx->rst_marker_idx & 7];
658         ctx->rst_marker_idx += ctx_ext->ecs_cnt[i];
659 
660         VepuOffsetCfg cfg;
661 
662         memset(&cfg, 0, sizeof(cfg));
663 
664         cfg.fmt = syntax->format;
665         cfg.width = syntax->width;
666         cfg.height = syntax->height;
667         cfg.hor_stride = syntax->hor_stride;
668         cfg.ver_stride = syntax->ver_stride;
669         cfg.offset_x = syntax->offset_x;
670         cfg.offset_y = syntax->offset_y + mcu_y * 16;
671 
672         if (syntax->rotation == MPP_ENC_ROT_90 || syntax->rotation == MPP_ENC_ROT_270) {
673             regs[103] = part_enc_mcu_h << 8  |
674                         (syntax->mcu_w) << 20 |
675                         (1 << 6) |  /* intra coding  */
676                         (2 << 4) |  /* format jpeg   */
677                         1;          /* encoder start */
678 
679             /*
680              * It is opposite that position of partitions
681              * of rotation 90 degree and rotation 270 degree.
682              */
683             if (syntax->rotation == MPP_ENC_ROT_270)
684                 cfg.offset_x = syntax->offset_x +
685                                (syntax->mcu_h - ctx_ext->part_rows[0] - mcu_y) * 16;
686             else
687                 cfg.offset_x = syntax->offset_x + mcu_y * 16;
688 
689             cfg.offset_y = syntax->offset_y;
690         }
691 
692         get_vepu_offset_cfg(&cfg);
693         mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_Y, cfg.offset_byte[0]);
694         mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_U, cfg.offset_byte[1]);
695         mpp_dev_multi_offset_update(reg_cfg, VEPU2_REG_INPUT_V, cfg.offset_byte[2]);
696 
697         mcu_y += part_enc_mcu_h;
698 
699         do {
700             MppDevRegWrCfg wr_cfg;
701             MppDevRegRdCfg rd_cfg;
702 
703             wr_cfg.reg = regs;
704             wr_cfg.size = reg_size;
705             wr_cfg.offset = 0;
706 
707             ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
708             if (ret) {
709                 mpp_err_f("set register write failed %d\n", ret);
710                 break;
711             }
712 
713             rd_cfg.reg = ctx_ext->regs_out[i];
714             rd_cfg.size = reg_size;
715             rd_cfg.offset = 0;
716 
717             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
718             if (ret) {
719                 mpp_err_f("set register read failed %d\n", ret);
720                 break;
721             }
722 
723             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_OFFS, reg_cfg);
724             if (ret) {
725                 mpp_err_f("set register offsets failed %d\n", ret);
726                 break;
727             }
728 
729             if (i < partion_num - 1) {
730                 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_DELIMIT, NULL);
731                 if (ret) {
732                     mpp_err_f("send delimit failed %d\n", ret);
733                     break;
734                 }
735             }
736         } while (0);
737     }
738 
739     ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
740     if (ret)
741         mpp_err_f("send cmd failed %d\n", ret);
742 
743     return ret;
744 }
745 
multi_core_wait(HalJpegeCtx * ctx,HalEncTask * task)746 static MPP_RET multi_core_wait(HalJpegeCtx *ctx, HalEncTask *task)
747 {
748     JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
749     JpegeFeedback *feedback = &ctx->feedback;
750     RK_U32 sw_bit = 0;
751     RK_U32 hw_bit = 0;
752     MPP_RET ret = MPP_OK;
753     RK_U32 val;
754     RK_U32 i;
755 
756     hal_jpege_dbg_detail("poll partion_num %d\n", ctx_ext->partion_num);
757 
758     for (i = 0; i < ctx_ext->partion_num; i++) {
759         RK_U32 *regs = ctx_ext->regs_out[i];
760 
761         hal_jpege_dbg_detail("poll reg %d %p", i, regs);
762 
763         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
764         if (ret)
765             mpp_err_f("poll cmd failed %d\n", ret);
766 
767         if (i == 0) {
768             val = regs[109];
769             hal_jpege_dbg_output("hw_status %08x\n", val);
770             feedback->hw_status = val & 0x70;
771             val = regs[53];
772             sw_bit = jpege_bits_get_bitpos(ctx->bits);
773             hw_bit = val;
774             feedback->stream_length = ((sw_bit / 8) & (~0x7)) + hw_bit / 8;
775             hal_jpege_dbg_detail("partion len = %d", hw_bit / 8);
776             task->length = feedback->stream_length;
777             task->hw_length = task->length - ctx->hal_start_pos;
778         } else {
779             void *stream_ptr = mpp_buffer_get_ptr(task->output);
780             void *partion_ptr = mpp_buffer_get_ptr(ctx_ext->partions_buf[i - 1]);
781             RK_U32 partion_len = 0;
782 
783             val = regs[109];
784             hal_jpege_dbg_output("hw_status %08x\n", val);
785             feedback->hw_status = val & 0x70;
786             partion_len = regs[53] / 8;
787 
788             hal_jpege_dbg_detail("partion_len = %d", partion_len);
789             memcpy(stream_ptr + feedback->stream_length, partion_ptr, partion_len);
790             feedback->stream_length += partion_len;
791             task->length = feedback->stream_length;
792             task->hw_length += partion_len;
793         }
794     }
795 
796     hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
797                          sw_bit, hw_bit, feedback->stream_length, task->hw_length);
798 
799     return ret;
800 }
801 
hal_jpege_vepu2_start(void * hal,HalEncTask * task)802 MPP_RET hal_jpege_vepu2_start(void *hal, HalEncTask *task)
803 {
804     MPP_RET ret = MPP_OK;
805     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
806     JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
807 
808     hal_jpege_dbg_func("enter hal %p\n", hal);
809 
810     if (ctx_ext && ctx_ext->multi_core_enabled) {
811         multi_core_start(ctx, task);
812     } else {
813         hal_jpege_vepu2_set_extra_info(ctx->dev, &ctx->syntax, 0);
814         do {
815             MppDevRegWrCfg wr_cfg;
816             MppDevRegRdCfg rd_cfg;
817             RK_U32 reg_size = ctx->reg_size;
818 
819             wr_cfg.reg = ctx->regs;
820             wr_cfg.size = reg_size;
821             wr_cfg.offset = 0;
822 
823             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
824             if (ret) {
825                 mpp_err_f("set register write failed %d\n", ret);
826                 break;
827             }
828 
829             rd_cfg.reg = ctx->regs;
830             rd_cfg.size = reg_size;
831             rd_cfg.offset = 0;
832 
833             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
834             if (ret) {
835                 mpp_err_f("set register read failed %d\n", ret);
836                 break;
837             }
838 
839             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
840             if (ret) {
841                 mpp_err_f("send cmd failed %d\n", ret);
842                 break;
843             }
844         } while (0);
845     }
846     hal_jpege_dbg_func("leave hal %p\n", hal);
847     (void)task;
848     return ret;
849 }
850 
hal_jpege_vepu2_wait(void * hal,HalEncTask * task)851 MPP_RET hal_jpege_vepu2_wait(void *hal, HalEncTask *task)
852 {
853     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
854     JpegeMultiCoreCtx *ctx_ext = (JpegeMultiCoreCtx *)ctx->ctx_ext;
855     MPP_RET ret = MPP_OK;
856 
857     hal_jpege_dbg_func("enter hal %p\n", hal);
858 
859     if (ctx_ext && ctx_ext->multi_core_enabled) {
860         multi_core_wait(ctx, task);
861     } else {
862         JpegeFeedback *feedback = &ctx->feedback;
863         JpegeBits bits = ctx->bits;
864         RK_U32 *regs = ctx->regs;
865         RK_U32 sw_bit = 0;
866         RK_U32 hw_bit = 0;
867         RK_U32 val;
868 
869         if (ctx->dev) {
870             ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
871             if (ret)
872                 mpp_err_f("poll cmd failed %d\n", ret);
873         }
874 
875         val = regs[109];
876         hal_jpege_dbg_output("hw_status %08x\n", val);
877         feedback->hw_status = val & 0x70;
878         val = regs[53];
879 
880         sw_bit = jpege_bits_get_bitpos(bits);
881         hw_bit = val;
882 
883         // NOTE: hardware will return 64 bit access byte count
884         feedback->stream_length = ((sw_bit / 8) & (~0x7)) + hw_bit / 8;
885         task->length = feedback->stream_length;
886         task->hw_length = task->length - ctx->hal_start_pos;
887 
888         hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
889                              sw_bit, hw_bit, feedback->stream_length, task->hw_length);
890     }
891 
892     hal_jpege_dbg_func("leave hal %p\n", hal);
893     return ret;
894 }
895 
hal_jpege_vepu2_part_start(void * hal,HalEncTask * task)896 MPP_RET hal_jpege_vepu2_part_start(void *hal, HalEncTask *task)
897 {
898     MPP_RET ret = MPP_OK;
899     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
900     JpegeSyntax *syntax = (JpegeSyntax *)task->syntax.data;
901     RK_U32 mcu_w = syntax->mcu_w;
902     RK_U32 mcu_h = syntax->mcu_h;
903     RK_U32 mcu_y = ctx->mcu_y;
904     RK_U32 part_mcu_h = syntax->part_rows;
905     RK_U32 *regs = (RK_U32 *)ctx->regs;
906     RK_U32 part_enc_h;
907     RK_U32 part_enc_mcu_h;
908     RK_U32 part_y_fill;
909     RK_U32 part_not_end;
910 
911     hal_jpege_dbg_func("enter part start %p\n", hal);
912 
913     /* Fix register for each part encoding */
914     task->part_first = !mcu_y;
915     if (mcu_y + part_mcu_h < mcu_h) {
916         part_enc_h = part_mcu_h * 16;
917         part_enc_mcu_h = part_mcu_h;
918         part_y_fill = 0;
919         part_not_end = 1;
920         task->part_last = 0;
921     } else {
922         part_enc_h = syntax->height - mcu_y * 16;
923         part_enc_mcu_h = MPP_ALIGN(part_enc_h, 16) / 16;;
924         part_y_fill = ctx->part_y_fill;
925         part_not_end = 0;
926         task->part_last = 1;
927     }
928 
929     hal_jpege_dbg_detail("part first %d last %d\n", task->part_first, task->part_last);
930 
931     get_msb_lsb_at_pos(&regs[51], &regs[52], ctx->base, ctx->part_bytepos);
932 
933     regs[53] = ctx->size - ctx->part_bytepos;
934 
935     regs[60] = (((ctx->part_bytepos & 7) * 8) << 16) |
936                (ctx->part_x_fill << 4) |
937                (part_y_fill);
938 
939     regs[77] = mpp_buffer_get_fd(task->output);
940     if (ctx->part_bytepos)
941         mpp_dev_set_reg_offset(ctx->dev, 77, ctx->part_bytepos);
942 
943     regs[103] = mcu_w << 8  |
944                 (part_enc_mcu_h) << 20 |
945                 (1 << 6) |  /* intra coding  */
946                 (2 << 4) |  /* format jpeg   */
947                 1;          /* encoder start */
948 
949     hal_jpege_dbg_detail("part_not_end 0x%x, rst_marker_idx %d",
950                          part_not_end, ctx->rst_marker_idx);
951     regs[107] = part_not_end << 24 | jpege_restart_marker[ctx->rst_marker_idx & 7];
952     ctx->rst_marker_idx++;
953 
954     hal_jpege_vepu2_set_extra_info(ctx->dev, syntax, mcu_y);
955     ctx->mcu_y += part_enc_mcu_h;
956 
957     do {
958         MppDevRegWrCfg wr_cfg;
959         MppDevRegRdCfg rd_cfg;
960         RK_U32 reg_size = ctx->reg_size;
961 
962         wr_cfg.reg = ctx->regs;
963         wr_cfg.size = reg_size;
964         wr_cfg.offset = 0;
965 
966         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
967         if (ret) {
968             mpp_err_f("set register write failed %d\n", ret);
969             break;
970         }
971 
972         rd_cfg.reg = ctx->regs_out;
973         rd_cfg.size = reg_size;
974         rd_cfg.offset = 0;
975 
976         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
977         if (ret) {
978             mpp_err_f("set register read failed %d\n", ret);
979             break;
980         }
981 
982         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
983         if (ret) {
984             mpp_err_f("send cmd failed %d\n", ret);
985             break;
986         }
987     } while (0);
988 
989     hal_jpege_dbg_func("leave part start %p\n", hal);
990     (void)task;
991     return ret;
992 }
993 
hal_jpege_vepu2_part_wait(void * hal,HalEncTask * task)994 MPP_RET hal_jpege_vepu2_part_wait(void *hal, HalEncTask *task)
995 {
996     MPP_RET ret = MPP_OK;
997     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
998     RK_U32 *regs = ctx->regs_out;
999     JpegeFeedback *feedback = &ctx->feedback;
1000     RK_U32 hw_bit = 0;
1001 
1002     hal_jpege_dbg_func("enter part wait %p\n", hal);
1003 
1004     if (ctx->dev) {
1005         ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
1006         if (ret)
1007             mpp_err_f("poll cmd failed %d\n", ret);
1008     }
1009 
1010     hal_jpege_dbg_detail("hw_status %08x\n", regs[109]);
1011 
1012     hw_bit = regs[53];
1013 
1014     hal_jpege_dbg_detail("byte pos %d -> %d\n", ctx->part_bytepos,
1015                          (ctx->part_bytepos & (~7)) + (hw_bit / 8));
1016     ctx->part_bytepos = (ctx->part_bytepos & (~7)) + (hw_bit / 8);
1017 
1018     feedback->stream_length = ctx->part_bytepos;
1019     task->length = ctx->part_bytepos;
1020     task->hw_length = task->length - ctx->hal_start_pos;
1021 
1022     hal_jpege_dbg_detail("stream_length %d, hw_byte %d",
1023                          feedback->stream_length, hw_bit / 8);
1024 
1025     hal_jpege_dbg_output("stream bit: sw %d hw %d total %d hw_length %d\n",
1026                          ctx->sw_bit, hw_bit, feedback->stream_length, task->hw_length);
1027 
1028     hal_jpege_dbg_func("leave part wait %p\n", hal);
1029     return ret;
1030 }
1031 
hal_jpege_vepu2_ret_task(void * hal,HalEncTask * task)1032 MPP_RET hal_jpege_vepu2_ret_task(void *hal, HalEncTask *task)
1033 {
1034     HalJpegeCtx *ctx = (HalJpegeCtx *)hal;
1035     EncRcTaskInfo *rc_info = &task->rc_task->info;
1036 
1037     ctx->hal_rc.last_quality = task->rc_task->info.quality_target;
1038     task->rc_task->info.bit_real = ctx->feedback.stream_length * 8;
1039     task->hal_ret.data = &ctx->feedback;
1040     task->hal_ret.number = 1;
1041 
1042     if (ctx->cfg->rc.rc_mode != MPP_ENC_RC_MODE_FIXQP) {
1043         if (!ctx->hal_rc.q_factor)
1044             rc_info->quality_real = rc_info->quality_target;
1045         else
1046             rc_info->quality_real = ctx->hal_rc.q_factor;
1047     } else {
1048         rc_info->quality_real = ctx->cfg->codec.jpeg.q_factor;
1049     }
1050 
1051     return MPP_OK;
1052 }
1053 
1054 const MppEncHalApi hal_jpege_vepu2 = {
1055     .name       = "hal_jpege_vepu2",
1056     .coding     = MPP_VIDEO_CodingMJPEG,
1057     .ctx_size   = sizeof(HalJpegeCtx),
1058     .flag       = 0,
1059     .init       = hal_jpege_vepu2_init,
1060     .deinit     = hal_jpege_vepu2_deinit,
1061     .prepare    = NULL,
1062     .get_task   = hal_jpege_vepu2_get_task,
1063     .gen_regs   = hal_jpege_vepu2_gen_regs,
1064     .start      = hal_jpege_vepu2_start,
1065     .wait       = hal_jpege_vepu2_wait,
1066     .part_start = hal_jpege_vepu2_part_start,
1067     .part_wait  = hal_jpege_vepu2_part_wait,
1068     .ret_task   = hal_jpege_vepu2_ret_task,
1069 };
1070