1 /*
2 * Copyright 2021 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_h264e_vepu580"
18
19 #include <string.h>
20
21 #include "mpp_env.h"
22 #include "mpp_mem.h"
23 #include "mpp_common.h"
24 #include "mpp_frame_impl.h"
25 #include "mpp_packet_impl.h"
26 #include "mpp_rc.h"
27
28 #include "h264e_sps.h"
29 #include "h264e_pps.h"
30 #include "h264e_dpb.h"
31 #include "h264e_slice.h"
32
33 #include "hal_h264e_debug.h"
34 #include "hal_bufs.h"
35 #include "mpp_enc_hal.h"
36 #include "rkv_enc_def.h"
37 #include "vepu541_common.h"
38 #include "vepu5xx_common.h"
39 #include "vepu5xx.h"
40 #include "hal_h264e_vepu580_reg.h"
41 #include "mpp_enc_cb_param.h"
42 #include "hal_h264e_stream_amend.h"
43
44 #define DUMP_REG 0
45 #define MAX_TASK_CNT 2
46
47 typedef struct Vepu580RoiH264BsCfg_t {
48 RK_U64 force_inter : 42;
49 RK_U64 mode_mask : 9;
50 RK_U64 reserved : 10;
51 RK_U64 force_intra : 1;
52 RK_U64 qp_adj_en : 1;
53 RK_U64 amv_en : 1;
54 } Vepu580RoiH264BsCfg;
55
56 typedef struct HalH264eVepu580Ctx_t {
57 MppEncCfgSet *cfg;
58
59 MppDev dev;
60 RK_S32 frame_cnt;
61 RK_U32 task_cnt;
62
63 /* buffers management */
64 HalBufs hw_recn;
65 RK_S32 pixel_buf_fbc_hdr_size;
66 RK_S32 pixel_buf_fbc_bdy_size;
67 RK_S32 pixel_buf_size;
68 RK_S32 thumb_buf_size;
69 RK_S32 max_buf_cnt;
70 MppDevRegOffCfgs *offsets;
71
72 /* external line buffer over 4K */
73 MppBufferGroup ext_line_buf_grp;
74 MppBuffer ext_line_bufs[MAX_TASK_CNT];
75 RK_S32 ext_line_buf_size;
76
77 /* syntax for input from enc_impl */
78 RK_U32 updated;
79 H264eSps *sps;
80 H264ePps *pps;
81 H264eDpb *dpb;
82 H264eFrmInfo *frms;
83
84 /* async encode TSVC info */
85 H264eReorderInfo *reorder;
86 H264eMarkingInfo *marking;
87
88 /* syntax for output to enc_impl */
89 EncRcTaskInfo hal_rc_cfg;
90
91 /* roi */
92 void *roi_data;
93 MppBufferGroup roi_grp;
94 MppBuffer roi_base_cfg_buf;
95 RK_S32 roi_base_buf_size;
96
97 /* osd */
98 Vepu541OsdCfg osd_cfg;
99
100 /* finetune */
101 void *tune;
102
103 /* two-pass deflicker */
104 MppBuffer buf_pass1;
105
106 /* register */
107 HalVepu580RegSet *regs_sets;
108 HalH264eVepuStreamAmend *amend_sets;
109
110 H264ePrefixNal *prefix_sets;
111 H264eSlice *slice_sets;
112
113 /* frame parallel info */
114 RK_S32 task_idx;
115 RK_S32 curr_idx;
116 RK_S32 prev_idx;
117 HalVepu580RegSet *regs_set;
118 HalH264eVepuStreamAmend *amend;
119 H264ePrefixNal *prefix;
120 H264eSlice *slice;
121
122 MppBuffer ext_line_buf;
123
124 /* slice low delay output callback */
125 MppCbCtx *output_cb;
126 RK_S32 poll_slice_max;
127 RK_S32 poll_cfg_size;
128 MppDevPollCfg *poll_cfgs;
129 } HalH264eVepu580Ctx;
130
131 #define CHROMA_KLUT_TAB_SIZE (24 * sizeof(RK_U32))
132
133 static RK_U32 h264e_klut_weight[30] = {
134 0x0a000010, 0x00064000, 0x14000020, 0x000c8000,
135 0x28000040, 0x00194000, 0x50800080, 0x0032c000,
136 0xa1000100, 0x00658000, 0x42800200, 0x00cb0001,
137 0x85000400, 0x01964002, 0x0a000800, 0x032c8005,
138 0x14001000, 0x0659400a, 0x28802000, 0x0cb2c014,
139 0x51004000, 0x1965c028, 0xa2808000, 0x32cbc050,
140 0x4500ffff, 0x659780a1, 0x8a81fffe, 0xCC000142,
141 0xFF83FFFF, 0x000001FF,
142 };
143
144 static RK_U32 dump_l1_reg = 0;
145 static RK_U32 dump_l2_reg = 0;
146 static RK_U32 disable_rcb_buf = 0;
147
148 static RK_U32 h264_mode_bias[16] = {
149 0, 2, 4, 6,
150 8, 10, 12, 14,
151 16, 18, 20, 24,
152 28, 32, 64, 128
153 };
154
155 static RK_S32 h264_aq_tthd_default[16] = {
156 0, 0, 0, 0,
157 3, 3, 5, 5,
158 8, 8, 8, 15,
159 15, 20, 25, 35,
160 };
161
162 static RK_S32 h264_P_aq_step_default[16] = {
163 -8, -7, -6, -5,
164 -4, -3, -2, -1,
165 0, 1, 2, 3,
166 4, 5, 7, 8,
167 };
168
169 static RK_S32 h264_I_aq_step_default[16] = {
170 -8, -7, -6, -5,
171 -4, -3, -2, -1,
172 0, 1, 2, 3,
173 4, 5, 6, 8,
174 };
175
176 #include "hal_h264e_vepu580_tune.c"
177
setup_ext_line_bufs(HalH264eVepu580Ctx * ctx)178 static void setup_ext_line_bufs(HalH264eVepu580Ctx *ctx)
179 {
180 RK_U32 i;
181
182 for (i = 0; i < ctx->task_cnt; i++) {
183 if (ctx->ext_line_bufs[i])
184 continue;
185
186 mpp_buffer_get(ctx->ext_line_buf_grp, &ctx->ext_line_bufs[i],
187 ctx->ext_line_buf_size);
188 }
189 }
190
clear_ext_line_bufs(HalH264eVepu580Ctx * ctx)191 static void clear_ext_line_bufs(HalH264eVepu580Ctx *ctx)
192 {
193 RK_U32 i;
194
195 for (i = 0; i < ctx->task_cnt; i++) {
196 if (ctx->ext_line_bufs[i]) {
197 mpp_buffer_put(ctx->ext_line_bufs[i]);
198 ctx->ext_line_bufs[i] = NULL;
199 }
200 }
201 }
202
hal_h264e_vepu580_deinit(void * hal)203 static MPP_RET hal_h264e_vepu580_deinit(void *hal)
204 {
205 HalH264eVepu580Ctx *p = (HalH264eVepu580Ctx *)hal;
206 RK_U32 i;
207
208 hal_h264e_dbg_func("enter %p\n", p);
209
210 if (p->dev) {
211 mpp_dev_deinit(p->dev);
212 p->dev = NULL;
213 }
214
215 clear_ext_line_bufs(p);
216
217 for (i = 0; i < p->task_cnt; i++)
218 h264e_vepu_stream_amend_deinit(&p->amend_sets[i]);
219
220 MPP_FREE(p->regs_sets);
221 MPP_FREE(p->amend_sets);
222 MPP_FREE(p->prefix_sets);
223 MPP_FREE(p->slice_sets);
224 MPP_FREE(p->reorder);
225 MPP_FREE(p->marking);
226 MPP_FREE(p->poll_cfgs);
227
228 if (p->ext_line_buf_grp) {
229 mpp_buffer_group_put(p->ext_line_buf_grp);
230 p->ext_line_buf_grp = NULL;
231 }
232
233 if (p->hw_recn) {
234 hal_bufs_deinit(p->hw_recn);
235 p->hw_recn = NULL;
236 }
237
238 if (p->roi_base_cfg_buf) {
239 mpp_buffer_put(p->roi_base_cfg_buf);
240 p->roi_base_cfg_buf = NULL;
241 p->roi_base_buf_size = 0;
242 }
243
244 if (p->roi_grp) {
245 mpp_buffer_group_put(p->roi_grp);
246 p->roi_grp = NULL;
247 }
248
249 if (p->offsets) {
250 mpp_dev_multi_offset_deinit(p->offsets);
251 p->offsets = NULL;
252 }
253
254 if (p->buf_pass1) {
255 mpp_buffer_put(p->buf_pass1);
256 p->buf_pass1 = NULL;
257 }
258
259 if (p->tune) {
260 vepu580_h264e_tune_deinit(p->tune);
261 p->tune = NULL;
262 }
263
264 hal_h264e_dbg_func("leave %p\n", p);
265
266 return MPP_OK;
267 }
268
hal_h264e_vepu580_init(void * hal,MppEncHalCfg * cfg)269 static MPP_RET hal_h264e_vepu580_init(void *hal, MppEncHalCfg *cfg)
270 {
271 HalH264eVepu580Ctx *p = (HalH264eVepu580Ctx *)hal;
272 MPP_RET ret = MPP_OK;
273 RK_U32 i;
274
275 hal_h264e_dbg_func("enter %p\n", p);
276
277 p->cfg = cfg->cfg;
278
279 mpp_env_get_u32("disable_rcb_buf", &disable_rcb_buf, 0);
280
281 /* update output to MppEnc */
282 cfg->type = VPU_CLIENT_RKVENC;
283 ret = mpp_dev_init(&cfg->dev, cfg->type);
284 if (ret) {
285 mpp_err_f("mpp_dev_init failed. ret: %d\n", ret);
286 goto DONE;
287 }
288 p->dev = cfg->dev;
289 p->task_cnt = cfg->task_cnt;
290 mpp_assert(p->task_cnt && p->task_cnt <= MAX_TASK_CNT);
291
292 ret = hal_bufs_init(&p->hw_recn);
293 if (ret) {
294 mpp_err_f("init vepu buffer failed ret: %d\n", ret);
295 goto DONE;
296 }
297
298 p->regs_sets = mpp_malloc(HalVepu580RegSet, p->task_cnt);
299 if (NULL == p->regs_sets) {
300 ret = MPP_ERR_MALLOC;
301 mpp_err_f("init register buffer failed\n");
302 goto DONE;
303 }
304 p->amend_sets = mpp_malloc(HalH264eVepuStreamAmend, p->task_cnt);
305 if (NULL == p->amend_sets) {
306 ret = MPP_ERR_MALLOC;
307 mpp_err_f("init amend data failed\n");
308 goto DONE;
309 }
310
311 if (p->task_cnt > 1) {
312 p->prefix_sets = mpp_malloc(H264ePrefixNal, p->task_cnt);
313 if (NULL == p->prefix_sets) {
314 ret = MPP_ERR_MALLOC;
315 mpp_err_f("init amend data failed\n");
316 goto DONE;
317 }
318
319 p->slice_sets = mpp_malloc(H264eSlice, p->task_cnt);
320 if (NULL == p->slice_sets) {
321 ret = MPP_ERR_MALLOC;
322 mpp_err_f("init amend data failed\n");
323 goto DONE;
324 }
325
326 p->reorder = mpp_malloc(H264eReorderInfo, 1);
327 if (NULL == p->reorder) {
328 ret = MPP_ERR_MALLOC;
329 mpp_err_f("init amend data failed\n");
330 goto DONE;
331 }
332
333 p->marking = mpp_malloc(H264eMarkingInfo, 1);
334 if (NULL == p->marking) {
335 ret = MPP_ERR_MALLOC;
336 mpp_err_f("init amend data failed\n");
337 goto DONE;
338 }
339 }
340
341 p->poll_slice_max = 8;
342 p->poll_cfg_size = (sizeof(p->poll_cfgs) + sizeof(RK_S32) * p->poll_slice_max);
343 p->poll_cfgs = mpp_malloc_size(MppDevPollCfg, p->poll_cfg_size * p->task_cnt);
344 if (NULL == p->poll_cfgs) {
345 ret = MPP_ERR_MALLOC;
346 mpp_err_f("init poll cfg buffer failed\n");
347 goto DONE;
348 }
349
350 p->osd_cfg.reg_base = &p->regs_sets->reg_osd;
351 p->osd_cfg.dev = p->dev;
352 p->osd_cfg.reg_cfg = NULL;
353 p->osd_cfg.plt_cfg = &p->cfg->plt_cfg;
354 p->osd_cfg.osd_data = NULL;
355 p->osd_cfg.osd_data2 = NULL;
356
357 { /* setup default hardware config */
358 MppEncHwCfg *hw = &cfg->cfg->hw;
359
360 hw->qp_delta_row_i = 2;
361 hw->qp_delta_row = 2;
362 hw->extra_buf = 1;
363
364 memcpy(hw->aq_thrd_i, h264_aq_tthd_default, sizeof(hw->aq_thrd_i));
365 memcpy(hw->aq_thrd_p, h264_aq_tthd_default, sizeof(hw->aq_thrd_p));
366 memcpy(hw->aq_step_i, h264_I_aq_step_default, sizeof(hw->aq_step_i));
367 memcpy(hw->aq_step_p, h264_P_aq_step_default, sizeof(hw->aq_step_p));
368
369 for (i = 0; i < MPP_ARRAY_ELEMS(hw->mode_bias); i++)
370 hw->mode_bias[i] = 8;
371
372 hw->skip_sad = 8;
373 hw->skip_bias = 8;
374 }
375 mpp_dev_multi_offset_init(&p->offsets, 24);
376 p->osd_cfg.reg_cfg = p->offsets;
377
378 p->tune = vepu580_h264e_tune_init(p);
379 p->output_cb = cfg->output_cb;
380
381 cfg->cap_recn_out = 1;
382
383 for (i = 0; i < p->task_cnt; i++)
384 h264e_vepu_stream_amend_init(&p->amend_sets[i]);
385
386 DONE:
387 if (ret)
388 hal_h264e_vepu580_deinit(hal);
389
390 hal_h264e_dbg_func("leave %p\n", p);
391 return ret;
392 }
393
394 /*
395 * NOTE: recon / refer buffer is FBC data buffer.
396 * And FBC data require extra 16 lines space for hardware io.
397 */
setup_hal_bufs(HalH264eVepu580Ctx * ctx)398 static void setup_hal_bufs(HalH264eVepu580Ctx *ctx)
399 {
400 MppEncCfgSet *cfg = ctx->cfg;
401 MppEncPrepCfg *prep = &cfg->prep;
402 RK_S32 alignment_w = 64;
403 RK_S32 alignment_h = 16;
404 RK_S32 aligned_w = MPP_ALIGN(prep->width, alignment_w);
405 RK_S32 aligned_h = MPP_ALIGN(prep->height, alignment_h) + 16;
406 RK_S32 pixel_buf_fbc_hdr_size = MPP_ALIGN(aligned_w * aligned_h / 64, SZ_8K);
407 RK_S32 pixel_buf_fbc_bdy_size = aligned_w * aligned_h * 3 / 2;
408 RK_S32 pixel_buf_size = pixel_buf_fbc_hdr_size + pixel_buf_fbc_bdy_size;
409 RK_S32 thumb_buf_size = MPP_ALIGN(aligned_w / 64 * aligned_h / 64 * 256, SZ_8K);
410 RK_S32 old_max_cnt = ctx->max_buf_cnt;
411 RK_S32 new_max_cnt = 4;
412 MppEncRefCfg ref_cfg = cfg->ref_cfg;
413
414 if (ref_cfg) {
415 MppEncCpbInfo *info = mpp_enc_ref_cfg_get_cpb_info(ref_cfg);
416 if (new_max_cnt < MPP_MAX(new_max_cnt, info->dpb_size + 1))
417 new_max_cnt = MPP_MAX(new_max_cnt, info->dpb_size + 1);
418 }
419
420 if (aligned_w > SZ_4K) {
421 /* 480 bytes for each ctu above 3072 */
422 RK_S32 ext_line_buf_size = (aligned_w - 3 * SZ_1K) / 64 * 480;
423
424 if (NULL == ctx->ext_line_buf_grp)
425 mpp_buffer_group_get_internal(&ctx->ext_line_buf_grp, MPP_BUFFER_TYPE_ION);
426 else if (ext_line_buf_size != ctx->ext_line_buf_size) {
427 clear_ext_line_bufs(ctx);
428 mpp_buffer_group_clear(ctx->ext_line_buf_grp);
429 }
430
431 mpp_assert(ctx->ext_line_buf_grp);
432
433 ctx->ext_line_buf_size = ext_line_buf_size;
434 setup_ext_line_bufs(ctx);
435 } else {
436 clear_ext_line_bufs(ctx);
437 if (ctx->ext_line_buf_grp) {
438 mpp_buffer_group_clear(ctx->ext_line_buf_grp);
439 mpp_buffer_group_put(ctx->ext_line_buf_grp);
440 ctx->ext_line_buf_grp = NULL;
441 }
442 ctx->ext_line_buf_size = 0;
443 }
444
445 if ((ctx->pixel_buf_fbc_hdr_size != pixel_buf_fbc_hdr_size) ||
446 (ctx->pixel_buf_fbc_bdy_size != pixel_buf_fbc_bdy_size) ||
447 (ctx->pixel_buf_size != pixel_buf_size) ||
448 (ctx->thumb_buf_size != thumb_buf_size) ||
449 (new_max_cnt > old_max_cnt)) {
450 size_t sizes[2];
451
452 hal_h264e_dbg_detail("frame size %d -> %d max count %d -> %d\n",
453 ctx->pixel_buf_size, pixel_buf_size,
454 old_max_cnt, new_max_cnt);
455
456 /* pixel buffer */
457 sizes[0] = pixel_buf_size;
458 /* thumb buffer */
459 sizes[1] = thumb_buf_size;
460 new_max_cnt = MPP_MAX(new_max_cnt, old_max_cnt);
461
462 hal_bufs_setup(ctx->hw_recn, new_max_cnt, 2, sizes);
463
464 ctx->pixel_buf_fbc_hdr_size = pixel_buf_fbc_hdr_size;
465 ctx->pixel_buf_fbc_bdy_size = pixel_buf_fbc_bdy_size;
466 ctx->pixel_buf_size = pixel_buf_size;
467 ctx->thumb_buf_size = thumb_buf_size;
468 ctx->max_buf_cnt = new_max_cnt;
469 }
470 }
471
hal_h264e_vepu580_prepare(void * hal)472 static MPP_RET hal_h264e_vepu580_prepare(void *hal)
473 {
474 HalH264eVepu580Ctx *ctx = (HalH264eVepu580Ctx *)hal;
475 MppEncPrepCfg *prep = &ctx->cfg->prep;
476
477 hal_h264e_dbg_func("enter %p\n", hal);
478
479 if (prep->change & (MPP_ENC_PREP_CFG_CHANGE_INPUT | MPP_ENC_PREP_CFG_CHANGE_FORMAT)) {
480 RK_S32 i;
481
482 // pre-alloc required buffers to reduce first frame delay
483 setup_hal_bufs(ctx);
484 for (i = 0; i < ctx->max_buf_cnt; i++)
485 hal_bufs_get_buf(ctx->hw_recn, i);
486
487 prep->change = 0;
488 }
489
490 hal_h264e_dbg_func("leave %p\n", hal);
491
492 return MPP_OK;
493 }
494
update_vepu580_syntax(HalH264eVepu580Ctx * ctx,MppSyntax * syntax)495 static RK_U32 update_vepu580_syntax(HalH264eVepu580Ctx *ctx, MppSyntax *syntax)
496 {
497 H264eSyntaxDesc *desc = syntax->data;
498 RK_S32 syn_num = syntax->number;
499 RK_U32 updated = 0;
500 RK_S32 i;
501
502 for (i = 0; i < syn_num; i++, desc++) {
503 switch (desc->type) {
504 case H264E_SYN_CFG : {
505 hal_h264e_dbg_detail("update cfg");
506 ctx->cfg = desc->p;
507 } break;
508 case H264E_SYN_SPS : {
509 hal_h264e_dbg_detail("update sps");
510 ctx->sps = desc->p;
511 } break;
512 case H264E_SYN_PPS : {
513 hal_h264e_dbg_detail("update pps");
514 ctx->pps = desc->p;
515 } break;
516 case H264E_SYN_DPB : {
517 hal_h264e_dbg_detail("update dpb");
518 ctx->dpb = desc->p;
519 } break;
520 case H264E_SYN_SLICE : {
521 hal_h264e_dbg_detail("update slice");
522 ctx->slice = desc->p;
523 } break;
524 case H264E_SYN_FRAME : {
525 hal_h264e_dbg_detail("update frames");
526 ctx->frms = desc->p;
527 } break;
528 case H264E_SYN_PREFIX : {
529 hal_h264e_dbg_detail("update prefix nal");
530 ctx->prefix = desc->p;
531 } break;
532 default : {
533 mpp_log_f("invalid syntax type %d\n", desc->type);
534 } break;
535 }
536
537 updated |= SYN_TYPE_FLAG(desc->type);
538 }
539
540 return updated;
541 }
542
hal_h264e_vepu580_get_task(void * hal,HalEncTask * task)543 static MPP_RET hal_h264e_vepu580_get_task(void *hal, HalEncTask *task)
544 {
545 HalH264eVepu580Ctx *ctx = (HalH264eVepu580Ctx *)hal;
546 MppEncCfgSet *cfg_set = ctx->cfg;
547 MppEncRefCfgImpl *ref = (MppEncRefCfgImpl *)cfg_set->ref_cfg;
548 MppEncH264HwCfg *hw_cfg = &cfg_set->codec.h264.hw_cfg;
549 RK_U32 updated = update_vepu580_syntax(ctx, &task->syntax);
550 EncFrmStatus *frm_status = &task->rc_task->frm;
551 H264eFrmInfo *frms = ctx->frms;
552
553 hal_h264e_dbg_func("enter %p\n", hal);
554
555 if (updated & SYN_TYPE_FLAG(H264E_SYN_CFG))
556 setup_hal_bufs(ctx);
557
558 if (!frm_status->reencode && mpp_frame_has_meta(task->frame)) {
559 MppMeta meta = mpp_frame_get_meta(task->frame);
560
561 mpp_meta_get_ptr_d(meta, KEY_ROI_DATA2, (void **)&ctx->roi_data, NULL);
562 mpp_meta_get_ptr_d(meta, KEY_OSD_DATA, (void **)&ctx->osd_cfg.osd_data, NULL);
563 mpp_meta_get_ptr_d(meta, KEY_OSD_DATA2, (void **)&ctx->osd_cfg.osd_data2, NULL);
564 }
565
566 if (ctx->dpb) {
567 h264e_dpb_hal_start(ctx->dpb, frms->curr_idx);
568 h264e_dpb_hal_start(ctx->dpb, frms->refr_idx);
569 }
570
571 task->flags.reg_idx = ctx->task_idx;
572 task->flags.curr_idx = frms->curr_idx;
573 task->flags.refr_idx = frms->refr_idx;
574 task->part_first = 1;
575 task->part_last = 0;
576
577 ctx->ext_line_buf = ctx->ext_line_bufs[ctx->task_idx];
578 ctx->regs_set = &ctx->regs_sets[ctx->task_idx];
579 ctx->amend = &ctx->amend_sets[ctx->task_idx];
580 ctx->osd_cfg.reg_base = &ctx->regs_set->reg_osd;
581
582 /* if not VEPU1/2, update log2_max_frame_num_minus4 in hw_cfg */
583 hw_cfg->hw_log2_max_frame_num_minus4 = ctx->sps->log2_max_frame_num_minus4;
584
585 if (ctx->task_cnt > 1 && (ref->lt_cfg_cnt || ref->st_cfg_cnt > 1)) {
586 H264ePrefixNal *prefix = &ctx->prefix_sets[ctx->task_idx];
587 H264eSlice *slice = &ctx->slice_sets[ctx->task_idx];
588
589 //store async encode TSVC info
590 if (ctx->prefix)
591 memcpy(prefix, ctx->prefix, sizeof(H264ePrefixNal));
592 if (ctx->slice) {
593 memcpy(slice, ctx->slice, sizeof(H264eSlice));
594
595 /*
596 * Generally, reorder and marking are shared by dpb and slice.
597 * However, async encoding TSVC will change reorder and marking in each task.
598 * Therefore, malloc a special space for async encoding TSVC.
599 */
600 ctx->amend->reorder = ctx->reorder;
601 ctx->amend->marking = ctx->marking;
602 }
603
604 h264e_vepu_stream_amend_config(ctx->amend, task->packet, ctx->cfg,
605 slice, prefix);
606 } else {
607 h264e_vepu_stream_amend_config(ctx->amend, task->packet, ctx->cfg,
608 ctx->slice, ctx->prefix);
609 }
610
611 if (ctx->task_cnt > 1)
612 ctx->task_idx = !ctx->task_idx;
613
614 hal_h264e_dbg_func("leave %p\n", hal);
615
616 return MPP_OK;
617 }
618
setup_vepu580_normal(HalVepu580RegSet * regs)619 static void setup_vepu580_normal(HalVepu580RegSet *regs)
620 {
621 hal_h264e_dbg_func("enter\n");
622 /* reg000 VERSION is read only */
623
624 /* reg001 ENC_STRT */
625 regs->reg_ctl.enc_strt.lkt_num = 0;
626 regs->reg_ctl.enc_strt.vepu_cmd = 1;
627 regs->reg_ctl.func_en.cke = 1;
628 regs->reg_ctl.func_en.resetn_hw_en = 1;
629 regs->reg_ctl.func_en.enc_done_tmvp_en = 1;
630
631 /* reg002 ENC_CLR */
632 regs->reg_ctl.enc_clr.safe_clr = 0;
633 regs->reg_ctl.enc_clr.force_clr = 0;
634
635 /* reg003 LKT_ADDR */
636 // regs->reg_ctl.lkt_addr = 0;
637
638 /* reg004 INT_EN */
639 regs->reg_ctl.int_en.enc_done_en = 1;
640 regs->reg_ctl.int_en.lkt_node_done_en = 1;
641 regs->reg_ctl.int_en.sclr_done_en = 1;
642 regs->reg_ctl.int_en.slc_done_en = 0;
643 regs->reg_ctl.int_en.bsf_oflw_en = 1;
644 regs->reg_ctl.int_en.brsp_otsd_en = 1;
645 regs->reg_ctl.int_en.wbus_err_en = 1;
646 regs->reg_ctl.int_en.rbus_err_en = 1;
647 regs->reg_ctl.int_en.wdg_en = 1;
648
649 /* reg005 INT_MSK */
650 regs->reg_ctl.int_msk.enc_done_msk = 0;
651 regs->reg_ctl.int_msk.lkt_node_done_msk = 0;
652 regs->reg_ctl.int_msk.sclr_done_msk = 0;
653 regs->reg_ctl.int_msk.slc_done_msk = 0;
654 regs->reg_ctl.int_msk.bsf_oflw_msk = 0;
655 regs->reg_ctl.int_msk.brsp_otsd_msk = 0;
656 regs->reg_ctl.int_msk.wbus_err_msk = 0;
657 regs->reg_ctl.int_msk.rbus_err_msk = 0;
658 regs->reg_ctl.int_msk.wdg_msk = 0;
659
660 /* reg006 INT_CLR is not set */
661 /* reg007 INT_STA is read only */
662 /* reg008 ~ reg0011 gap */
663 regs->reg_ctl.enc_wdg.vs_load_thd = 0x1fffff;
664 regs->reg_ctl.enc_wdg.rfp_load_thd = 0;
665
666 /* reg015 DTRNS_MAP */
667 regs->reg_ctl.dtrns_map.cmvw_bus_ordr = 0;
668 regs->reg_ctl.dtrns_map.dspw_bus_ordr = 0;
669 regs->reg_ctl.dtrns_map.rfpw_bus_ordr = 0;
670 regs->reg_ctl.dtrns_map.src_bus_edin = 0;
671 regs->reg_ctl.dtrns_map.meiw_bus_edin = 0;
672 regs->reg_ctl.dtrns_map.bsw_bus_edin = 7;
673 regs->reg_ctl.dtrns_map.lktr_bus_edin = 0;
674 regs->reg_ctl.dtrns_map.roir_bus_edin = 0;
675 regs->reg_ctl.dtrns_map.lktw_bus_edin = 0;
676 regs->reg_ctl.dtrns_map.afbc_bsize = 1;
677
678 regs->reg_ctl.dtrns_cfg.axi_brsp_cke = 0;
679 regs->reg_ctl.dtrns_cfg.dspr_otsd = 1;
680 hal_h264e_dbg_func("leave\n");
681 }
682
setup_vepu580_prep(HalVepu580RegSet * regs,MppEncPrepCfg * prep,HalEncTask * task)683 static MPP_RET setup_vepu580_prep(HalVepu580RegSet *regs, MppEncPrepCfg *prep,
684 HalEncTask *task)
685 {
686 VepuFmtCfg cfg;
687 MppFrameFormat fmt = prep->format;
688 MPP_RET ret = vepu541_set_fmt(&cfg, fmt);
689 RK_U32 hw_fmt = cfg.format;
690 RK_S32 y_stride;
691 RK_S32 c_stride;
692
693 hal_h264e_dbg_func("enter\n");
694
695 /* do nothing when color format is not supported */
696 if (ret)
697 return ret;
698
699 regs->reg_base.enc_rsl.pic_wd8_m1 = MPP_ALIGN(prep->width, 16) / 8 - 1;
700 regs->reg_base.src_fill.pic_wfill = MPP_ALIGN(prep->width, 16) - prep->width;
701 regs->reg_base.enc_rsl.pic_hd8_m1 = MPP_ALIGN(prep->height, 16) / 8 - 1;
702 regs->reg_base.src_fill.pic_hfill = MPP_ALIGN(prep->height, 16) - prep->height;
703
704 regs->reg_ctl.dtrns_map.src_bus_edin = cfg.src_endian;
705
706 regs->reg_base.src_fmt.src_cfmt = hw_fmt;
707 regs->reg_base.src_fmt.alpha_swap = cfg.alpha_swap;
708 regs->reg_base.src_fmt.rbuv_swap = cfg.rbuv_swap;
709 regs->reg_base.src_fmt.src_range = (prep->range == MPP_FRAME_RANGE_JPEG ? 1 : 0);
710 regs->reg_base.src_fmt.out_fmt = 1;
711
712 if (MPP_FRAME_FMT_IS_FBC(fmt)) {
713 y_stride = mpp_frame_get_fbc_hdr_stride(task->frame);
714 if (!y_stride)
715 y_stride = MPP_ALIGN(prep->width, 16);
716 } else if (prep->hor_stride) {
717 y_stride = prep->hor_stride;
718 } else {
719 if (hw_fmt == VEPU541_FMT_BGRA8888 )
720 y_stride = prep->width * 4;
721 else if (hw_fmt == VEPU541_FMT_BGR888 )
722 y_stride = prep->width * 3;
723 else if (hw_fmt == VEPU541_FMT_BGR565 ||
724 hw_fmt == VEPU541_FMT_YUYV422 ||
725 hw_fmt == VEPU541_FMT_UYVY422)
726 y_stride = prep->width * 2;
727 else
728 y_stride = prep->width;
729 }
730
731 switch (hw_fmt) {
732 case VEPU580_FMT_YUV444SP : {
733 c_stride = y_stride * 2;
734 } break;
735 case VEPU541_FMT_YUV422SP :
736 case VEPU541_FMT_YUV420SP :
737 case VEPU580_FMT_YUV444P : {
738 c_stride = y_stride;
739 } break;
740 default : {
741 c_stride = y_stride / 2;
742 } break;
743 }
744
745 if (hw_fmt < VEPU541_FMT_NONE) {
746 const VepuRgb2YuvCfg *cfg_coeffs = get_rgb2yuv_cfg(prep->range, prep->color);
747
748 hal_h264e_dbg_flow("input color range %d colorspace %d", prep->range, prep->color);
749
750 regs->reg_base.src_udfy.csc_wgt_b2y = cfg_coeffs->_2y.b_coeff;
751 regs->reg_base.src_udfy.csc_wgt_g2y = cfg_coeffs->_2y.g_coeff;
752 regs->reg_base.src_udfy.csc_wgt_r2y = cfg_coeffs->_2y.r_coeff;
753
754 regs->reg_base.src_udfu.csc_wgt_b2u = cfg_coeffs->_2u.b_coeff;
755 regs->reg_base.src_udfu.csc_wgt_g2u = cfg_coeffs->_2u.g_coeff;
756 regs->reg_base.src_udfu.csc_wgt_r2u = cfg_coeffs->_2u.r_coeff;
757
758 regs->reg_base.src_udfv.csc_wgt_b2v = cfg_coeffs->_2v.b_coeff;
759 regs->reg_base.src_udfv.csc_wgt_g2v = cfg_coeffs->_2v.g_coeff;
760 regs->reg_base.src_udfv.csc_wgt_r2v = cfg_coeffs->_2v.r_coeff;
761
762 regs->reg_base.src_udfo.csc_ofst_y = cfg_coeffs->_2y.offset;
763 regs->reg_base.src_udfo.csc_ofst_u = cfg_coeffs->_2u.offset;
764 regs->reg_base.src_udfo.csc_ofst_v = cfg_coeffs->_2v.offset;
765
766 hal_h264e_dbg_flow("use color range %d colorspace %d", cfg_coeffs->dst_range, cfg_coeffs->color);
767 } else {
768 regs->reg_base.src_udfy.csc_wgt_b2y = cfg.weight[0];
769 regs->reg_base.src_udfy.csc_wgt_g2y = cfg.weight[1];
770 regs->reg_base.src_udfy.csc_wgt_r2y = cfg.weight[2];
771
772 regs->reg_base.src_udfu.csc_wgt_b2u = cfg.weight[3];
773 regs->reg_base.src_udfu.csc_wgt_g2u = cfg.weight[4];
774 regs->reg_base.src_udfu.csc_wgt_r2u = cfg.weight[5];
775
776 regs->reg_base.src_udfv.csc_wgt_b2v = cfg.weight[6];
777 regs->reg_base.src_udfv.csc_wgt_g2v = cfg.weight[7];
778 regs->reg_base.src_udfv.csc_wgt_r2v = cfg.weight[8];
779
780 regs->reg_base.src_udfo.csc_ofst_y = cfg.offset[0];
781 regs->reg_base.src_udfo.csc_ofst_u = cfg.offset[1];
782 regs->reg_base.src_udfo.csc_ofst_v = cfg.offset[2];
783 }
784
785 regs->reg_base.src_proc.afbcd_en = MPP_FRAME_FMT_IS_FBC(fmt) ? 1 : 0;
786 regs->reg_base.src_strd0.src_strd0 = y_stride;
787 regs->reg_base.src_strd1.src_strd1 = c_stride;
788
789 regs->reg_base.src_proc.src_mirr = prep->mirroring > 0;
790 regs->reg_base.src_proc.src_rot = prep->rotation;
791 regs->reg_base.src_proc.txa_en = 0;
792
793 regs->reg_base.sli_cfg.sli_crs_en = 1;
794
795 regs->reg_base.pic_ofst.pic_ofst_y = 0;
796 regs->reg_base.pic_ofst.pic_ofst_x = 0;
797
798 hal_h264e_dbg_func("leave\n");
799
800 return ret;
801 }
802
vepu580_h264e_save_pass1_patch(HalVepu580RegSet * regs,HalH264eVepu580Ctx * ctx)803 static MPP_RET vepu580_h264e_save_pass1_patch(HalVepu580RegSet *regs, HalH264eVepu580Ctx *ctx)
804 {
805 RK_S32 width_align = MPP_ALIGN(ctx->cfg->prep.width, 16);
806 RK_S32 height_align = MPP_ALIGN(ctx->cfg->prep.height, 16);
807
808 if (NULL == ctx->buf_pass1) {
809 mpp_buffer_get(NULL, &ctx->buf_pass1, width_align * height_align * 3 / 2);
810 if (!ctx->buf_pass1) {
811 mpp_err("buf_pass1 malloc fail, debreath invaild");
812 return MPP_NOK;
813 }
814 }
815
816 regs->reg_base.enc_pic.cur_frm_ref = 1;
817 regs->reg_base.rfpw_h_addr = mpp_buffer_get_fd(ctx->buf_pass1);
818 regs->reg_base.rfpw_b_addr = regs->reg_base.rfpw_h_addr;
819 regs->reg_base.enc_pic.rec_fbc_dis = 1;
820
821 mpp_dev_multi_offset_update(ctx->offsets, 164, width_align * height_align);
822
823 /* NOTE: disable split to avoid lowdelay slice output */
824 regs->reg_base.sli_splt.sli_splt = 0;
825 regs->reg_base.enc_pic.slen_fifo = 0;
826
827 return MPP_OK;
828 }
829
vepu580_h264e_use_pass1_patch(HalVepu580RegSet * regs,HalH264eVepu580Ctx * ctx)830 static MPP_RET vepu580_h264e_use_pass1_patch(HalVepu580RegSet *regs, HalH264eVepu580Ctx *ctx)
831 {
832 MppEncPrepCfg *prep = &ctx->cfg->prep;
833 RK_S32 hor_stride = MPP_ALIGN(prep->width, 16);
834 RK_S32 ver_stride = MPP_ALIGN(prep->height, 16);
835 RK_S32 frame_size = hor_stride * ver_stride;
836 RK_S32 fd_in = mpp_buffer_get_fd(ctx->buf_pass1);
837 RK_S32 y_stride;
838 RK_S32 c_stride;
839
840 hal_h264e_dbg_func("enter\n");
841
842 regs->reg_base.src_fmt.src_cfmt = VEPU541_FMT_YUV420SP;
843 regs->reg_base.src_fmt.alpha_swap = 0;
844 regs->reg_base.src_fmt.rbuv_swap = 0;
845 regs->reg_base.src_fmt.out_fmt = 1;
846
847 y_stride = MPP_ALIGN(prep->width, 16);
848 c_stride = y_stride;
849
850 regs->reg_base.src_proc.afbcd_en = 0;
851 regs->reg_base.src_strd0.src_strd0 = y_stride;
852 regs->reg_base.src_strd1.src_strd1 = c_stride;
853
854 regs->reg_base.src_proc.src_mirr = 0;
855 regs->reg_base.src_proc.src_rot = 0;
856 regs->reg_base.src_proc.txa_en = 0;
857
858 regs->reg_base.pic_ofst.pic_ofst_y = 0;
859 regs->reg_base.pic_ofst.pic_ofst_x = 0;
860
861
862 regs->reg_base.adr_src0 = fd_in;
863 regs->reg_base.adr_src1 = fd_in;
864 regs->reg_base.adr_src2 = fd_in;
865
866 mpp_dev_multi_offset_update(ctx->offsets, 161, frame_size);
867 mpp_dev_multi_offset_update(ctx->offsets, 162, frame_size);
868
869 hal_h264e_dbg_func("leave\n");
870 return MPP_OK;
871 }
872
setup_vepu580_codec(HalVepu580RegSet * regs,H264eSps * sps,H264ePps * pps,H264eSlice * slice)873 static void setup_vepu580_codec(HalVepu580RegSet *regs, H264eSps *sps,
874 H264ePps *pps, H264eSlice *slice)
875 {
876 hal_h264e_dbg_func("enter\n");
877
878 regs->reg_base.enc_pic.enc_stnd = 0;
879 regs->reg_base.enc_pic.cur_frm_ref = slice->nal_reference_idc > 0;
880 regs->reg_base.enc_pic.bs_scp = 1;
881 //regs->reg013.lamb_mod_sel = (slice->slice_type == H264_I_SLICE) ? 0 : 1;
882 //regs->reg013.atr_thd_sel = 0;
883 // regs->reg_ctl.lkt_node_cfg.node_int = 0;
884
885 regs->reg_base.synt_nal.nal_ref_idc = slice->nal_reference_idc;
886 regs->reg_base.synt_nal.nal_unit_type = slice->nalu_type;
887
888 regs->reg_base.synt_sps.max_fnum = sps->log2_max_frame_num_minus4;
889 regs->reg_base.synt_sps.drct_8x8 = sps->direct8x8_inference;
890 regs->reg_base.synt_sps.mpoc_lm4 = sps->log2_max_poc_lsb_minus4;
891
892 regs->reg_base.synt_pps.etpy_mode = pps->entropy_coding_mode;
893 regs->reg_base.synt_pps.trns_8x8 = pps->transform_8x8_mode;
894 regs->reg_base.synt_pps.csip_flag = pps->constrained_intra_pred;
895 regs->reg_base.synt_pps.num_ref0_idx = pps->num_ref_idx_l0_default_active - 1;
896 regs->reg_base.synt_pps.num_ref1_idx = pps->num_ref_idx_l1_default_active - 1;
897 regs->reg_base.synt_pps.pic_init_qp = pps->pic_init_qp;
898 regs->reg_base.synt_pps.cb_ofst = pps->chroma_qp_index_offset;
899 regs->reg_base.synt_pps.cr_ofst = pps->second_chroma_qp_index_offset;
900 regs->reg_base.synt_pps.wght_pred = pps->weighted_pred;
901 regs->reg_base.synt_pps.dbf_cp_flg = pps->deblocking_filter_control;
902
903 regs->reg_base.synt_sli0.sli_type = (slice->slice_type == H264_I_SLICE) ? (2) : (0);
904 regs->reg_base.synt_sli0.pps_id = slice->pic_parameter_set_id;
905 regs->reg_base.synt_sli0.drct_smvp = 0;
906 regs->reg_base.synt_sli0.num_ref_ovrd = slice->num_ref_idx_override;
907 regs->reg_base.synt_sli0.cbc_init_idc = slice->cabac_init_idc;
908 regs->reg_base.synt_sli0.frm_num = slice->frame_num;
909
910 regs->reg_base.synt_sli1.idr_pid = (slice->slice_type == H264_I_SLICE) ? slice->idr_pic_id : (RK_U32)(-1);
911 regs->reg_base.synt_sli1.poc_lsb = slice->pic_order_cnt_lsb;
912
913
914 regs->reg_base.synt_sli2.dis_dblk_idc = slice->disable_deblocking_filter_idc;
915 regs->reg_base.synt_sli2.sli_alph_ofst = slice->slice_alpha_c0_offset_div2;
916
917 h264e_reorder_rd_rewind(slice->reorder);
918 { /* reorder process */
919 H264eRplmo rplmo;
920 MPP_RET ret = h264e_reorder_rd_op(slice->reorder, &rplmo);
921
922 if (MPP_OK == ret) {
923 regs->reg_base.synt_sli2.ref_list0_rodr = 1;
924 regs->reg_base.synt_sli2.rodr_pic_idx = rplmo.modification_of_pic_nums_idc;
925
926 switch (rplmo.modification_of_pic_nums_idc) {
927 case 0 :
928 case 1 : {
929 regs->reg_base.synt_sli2.rodr_pic_num = rplmo.abs_diff_pic_num_minus1;
930 } break;
931 case 2 : {
932 regs->reg_base.synt_sli2.rodr_pic_num = rplmo.long_term_pic_idx;
933 } break;
934 default : {
935 mpp_err_f("invalid modification_of_pic_nums_idc %d\n",
936 rplmo.modification_of_pic_nums_idc);
937 } break;
938 }
939 } else {
940 // slice->ref_pic_list_modification_flag;
941 regs->reg_base.synt_sli2.ref_list0_rodr = 0;
942 regs->reg_base.synt_sli2.rodr_pic_idx = 0;
943 regs->reg_base.synt_sli2.rodr_pic_num = 0;
944 }
945 }
946
947 /* clear all mmco arg first */
948 regs->reg_base.synt_refm0.nopp_flg = 0;
949 regs->reg_base.synt_refm0.ltrf_flg = 0;
950 regs->reg_base.synt_refm0.arpm_flg = 0;
951 regs->reg_base.synt_refm0.mmco4_pre = 0;
952 regs->reg_base.synt_refm0.mmco_type0 = 0;
953 regs->reg_base.synt_refm0.mmco_parm0 = 0;
954 regs->reg_base.synt_refm0.mmco_type1 = 0;
955 regs->reg_base.synt_refm1.mmco_parm1 = 0;
956 regs->reg_base.synt_refm0.mmco_type2 = 0;
957 regs->reg_base.synt_refm1.mmco_parm2 = 0;
958 regs->reg_base.synt_refm2.long_term_frame_idx0 = 0;
959 regs->reg_base.synt_refm2.long_term_frame_idx1 = 0;
960 regs->reg_base.synt_refm2.long_term_frame_idx2 = 0;
961
962 h264e_marking_rd_rewind(slice->marking);
963
964 /* only update used parameter */
965 if (slice->slice_type == H264_I_SLICE) {
966 regs->reg_base.synt_refm0.nopp_flg = slice->no_output_of_prior_pics;
967 regs->reg_base.synt_refm0.ltrf_flg = slice->long_term_reference_flag;
968 } else {
969 if (!h264e_marking_is_empty(slice->marking)) {
970 H264eMmco mmco;
971
972 regs->reg_base.synt_refm0.arpm_flg = 1;
973
974 /* max 3 mmco */
975 do {
976 RK_S32 type = 0;
977 RK_S32 param_0 = 0;
978 RK_S32 param_1 = 0;
979
980 h264e_marking_rd_op(slice->marking, &mmco);
981 type = mmco.mmco;
982 switch (type) {
983 case 1 : {
984 param_0 = mmco.difference_of_pic_nums_minus1;
985 } break;
986 case 2 : {
987 param_0 = mmco.long_term_pic_num;
988 } break;
989 case 3 : {
990 param_0 = mmco.difference_of_pic_nums_minus1;
991 param_1 = mmco.long_term_frame_idx;
992 } break;
993 case 4 : {
994 param_0 = mmco.max_long_term_frame_idx_plus1;
995 } break;
996 case 5 : {
997 } break;
998 case 6 : {
999 param_0 = mmco.long_term_frame_idx;
1000 } break;
1001 default : {
1002 mpp_err_f("unsupported mmco 0 %d\n", type);
1003 type = 0;
1004 } break;
1005 }
1006
1007 regs->reg_base.synt_refm0.mmco_type0 = type;
1008 regs->reg_base.synt_refm0.mmco_parm0 = param_0;
1009 regs->reg_base.synt_refm2.long_term_frame_idx0 = param_1;
1010
1011 if (h264e_marking_is_empty(slice->marking))
1012 break;
1013
1014 h264e_marking_rd_op(slice->marking, &mmco);
1015 type = mmco.mmco;
1016 param_0 = 0;
1017 param_1 = 0;
1018 switch (type) {
1019 case 1 : {
1020 param_0 = mmco.difference_of_pic_nums_minus1;
1021 } break;
1022 case 2 : {
1023 param_0 = mmco.long_term_pic_num;
1024 } break;
1025 case 3 : {
1026 param_0 = mmco.difference_of_pic_nums_minus1;
1027 param_1 = mmco.long_term_frame_idx;
1028 } break;
1029 case 4 : {
1030 param_0 = mmco.max_long_term_frame_idx_plus1;
1031 } break;
1032 case 5 : {
1033 } break;
1034 case 6 : {
1035 param_0 = mmco.long_term_frame_idx;
1036 } break;
1037 default : {
1038 mpp_err_f("unsupported mmco 0 %d\n", type);
1039 type = 0;
1040 } break;
1041 }
1042
1043 regs->reg_base.synt_refm0.mmco_type1 = type;
1044 regs->reg_base.synt_refm1.mmco_parm1 = param_0;
1045 regs->reg_base.synt_refm2.long_term_frame_idx1 = param_1;
1046
1047 if (h264e_marking_is_empty(slice->marking))
1048 break;
1049
1050 h264e_marking_rd_op(slice->marking, &mmco);
1051 type = mmco.mmco;
1052 param_0 = 0;
1053 param_1 = 0;
1054 switch (type) {
1055 case 1 : {
1056 param_0 = mmco.difference_of_pic_nums_minus1;
1057 } break;
1058 case 2 : {
1059 param_0 = mmco.long_term_pic_num;
1060 } break;
1061 case 3 : {
1062 param_0 = mmco.difference_of_pic_nums_minus1;
1063 param_1 = mmco.long_term_frame_idx;
1064 } break;
1065 case 4 : {
1066 param_0 = mmco.max_long_term_frame_idx_plus1;
1067 } break;
1068 case 5 : {
1069 } break;
1070 case 6 : {
1071 param_0 = mmco.long_term_frame_idx;
1072 } break;
1073 default : {
1074 mpp_err_f("unsupported mmco 0 %d\n", type);
1075 type = 0;
1076 } break;
1077 }
1078
1079 regs->reg_base.synt_refm0.mmco_type2 = type;
1080 regs->reg_base.synt_refm1.mmco_parm2 = param_0;
1081 regs->reg_base.synt_refm2.long_term_frame_idx2 = param_1;
1082 } while (0);
1083 }
1084 }
1085
1086 hal_h264e_dbg_func("leave\n");
1087 }
1088
setup_vepu580_rdo_pred(HalVepu580RegSet * regs,H264eSps * sps,H264ePps * pps,H264eSlice * slice)1089 static void setup_vepu580_rdo_pred(HalVepu580RegSet *regs, H264eSps *sps,
1090 H264ePps *pps, H264eSlice *slice)
1091 {
1092 hal_h264e_dbg_func("enter\n");
1093
1094 if (slice->slice_type == H264_I_SLICE) {
1095 regs->reg_rc_klut.klut_ofst.chrm_klut_ofst = 0;
1096 memcpy(®s->reg_rc_klut.klut_wgt0, &h264e_klut_weight[0], CHROMA_KLUT_TAB_SIZE);
1097 } else {
1098 regs->reg_rc_klut.klut_ofst.chrm_klut_ofst = 3;
1099 memcpy(®s->reg_rc_klut.klut_wgt0, &h264e_klut_weight[4], CHROMA_KLUT_TAB_SIZE);
1100 }
1101
1102 regs->reg_base.iprd_csts.vthd_y = 9;
1103 regs->reg_base.iprd_csts.vthd_c = 63;
1104
1105 regs->reg_base.rdo_cfg.rect_size = (sps->profile_idc == H264_PROFILE_BASELINE &&
1106 sps->level_idc <= H264_LEVEL_3_0) ? 1 : 0;
1107 regs->reg_base.rdo_cfg.inter_4x4 = 1;
1108 regs->reg_base.rdo_cfg.vlc_lmt = (sps->profile_idc < H264_PROFILE_MAIN) &&
1109 !pps->entropy_coding_mode;
1110 regs->reg_base.rdo_cfg.chrm_spcl = 1;
1111 regs->reg_base.rdo_cfg.rdo_mask = 0;
1112 regs->reg_base.rdo_cfg.ccwa_e = 1;
1113 regs->reg_base.rdo_cfg.scl_lst_sel = pps->pic_scaling_matrix_present;
1114 regs->reg_base.rdo_cfg.atr_e = 1;
1115 regs->reg_base.rdo_cfg.atf_intra_e = 1;
1116
1117 hal_h264e_dbg_func("leave\n");
1118 }
1119
setup_vepu580_rdo_bias_cfg(Vepu580RdoCfg * regs,MppEncHwCfg * hw)1120 static void setup_vepu580_rdo_bias_cfg(Vepu580RdoCfg *regs, MppEncHwCfg *hw)
1121 {
1122 RK_U8 bias = h264_mode_bias[hw->mode_bias[1]];
1123
1124 regs->rdo_intra_atf_wgt0.atf_rdo_intra_wgt00 = bias > 24 ? bias : 24;
1125 regs->rdo_intra_atf_wgt0.atf_rdo_intra_wgt01 = bias > 22 ? bias : 22;
1126 regs->rdo_intra_atf_wgt0.atf_rdo_intra_wgt02 = bias > 21 ? bias : 21;
1127 regs->rdo_intra_atf_wgt1.atf_rdo_intra_wgt10 = bias > 22 ? bias : 22;
1128 regs->rdo_intra_atf_wgt1.atf_rdo_intra_wgt11 = bias > 21 ? bias : 21;
1129 regs->rdo_intra_atf_wgt1.atf_rdo_intra_wgt12 = bias > 20 ? bias : 20;
1130 regs->rdo_intra_atf_wgt2.atf_rdo_intra_wgt20 = bias > 20 ? bias : 20;
1131 regs->rdo_intra_atf_wgt2.atf_rdo_intra_wgt21 = bias > 19 ? bias : 19;
1132 regs->rdo_intra_atf_wgt2.atf_rdo_intra_wgt22 = bias > 18 ? bias : 18;
1133 regs->rdo_intra_atf_wgt3.atf_rdo_intra_wgt30 = bias;
1134 regs->rdo_intra_atf_wgt3.atf_rdo_intra_wgt31 = bias;
1135 regs->rdo_intra_atf_wgt3.atf_rdo_intra_wgt32 = bias;
1136
1137 if (hw->skip_bias_en) {
1138 bias = hw->skip_bias;
1139
1140 regs->rdo_skip_cime_thd0.atf_rdo_skip_cime_thd0 = hw->skip_sad < 10 ? hw->skip_sad : 10;
1141 regs->rdo_skip_cime_thd0.atf_rdo_skip_cime_thd1 = hw->skip_sad < 8 ? hw->skip_sad : 8;
1142 regs->rdo_skip_cime_thd1.atf_rdo_skip_cime_thd2 = hw->skip_sad < 15 ? hw->skip_sad : 15;
1143 regs->rdo_skip_cime_thd1.atf_rdo_skip_cime_thd3 = hw->skip_sad;
1144 regs->rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt00 = bias > 20 ? bias : 20;
1145 regs->rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt10 = bias;
1146 regs->rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt11 = bias;
1147 regs->rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt12 = bias;
1148 regs->rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt20 = bias;
1149 regs->rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt21 = bias;
1150 regs->rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt22 = bias;
1151 regs->rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt30 = bias;
1152 regs->rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt31 = bias;
1153 regs->rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt32 = bias;
1154 regs->rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt40 = bias;
1155 regs->rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt41 = bias;
1156 regs->rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt42 = bias;
1157 }
1158 }
1159
setup_vepu580_rdo_cfg(Vepu580RdoCfg * regs)1160 static void setup_vepu580_rdo_cfg(Vepu580RdoCfg *regs)
1161 {
1162 hal_h264e_dbg_func("enter\n");
1163
1164 /* 0x2000 */
1165 regs->rdo_sqi_cfg.atf_pskip_en = 1;
1166
1167 /* 0x20CC ~ 0x20D0 */
1168 regs->rdo_intra_cime_thd0.atf_rdo_intra_cime_thd0 = 28;
1169 regs->rdo_intra_cime_thd0.atf_rdo_intra_cime_thd1 = 44;
1170 regs->rdo_intra_cime_thd1.atf_rdo_intra_cime_thd2 = 72;
1171
1172 /* 0x20D4 ~ 0x20E0 */
1173 regs->rdo_intra_var_thd0.atf_rdo_intra_var_thd00 = 25;
1174 regs->rdo_intra_var_thd0.atf_rdo_intra_var_thd01 = 64;
1175 regs->rdo_intra_var_thd1.atf_rdo_intra_var_thd10 = 25;
1176 regs->rdo_intra_var_thd1.atf_rdo_intra_var_thd11 = 64;
1177 regs->rdo_intra_var_thd2.atf_rdo_intra_var_thd20 = 70;
1178 regs->rdo_intra_var_thd2.atf_rdo_intra_var_thd21 = 100;
1179 regs->rdo_intra_var_thd3.atf_rdo_intra_var_thd30 = 70;
1180 regs->rdo_intra_var_thd3.atf_rdo_intra_var_thd31 = 100;
1181
1182 /* 0x20E4 ~ 0x20F0 */
1183 regs->rdo_intra_atf_wgt0.atf_rdo_intra_wgt00 = 24;
1184 regs->rdo_intra_atf_wgt0.atf_rdo_intra_wgt01 = 22;
1185 regs->rdo_intra_atf_wgt0.atf_rdo_intra_wgt02 = 21;
1186 regs->rdo_intra_atf_wgt1.atf_rdo_intra_wgt10 = 22;
1187 regs->rdo_intra_atf_wgt1.atf_rdo_intra_wgt11 = 21;
1188 regs->rdo_intra_atf_wgt1.atf_rdo_intra_wgt12 = 20;
1189 regs->rdo_intra_atf_wgt2.atf_rdo_intra_wgt20 = 20;
1190 regs->rdo_intra_atf_wgt2.atf_rdo_intra_wgt21 = 19;
1191 regs->rdo_intra_atf_wgt2.atf_rdo_intra_wgt22 = 18;
1192 regs->rdo_intra_atf_wgt3.atf_rdo_intra_wgt30 = 16;
1193 regs->rdo_intra_atf_wgt3.atf_rdo_intra_wgt31 = 16;
1194 regs->rdo_intra_atf_wgt3.atf_rdo_intra_wgt32 = 16;
1195
1196 /* 0x211C ~ 0x2130 */
1197 regs->rdo_skip_cime_thd0.atf_rdo_skip_cime_thd0 = 10;
1198 regs->rdo_skip_cime_thd0.atf_rdo_skip_cime_thd1 = 8;
1199 regs->rdo_skip_cime_thd1.atf_rdo_skip_cime_thd2 = 15;
1200 regs->rdo_skip_cime_thd1.atf_rdo_skip_cime_thd3 = 25;
1201 regs->rdo_skip_var_thd0.atf_rdo_skip_var_thd10 = 25;
1202 regs->rdo_skip_var_thd0.atf_rdo_skip_var_thd11 = 40;
1203 regs->rdo_skip_var_thd1.atf_rdo_skip_var_thd20 = 25;
1204 regs->rdo_skip_var_thd1.atf_rdo_skip_var_thd21 = 40;
1205 regs->rdo_skip_var_thd2.atf_rdo_skip_var_thd30 = 70;
1206 regs->rdo_skip_var_thd2.atf_rdo_skip_var_thd31 = 100;
1207 regs->rdo_skip_var_thd3.atf_rdo_skip_var_thd40 = 70;
1208 regs->rdo_skip_var_thd3.atf_rdo_skip_var_thd41 = 100;
1209
1210 /* 0x2134 ~ 0x2140 */
1211 regs->rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt00 = 20;
1212 regs->rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt10 = 16;
1213 regs->rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt11 = 16;
1214 regs->rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt12 = 16;
1215 regs->rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt20 = 16;
1216 regs->rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt21 = 16;
1217 regs->rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt22 = 16;
1218 regs->rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt30 = 16;
1219 regs->rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt31 = 16;
1220 regs->rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt32 = 16;
1221 regs->rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt40 = 16;
1222 regs->rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt41 = 16;
1223 regs->rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt42 = 16;
1224
1225 hal_h264e_dbg_func("leave\n");
1226 }
1227
setup_vepu580_scl_cfg(Vepu580SclCfg * regs)1228 static void setup_vepu580_scl_cfg(Vepu580SclCfg *regs)
1229 {
1230 static RK_U32 vepu580_h264_scl_tab[] = {
1231 /* 0x2200 */
1232 0x2fbe3333, 0x2fbe4189, 0x2fbe3333, 0x2fbe4189, 0x2ca42fbe, 0x2ca43c79, 0x2ca42fbe, 0x2ca43c79,
1233 0x3c794189, 0x3c7951ec, 0x3c794189, 0x3c7951ec, 0x2ca42fbe, 0x2ca43c79, 0x2ca42fbe, 0x2ca43c79,
1234 0x2fbe3333, 0x2fbe4189, 0x2fbe3333, 0x2fbe4189, 0x2ca42fbe, 0x2ca43c79, 0x2ca42fbe, 0x2ca43c79,
1235 0x3c794189, 0x3c7951ec, 0x3c794189, 0x3c7951ec, 0x2ca42fbe, 0x2ca43c79, 0x2ca42fbe, 0x2ca43c79,
1236 0x2b322e8c, 0x2b323a84, 0x2b322e8c, 0x2b323a84, 0x2a4a2b32, 0x2a4a37d2, 0x2a4a2b32, 0x2a4a37d2,
1237 0x37d23a84, 0x37d24ae6, 0x37d23a84, 0x37d24ae6, 0x2a4a2b32, 0x2a4a37d2, 0x2a4a2b32, 0x2a4a37d2,
1238 0x2b322e8c, 0x2b323a84, 0x2b322e8c, 0x2b323a84, 0x2a4a2b32, 0x2a4a37d2, 0x2a4a2b32, 0x2a4a37d2,
1239 0x37d23a84, 0x37d24ae6, 0x37d23a84, 0x37d24ae6, 0x2a4a2b32, 0x2a4a37d2, 0x2a4a2b32, 0x2a4a37d2,
1240 0x25cb2762, 0x25cb31a6, 0x25cb2762, 0x25cb31a6, 0x22ef25cb, 0x22ef2ed1, 0x22ef25cb, 0x22ef2ed1,
1241 0x2ed131a6, 0x2ed13e6a, 0x2ed131a6, 0x2ed13e6a, 0x22ef25cb, 0x22ef2ed1, 0x22ef25cb, 0x22ef2ed1,
1242 0x25cb2762, 0x25cb31a6, 0x25cb2762, 0x25cb31a6, 0x22ef25cb, 0x22ef2ed1, 0x22ef25cb, 0x22ef2ed1,
1243 0x2ed131a6, 0x2ed13e6a, 0x2ed131a6, 0x2ed13e6a, 0x22ef25cb, 0x22ef2ed1, 0x22ef25cb, 0x22ef2ed1,
1244 0x22e32492, 0x22e32ed0, 0x22e32492, 0x22e32ed0, 0x202422e3, 0x20242bfb, 0x202422e3, 0x20242bfb,
1245 0x2bfb2ed0, 0x2bfb3a41, 0x2bfb2ed0, 0x2bfb3a41, 0x202422e3, 0x20242bfb, 0x202422e3, 0x20242bfb,
1246 0x22e32492, 0x22e32ed0, 0x22e32492, 0x22e32ed0, 0x202422e3, 0x20242bfb, 0x202422e3, 0x20242bfb,
1247 0x2bfb2ed0, 0x2bfb3a41, 0x2bfb2ed0, 0x2bfb3a41, 0x202422e3, 0x20242bfb, 0x202422e3, 0x20242bfb,
1248 0x1e3c2000, 0x1e3c28f6, 0x1e3c2000, 0x1e3c28f6, 0x1cb21e3c, 0x1cb22631, 0x1cb21e3c, 0x1cb22631,
1249 0x263128f6, 0x26313367, 0x263128f6, 0x26313367, 0x1cb21e3c, 0x1cb22631, 0x1cb21e3c, 0x1cb22631,
1250 0x1e3c2000, 0x1e3c28f6, 0x1e3c2000, 0x1e3c28f6, 0x1cb21e3c, 0x1cb22631, 0x1cb21e3c, 0x1cb22631,
1251 0x263128f6, 0x26313367, 0x263128f6, 0x26313367, 0x1cb21e3c, 0x1cb22631, 0x1cb21e3c, 0x1cb22631,
1252 0x1aae1c72, 0x1aae239e, 0x1aae1c72, 0x1aae239e, 0x191c1aae, 0x191c21c0, 0x191c1aae, 0x191c21c0,
1253 0x21c0239e, 0x21c02d32, 0x21c0239e, 0x21c02d32, 0x191c1aae, 0x191c21c0, 0x191c1aae, 0x191c21c0,
1254 0x1aae1c72, 0x1aae239e, 0x1aae1c72, 0x1aae239e, 0x191c1aae, 0x191c21c0, 0x191c1aae, 0x191c21c0,
1255 0x21c0239e, 0x21c02d32, 0x21c0239e, 0x21c02d32, 0x191c1aae, 0x191c21c0, 0x191c1aae, 0x191c21c0,
1256 0x00be0033, 0x00be0089, 0x00be0033, 0x00be0089, 0x00a400be, 0x00a40079, 0x00a400be, 0x00a40079,
1257 0x00790089, 0x007900ec, 0x00790089, 0x007900ec, 0x00a400be, 0x00a40079, 0x00a400be, 0x00a40079,
1258 0x00be0033, 0x00be0089, 0x00be0033, 0x00be0089, 0x00a400be, 0x00a40079, 0x00a400be, 0x00a40079,
1259 0x00790089, 0x007900ec, 0x00790089, 0x007900ec, 0x00a400be, 0x00a40079, 0x00a400be, 0x00a40079,
1260 0x0032008c, 0x00320084, 0x0032008c, 0x00320084, 0x004a0032, 0x004a00d2, 0x004a0032, 0x004a00d2,
1261 0x00d20084, 0x00d200e6, 0x00d20084, 0x00d200e6, 0x004a0032, 0x004a00d2, 0x004a0032, 0x004a00d2,
1262 0x0032008c, 0x00320084, 0x0032008c, 0x00320084, 0x004a0032, 0x004a00d2, 0x004a0032, 0x004a00d2,
1263 0x00d20084, 0x00d200e6, 0x00d20084, 0x00d200e6, 0x004a0032, 0x004a00d2, 0x004a0032, 0x004a00d2,
1264 0x00cb0062, 0x00cb00a6, 0x00cb0062, 0x00cb00a6, 0x00ef00cb, 0x00ef00d1, 0x00ef00cb, 0x00ef00d1,
1265 0x00d100a6, 0x00d1006a, 0x00d100a6, 0x00d1006a, 0x00ef00cb, 0x00ef00d1, 0x00ef00cb, 0x00ef00d1,
1266 0x00cb0062, 0x00cb00a6, 0x00cb0062, 0x00cb00a6, 0x00ef00cb, 0x00ef00d1, 0x00ef00cb, 0x00ef00d1,
1267 0x00d100a6, 0x00d1006a, 0x00d100a6, 0x00d1006a, 0x00ef00cb, 0x00ef00d1, 0x00ef00cb, 0x00ef00d1,
1268 0x00e30092, 0x00e300d0, 0x00e30092, 0x00e300d0, 0x002400e3, 0x002400fb, 0x002400e3, 0x002400fb,
1269 0x00fb00d0, 0x00fb0041, 0x00fb00d0, 0x00fb0041, 0x002400e3, 0x002400fb, 0x002400e3, 0x002400fb,
1270 0x00e30092, 0x00e300d0, 0x00e30092, 0x00e300d0, 0x002400e3, 0x002400fb, 0x002400e3, 0x002400fb,
1271 0x00fb00d0, 0x00fb0041, 0x00fb00d0, 0x00fb0041, 0x002400e3, 0x002400fb, 0x002400e3, 0x002400fb,
1272 0x003c0000, 0x003c00f6, 0x003c0000, 0x003c00f6, 0x00b2003c, 0x00b20031, 0x00b2003c, 0x00b20031,
1273 0x003100f6, 0x00310067, 0x003100f6, 0x00310067, 0x00b2003c, 0x00b20031, 0x00b2003c, 0x00b20031,
1274 0x003c0000, 0x003c00f6, 0x003c0000, 0x003c00f6, 0x00b2003c, 0x00b20031, 0x00b2003c, 0x00b20031,
1275 0x003100f6, 0x00310067, 0x003100f6, 0x00310067, 0x00b2003c, 0x00b20031, 0x00b2003c, 0x00b20031,
1276 0x00ae0072, 0x00ae009e, 0x00ae0072, 0x00ae009e, 0x001c00ae, 0x001c00c0, 0x001c00ae, 0x001c00c0,
1277 0x00c0009e, 0x00c00032, 0x00c0009e, 0x00c00032, 0x001c00ae, 0x001c00c0, 0x001c00ae, 0x001c00c0,
1278 0x00ae0072, 0x00ae009e, 0x00ae0072, 0x00ae009e, 0x001c00ae, 0x001c00c0, 0x001c00ae, 0x001c00c0,
1279 0x00c0009e, 0x00c00032, 0x00c0009e, 0x00c00032, 0x001c00ae, 0x001c00c0, 0x001c00ae, 0x001c00c0,
1280 0x002f0033, 0x002f0041, 0x002f0033, 0x002f0041, 0x002c002f, 0x002c003c, 0x002c002f, 0x002c003c,
1281 0x003c0041, 0x003c0051, 0x003c0041, 0x003c0051, 0x002c002f, 0x002c003c, 0x002c002f, 0x002c003c,
1282 0x002f0033, 0x002f0041, 0x002f0033, 0x002f0041, 0x002c002f, 0x002c003c, 0x002c002f, 0x002c003c,
1283 0x003c0041, 0x003c0051, 0x003c0041, 0x003c0051, 0x002c002f, 0x002c003c, 0x002c002f, 0x002c003c,
1284 0x002b002e, 0x002b003a, 0x002b002e, 0x002b003a, 0x002a002b, 0x002a0037, 0x002a002b, 0x002a0037,
1285 0x0037003a, 0x0037004a, 0x0037003a, 0x0037004a, 0x002a002b, 0x002a0037, 0x002a002b, 0x002a0037,
1286 0x002b002e, 0x002b003a, 0x002b002e, 0x002b003a, 0x002a002b, 0x002a0037, 0x002a002b, 0x002a0037,
1287 0x0037003a, 0x0037004a, 0x0037003a, 0x0037004a, 0x002a002b, 0x002a0037, 0x002a002b, 0x002a0037,
1288 0x01250127, 0x01250131, 0x01250127, 0x01250131, 0x01220125, 0x0122012e, 0x01220125, 0x0122012e,
1289 0x012e0131, 0x012e013e, 0x012e0131, 0x012e013e, 0x01220125, 0x0122012e, 0x01220125, 0x0122012e,
1290 0x01250127, 0x01250131, 0x01250127, 0x01250131, 0x01220125, 0x0122012e, 0x01220125, 0x0122012e,
1291 0x012e0131, 0x012e013e, 0x012e0131, 0x012e013e, 0x01220125, 0x0122012e, 0x01220125, 0x0122012e,
1292 0x01220124, 0x0122012e, 0x01220124, 0x0122012e, 0x01200122, 0x0120012b, 0x01200122, 0x0120012b,
1293 0x012b012e, 0x012b013a, 0x012b012e, 0x012b013a, 0x01200122, 0x0120012b, 0x01200122, 0x0120012b,
1294 0x01220124, 0x0122012e, 0x01220124, 0x0122012e, 0x01200122, 0x0120012b, 0x01200122, 0x0120012b,
1295 0x012b012e, 0x012b013a, 0x012b012e, 0x012b013a, 0x01200122, 0x0120012b, 0x01200122, 0x0120012b,
1296 0x001e0020, 0x001e0028, 0x001e0020, 0x001e0028, 0x001c001e, 0x001c0026, 0x001c001e, 0x001c0026,
1297 0x00260028, 0x00260033, 0x00260028, 0x00260033, 0x001c001e, 0x001c0026, 0x001c001e, 0x001c0026,
1298 0x001e0020, 0x001e0028, 0x001e0020, 0x001e0028, 0x001c001e, 0x001c0026, 0x001c001e, 0x001c0026,
1299 0x00260028, 0x00260033, 0x00260028, 0x00260033, 0x001c001e, 0x001c0026, 0x001c001e, 0x001c0026,
1300 0x001a001c, 0x001a0023, 0x001a001c, 0x001a0023, 0x0019001a, 0x00190021, 0x0019001a, 0x00190021,
1301 0x00210023, 0x0021002d, 0x00210023, 0x0021002d, 0x0019001a, 0x00190021, 0x0019001a, 0x00190021,
1302 0x001a001c, 0x001a0023, 0x001a001c, 0x001a0023, 0x0019001a, 0x00190021, 0x0019001a, 0x00190021,
1303 0x00210023, 0x0021002d, 0x00210023, 0x0021002d, 0x0019001a, 0x00190021, 0x0019001a, 0x00190021,
1304 };
1305
1306 hal_h264e_dbg_func("enter\n");
1307
1308 memcpy(regs, vepu580_h264_scl_tab, sizeof(vepu580_h264_scl_tab));
1309
1310 hal_h264e_dbg_func("leave\n");
1311 }
1312
setup_vepu580_rc_base(HalVepu580RegSet * regs,H264eSps * sps,H264eSlice * slice,MppEncHwCfg * hw,EncRcTask * rc_task)1313 static void setup_vepu580_rc_base(HalVepu580RegSet *regs, H264eSps *sps,
1314 H264eSlice *slice, MppEncHwCfg *hw,
1315 EncRcTask *rc_task)
1316 {
1317 EncRcTaskInfo *rc_info = &rc_task->info;
1318 RK_S32 mb_w = sps->pic_width_in_mbs;
1319 RK_S32 mb_h = sps->pic_height_in_mbs;
1320 RK_U32 qp_target = rc_info->quality_target;
1321 RK_U32 qp_min = rc_info->quality_min;
1322 RK_U32 qp_max = rc_info->quality_max;
1323 RK_U32 qpmap_mode = 1;
1324 RK_S32 mb_target_bits_mul_16 = (rc_info->bit_target << 4) / (mb_w * mb_h);
1325 RK_S32 mb_target_bits;
1326 RK_S32 negative_bits_thd;
1327 RK_S32 positive_bits_thd;
1328
1329 hal_h264e_dbg_rc("bittarget %d qp [%d %d %d]\n", rc_info->bit_target,
1330 qp_min, qp_target, qp_max);
1331
1332 if (mb_target_bits_mul_16 >= 0x100000) {
1333 mb_target_bits_mul_16 = 0x50000;
1334 }
1335
1336 mb_target_bits = (mb_target_bits_mul_16 * mb_w) >> 4;
1337 negative_bits_thd = 0 - 5 * mb_target_bits / 16;
1338 positive_bits_thd = 5 * mb_target_bits / 16;
1339
1340 hal_h264e_dbg_func("enter\n");
1341
1342 regs->reg_base.enc_pic.pic_qp = qp_target;
1343
1344 regs->reg_base.rc_cfg.rc_en = 1;
1345 regs->reg_base.rc_cfg.aq_en = 1;
1346 regs->reg_base.rc_cfg.aq_mode = 0;
1347 regs->reg_base.rc_cfg.rc_ctu_num = mb_w;
1348
1349 regs->reg_base.rc_qp.rc_qp_range = (slice->slice_type == H264_I_SLICE) ?
1350 hw->qp_delta_row_i : hw->qp_delta_row;
1351 regs->reg_base.rc_qp.rc_max_qp = qp_max;
1352 regs->reg_base.rc_qp.rc_min_qp = qp_min;
1353
1354 regs->reg_base.rc_tgt.ctu_ebit = mb_target_bits_mul_16;
1355
1356 regs->reg_rc_klut.rc_adj0.qp_adj0 = -2;
1357 regs->reg_rc_klut.rc_adj0.qp_adj1 = -1;
1358 regs->reg_rc_klut.rc_adj0.qp_adj2 = 0;
1359 regs->reg_rc_klut.rc_adj0.qp_adj3 = 1;
1360 regs->reg_rc_klut.rc_adj0.qp_adj4 = 2;
1361 regs->reg_rc_klut.rc_adj1.qp_adj5 = 0;
1362 regs->reg_rc_klut.rc_adj1.qp_adj6 = 0;
1363 regs->reg_rc_klut.rc_adj1.qp_adj7 = 0;
1364 regs->reg_rc_klut.rc_adj1.qp_adj8 = 1;
1365
1366 regs->reg_rc_klut.rc_dthd_0_8[0] = 4 * negative_bits_thd;
1367 regs->reg_rc_klut.rc_dthd_0_8[1] = negative_bits_thd;
1368 regs->reg_rc_klut.rc_dthd_0_8[2] = positive_bits_thd;
1369 regs->reg_rc_klut.rc_dthd_0_8[3] = 4 * positive_bits_thd;
1370 regs->reg_rc_klut.rc_dthd_0_8[4] = 0x7FFFFFFF;
1371 regs->reg_rc_klut.rc_dthd_0_8[5] = 0x7FFFFFFF;
1372 regs->reg_rc_klut.rc_dthd_0_8[6] = 0x7FFFFFFF;
1373 regs->reg_rc_klut.rc_dthd_0_8[7] = 0x7FFFFFFF;
1374 regs->reg_rc_klut.rc_dthd_0_8[8] = 0x7FFFFFFF;
1375
1376 regs->reg_rc_klut.roi_qthd0.qpmin_area0 = qp_min;
1377 regs->reg_rc_klut.roi_qthd0.qpmax_area0 = qp_max;
1378 regs->reg_rc_klut.roi_qthd0.qpmin_area1 = qp_min;
1379 regs->reg_rc_klut.roi_qthd0.qpmax_area1 = qp_max;
1380 regs->reg_rc_klut.roi_qthd0.qpmin_area2 = qp_min;
1381
1382 regs->reg_rc_klut.roi_qthd1.qpmax_area2 = qp_max;
1383 regs->reg_rc_klut.roi_qthd1.qpmin_area3 = qp_min;
1384 regs->reg_rc_klut.roi_qthd1.qpmax_area3 = qp_max;
1385 regs->reg_rc_klut.roi_qthd1.qpmin_area4 = qp_min;
1386 regs->reg_rc_klut.roi_qthd1.qpmax_area4 = qp_max;
1387
1388 regs->reg_rc_klut.roi_qthd2.qpmin_area5 = qp_min;
1389 regs->reg_rc_klut.roi_qthd2.qpmax_area5 = qp_max;
1390 regs->reg_rc_klut.roi_qthd2.qpmin_area6 = qp_min;
1391 regs->reg_rc_klut.roi_qthd2.qpmax_area6 = qp_max;
1392 regs->reg_rc_klut.roi_qthd2.qpmin_area7 = qp_min;
1393
1394 regs->reg_rc_klut.roi_qthd3.qpmax_area7 = qp_max;
1395 regs->reg_rc_klut.roi_qthd3.qpmap_mode = qpmap_mode;
1396
1397 {
1398 /* 0x1070 ~ 0x1074 */
1399 regs->reg_rc_klut.md_sad_thd.md_sad_thd0 = 4;
1400 regs->reg_rc_klut.md_sad_thd.md_sad_thd1 = 9;
1401 regs->reg_rc_klut.md_sad_thd.md_sad_thd2 = 15;
1402
1403 regs->reg_rc_klut.madi_thd.madi_thd0 = 4;
1404 regs->reg_rc_klut.madi_thd.madi_thd1 = 9;
1405 regs->reg_rc_klut.madi_thd.madi_thd2 = 15;
1406 }
1407
1408 hal_h264e_dbg_func("leave\n");
1409 }
1410
setup_vepu580_io_buf(HalVepu580RegSet * regs,MppDevRegOffCfgs * offsets,HalEncTask * task)1411 static void setup_vepu580_io_buf(HalVepu580RegSet *regs, MppDevRegOffCfgs *offsets,
1412 HalEncTask *task)
1413 {
1414 MppFrame frm = task->frame;
1415 MppPacket pkt = task->packet;
1416 MppBuffer buf_in = mpp_frame_get_buffer(frm);
1417 MppBuffer buf_out = task->output;
1418 MppFrameFormat fmt = mpp_frame_get_fmt(frm);
1419 RK_S32 hor_stride = mpp_frame_get_hor_stride(frm);
1420 RK_S32 ver_stride = mpp_frame_get_ver_stride(frm);
1421 RK_S32 fd_in = mpp_buffer_get_fd(buf_in);
1422 RK_U32 off_in[2] = {0};
1423 RK_U32 off_out = mpp_packet_get_length(pkt);
1424 size_t siz_out = mpp_buffer_get_size(buf_out);
1425 RK_S32 fd_out = mpp_buffer_get_fd(buf_out);
1426
1427 hal_h264e_dbg_func("enter\n");
1428
1429 regs->reg_base.adr_src0 = fd_in;
1430 regs->reg_base.adr_src1 = fd_in;
1431 regs->reg_base.adr_src2 = fd_in;
1432
1433 regs->reg_base.bsbb_addr = fd_out;
1434 regs->reg_base.bsbr_addr = fd_out;
1435 regs->reg_base.adr_bsbs = fd_out;
1436 regs->reg_base.bsbt_addr = fd_out;
1437
1438 if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1439 off_in[0] = mpp_frame_get_fbc_offset(frm);;
1440 off_in[1] = 0;
1441 } else if (MPP_FRAME_FMT_IS_YUV(fmt)) {
1442 VepuFmtCfg cfg;
1443
1444 vepu541_set_fmt(&cfg, fmt);
1445 switch (cfg.format) {
1446 case VEPU541_FMT_BGRA8888 :
1447 case VEPU541_FMT_BGR888 :
1448 case VEPU541_FMT_BGR565 : {
1449 off_in[0] = 0;
1450 off_in[1] = 0;
1451 } break;
1452 case VEPU541_FMT_YUV420SP :
1453 case VEPU541_FMT_YUV422SP : {
1454 off_in[0] = hor_stride * ver_stride;
1455 off_in[1] = hor_stride * ver_stride;
1456 } break;
1457 case VEPU541_FMT_YUV422P : {
1458 off_in[0] = hor_stride * ver_stride;
1459 off_in[1] = hor_stride * ver_stride * 3 / 2;
1460 } break;
1461 case VEPU541_FMT_YUV420P : {
1462 off_in[0] = hor_stride * ver_stride;
1463 off_in[1] = hor_stride * ver_stride * 5 / 4;
1464 } break;
1465 case VEPU541_FMT_YUYV422 :
1466 case VEPU541_FMT_UYVY422 : {
1467 off_in[0] = 0;
1468 off_in[1] = 0;
1469 } break;
1470 case VEPU580_FMT_YUV444SP : {
1471 off_in[0] = hor_stride * ver_stride;
1472 off_in[1] = hor_stride * ver_stride;
1473 } break;
1474 case VEPU580_FMT_YUV444P : {
1475 off_in[0] = hor_stride * ver_stride;
1476 off_in[1] = hor_stride * ver_stride * 2;
1477 } break;
1478 case VEPU541_FMT_NONE :
1479 default : {
1480 off_in[0] = 0;
1481 off_in[1] = 0;
1482 } break;
1483 }
1484 }
1485
1486 mpp_dev_multi_offset_update(offsets, 161, off_in[0]);
1487 mpp_dev_multi_offset_update(offsets, 162, off_in[1]);
1488 mpp_dev_multi_offset_update(offsets, 172, siz_out);
1489 mpp_dev_multi_offset_update(offsets, 175, off_out);
1490
1491 hal_h264e_dbg_func("leave\n");
1492 }
1493
vepu580_h264_set_one_roi(void * buf,MppEncROIRegion * region,RK_S32 w,RK_S32 h)1494 static MPP_RET vepu580_h264_set_one_roi(void *buf, MppEncROIRegion *region, RK_S32 w, RK_S32 h)
1495 {
1496 Vepu580RoiH264BsCfg *ptr = (Vepu580RoiH264BsCfg *)buf;
1497 RK_S32 mb_w = MPP_ALIGN(w, 16) / 16;
1498 RK_S32 mb_h = MPP_ALIGN(h, 16) / 16;
1499 RK_S32 stride_h = MPP_ALIGN(mb_w, 4);
1500 Vepu580RoiH264BsCfg cfg;
1501 MPP_RET ret = MPP_NOK;
1502
1503 if (NULL == buf || NULL == region) {
1504 mpp_err_f("invalid buf %p roi %p\n", buf, region);
1505 goto DONE;
1506 }
1507
1508 RK_S32 roi_width = (region->w + 15) / 16;
1509 RK_S32 roi_height = (region->h + 15) / 16;
1510 RK_S32 pos_x_init = region->x / 16;
1511 RK_S32 pos_y_init = region->y / 16;
1512 RK_S32 pos_x_end = pos_x_init + roi_width;
1513 RK_S32 pos_y_end = pos_y_init + roi_height;
1514 RK_S32 x, y;
1515
1516 pos_x_end = MPP_MIN(pos_x_end, mb_w);
1517 pos_y_end = MPP_MIN(pos_y_end, mb_h);
1518 pos_x_init = MPP_MAX(pos_x_init, 0);
1519 pos_y_init = MPP_MAX(pos_y_init, 0);
1520
1521 mpp_assert(pos_x_end > pos_x_init);
1522 mpp_assert(pos_y_end > pos_y_init);
1523
1524 cfg.force_intra = 1;
1525
1526 ptr += pos_y_init * stride_h + pos_x_init;
1527 roi_width = pos_x_end - pos_x_init;
1528 roi_height = pos_y_end - pos_y_init;
1529
1530 for (y = 0; y < roi_height; y++) {
1531 Vepu580RoiH264BsCfg *dst = ptr;
1532
1533 for (x = 0; x < roi_width; x++, dst++)
1534 memcpy(dst, &cfg, sizeof(cfg));
1535
1536 ptr += stride_h;
1537 }
1538 DONE:
1539 return ret;
1540 }
1541
setup_vepu580_intra_refresh(HalVepu580RegSet * regs,HalH264eVepu580Ctx * ctx,RK_U32 refresh_idx)1542 static MPP_RET setup_vepu580_intra_refresh(HalVepu580RegSet *regs, HalH264eVepu580Ctx *ctx, RK_U32 refresh_idx)
1543 {
1544 MPP_RET ret = MPP_OK;
1545 RK_U32 mb_w = ctx->sps->pic_width_in_mbs;
1546 RK_U32 mb_h = ctx->sps->pic_height_in_mbs;
1547 RK_U32 w = mb_w * 16;
1548 RK_U32 h = mb_h * 16;
1549 MppEncROIRegion *region = NULL;
1550 RK_U32 refresh_num = ctx->cfg->rc.refresh_num;
1551 RK_U32 stride_h = MPP_ALIGN(mb_w, 4);
1552 RK_U32 stride_v = MPP_ALIGN(mb_h, 4);
1553 RK_U32 roi_base_buf_size = mb_w * mb_h * 8;
1554 RK_U32 i = 0;
1555
1556 hal_h264e_dbg_func("enter\n");
1557
1558 if (!ctx->cfg->rc.refresh_en) {
1559 ret = MPP_ERR_VALUE;
1560 goto RET;
1561 }
1562
1563 if (NULL == ctx->roi_base_cfg_buf) {
1564 if (NULL == ctx->roi_grp)
1565 mpp_buffer_group_get_internal(&ctx->roi_grp, MPP_BUFFER_TYPE_ION);
1566 mpp_buffer_get(ctx->roi_grp, &ctx->roi_base_cfg_buf, roi_base_buf_size);
1567 ctx->roi_base_buf_size = roi_base_buf_size;
1568 }
1569
1570 mpp_assert(ctx->roi_base_cfg_buf);
1571 RK_S32 base_cfg_fd = mpp_buffer_get_fd(ctx->roi_base_cfg_buf);
1572 void *base_cfg_buf = mpp_buffer_get_ptr(ctx->roi_base_cfg_buf);
1573 Vepu580RoiH264BsCfg base_cfg;
1574 Vepu580RoiH264BsCfg *base_cfg_ptr = (Vepu580RoiH264BsCfg *)base_cfg_buf;
1575
1576 base_cfg.force_intra = 0;
1577 base_cfg.qp_adj_en = 1;
1578
1579 for (i = 0; i < stride_h * stride_v; i++, base_cfg_ptr++)
1580 memcpy(base_cfg_ptr, &base_cfg, sizeof(base_cfg));
1581
1582 region = mpp_calloc(MppEncROIRegion, 1);
1583
1584 if (NULL == region) {
1585 mpp_err_f("Failed to calloc for MppEncROIRegion !\n");
1586 ret = MPP_ERR_MALLOC;
1587 }
1588
1589 if (ctx->cfg->rc.refresh_mode == MPP_ENC_RC_INTRA_REFRESH_ROW) {
1590 region->x = 0;
1591 region->w = w;
1592 if (refresh_idx > 0) {
1593 region->y = refresh_idx * 16 * refresh_num - 32;
1594 region->h = 16 * refresh_num + 32;
1595 } else {
1596 region->y = refresh_idx * 16 * refresh_num;
1597 region->h = 16 * refresh_num;
1598 }
1599 regs->reg_base.me_rnge.cme_srch_v = 1;
1600 } else if (ctx->cfg->rc.refresh_mode == MPP_ENC_RC_INTRA_REFRESH_COL) {
1601 region->y = 0;
1602 region->h = h;
1603 if (refresh_idx > 0) {
1604 region->x = refresh_idx * 16 * refresh_num - 32;
1605 region->w = 16 * refresh_num + 32;
1606 } else {
1607 region->x = refresh_idx * 16 * refresh_num;
1608 region->w = 16 * refresh_num;
1609 }
1610 regs->reg_base.me_rnge.cme_srch_h = 1;
1611 }
1612
1613 region->intra = 1;
1614 region->quality = -ctx->cfg->rc.qp_delta_ip;
1615
1616 region->area_map_en = 1;
1617 region->qp_area_idx = 1;
1618 region->abs_qp_en = 0;
1619
1620 regs->reg_base.enc_pic.roi_en = 1;
1621 regs->reg_base.roi_addr = base_cfg_fd;
1622 vepu580_h264_set_one_roi(base_cfg_buf, region, w, h);
1623 mpp_free(region);
1624 RET:
1625 hal_h264e_dbg_func("leave, ret %d\n", ret);
1626 return ret;
1627 }
1628
setup_vepu580_roi(HalVepu580RegSet * regs,HalH264eVepu580Ctx * ctx)1629 static void setup_vepu580_roi(HalVepu580RegSet *regs, HalH264eVepu580Ctx *ctx)
1630 {
1631 hal_h264e_dbg_func("enter\n");
1632
1633 /* memset register on start so do not clear registers again here */
1634 if (ctx->roi_data) {
1635 /* roi setup */
1636 RK_U32 mb_w = MPP_ALIGN(ctx->cfg->prep.width, 64) / 16;
1637 RK_U32 mb_h = MPP_ALIGN(ctx->cfg->prep.height, 64) / 16;
1638 RK_U32 base_cfg_size = mb_w * mb_h * 8;
1639 RK_U32 qp_cfg_size = mb_w * mb_h * 2;
1640 RK_U32 amv_cfg_size = mb_w * mb_h / 4;
1641 RK_U32 mv_cfg_size = mb_w * mb_h * 96 / 4;
1642 MppEncROICfg2 *cfg = (MppEncROICfg2 *)ctx->roi_data;
1643
1644 if (mpp_buffer_get_size(cfg->base_cfg_buf) >= base_cfg_size) {
1645 regs->reg_base.enc_pic.roi_en = 1;
1646 regs->reg_base.roi_addr = mpp_buffer_get_fd(cfg->base_cfg_buf);
1647 } else {
1648 mpp_err("roi base cfg buf not enough, roi is invalid");
1649 }
1650
1651 if (cfg->roi_qp_en) {
1652 if (mpp_buffer_get_size(cfg->qp_cfg_buf) >= qp_cfg_size) {
1653 regs->reg_base.roi_qp_addr = mpp_buffer_get_fd(cfg->qp_cfg_buf);
1654 regs->reg_base.roi_en.roi_qp_en = 1;
1655 } else {
1656 mpp_err("roi qp cfg buf not enough, roi is invalid");
1657 }
1658 }
1659
1660 if (cfg->roi_amv_en) {
1661 if (mpp_buffer_get_size(cfg->amv_cfg_buf) >= amv_cfg_size) {
1662 regs->reg_base.qoi_amv_addr = mpp_buffer_get_fd(cfg->amv_cfg_buf);
1663 regs->reg_base.roi_en.roi_amv_en = 1;
1664 } else {
1665 mpp_err("roi amv cfg buf not enough, roi is invalid");
1666 }
1667 }
1668
1669 if (cfg->roi_mv_en) {
1670 if (mpp_buffer_get_size(cfg->mv_cfg_buf) >= mv_cfg_size) {
1671 regs->reg_base.qoi_mv_addr = mpp_buffer_get_fd(cfg->mv_cfg_buf);
1672 regs->reg_base.roi_en.roi_mv_en = 1;
1673 } else {
1674 mpp_err("roi mv cfg buf not enough, roi is invalid");
1675 }
1676 }
1677 }
1678
1679 hal_h264e_dbg_func("leave\n");
1680 }
1681
setup_vepu580_recn_refr(HalH264eVepu580Ctx * ctx,HalVepu580RegSet * regs)1682 static void setup_vepu580_recn_refr(HalH264eVepu580Ctx *ctx, HalVepu580RegSet *regs)
1683 {
1684 H264eFrmInfo *frms = ctx->frms;
1685 HalBufs bufs = ctx->hw_recn;
1686 RK_S32 fbc_hdr_size = ctx->pixel_buf_fbc_hdr_size;
1687
1688 HalBuf *curr = hal_bufs_get_buf(bufs, frms->curr_idx);
1689 HalBuf *refr = hal_bufs_get_buf(bufs, frms->refr_idx);
1690
1691 hal_h264e_dbg_func("enter\n");
1692
1693 if (curr && curr->cnt) {
1694 MppBuffer buf_pixel = curr->buf[0];
1695 MppBuffer buf_thumb = curr->buf[1];
1696 RK_S32 fd = mpp_buffer_get_fd(buf_pixel);
1697
1698 mpp_assert(buf_pixel);
1699 mpp_assert(buf_thumb);
1700
1701 regs->reg_base.rfpw_h_addr = fd;
1702 regs->reg_base.rfpw_b_addr = fd;
1703 regs->reg_base.dspw_addr = mpp_buffer_get_fd(buf_thumb);
1704 }
1705
1706 if (refr && refr->cnt) {
1707 MppBuffer buf_pixel = refr->buf[0];
1708 MppBuffer buf_thumb = refr->buf[1];
1709 RK_S32 fd = mpp_buffer_get_fd(buf_pixel);
1710
1711 mpp_assert(buf_pixel);
1712 mpp_assert(buf_thumb);
1713
1714 regs->reg_base.rfpr_h_addr = fd;
1715 regs->reg_base.rfpr_b_addr = fd;
1716 regs->reg_base.dspr_addr = mpp_buffer_get_fd(buf_thumb);
1717 }
1718
1719 mpp_dev_multi_offset_update(ctx->offsets, 164, fbc_hdr_size);
1720 mpp_dev_multi_offset_update(ctx->offsets, 166, fbc_hdr_size);
1721
1722 hal_h264e_dbg_func("leave\n");
1723 }
1724
setup_vepu580_split(HalVepu580RegSet * regs,MppEncCfgSet * enc_cfg)1725 static void setup_vepu580_split(HalVepu580RegSet *regs, MppEncCfgSet *enc_cfg)
1726 {
1727 MppEncSliceSplit *cfg = &enc_cfg->split;
1728
1729 hal_h264e_dbg_func("enter\n");
1730
1731 switch (cfg->split_mode) {
1732 case MPP_ENC_SPLIT_NONE : {
1733 regs->reg_base.sli_splt.sli_splt = 0;
1734 regs->reg_base.sli_splt.sli_splt_mode = 0;
1735 regs->reg_base.sli_splt.sli_splt_cpst = 0;
1736 regs->reg_base.sli_splt.sli_max_num_m1 = 0;
1737 regs->reg_base.sli_splt.sli_flsh = 0;
1738 regs->reg_base.sli_cnum.sli_splt_cnum_m1 = 0;
1739
1740 regs->reg_base.sli_byte.sli_splt_byte = 0;
1741 regs->reg_base.enc_pic.slen_fifo = 0;
1742 } break;
1743 case MPP_ENC_SPLIT_BY_BYTE : {
1744 regs->reg_base.sli_splt.sli_splt = 1;
1745 regs->reg_base.sli_splt.sli_splt_mode = 0;
1746 regs->reg_base.sli_splt.sli_splt_cpst = 0;
1747 regs->reg_base.sli_splt.sli_max_num_m1 = 500;
1748 regs->reg_base.sli_splt.sli_flsh = 1;
1749 regs->reg_base.sli_cnum.sli_splt_cnum_m1 = 0;
1750
1751 regs->reg_base.sli_byte.sli_splt_byte = cfg->split_arg;
1752 regs->reg_base.enc_pic.slen_fifo = cfg->split_out ? 1 : 0;
1753 regs->reg_ctl.int_en.slc_done_en = 1;
1754 } break;
1755 case MPP_ENC_SPLIT_BY_CTU : {
1756 RK_U32 mb_w = MPP_ALIGN(enc_cfg->prep.width, 16) / 16;
1757 RK_U32 mb_h = MPP_ALIGN(enc_cfg->prep.height, 16) / 16;
1758 RK_U32 slice_num = (mb_w * mb_h + cfg->split_arg - 1) / cfg->split_arg;
1759
1760 regs->reg_base.sli_splt.sli_splt = 1;
1761 regs->reg_base.sli_splt.sli_splt_mode = 1;
1762 regs->reg_base.sli_splt.sli_splt_cpst = 0;
1763 regs->reg_base.sli_splt.sli_max_num_m1 = 500;
1764 regs->reg_base.sli_splt.sli_flsh = 1;
1765 regs->reg_base.sli_cnum.sli_splt_cnum_m1 = cfg->split_arg - 1;
1766
1767 regs->reg_base.sli_byte.sli_splt_byte = 0;
1768 regs->reg_base.enc_pic.slen_fifo = cfg->split_out ? 1 : 0;
1769 regs->reg_ctl.int_en.slc_done_en = (cfg->split_out & MPP_ENC_SPLIT_OUT_LOWDELAY) ? 1 : 0;
1770
1771 if (slice_num > VEPU580_SLICE_FIFO_LEN)
1772 regs->reg_ctl.int_en.slc_done_en = 1;
1773 } break;
1774 default : {
1775 mpp_log_f("invalide slice split mode %d\n", cfg->split_mode);
1776 } break;
1777 }
1778
1779 cfg->change = 0;
1780
1781 hal_h264e_dbg_func("leave\n");
1782 }
1783
calc_cime_parameter(HalVepu580RegSet * regs,H264eSps * sps)1784 static void calc_cime_parameter(HalVepu580RegSet *regs, H264eSps *sps)
1785 {
1786 Vepu580BaseCfg *base_regs = ®s->reg_base;
1787 RK_S32 x_gmv = base_regs->gmv.gmv_x;
1788 RK_S32 y_gmv = base_regs->gmv.gmv_y;
1789 RK_S32 srch_w = base_regs->me_rnge.cme_srch_h * 4;
1790 RK_S32 srch_h = base_regs->me_rnge.cme_srch_v * 4;
1791 RK_S32 frm_sta = 0, frm_end = 0, pic_w = 0;
1792 RK_S32 pic_wd64 = MPP_ALIGN(sps->pic_width_in_mbs * 16, 64) / 64;
1793
1794 // calc cime_linebuf_w
1795 {
1796 if (x_gmv - srch_w < 0) {
1797 frm_sta = (x_gmv - srch_w - 15) / 16;
1798 } else {
1799 frm_sta = (x_gmv - srch_w) / 16;
1800 }
1801 frm_sta = mpp_clip(frm_sta, 0, pic_wd64 - 1);
1802
1803 if (x_gmv + srch_w < 0) {
1804 frm_end = pic_wd64 - 1 + (x_gmv + srch_w) / 16;
1805 } else {
1806 frm_end = pic_wd64 - 1 + (x_gmv + srch_w + 15) / 16;
1807 }
1808 frm_end = mpp_clip(frm_end, 0, pic_wd64 - 1);
1809
1810 pic_w = (frm_end - frm_sta + 1) * 64;
1811 base_regs->me_cach.cme_linebuf_w = (pic_w ? pic_w : 64) / 64;
1812 }
1813
1814 // calc cime_cacha_h and cime_cacha_max
1815 {
1816 RK_U32 cime_cacha_max = 2464;
1817 RK_U32 ctu_4_h = 1, ramb_h;
1818 RK_U32 cur_srch_16_w, cur_srch_4_h, cur_srch_max;
1819 RK_U32 cime_cacha_h = ctu_4_h;
1820
1821 if ((x_gmv % 16 - srch_w % 16) < 0) {
1822 cur_srch_16_w = (16 + (x_gmv % 16 - srch_w % 16) % 16 + srch_w * 2 + 15) / 16 + 1;
1823 } else {
1824 cur_srch_16_w = ((x_gmv % 16 - srch_w % 16) % 16 + srch_w * 2 + 15) / 16 + 1;
1825 }
1826
1827 if ((y_gmv % 4 - srch_h % 4) < 0) {
1828 cur_srch_4_h = (4 + (y_gmv % 4 - srch_h % 4) % 4 + srch_h * 2 + 3) / 4 + ctu_4_h;
1829 } else {
1830 cur_srch_4_h = ((y_gmv % 4 - srch_h % 4) % 4 + srch_h * 2 + 3) / 4 + ctu_4_h;
1831 }
1832
1833 cur_srch_max = cur_srch_4_h;
1834
1835 if (base_regs->me_cach.cme_linebuf_w < cur_srch_16_w) {
1836 cur_srch_16_w = base_regs->me_cach.cme_linebuf_w;
1837 }
1838
1839 ramb_h = cur_srch_4_h;
1840 while ((cime_cacha_h < cur_srch_max) && (cime_cacha_max >
1841 ((cime_cacha_h - ctu_4_h) * base_regs->me_cach.cme_linebuf_w * 4 + (ramb_h * 4 * cur_srch_16_w)))) {
1842 cime_cacha_h = cime_cacha_h + ctu_4_h;
1843
1844 if (ramb_h > 2 * ctu_4_h) {
1845 ramb_h = ramb_h - ctu_4_h;
1846 } else {
1847 ramb_h = ctu_4_h;
1848 }
1849 }
1850
1851 if (cur_srch_4_h == ctu_4_h) {
1852 cime_cacha_h = cime_cacha_h + ctu_4_h;
1853 ramb_h = 0;
1854 }
1855
1856 if (cime_cacha_max < ((cime_cacha_h - ctu_4_h) * base_regs->me_cach.cme_linebuf_w * 4 + (ramb_h * 4 * cur_srch_16_w))) {
1857 cime_cacha_h = cime_cacha_h - ctu_4_h;
1858 }
1859 base_regs->me_cach.cme_rama_h = cime_cacha_h;
1860
1861 /* cime_cacha_max */
1862 {
1863 RK_U32 ram_col_h = (cime_cacha_h - ctu_4_h) / ctu_4_h;
1864 base_regs->me_cach.cme_rama_max = ram_col_h * base_regs->me_cach.cme_linebuf_w + cur_srch_16_w;
1865 }
1866 }
1867 }
1868
setup_vepu580_me(HalVepu580RegSet * regs,H264eSps * sps,H264eSlice * slice)1869 static void setup_vepu580_me(HalVepu580RegSet *regs, H264eSps *sps,
1870 H264eSlice *slice)
1871 {
1872 RK_S32 level_idc = sps->level_idc;
1873 RK_S32 cime_w = 176;
1874 RK_S32 cime_h = 112;
1875 RK_S32 cime_blk_w_max = 44;
1876 RK_S32 cime_blk_h_max = 28;
1877
1878 hal_h264e_dbg_func("enter\n");
1879 /*
1880 * Step 1. limit the mv range by level_idc
1881 * For level 1 and level 1b the vertical MV range is [-64,+63.75]
1882 * For level 1.1, 1.2, 1.3 and 2 the vertical MV range is [-128,+127.75]
1883 */
1884 switch (level_idc) {
1885 case H264_LEVEL_1_0 :
1886 case H264_LEVEL_1_b : {
1887 cime_blk_h_max = 12;
1888 } break;
1889 case H264_LEVEL_1_1 :
1890 case H264_LEVEL_1_2 :
1891 case H264_LEVEL_1_3 :
1892 case H264_LEVEL_2_0 : {
1893 cime_blk_h_max = 28;
1894 } break;
1895 default : {
1896 cime_blk_h_max = 28;
1897 } break;
1898 }
1899
1900 if (cime_w < cime_blk_w_max * 4)
1901 cime_blk_w_max = cime_w / 4;
1902
1903 if (cime_h < cime_blk_h_max * 4)
1904 cime_blk_h_max = cime_h / 4;
1905
1906 /*
1907 * Step 2. limit the mv range by image size
1908 */
1909 if (cime_blk_w_max / 4 * 2 > (sps->pic_width_in_mbs * 2 + 1) / 2)
1910 cime_blk_w_max = (sps->pic_width_in_mbs * 2 + 1) / 2 / 2 * 4;
1911
1912 if (cime_blk_h_max / 4 > MPP_ALIGN(sps->pic_height_in_mbs * 16, 64) / 128 * 4)
1913 cime_blk_h_max = MPP_ALIGN(sps->pic_height_in_mbs * 16, 64) / 128 * 16;
1914
1915 regs->reg_base.me_rnge.cme_srch_h = cime_blk_w_max / 4;
1916 regs->reg_base.me_rnge.cme_srch_v = cime_blk_h_max / 4;
1917 regs->reg_base.me_rnge.rme_srch_h = 7;
1918 regs->reg_base.me_rnge.rme_srch_v = 5;
1919 regs->reg_base.me_rnge.dlt_frm_num = 0;
1920
1921 if (slice->slice_type == H264_I_SLICE) {
1922 regs->reg_base.me_cfg.pmv_mdst_h = 0;
1923 regs->reg_base.me_cfg.pmv_mdst_v = 0;
1924 } else {
1925 regs->reg_base.me_cfg.pmv_mdst_h = 5;
1926 regs->reg_base.me_cfg.pmv_mdst_v = 5;
1927 }
1928 regs->reg_base.me_cfg.mv_limit = (sps->level_idc > 20) ? 2 : ((sps->level_idc >= 11) ? 1 : 0);//2;
1929 regs->reg_base.me_cfg.pmv_num = 2;
1930 regs->reg_base.me_cfg.rme_dis = 0;
1931 regs->reg_base.me_cfg.fme_dis = 0;
1932 regs->reg_base.me_cfg.lvl4_ovrd_en = 0;
1933
1934 calc_cime_parameter(regs, sps);
1935
1936 hal_h264e_dbg_func("leave\n");
1937 }
1938
1939 #define H264E_LAMBDA_TAB_SIZE (52 * sizeof(RK_U32))
1940
1941 static RK_U32 h264e_lambda_default[58] = {
1942 0x00000003, 0x00000005, 0x00000006, 0x00000007,
1943 0x00000009, 0x0000000b, 0x0000000e, 0x00000012,
1944 0x00000016, 0x0000001c, 0x00000024, 0x0000002d,
1945 0x00000039, 0x00000048, 0x0000005b, 0x00000073,
1946 0x00000091, 0x000000b6, 0x000000e6, 0x00000122,
1947 0x0000016d, 0x000001cc, 0x00000244, 0x000002db,
1948 0x00000399, 0x00000489, 0x000005b6, 0x00000733,
1949 0x00000912, 0x00000b6d, 0x00000e66, 0x00001224,
1950 0x000016db, 0x00001ccc, 0x00002449, 0x00002db7,
1951 0x00003999, 0x00004892, 0x00005b6f, 0x00007333,
1952 0x00009124, 0x0000b6de, 0x0000e666, 0x00012249,
1953 0x00016dbc, 0x0001cccc, 0x00024492, 0x0002db79,
1954 0x00039999, 0x00048924, 0x0005b6f2, 0x00073333,
1955 0x00091249, 0x000b6de5, 0x000e6666, 0x00122492,
1956 0x0016dbcb, 0x001ccccc,
1957 };
1958
setup_vepu580_l2(HalVepu580RegSet * regs,H264eSlice * slice,MppEncHwCfg * hw)1959 static void setup_vepu580_l2(HalVepu580RegSet *regs, H264eSlice *slice, MppEncHwCfg *hw)
1960 {
1961 RK_U32 i;
1962
1963 hal_h264e_dbg_func("enter\n");
1964
1965 regs->reg_s3.iprd_wgt_qp_hevc_0_51[0] = 0;
1966 /* ~ */
1967 regs->reg_s3.iprd_wgt_qp_hevc_0_51[51] = 0;
1968
1969 if (slice->slice_type == H264_I_SLICE) {
1970 memcpy(regs->reg_s3.rdo_wgta_qp_grpa_0_51, &h264e_lambda_default[6], H264E_LAMBDA_TAB_SIZE);
1971 } else {
1972 memcpy(regs->reg_s3.rdo_wgta_qp_grpa_0_51, &h264e_lambda_default[6], H264E_LAMBDA_TAB_SIZE);
1973 }
1974 memset(regs->reg_s3.iprd_wgt_qp_hevc_0_51, 0, H264E_LAMBDA_TAB_SIZE);
1975
1976 regs->reg_rc_klut.madi_cfg.madi_mode = 0;
1977 regs->reg_rc_klut.madi_cfg.madi_thd = 25;
1978
1979 regs->reg_s3.lvl32_intra_CST_THD0.lvl4_intra_cst_thd0 = 1;
1980 regs->reg_s3.lvl32_intra_CST_THD0.lvl4_intra_cst_thd1 = 4;
1981 regs->reg_s3.lvl32_intra_CST_THD1.lvl4_intra_cst_thd2 = 9;
1982 regs->reg_s3.lvl32_intra_CST_THD1.lvl4_intra_cst_thd3 = 36;
1983
1984 regs->reg_s3.lvl16_intra_CST_THD0.lvl8_intra_chrm_cst_thd0 = 1;
1985 regs->reg_s3.lvl16_intra_CST_THD0.lvl8_intra_chrm_cst_thd1 = 4;
1986 regs->reg_s3.lvl16_intra_CST_THD1.lvl8_intra_chrm_cst_thd2 = 9;
1987 regs->reg_s3.lvl16_intra_CST_THD1.lvl8_intra_chrm_cst_thd3 = 36;
1988
1989 regs->reg_s3.lvl8_intra_CST_THD0.lvl8_intra_cst_thd0 = 1;
1990 regs->reg_s3.lvl8_intra_CST_THD0.lvl8_intra_cst_thd1 = 4;
1991 regs->reg_s3.lvl8_intra_CST_THD1.lvl8_intra_cst_thd2 = 9;
1992 regs->reg_s3.lvl8_intra_CST_THD1.lvl8_intra_cst_thd3 = 36;
1993
1994 regs->reg_s3.lvl16_intra_UL_CST_THD.lvl16_intra_ul_cst_thld = 0;
1995 regs->reg_s3.lvl32_intra_CST_WGT0.lvl8_intra_cst_wgt0 = 48;
1996 regs->reg_s3.lvl32_intra_CST_WGT0.lvl8_intra_cst_wgt1 = 60;
1997 regs->reg_s3.lvl32_intra_CST_WGT0.lvl8_intra_cst_wgt2 = 40;
1998 regs->reg_s3.lvl32_intra_CST_WGT0.lvl8_intra_cst_wgt3 = 48;
1999
2000 regs->reg_s3.lvl32_intra_CST_WGT1.lvl4_intra_cst_wgt0 = 48;
2001 regs->reg_s3.lvl32_intra_CST_WGT1.lvl4_intra_cst_wgt1 = 60;
2002 regs->reg_s3.lvl32_intra_CST_WGT1.lvl4_intra_cst_wgt2 = 40;
2003 regs->reg_s3.lvl32_intra_CST_WGT1.lvl4_intra_cst_wgt3 = 48;
2004
2005 regs->reg_s3.lvl16_intra_CST_WGT0.lvl16_intra_cst_wgt0 = 48;
2006 regs->reg_s3.lvl16_intra_CST_WGT0.lvl16_intra_cst_wgt1 = 60;
2007 regs->reg_s3.lvl16_intra_CST_WGT0.lvl16_intra_cst_wgt2 = 40;
2008 regs->reg_s3.lvl16_intra_CST_WGT0.lvl16_intra_cst_wgt3 = 48;
2009 /* 0x1728 */
2010 regs->reg_s3.lvl16_intra_CST_WGT1.lvl8_intra_chrm_cst_wgt0 = 36;
2011 regs->reg_s3.lvl16_intra_CST_WGT1.lvl8_intra_chrm_cst_wgt1 = 42;
2012 regs->reg_s3.lvl16_intra_CST_WGT1.lvl8_intra_chrm_cst_wgt2 = 28;
2013 regs->reg_s3.lvl16_intra_CST_WGT1.lvl8_intra_chrm_cst_wgt3 = 32;
2014
2015 regs->reg_s3.RDO_QUANT.quant_f_bias_P = 171;
2016
2017 if (slice->slice_type == H264_I_SLICE) {
2018 regs->reg_s3.RDO_QUANT.quant_f_bias_I = 683;
2019 regs->reg_s3.ATR_THD0.atr_thd0 = 1;
2020 regs->reg_s3.ATR_THD0.atr_thd1 = 4;
2021 regs->reg_s3.ATR_THD1.atr_thd2 = 36;
2022 } else {
2023 regs->reg_s3.RDO_QUANT.quant_f_bias_I = 583;
2024 regs->reg_s3.ATR_THD0.atr_thd0 = 4;
2025 regs->reg_s3.ATR_THD0.atr_thd1 = 16;
2026 regs->reg_s3.ATR_THD1.atr_thd2 = 81;
2027 }
2028 regs->reg_s3.ATR_THD1.atr_thdqp = 45;
2029
2030 if (slice->slice_type == H264_I_SLICE) {
2031 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt0 = 16;
2032 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt1 = 16;
2033 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt2 = 16;
2034
2035 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt0 = 22;
2036 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt1 = 21;
2037 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt2 = 20;
2038
2039 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt0 = 20;
2040 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt1 = 18;
2041 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt2 = 16;
2042 } else {
2043 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt0 = 25;
2044 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt1 = 20;
2045 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt2 = 16;
2046
2047 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt0 = 25;
2048 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt1 = 20;
2049 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt2 = 18;
2050
2051 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt0 = 25;
2052 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt1 = 20;
2053 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt2 = 16;
2054 }
2055 /* CIME */
2056 {
2057 /* 0x1760 */
2058 regs->reg_s3.cime_sqi_cfg.cime_sad_mod_sel = 0;
2059 regs->reg_s3.cime_sqi_cfg.cime_sad_use_big_block = 1;
2060 regs->reg_s3.cime_sqi_cfg.cime_pmv_set_zero = 1;
2061 regs->reg_s3.cime_sqi_cfg.cime_pmv_num = 3;
2062
2063 /* 0x1764 */
2064 regs->reg_s3.cime_sqi_thd.cime_mvd_th0 = 32;
2065 regs->reg_s3.cime_sqi_thd.cime_mvd_th1 = 80;
2066 regs->reg_s3.cime_sqi_thd.cime_mvd_th2 = 128;
2067
2068 /* 0x1768 */
2069 regs->reg_s3.cime_sqi_multi0.cime_multi0 = 4;
2070 regs->reg_s3.cime_sqi_multi0.cime_multi1 = 8;
2071 regs->reg_s3.cime_sqi_multi1.cime_multi2 = 24;
2072 regs->reg_s3.cime_sqi_multi1.cime_multi3 = 24;
2073 }
2074
2075 /* RIME && FME */
2076 {
2077 /* 0x1770 */
2078 regs->reg_s3.rime_sqi_thd.cime_sad_th0 = 50;
2079 regs->reg_s3.rime_sqi_thd.rime_mvd_th0 = 3;
2080 regs->reg_s3.rime_sqi_thd.rime_mvd_th1 = 8;
2081 regs->reg_s3.rime_sqi_multi.rime_multi0 = 4;
2082 regs->reg_s3.rime_sqi_multi.rime_multi1 = 32;
2083 regs->reg_s3.rime_sqi_multi.rime_multi2 = 128;
2084
2085 /* 0x1778 */
2086 regs->reg_s3.fme_sqi_thd0.cime_sad_pu16_th = 2;
2087
2088 /* 0x177C */
2089 regs->reg_s3.fme_sqi_thd1.move_lambda = 1;
2090 }
2091
2092 {
2093 RK_U8* thd = (RK_U8*)®s->reg_rc_klut.aq_tthd0;
2094 RK_U8* step = (RK_U8*)®s->reg_rc_klut.aq_stp0;
2095
2096 if (slice->slice_type == H264_I_SLICE) {
2097 for (i = 0; i < MPP_ARRAY_ELEMS(h264_aq_tthd_default); i++) {
2098 thd[i] = hw->aq_thrd_i[i];
2099 step[i] = hw->aq_step_i[i] & 0x3f;
2100 }
2101 } else {
2102 for (i = 0; i < MPP_ARRAY_ELEMS(h264_P_aq_step_default); i++) {
2103 thd[i] = hw->aq_thrd_p[i];
2104 step[i] = hw->aq_step_p[i] & 0x3f;
2105 }
2106 }
2107 }
2108
2109 mpp_env_get_u32("dump_l2_reg", &dump_l2_reg, 0);
2110
2111 if (dump_l2_reg) {
2112 mpp_log("L2 reg dump start:\n");
2113 RK_U32 *p = (RK_U32 *)regs;
2114
2115 for (i = 0; i < (sizeof(*regs) / sizeof(RK_U32)); i++)
2116 mpp_log("%04x %08x\n", 4 + i * 4, p[i]);
2117
2118 mpp_log("L2 reg done\n");
2119 }
2120
2121 hal_h264e_dbg_func("leave\n");
2122 }
2123
setup_vepu580_ext_line_buf(HalVepu580RegSet * regs,HalH264eVepu580Ctx * ctx)2124 static void setup_vepu580_ext_line_buf(HalVepu580RegSet *regs, HalH264eVepu580Ctx *ctx)
2125 {
2126 RK_S32 offset = 0;
2127 RK_S32 fd;
2128
2129 if (!ctx->ext_line_buf) {
2130 regs->reg_base.ebufb_addr = 0;
2131 regs->reg_base.ebufb_addr = 0;
2132 return;
2133 }
2134
2135 fd = mpp_buffer_get_fd(ctx->ext_line_buf);
2136 offset = ctx->ext_line_buf_size;
2137
2138 regs->reg_base.ebuft_addr = fd;
2139 regs->reg_base.ebufb_addr = fd;
2140
2141 mpp_dev_multi_offset_update(ctx->offsets, 182, offset);
2142
2143 /* rcb info for sram */
2144 if (!disable_rcb_buf) {
2145 MppDevRcbInfoCfg rcb_cfg;
2146
2147 rcb_cfg.reg_idx = 183;
2148 rcb_cfg.size = offset;
2149
2150 mpp_dev_ioctl(ctx->dev, MPP_DEV_RCB_INFO, &rcb_cfg);
2151
2152 rcb_cfg.reg_idx = 182;
2153 rcb_cfg.size = 0;
2154
2155 mpp_dev_ioctl(ctx->dev, MPP_DEV_RCB_INFO, &rcb_cfg);
2156 }
2157 }
2158
setup_vepu580_dual_core(HalH264eVepu580Ctx * ctx,H264SliceType slice_type)2159 static MPP_RET setup_vepu580_dual_core(HalH264eVepu580Ctx *ctx, H264SliceType slice_type)
2160 {
2161 Vepu580BaseCfg *reg_base = &ctx->regs_set->reg_base;
2162 RK_U32 dchs_ofst = 9;
2163 RK_U32 dchs_rxe = 1;
2164
2165 if (ctx->task_cnt == 1)
2166 return MPP_OK;
2167
2168 if (slice_type == H264_I_SLICE) {
2169 ctx->curr_idx = 0;
2170 ctx->prev_idx = 0;
2171 dchs_rxe = 0;
2172 }
2173
2174 reg_base->dual_core.dchs_txid = ctx->curr_idx;
2175 reg_base->dual_core.dchs_rxid = ctx->prev_idx;
2176 reg_base->dual_core.dchs_txe = 1;
2177 reg_base->dual_core.dchs_rxe = dchs_rxe;
2178 reg_base->dual_core.dchs_ofst = dchs_ofst;
2179
2180 ctx->prev_idx = ctx->curr_idx++;
2181 if (ctx->curr_idx > 3)
2182 ctx->curr_idx = 0;
2183
2184 return MPP_OK;
2185 }
2186
hal_h264e_vepu580_gen_regs(void * hal,HalEncTask * task)2187 static MPP_RET hal_h264e_vepu580_gen_regs(void *hal, HalEncTask *task)
2188 {
2189 HalH264eVepu580Ctx *ctx = (HalH264eVepu580Ctx *)hal;
2190 HalVepu580RegSet *regs = ctx->regs_set;
2191 MppEncCfgSet *cfg = ctx->cfg;
2192 H264eSps *sps = ctx->sps;
2193 H264ePps *pps = ctx->pps;
2194 H264eSlice *slice = ctx->slice;
2195 EncRcTask *rc_task = task->rc_task;
2196 EncFrmStatus *frm = &rc_task->frm;
2197 MPP_RET ret = MPP_OK;
2198 EncFrmStatus *frm_status = &task->rc_task->frm;
2199
2200 hal_h264e_dbg_func("enter %p\n", hal);
2201 hal_h264e_dbg_detail("frame %d generate regs now", ctx->frms->seq_idx);
2202
2203 /* register setup */
2204 memset(regs, 0, sizeof(*regs));
2205
2206 setup_vepu580_normal(regs);
2207 ret = setup_vepu580_prep(regs, &ctx->cfg->prep, task);
2208 if (ret)
2209 return ret;
2210
2211 setup_vepu580_dual_core(ctx, slice->slice_type);
2212 setup_vepu580_codec(regs, sps, pps, slice);
2213 setup_vepu580_rdo_pred(regs, sps, pps, slice);
2214 setup_vepu580_rdo_cfg(®s->reg_rdo);
2215 setup_vepu580_rdo_bias_cfg(®s->reg_rdo, &cfg->hw);
2216 setup_vepu580_scl_cfg(®s->reg_scl);
2217 setup_vepu580_rc_base(regs, sps, slice, &cfg->hw, rc_task);
2218 setup_vepu580_io_buf(regs, ctx->offsets, task);
2219 setup_vepu580_roi(regs, ctx);
2220 setup_vepu580_recn_refr(ctx, regs);
2221
2222 regs->reg_base.meiw_addr = task->md_info ? mpp_buffer_get_fd(task->md_info) : 0;
2223 regs->reg_base.enc_pic.mei_stor = task->md_info ? 1 : 0;
2224
2225 regs->reg_base.pic_ofst.pic_ofst_y = mpp_frame_get_offset_y(task->frame);
2226 regs->reg_base.pic_ofst.pic_ofst_x = mpp_frame_get_offset_x(task->frame);
2227
2228 setup_vepu580_split(regs, cfg);
2229 setup_vepu580_me(regs, sps, slice);
2230
2231 if (frm_status->is_i_refresh)
2232 setup_vepu580_intra_refresh(regs, ctx, frm_status->seq_idx % cfg->rc.gop);
2233
2234 vepu580_set_osd(&ctx->osd_cfg);
2235 setup_vepu580_l2(regs, slice, &cfg->hw);
2236 setup_vepu580_ext_line_buf(regs, ctx);
2237 vepu580_h264e_tune_reg_patch(ctx->tune);
2238
2239 /* two pass register patch */
2240 if (frm->save_pass1)
2241 vepu580_h264e_save_pass1_patch(regs, ctx);
2242
2243 if (frm->use_pass1)
2244 vepu580_h264e_use_pass1_patch(regs, ctx);
2245
2246 mpp_env_get_u32("dump_l1_reg", &dump_l1_reg, 0);
2247
2248 if (dump_l1_reg) {
2249 mpp_log("L1 reg dump start:\n");
2250 RK_U32 *p = (RK_U32 *)regs;
2251 RK_S32 n = 0x1D0 / sizeof(RK_U32);
2252 RK_S32 i;
2253
2254 for (i = 0; i < n; i++)
2255 mpp_log("%04x %08x\n", i * 4, p[i]);
2256
2257 mpp_log("L1 reg done\n");
2258 }
2259
2260 ctx->frame_cnt++;
2261
2262 hal_h264e_dbg_func("leave %p\n", hal);
2263 return MPP_OK;
2264 }
2265
hal_h264e_vepu580_start(void * hal,HalEncTask * task)2266 static MPP_RET hal_h264e_vepu580_start(void *hal, HalEncTask *task)
2267 {
2268 MPP_RET ret = MPP_OK;
2269 HalH264eVepu580Ctx *ctx = (HalH264eVepu580Ctx *)hal;
2270 HalVepu580RegSet *regs = ctx->regs_set;
2271
2272 (void) task;
2273
2274 hal_h264e_dbg_func("enter %p\n", hal);
2275
2276 do {
2277 MppDevRegWrCfg wr_cfg;
2278 MppDevRegRdCfg rd_cfg;
2279
2280 wr_cfg.reg = ®s->reg_ctl;
2281 wr_cfg.size = sizeof(regs->reg_ctl);
2282 wr_cfg.offset = VEPU580_CONTROL_CFG_OFFSET;
2283 #if DUMP_REG
2284 {
2285 RK_U32 i;
2286 RK_U32 *reg = (RK_U32)wr_cfg.reg;
2287 for ( i = 0; i < sizeof(regs->reg_ctl) / sizeof(RK_U32); i++) {
2288 /* code */
2289 mpp_log("reg[%d] = 0x%08x\n", i, reg[i]);
2290 }
2291
2292 }
2293 #endif
2294 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
2295 if (ret) {
2296 mpp_err_f("set register write failed %d\n", ret);
2297 break;
2298 }
2299 wr_cfg.reg = ®s->reg_base;
2300 wr_cfg.size = sizeof(regs->reg_base);
2301 wr_cfg.offset = VEPU580_BASE_CFG_OFFSET;
2302
2303 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
2304 if (ret) {
2305 mpp_err_f("set register write failed %d\n", ret);
2306 break;
2307 }
2308 wr_cfg.reg = ®s->reg_rc_klut;
2309 wr_cfg.size = sizeof(regs->reg_rc_klut);
2310 wr_cfg.offset = VEPU580_RC_KLUT_CFG_OFFSET;
2311
2312 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
2313 if (ret) {
2314 mpp_err_f("set register write failed %d\n", ret);
2315 break;
2316 }
2317 wr_cfg.reg = ®s->reg_s3;
2318 wr_cfg.size = sizeof(regs->reg_s3);
2319 wr_cfg.offset = VEPU580_SECTION_3_OFFSET;
2320
2321 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
2322 if (ret) {
2323 mpp_err_f("set register write failed %d\n", ret);
2324 break;
2325 }
2326 wr_cfg.reg = ®s->reg_rdo;
2327 wr_cfg.size = sizeof(regs->reg_rdo);
2328 wr_cfg.offset = VEPU580_RDO_CFG_OFFSET;
2329
2330 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
2331 if (ret) {
2332 mpp_err_f("set register write failed %d\n", ret);
2333 break;
2334 }
2335
2336 wr_cfg.reg = ®s->reg_scl;
2337 wr_cfg.size = sizeof(regs->reg_scl);
2338 wr_cfg.offset = VEPU580_SCL_CFG_OFFSET;
2339
2340 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
2341 if (ret) {
2342 mpp_err_f("set register write failed %d\n", ret);
2343 break;
2344 }
2345
2346 wr_cfg.reg = ®s->reg_osd;
2347 wr_cfg.size = sizeof(regs->reg_osd);
2348 wr_cfg.offset = VEPU580_OSD_OFFSET;
2349
2350 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
2351 if (ret) {
2352 mpp_err_f("set register write failed %d\n", ret);
2353 break;
2354 }
2355
2356 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_OFFS, ctx->offsets);
2357 if (ret) {
2358 mpp_err_f("set register offsets failed %d\n", ret);
2359 break;
2360 }
2361
2362 rd_cfg.reg = ®s->reg_ctl.int_sta;
2363 rd_cfg.size = sizeof(RK_U32);
2364 rd_cfg.offset = VEPU580_REG_BASE_HW_STATUS;
2365
2366 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
2367 if (ret) {
2368 mpp_err_f("set register read failed %d\n", ret);
2369 break;
2370 }
2371
2372 rd_cfg.reg = ®s->reg_st;
2373 rd_cfg.size = sizeof(regs->reg_st);
2374 rd_cfg.offset = VEPU580_STATUS_OFFSET;
2375
2376 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
2377 if (ret) {
2378 mpp_err_f("set register read failed %d\n", ret);
2379 break;
2380 }
2381
2382 /* send request to hardware */
2383 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
2384 if (ret) {
2385 mpp_err_f("send cmd failed %d\n", ret);
2386 break;
2387 }
2388 } while (0);
2389
2390 hal_h264e_dbg_func("leave %p\n", hal);
2391
2392 return ret;
2393 }
2394
hal_h264e_vepu580_status_check(HalVepu580RegSet * regs)2395 static MPP_RET hal_h264e_vepu580_status_check(HalVepu580RegSet *regs)
2396 {
2397 MPP_RET ret = MPP_OK;
2398
2399 if (regs->reg_ctl.int_sta.lkt_node_done_sta)
2400 hal_h264e_dbg_detail("lkt_done finish");
2401
2402 if (regs->reg_ctl.int_sta.enc_done_sta)
2403 hal_h264e_dbg_detail("enc_done finish");
2404
2405 if (regs->reg_ctl.int_sta.slc_done_sta)
2406 hal_h264e_dbg_detail("enc_slice finsh");
2407
2408 if (regs->reg_ctl.int_sta.sclr_done_sta)
2409 hal_h264e_dbg_detail("safe clear finsh");
2410
2411 if (regs->reg_ctl.int_sta.bsf_oflw_sta) {
2412 mpp_err_f("bit stream overflow");
2413 ret = MPP_NOK;
2414 }
2415
2416 if (regs->reg_ctl.int_sta.brsp_otsd_sta) {
2417 mpp_err_f("bus write full");
2418 ret = MPP_NOK;
2419 }
2420
2421 if (regs->reg_ctl.int_sta.wbus_err_sta) {
2422 mpp_err_f("bus write error");
2423 ret = MPP_NOK;
2424 }
2425
2426 if (regs->reg_ctl.int_sta.rbus_err_sta) {
2427 mpp_err_f("bus read error");
2428 ret = MPP_NOK;
2429 }
2430
2431 if (regs->reg_ctl.int_sta.wdg_sta) {
2432 ret = MPP_NOK;
2433 mpp_err_f("wdg timeout");
2434 }
2435
2436 return ret;
2437 }
2438
hal_h264e_vepu580_wait(void * hal,HalEncTask * task)2439 static MPP_RET hal_h264e_vepu580_wait(void *hal, HalEncTask *task)
2440 {
2441 MPP_RET ret = MPP_OK;
2442 HalH264eVepu580Ctx *ctx = (HalH264eVepu580Ctx *)hal;
2443 HalVepu580RegSet *regs = &ctx->regs_sets[task->flags.reg_idx];
2444 RK_U32 split_out = ctx->cfg->split.split_out;
2445 MppPacket pkt = task->packet;
2446 RK_S32 offset = mpp_packet_get_length(pkt);
2447 H264NaluType type = task->rc_task->frm.is_idr ? H264_NALU_TYPE_IDR : H264_NALU_TYPE_SLICE;
2448 RK_S32 i;
2449
2450 hal_h264e_dbg_func("enter %p\n", hal);
2451
2452 if (split_out) {
2453 EncOutParam param;
2454 RK_U32 slice_len;
2455 RK_U32 slice_last;
2456 MppDevPollCfg *poll_cfg = (MppDevPollCfg *)((char *)ctx->poll_cfgs +
2457 task->flags.reg_idx * ctx->poll_cfg_size);
2458 param.task = task;
2459 param.base = mpp_packet_get_data(task->packet);
2460
2461 do {
2462 poll_cfg->poll_type = 0;
2463 poll_cfg->poll_ret = 0;
2464 poll_cfg->count_max = ctx->poll_slice_max;
2465 poll_cfg->count_ret = 0;
2466
2467 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, poll_cfg);
2468
2469 for (i = 0; i < poll_cfg->count_ret; i++) {
2470 slice_last = poll_cfg->slice_info[i].last;
2471 slice_len = poll_cfg->slice_info[i].length;
2472
2473 mpp_packet_add_segment_info(pkt, type, offset, slice_len);
2474 offset += slice_len;
2475
2476 if (split_out & MPP_ENC_SPLIT_OUT_LOWDELAY) {
2477 param.length = slice_len;
2478
2479 if (slice_last)
2480 ctx->output_cb->cmd = ENC_OUTPUT_FINISH;
2481 else
2482 ctx->output_cb->cmd = ENC_OUTPUT_SLICE;
2483
2484 mpp_callback(ctx->output_cb, ¶m);
2485 }
2486 }
2487 } while (!slice_last);
2488
2489 ret = hal_h264e_vepu580_status_check(regs);
2490 if (!ret)
2491 task->hw_length += regs->reg_st.bs_lgth_l32;
2492 } else {
2493 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
2494 if (ret) {
2495 mpp_err_f("poll cmd failed %d\n", ret);
2496 ret = MPP_ERR_VPUHW;
2497 } else {
2498 ret = hal_h264e_vepu580_status_check(regs);
2499 if (!ret)
2500 task->hw_length += regs->reg_st.bs_lgth_l32;
2501 }
2502
2503 mpp_packet_add_segment_info(pkt, type, offset, regs->reg_st.bs_lgth_l32);
2504 }
2505
2506 if (!(split_out & MPP_ENC_SPLIT_OUT_LOWDELAY) && !ret) {
2507 HalH264eVepuStreamAmend *amend = &ctx->amend_sets[task->flags.reg_idx];
2508
2509 if (amend->enable) {
2510 amend->diable_split_out = !split_out;
2511 amend->old_length = task->hw_length;
2512 amend->slice->is_multi_slice = (ctx->cfg->split.split_mode > 0);
2513 h264e_vepu_stream_amend_proc(amend, &ctx->cfg->codec.h264.hw_cfg);
2514 task->hw_length = amend->new_length;
2515 } else if (amend->prefix) {
2516 /* check prefix value */
2517 amend->old_length = task->hw_length;
2518 h264e_vepu_stream_amend_sync_ref_idc(amend);
2519 }
2520 }
2521
2522 hal_h264e_dbg_func("leave %p ret %d\n", hal, ret);
2523
2524 return ret;
2525 }
2526
hal_h264e_vepu580_ret_task(void * hal,HalEncTask * task)2527 static MPP_RET hal_h264e_vepu580_ret_task(void * hal, HalEncTask * task)
2528 {
2529 HalH264eVepu580Ctx *ctx = (HalH264eVepu580Ctx *)hal;
2530 HalVepu580RegSet *regs = &ctx->regs_sets[task->flags.reg_idx];
2531 EncRcTaskInfo *rc_info = &task->rc_task->info;
2532 RK_U32 mb_w = ctx->sps->pic_width_in_mbs;
2533 RK_U32 mb_h = ctx->sps->pic_height_in_mbs;
2534 RK_U32 mbs = mb_w * mb_h;
2535
2536 hal_h264e_dbg_func("enter %p\n", hal);
2537
2538 // update total hardware length
2539 task->length += task->hw_length;
2540
2541 // setup bit length for rate control
2542 rc_info->bit_real = task->hw_length * 8;
2543 rc_info->quality_real = regs->reg_st.qp_sum / mbs;
2544 rc_info->madi = (!regs->reg_st.st_bnum_b16.num_b16) ? 0 :
2545 regs->reg_st.madi / regs->reg_st.st_bnum_b16.num_b16;
2546 rc_info->madp = (!regs->reg_st.st_bnum_cme.num_ctu) ? 0 :
2547 regs->reg_st.madp / regs->reg_st.st_bnum_cme.num_ctu;
2548 rc_info->iblk4_prop = (regs->reg_st.st_pnum_i4.pnum_i4 +
2549 regs->reg_st.st_pnum_i8.pnum_i8 +
2550 regs->reg_st.st_pnum_i16.pnum_i16) * 256 / mbs;
2551
2552 ctx->hal_rc_cfg.bit_real = rc_info->bit_real;
2553 ctx->hal_rc_cfg.quality_real = rc_info->quality_real;
2554 ctx->hal_rc_cfg.iblk4_prop = rc_info->iblk4_prop;
2555
2556 task->hal_ret.data = &ctx->hal_rc_cfg;
2557 task->hal_ret.number = 1;
2558
2559 vepu580_h264e_tune_stat_update(ctx->tune, task);
2560
2561 mpp_dev_multi_offset_reset(ctx->offsets);
2562
2563 if (ctx->dpb) {
2564 h264e_dpb_hal_end(ctx->dpb, task->flags.curr_idx);
2565 h264e_dpb_hal_end(ctx->dpb, task->flags.refr_idx);
2566 }
2567
2568 hal_h264e_dbg_func("leave %p\n", hal);
2569
2570 return MPP_OK;
2571 }
2572
2573 const MppEncHalApi hal_h264e_vepu580 = {
2574 .name = "hal_h264e_vepu580",
2575 .coding = MPP_VIDEO_CodingAVC,
2576 .ctx_size = sizeof(HalH264eVepu580Ctx),
2577 .flag = 0,
2578 .init = hal_h264e_vepu580_init,
2579 .deinit = hal_h264e_vepu580_deinit,
2580 .prepare = hal_h264e_vepu580_prepare,
2581 .get_task = hal_h264e_vepu580_get_task,
2582 .gen_regs = hal_h264e_vepu580_gen_regs,
2583 .start = hal_h264e_vepu580_start,
2584 .wait = hal_h264e_vepu580_wait,
2585 .part_start = NULL,
2586 .part_wait = NULL,
2587 .ret_task = hal_h264e_vepu580_ret_task,
2588 };
2589