xref: /rockchip-linux_mpp/mpp/hal/rkenc/h264e/hal_h264e_vepu580_tune.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /*
2  * Copyright 2021 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "vepu580_tune.h"
18 
19 #define HAL_H264E_DBG_CONTENT           (0x00000200)
20 #define hal_h264e_dbg_content(fmt, ...) hal_h264e_dbg_f(HAL_H264E_DBG_CONTENT, fmt, ## __VA_ARGS__)
21 
22 /*
23  * Please follow the configuration below:
24  *
25  * FRAME_CONTENT_ANALYSIS_NUM >= 5
26  * MD_WIN_LEN >= 3
27  * MD_SHOW_LEN == 4
28  */
29 
30 typedef struct HalH264eVepu580Tune_t {
31     HalH264eVepu580Ctx  *ctx;
32 
33     /* motion and texture statistic of previous frames */
34     RK_S32  curr_scene_motion_flag;
35     /* motion and texture statistic of previous frames */
36     RK_S32  ap_motion_flag;
37     // level: 0~2: 0 <--> static, 1 <-->medium motion, 2 <--> large motion
38     RK_S32  md_madp[MD_WIN_LEN];
39     // level: 0~2: 0 <--> simple texture, 1 <--> medium texture, 2 <--> complex texture
40     RK_S32  txtr_madi[FRAME_CONTENT_ANALYSIS_NUM];
41     RK_S32  scene_motion_flag_matrix[FRAME_MOTION_ANALYSIS_NUM];
42     RK_S32  md_flag_matrix[MD_SHOW_LEN];
43 
44     RK_S32  pre_madp[2];
45     RK_S32  pre_madi[2];
46 } HalH264eVepu580Tune;
47 
48 static RK_S32 mb_avg_madp_thd[6] = {192, 128, 64, 192, 128, 64};
49 
50 static RK_S32 atr_wgt[4][9] = {
51     {22, 19, 16, 22, 19, 18, 22, 19, 16},
52     {19, 19, 19, 19, 19, 19, 19, 19, 19},
53     {22, 19, 16, 22, 19, 18, 22, 19, 16},
54     {20, 20, 20, 20, 20, 20, 20, 20, 20},
55 };
56 
57 static RK_S32 skip_atf_wgt[4][13] = {
58     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
59     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
60     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
61     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
62 };
63 
64 static RK_S32 intra_atf_wgt[4][12] = {
65     {24, 22, 21, 22, 21, 20, 20, 19, 18, 16, 16, 16},
66     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
67     {22, 21, 20, 21, 20, 19, 20, 19, 18, 16, 16, 16},
68     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
69 };
70 
71 static RK_S32 cime_multi[4][4] = {
72     {4, 8, 24, 24},
73     {4, 7, 20, 20},
74     {4, 7, 20, 20},
75     {4, 4, 4, 4},
76 };
77 
78 static RK_S32 rime_multi[4][3] = {
79     {4, 32, 128},
80     {4, 16, 64},
81     {4, 16, 64},
82     {4, 4, 4},
83 };
84 
vepu580_h264e_tune_init(HalH264eVepu580Ctx * ctx)85 static HalH264eVepu580Tune *vepu580_h264e_tune_init(HalH264eVepu580Ctx *ctx)
86 {
87     HalH264eVepu580Tune *tune = mpp_malloc(HalH264eVepu580Tune, 1);
88     RK_S32 scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
89 
90     if (NULL == tune)
91         return tune;
92 
93     tune->ctx = ctx;
94     tune->curr_scene_motion_flag = 0;
95     tune->ap_motion_flag = scene_mode;
96     memset(tune->md_madp, 0, sizeof(tune->md_madp));
97     memset(tune->txtr_madi, 0, sizeof(tune->txtr_madi));
98     memset(tune->md_flag_matrix, 0, sizeof(tune->md_flag_matrix));
99     memset(tune->scene_motion_flag_matrix, 0, sizeof(tune->scene_motion_flag_matrix));
100     tune->pre_madi[0] = tune->pre_madi[1] = -1;
101     tune->pre_madp[0] = tune->pre_madp[1] = -1;
102 
103     return tune;
104 }
105 
vepu580_h264e_tune_deinit(void * tune)106 static void vepu580_h264e_tune_deinit(void *tune)
107 {
108     MPP_FREE(tune);
109 }
110 
vepu580_h264e_tune_reg_patch(void * p)111 static void vepu580_h264e_tune_reg_patch(void *p)
112 {
113     HalH264eVepu580Tune *tune = (HalH264eVepu580Tune *)p;
114     HalH264eVepu580Ctx *ctx = NULL;
115     RK_S32 scene_mode = 0;
116 
117     if (NULL == tune)
118         return;
119 
120     ctx = tune->ctx;
121     scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
122 
123     H264eSlice *slice = ctx->slice;
124     HalVepu580RegSet *regs = ctx->regs_set;
125     tune->ap_motion_flag = scene_mode;
126     RK_U32 scene_motion_flag = tune->ap_motion_flag * 2 + tune->curr_scene_motion_flag;
127     MppEncHwCfg *hw = &ctx->cfg->hw;
128 
129     if (scene_motion_flag > 3) {
130         mpp_err_f("scene_motion_flag is a wrong value %d\n", scene_motion_flag);
131         return;
132     }
133 
134     /* modify register here */
135     if (slice->slice_type != H264_I_SLICE) {
136         RK_U32 *src = (tune->curr_scene_motion_flag ||
137                        ctx->cfg->rc.rc_mode == MPP_ENC_RC_MODE_SMTRC) ?
138                       &h264e_klut_weight[0] : &h264e_klut_weight[4];
139         memcpy(&regs->reg_rc_klut.klut_wgt0, src, CHROMA_KLUT_TAB_SIZE);
140     }
141 
142     regs->reg_rc_klut.md_sad_thd.md_sad_thd0 = 4;
143     regs->reg_rc_klut.md_sad_thd.md_sad_thd1 = 9;
144     regs->reg_rc_klut.md_sad_thd.md_sad_thd2 = 15;
145     regs->reg_rc_klut.madi_thd.madi_thd0 = 4;
146     regs->reg_rc_klut.madi_thd.madi_thd1 = 9;
147     regs->reg_rc_klut.madi_thd.madi_thd2 = 15;
148 
149     if (tune->curr_scene_motion_flag) {
150         regs->reg_s3.lvl16_intra_UL_CST_THD.lvl16_intra_ul_cst_thld = 2501;
151         regs->reg_s3.RDO_QUANT.quant_f_bias_P = 341;
152         regs->reg_base.iprd_csts.vthd_y     = 0;
153         regs->reg_base.iprd_csts.vthd_c     = 0;
154         regs->reg_rc_klut.klut_ofst.chrm_klut_ofst = 0;
155         regs->reg_base.rdo_cfg.atf_intra_e = 0;
156         regs->reg_rdo.rdo_sqi_cfg.atf_pskip_en = 0;
157         regs->reg_s3.ATR_THD1.atr_thdqp = 51;
158         regs->reg_s3.cime_sqi_cfg.cime_pmv_set_zero = 0;
159         regs->reg_s3.rime_sqi_thd.cime_sad_th0  = 0;
160         regs->reg_s3.fme_sqi_thd0.cime_sad_pu16_th = 0;
161         regs->reg_s3.fme_sqi_thd1.move_lambda = 8;
162     }
163 
164     regs->reg_rdo.rdo_intra_cime_thd0.atf_rdo_intra_cime_thd0 = 28;
165     regs->reg_rdo.rdo_intra_cime_thd0.atf_rdo_intra_cime_thd1 = 44;
166     regs->reg_rdo.rdo_intra_cime_thd1.atf_rdo_intra_cime_thd2 = 72;
167     regs->reg_rdo.rdo_intra_atf_wgt0.atf_rdo_intra_wgt00 = intra_atf_wgt[scene_motion_flag][0];
168     regs->reg_rdo.rdo_intra_atf_wgt0.atf_rdo_intra_wgt01 = intra_atf_wgt[scene_motion_flag][1];
169     regs->reg_rdo.rdo_intra_atf_wgt0.atf_rdo_intra_wgt02 = intra_atf_wgt[scene_motion_flag][2];
170     regs->reg_rdo.rdo_intra_atf_wgt1.atf_rdo_intra_wgt10 = intra_atf_wgt[scene_motion_flag][3];
171     regs->reg_rdo.rdo_intra_atf_wgt1.atf_rdo_intra_wgt11 = intra_atf_wgt[scene_motion_flag][4];
172     regs->reg_rdo.rdo_intra_atf_wgt1.atf_rdo_intra_wgt12 = intra_atf_wgt[scene_motion_flag][5];
173     regs->reg_rdo.rdo_intra_atf_wgt2.atf_rdo_intra_wgt20 = intra_atf_wgt[scene_motion_flag][6];
174     regs->reg_rdo.rdo_intra_atf_wgt2.atf_rdo_intra_wgt21 = intra_atf_wgt[scene_motion_flag][7];
175     regs->reg_rdo.rdo_intra_atf_wgt2.atf_rdo_intra_wgt22 = intra_atf_wgt[scene_motion_flag][8];
176     regs->reg_rdo.rdo_intra_atf_wgt3.atf_rdo_intra_wgt30 = intra_atf_wgt[scene_motion_flag][9];
177     regs->reg_rdo.rdo_intra_atf_wgt3.atf_rdo_intra_wgt31 = intra_atf_wgt[scene_motion_flag][10];
178     regs->reg_rdo.rdo_intra_atf_wgt3.atf_rdo_intra_wgt32 = intra_atf_wgt[scene_motion_flag][11];
179 
180     regs->reg_rdo.rdo_skip_cime_thd0.atf_rdo_skip_cime_thd0 = 10;
181     regs->reg_rdo.rdo_skip_cime_thd0.atf_rdo_skip_cime_thd1 = 8;
182     regs->reg_rdo.rdo_skip_cime_thd1.atf_rdo_skip_cime_thd2 = 15;
183     regs->reg_rdo.rdo_skip_cime_thd1.atf_rdo_skip_cime_thd3 = 25;
184     regs->reg_rdo.rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt00 = skip_atf_wgt[scene_motion_flag][0];
185     regs->reg_rdo.rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt10 = skip_atf_wgt[scene_motion_flag][1];
186     regs->reg_rdo.rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt11 = skip_atf_wgt[scene_motion_flag][2];
187     regs->reg_rdo.rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt12 = skip_atf_wgt[scene_motion_flag][3];
188     regs->reg_rdo.rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt20 = skip_atf_wgt[scene_motion_flag][4];
189     regs->reg_rdo.rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt21 = skip_atf_wgt[scene_motion_flag][5];
190     regs->reg_rdo.rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt22 = skip_atf_wgt[scene_motion_flag][6];
191     regs->reg_rdo.rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt30 = skip_atf_wgt[scene_motion_flag][7];
192     regs->reg_rdo.rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt31 = skip_atf_wgt[scene_motion_flag][8];
193     regs->reg_rdo.rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt32 = skip_atf_wgt[scene_motion_flag][9];
194     regs->reg_rdo.rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt40 = skip_atf_wgt[scene_motion_flag][10];
195     regs->reg_rdo.rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt41 = skip_atf_wgt[scene_motion_flag][11];
196     regs->reg_rdo.rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt42 = skip_atf_wgt[scene_motion_flag][12];
197 
198     if (slice->slice_type != H264_I_SLICE) {
199         regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt0 = atr_wgt[scene_motion_flag][0];
200         regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt1 = atr_wgt[scene_motion_flag][1];
201         regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt2 = atr_wgt[scene_motion_flag][2];
202         regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt0 = atr_wgt[scene_motion_flag][3];
203         regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt1 = atr_wgt[scene_motion_flag][4];
204         regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt2 = atr_wgt[scene_motion_flag][5];
205         regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt0 = atr_wgt[scene_motion_flag][6];
206         regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt1 = atr_wgt[scene_motion_flag][7];
207         regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt2 = atr_wgt[scene_motion_flag][8];
208     }
209 
210     regs->reg_s3.cime_sqi_multi0.cime_multi0 = cime_multi[scene_motion_flag][0];
211     regs->reg_s3.cime_sqi_multi0.cime_multi1 = cime_multi[scene_motion_flag][1];
212     regs->reg_s3.cime_sqi_multi1.cime_multi2 = cime_multi[scene_motion_flag][2];
213     regs->reg_s3.cime_sqi_multi1.cime_multi3 = cime_multi[scene_motion_flag][3];
214 
215     regs->reg_s3.rime_sqi_multi.rime_multi0 = rime_multi[scene_motion_flag][0];
216     regs->reg_s3.rime_sqi_multi.rime_multi1 = rime_multi[scene_motion_flag][1];
217     regs->reg_s3.rime_sqi_multi.rime_multi2 = rime_multi[scene_motion_flag][2];
218 
219     if (hw->qbias_en) {
220         regs->reg_s3.RDO_QUANT.quant_f_bias_I = hw->qbias_i ? hw->qbias_i : 683;
221         regs->reg_s3.RDO_QUANT.quant_f_bias_P = hw->qbias_p ? hw->qbias_p : 341;
222     }
223 }
224 
vepu580_h264e_tune_stat_update(void * p,HalEncTask * task)225 static void vepu580_h264e_tune_stat_update(void *p, HalEncTask *task)
226 {
227     HalH264eVepu580Tune *tune = (HalH264eVepu580Tune *)p;
228     HalH264eVepu580Ctx *ctx = NULL;
229     EncRcTaskInfo *rc_info = &task->rc_task->info;
230     RK_S32 scene_mode = 0;
231 
232     if (NULL == tune)
233         return;
234 
235     ctx = tune->ctx;
236     scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
237     tune->ap_motion_flag = scene_mode;
238     /* update statistic info here */
239     RK_S32 mb_num = 0;
240     RK_S32 madp = 0;
241     RK_S32 md_flag = 0;
242     RK_S32 nScore = 0;
243     RK_S32 j;
244     RK_S32 nScoreT = ((MD_WIN_LEN - 2) * 6 + 2 * 8 + 2 * 11 + 2 * 13) / 2;
245     RK_S32 i = 0;
246     RK_S32 mvbit = 10;
247     RK_S32 madp_cnt_statistics[5];
248     HalVepu580RegSet *regs = &ctx->regs_sets[task->flags.reg_idx];
249 
250     for (i = 0; i < 5; i++) {
251         madp_cnt_statistics[i] = regs->reg_st.md_sad_b16num0 * madp_num_map[i][0] +
252                                  regs->reg_st.md_sad_b16num1 * madp_num_map[i][1] +
253                                  regs->reg_st.md_sad_b16num2 * madp_num_map[i][2] +
254                                  regs->reg_st.md_sad_b16num3 * madp_num_map[i][3];
255     }
256 
257     rc_info->madi =
258         tune->pre_madi[0] = (!regs->reg_st.st_bnum_b16.num_b16) ? 0 :
259                             regs->reg_st.madi /  regs->reg_st.st_bnum_b16.num_b16;
260     rc_info->madp =
261         tune->pre_madp[0] = (!regs->reg_st.st_bnum_cme.num_ctu) ? 0 :
262                             regs->reg_st.madp / regs->reg_st.st_bnum_cme.num_ctu;
263 
264     mb_num = regs->reg_st.madi_b16num0 + regs->reg_st.madi_b16num1 +
265              regs->reg_st.madi_b16num2 + regs->reg_st.madi_b16num3;
266     mb_num = mb_num ? mb_num : 1;
267     if (0 != tune->ap_motion_flag)
268         mvbit = 15;
269 
270     madp = MOTION_LEVEL_STILL;
271     if (0 != madp_cnt_statistics[4]) {
272         RK_S32 base = tune->ap_motion_flag * 3;
273 
274         for (i = 0; i < 3; i++, base++) {
275             if (tune->pre_madp[0] >= mb_avg_madp_thd[base]) {
276                 if (madp_cnt_statistics[0] > mb_num * ctu_madp_cnt_thd[base][0] >> mvbit ||
277                     madp_cnt_statistics[1] > mb_num * ctu_madp_cnt_thd[base][1] >> mvbit ||
278                     madp_cnt_statistics[2] > mb_num * ctu_madp_cnt_thd[base][2] >> mvbit) {
279                     madp =  MOTION_LEVEL_BIG_MOTION;
280                 } else if ((madp_cnt_statistics[0] > mb_num * ctu_madp_cnt_thd[base][3] >> mvbit ||
281                             madp_cnt_statistics[1] > mb_num * ctu_madp_cnt_thd[base][4] >> mvbit) &&
282                            madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][5] >> mvbit) {
283                     madp =  MOTION_LEVEL_BIG_MOTION;
284                 } else if (madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][6] >> mvbit) {
285                     madp =  MOTION_LEVEL_BIG_MOTION;
286                 } else if (madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][7] >> mvbit) {
287                     madp =  MOTION_LEVEL_MOTION;
288                 }
289                 break;
290             }
291         }
292     } else {
293         madp = MOTION_LEVEL_UNKNOW_SCENE;
294     }
295 
296     if (MOTION_LEVEL_UNKNOW_SCENE != madp) {
297         nScore = madp * 13 + tune->md_madp[0] * 11 + tune->md_madp[1] * 8;
298     } else {
299         nScore = tune->md_madp[0] * 11 + tune->md_madp[1] * 8;
300         nScoreT -= 13;
301     }
302 
303     for (j = 2; j < MD_WIN_LEN; j++) {
304         nScore += tune->md_madp[j] * 6;
305     }
306 
307     if (nScore >= nScoreT) {
308         md_flag = 1;
309     }
310 
311     tune->curr_scene_motion_flag = 0;
312     if (tune->md_flag_matrix[0] && tune->md_flag_matrix[1] && tune->md_flag_matrix[2]) {
313         tune->curr_scene_motion_flag = 1;
314     } else if ((tune->md_flag_matrix[0] && tune->md_flag_matrix[1]) || (tune->md_flag_matrix[1] && tune->md_flag_matrix[2] && tune->md_flag_matrix[3])) {
315         tune->curr_scene_motion_flag = md_flag;
316     }
317 
318     if (MOTION_LEVEL_UNKNOW_SCENE != madp) {
319         for (j = MD_WIN_LEN - 2; j >= 0; j--) {
320             tune->md_madp[j + 1] = tune->md_madp[j];
321         }
322         tune->md_madp[0] = madp;
323     }
324     for (j = MD_SHOW_LEN - 2; j >= 0;  j--) {
325         tune->md_flag_matrix[j + 1] = tune->md_flag_matrix[j];
326     }
327 
328     tune->md_flag_matrix[0] = md_flag;
329 
330     for (j = FRAME_MOTION_ANALYSIS_NUM - 2; j >= 0;  j--) {
331         tune->scene_motion_flag_matrix[j + 1] = tune->scene_motion_flag_matrix[j];
332     }
333     tune->scene_motion_flag_matrix[0] = tune->curr_scene_motion_flag;
334 
335     tune->pre_madi[1] = tune->pre_madi[0];
336     tune->pre_madp[1] = tune->pre_madp[0];
337 }
338 
setup_vepu580_qpmap_buf(HalH264eVepu580Ctx * ctx)339 static MPP_RET setup_vepu580_qpmap_buf(HalH264eVepu580Ctx *ctx)
340 {
341     MPP_RET ret = MPP_OK;
342     RK_S32 mb_w = MPP_ALIGN(ctx->cfg->prep.width, 64) / 16;
343     RK_S32 mb_h = MPP_ALIGN(ctx->cfg->prep.height, 16) / 16;
344     RK_S32 qpmap_base_cfg_size = ctx->qpmap_base_cfg_size
345                                  = mb_w * mb_h * 8;
346     RK_S32 qpmap_qp_cfg_size   = ctx->qpmap_qp_cfg_size
347                                  = mb_w * mb_h * 2;
348     RK_S32 md_flag_size        = ctx->md_flag_size
349                                  = mb_w * mb_h;
350 
351     if (!ctx->cfg->tune.deblur_en) {
352         mpp_log("deblurring is closed!\n");
353         goto __RET;
354     }
355 
356     if (NULL == ctx->qpmap_base_cfg_buf) {
357         mpp_buffer_get(NULL, &ctx->qpmap_base_cfg_buf, qpmap_base_cfg_size);
358         if (!ctx->qpmap_base_cfg_buf) {
359             mpp_err("qpmap_base_cfg_buf malloc fail, qpmap invalid\n");
360             ret = MPP_ERR_VALUE;
361             goto __RET;
362         }
363     }
364 
365     if (NULL == ctx->qpmap_qp_cfg_buf) {
366         mpp_buffer_get(NULL, &ctx->qpmap_qp_cfg_buf, qpmap_qp_cfg_size);
367         if (!ctx->qpmap_qp_cfg_buf) {
368             mpp_err("qpmap_qp_cfg_buf malloc fail, qpmap invalid\n");
369             ret = MPP_ERR_VALUE;
370             goto __RET;
371         }
372     }
373 
374     if (NULL == ctx->md_flag_buf) {
375         ctx->md_flag_buf = mpp_malloc(RK_U8, md_flag_size);
376         if (!ctx->md_flag_buf) {
377             mpp_err("md_flag_buf malloc fail, qpmap invalid\n");
378             ret = MPP_ERR_VALUE;
379             goto __RET;
380         }
381     }
382 
383 __RET:
384     hal_h264e_dbg_func("leave, ret %d\n", ret);
385     return ret;
386 }