1 /*
2 * Copyright 2021 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "vepu580_tune.h"
18
19 #define HAL_H264E_DBG_CONTENT (0x00000200)
20 #define hal_h264e_dbg_content(fmt, ...) hal_h264e_dbg_f(HAL_H264E_DBG_CONTENT, fmt, ## __VA_ARGS__)
21
22 /*
23 * Please follow the configuration below:
24 *
25 * FRAME_CONTENT_ANALYSIS_NUM >= 5
26 * MD_WIN_LEN >= 3
27 * MD_SHOW_LEN == 4
28 */
29
30 typedef struct HalH264eVepu580Tune_t {
31 HalH264eVepu580Ctx *ctx;
32
33 /* motion and texture statistic of previous frames */
34 RK_S32 curr_scene_motion_flag;
35 /* motion and texture statistic of previous frames */
36 RK_S32 ap_motion_flag;
37 // level: 0~2: 0 <--> static, 1 <-->medium motion, 2 <--> large motion
38 RK_S32 md_madp[MD_WIN_LEN];
39 // level: 0~2: 0 <--> simple texture, 1 <--> medium texture, 2 <--> complex texture
40 RK_S32 txtr_madi[FRAME_CONTENT_ANALYSIS_NUM];
41 RK_S32 scene_motion_flag_matrix[FRAME_MOTION_ANALYSIS_NUM];
42 RK_S32 md_flag_matrix[MD_SHOW_LEN];
43
44 RK_S32 pre_madp[2];
45 RK_S32 pre_madi[2];
46 } HalH264eVepu580Tune;
47
48 static RK_S32 mb_avg_madp_thd[6] = {192, 128, 64, 192, 128, 64};
49
50 RK_S32 ctu_madp_cnt_thd[6][8] = {
51 {50, 100, 130, 50, 100, 550, 500, 550},
52 {100, 150, 200, 80, 120, 500, 450, 550},
53 {150, 200, 250, 100, 150, 450, 400, 450},
54 {50, 100, 130, 50, 100, 550, 500, 550},
55 {100, 150, 200, 80, 120, 500, 450, 550},
56 {150, 200, 250, 100, 150, 450, 400, 450}
57 };
58
59 RK_S32 madp_num_map[5][4] = {
60 {0, 0, 0, 1},
61 {0, 0, 1, 0},
62 {0, 0, 1, 1},
63 {1, 0, 0, 0},
64 {1, 1, 1, 1},
65 };
66
67 static RK_S32 atr_wgt[4][9] = {
68 {22, 19, 16, 22, 19, 18, 22, 19, 16},
69 {19, 19, 19, 19, 19, 19, 19, 19, 19},
70 {22, 19, 16, 22, 19, 18, 22, 19, 16},
71 {20, 20, 20, 20, 20, 20, 20, 20, 20},
72 };
73
74 static RK_S32 skip_atf_wgt[4][13] = {
75 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
76 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
77 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
78 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
79 };
80
81 static RK_S32 intra_atf_wgt[4][12] = {
82 {24, 22, 21, 22, 21, 20, 20, 19, 18, 16, 16, 16},
83 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
84 {22, 21, 20, 21, 20, 19, 20, 19, 18, 16, 16, 16},
85 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
86 };
87
88 static RK_S32 cime_multi[4][4] = {
89 {4, 8, 24, 24},
90 {4, 7, 20, 20},
91 {4, 7, 20, 20},
92 {4, 4, 4, 4},
93 };
94
95 static RK_S32 rime_multi[4][3] = {
96 {4, 32, 128},
97 {4, 16, 64},
98 {4, 16, 64},
99 {4, 4, 4},
100 };
101
vepu580_h264e_tune_init(HalH264eVepu580Ctx * ctx)102 static HalH264eVepu580Tune *vepu580_h264e_tune_init(HalH264eVepu580Ctx *ctx)
103 {
104 HalH264eVepu580Tune *tune = mpp_malloc(HalH264eVepu580Tune, 1);
105 RK_S32 scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
106
107 if (NULL == tune)
108 return tune;
109
110 tune->ctx = ctx;
111 tune->curr_scene_motion_flag = 0;
112 tune->ap_motion_flag = scene_mode;
113 memset(tune->md_madp, 0, sizeof(tune->md_madp));
114 memset(tune->txtr_madi, 0, sizeof(tune->txtr_madi));
115 memset(tune->md_flag_matrix, 0, sizeof(tune->md_flag_matrix));
116 memset(tune->scene_motion_flag_matrix, 0, sizeof(tune->scene_motion_flag_matrix));
117 tune->pre_madi[0] = tune->pre_madi[1] = -1;
118 tune->pre_madp[0] = tune->pre_madp[1] = -1;
119
120 return tune;
121 }
122
vepu580_h264e_tune_deinit(void * tune)123 static void vepu580_h264e_tune_deinit(void *tune)
124 {
125 MPP_FREE(tune);
126 }
127
vepu580_h264e_tune_reg_patch(void * p)128 static void vepu580_h264e_tune_reg_patch(void *p)
129 {
130 HalH264eVepu580Tune *tune = (HalH264eVepu580Tune *)p;
131 HalH264eVepu580Ctx *ctx = NULL;
132 RK_S32 scene_mode = 0;
133
134 if (NULL == tune)
135 return;
136
137 ctx = tune->ctx;
138 scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
139
140 H264eSlice *slice = ctx->slice;
141 HalVepu580RegSet *regs = ctx->regs_set;
142 tune->ap_motion_flag = scene_mode;
143 RK_U32 scene_motion_flag = tune->ap_motion_flag * 2 + tune->curr_scene_motion_flag;
144
145 if (scene_motion_flag > 3) {
146 mpp_err_f("scene_motion_flag is a wrong value %d\n", scene_motion_flag);
147 return;
148 }
149
150 /* modify register here */
151 if (slice->slice_type != H264_I_SLICE) {
152 RK_U32 *src = tune->curr_scene_motion_flag ? &h264e_klut_weight[0] : &h264e_klut_weight[4];
153 memcpy(®s->reg_rc_klut.klut_wgt0, src, CHROMA_KLUT_TAB_SIZE);
154 }
155
156 regs->reg_rc_klut.md_sad_thd.md_sad_thd0 = 4;
157 regs->reg_rc_klut.md_sad_thd.md_sad_thd1 = 9;
158 regs->reg_rc_klut.md_sad_thd.md_sad_thd2 = 15;
159 regs->reg_rc_klut.madi_thd.madi_thd0 = 4;
160 regs->reg_rc_klut.madi_thd.madi_thd1 = 9;
161 regs->reg_rc_klut.madi_thd.madi_thd2 = 15;
162
163 if (tune->curr_scene_motion_flag) {
164 regs->reg_s3.lvl16_intra_UL_CST_THD.lvl16_intra_ul_cst_thld = 2501;
165 regs->reg_s3.RDO_QUANT.quant_f_bias_P = 341;
166 regs->reg_base.iprd_csts.vthd_y = 0;
167 regs->reg_base.iprd_csts.vthd_c = 0;
168 regs->reg_rc_klut.klut_ofst.chrm_klut_ofst = 0;
169 regs->reg_base.rdo_cfg.atf_intra_e = 0;
170 regs->reg_rdo.rdo_sqi_cfg.atf_pskip_en = 0;
171 regs->reg_s3.ATR_THD1.atr_thdqp = 51;
172 regs->reg_s3.cime_sqi_cfg.cime_pmv_set_zero = 0;
173 regs->reg_s3.rime_sqi_thd.cime_sad_th0 = 0;
174 regs->reg_s3.fme_sqi_thd0.cime_sad_pu16_th = 0;
175 regs->reg_s3.fme_sqi_thd1.move_lambda = 8;
176 }
177
178 regs->reg_rdo.rdo_intra_cime_thd0.atf_rdo_intra_cime_thd0 = 28;
179 regs->reg_rdo.rdo_intra_cime_thd0.atf_rdo_intra_cime_thd1 = 44;
180 regs->reg_rdo.rdo_intra_cime_thd1.atf_rdo_intra_cime_thd2 = 72;
181 regs->reg_rdo.rdo_intra_atf_wgt0.atf_rdo_intra_wgt00 = intra_atf_wgt[scene_motion_flag][0];
182 regs->reg_rdo.rdo_intra_atf_wgt0.atf_rdo_intra_wgt01 = intra_atf_wgt[scene_motion_flag][1];
183 regs->reg_rdo.rdo_intra_atf_wgt0.atf_rdo_intra_wgt02 = intra_atf_wgt[scene_motion_flag][2];
184 regs->reg_rdo.rdo_intra_atf_wgt1.atf_rdo_intra_wgt10 = intra_atf_wgt[scene_motion_flag][3];
185 regs->reg_rdo.rdo_intra_atf_wgt1.atf_rdo_intra_wgt11 = intra_atf_wgt[scene_motion_flag][4];
186 regs->reg_rdo.rdo_intra_atf_wgt1.atf_rdo_intra_wgt12 = intra_atf_wgt[scene_motion_flag][5];
187 regs->reg_rdo.rdo_intra_atf_wgt2.atf_rdo_intra_wgt20 = intra_atf_wgt[scene_motion_flag][6];
188 regs->reg_rdo.rdo_intra_atf_wgt2.atf_rdo_intra_wgt21 = intra_atf_wgt[scene_motion_flag][7];
189 regs->reg_rdo.rdo_intra_atf_wgt2.atf_rdo_intra_wgt22 = intra_atf_wgt[scene_motion_flag][8];
190 regs->reg_rdo.rdo_intra_atf_wgt3.atf_rdo_intra_wgt30 = intra_atf_wgt[scene_motion_flag][9];
191 regs->reg_rdo.rdo_intra_atf_wgt3.atf_rdo_intra_wgt31 = intra_atf_wgt[scene_motion_flag][10];
192 regs->reg_rdo.rdo_intra_atf_wgt3.atf_rdo_intra_wgt32 = intra_atf_wgt[scene_motion_flag][11];
193
194 regs->reg_rdo.rdo_skip_cime_thd0.atf_rdo_skip_cime_thd0 = 10;
195 regs->reg_rdo.rdo_skip_cime_thd0.atf_rdo_skip_cime_thd1 = 8;
196 regs->reg_rdo.rdo_skip_cime_thd1.atf_rdo_skip_cime_thd2 = 15;
197 regs->reg_rdo.rdo_skip_cime_thd1.atf_rdo_skip_cime_thd3 = 25;
198 regs->reg_rdo.rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt00 = skip_atf_wgt[scene_motion_flag][0];
199 regs->reg_rdo.rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt10 = skip_atf_wgt[scene_motion_flag][1];
200 regs->reg_rdo.rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt11 = skip_atf_wgt[scene_motion_flag][2];
201 regs->reg_rdo.rdo_skip_atf_wgt0.atf_rdo_skip_atf_wgt12 = skip_atf_wgt[scene_motion_flag][3];
202 regs->reg_rdo.rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt20 = skip_atf_wgt[scene_motion_flag][4];
203 regs->reg_rdo.rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt21 = skip_atf_wgt[scene_motion_flag][5];
204 regs->reg_rdo.rdo_skip_atf_wgt1.atf_rdo_skip_atf_wgt22 = skip_atf_wgt[scene_motion_flag][6];
205 regs->reg_rdo.rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt30 = skip_atf_wgt[scene_motion_flag][7];
206 regs->reg_rdo.rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt31 = skip_atf_wgt[scene_motion_flag][8];
207 regs->reg_rdo.rdo_skip_atf_wgt2.atf_rdo_skip_atf_wgt32 = skip_atf_wgt[scene_motion_flag][9];
208 regs->reg_rdo.rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt40 = skip_atf_wgt[scene_motion_flag][10];
209 regs->reg_rdo.rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt41 = skip_atf_wgt[scene_motion_flag][11];
210 regs->reg_rdo.rdo_skip_atf_wgt3.atf_rdo_skip_atf_wgt42 = skip_atf_wgt[scene_motion_flag][12];
211
212 if (slice->slice_type != H264_I_SLICE) {
213 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt0 = atr_wgt[scene_motion_flag][0];
214 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt1 = atr_wgt[scene_motion_flag][1];
215 regs->reg_s3.Lvl16_ATR_WGT.lvl16_atr_wgt2 = atr_wgt[scene_motion_flag][2];
216 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt0 = atr_wgt[scene_motion_flag][3];
217 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt1 = atr_wgt[scene_motion_flag][4];
218 regs->reg_s3.Lvl8_ATR_WGT.lvl8_atr_wgt2 = atr_wgt[scene_motion_flag][5];
219 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt0 = atr_wgt[scene_motion_flag][6];
220 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt1 = atr_wgt[scene_motion_flag][7];
221 regs->reg_s3.Lvl4_ATR_WGT.lvl4_atr_wgt2 = atr_wgt[scene_motion_flag][8];
222 }
223
224 regs->reg_s3.cime_sqi_multi0.cime_multi0 = cime_multi[scene_motion_flag][0];
225 regs->reg_s3.cime_sqi_multi0.cime_multi1 = cime_multi[scene_motion_flag][1];
226 regs->reg_s3.cime_sqi_multi1.cime_multi2 = cime_multi[scene_motion_flag][2];
227 regs->reg_s3.cime_sqi_multi1.cime_multi3 = cime_multi[scene_motion_flag][3];
228
229 regs->reg_s3.rime_sqi_multi.rime_multi0 = rime_multi[scene_motion_flag][0];
230 regs->reg_s3.rime_sqi_multi.rime_multi1 = rime_multi[scene_motion_flag][1];
231 regs->reg_s3.rime_sqi_multi.rime_multi2 = rime_multi[scene_motion_flag][2];
232 }
233
vepu580_h264e_tune_stat_update(void * p,HalEncTask * task)234 static void vepu580_h264e_tune_stat_update(void *p, HalEncTask *task)
235 {
236 HalH264eVepu580Tune *tune = (HalH264eVepu580Tune *)p;
237 HalH264eVepu580Ctx *ctx = NULL;
238 EncRcTaskInfo *rc_info = &task->rc_task->info;
239 RK_S32 scene_mode = 0;
240
241 if (NULL == tune)
242 return;
243
244 ctx = tune->ctx;
245 scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
246 tune->ap_motion_flag = scene_mode;
247 /* update statistic info here */
248 RK_S32 mb_num = 0;
249 RK_S32 madp = 0;
250 RK_S32 md_flag = 0;
251 RK_S32 nScore = 0;
252 RK_S32 j;
253 RK_S32 nScoreT = ((MD_WIN_LEN - 2) * 6 + 2 * 8 + 2 * 11 + 2 * 13) / 2;
254 RK_S32 i = 0;
255 RK_S32 mvbit = 10;
256 RK_S32 madp_cnt_statistics[5];
257 HalVepu580RegSet *regs = &ctx->regs_sets[task->flags.reg_idx];
258
259 for (i = 0; i < 5; i++) {
260 madp_cnt_statistics[i] = regs->reg_st.md_sad_b16num0 * madp_num_map[i][0] +
261 regs->reg_st.md_sad_b16num1 * madp_num_map[i][1] +
262 regs->reg_st.md_sad_b16num2 * madp_num_map[i][2] +
263 regs->reg_st.md_sad_b16num3 * madp_num_map[i][3];
264 }
265
266 rc_info->madi =
267 tune->pre_madi[0] = (!regs->reg_st.st_bnum_b16.num_b16) ? 0 :
268 regs->reg_st.madi / regs->reg_st.st_bnum_b16.num_b16;
269 rc_info->madp =
270 tune->pre_madp[0] = (!regs->reg_st.st_bnum_cme.num_ctu) ? 0 :
271 regs->reg_st.madp / regs->reg_st.st_bnum_cme.num_ctu;
272
273 mb_num = regs->reg_st.madi_b16num0 + regs->reg_st.madi_b16num1 +
274 regs->reg_st.madi_b16num2 + regs->reg_st.madi_b16num3;
275 mb_num = mb_num ? mb_num : 1;
276 if (0 != tune->ap_motion_flag)
277 mvbit = 15;
278
279 madp = MOTION_LEVEL_STILL;
280 if (0 != madp_cnt_statistics[4]) {
281 RK_S32 base = tune->ap_motion_flag * 3;
282
283 for (i = 0; i < 3; i++, base++) {
284 if (tune->pre_madp[0] >= mb_avg_madp_thd[base]) {
285 if (madp_cnt_statistics[0] > mb_num * ctu_madp_cnt_thd[base][0] >> mvbit ||
286 madp_cnt_statistics[1] > mb_num * ctu_madp_cnt_thd[base][1] >> mvbit ||
287 madp_cnt_statistics[2] > mb_num * ctu_madp_cnt_thd[base][2] >> mvbit) {
288 madp = MOTION_LEVEL_BIG_MOTION;
289 } else if ((madp_cnt_statistics[0] > mb_num * ctu_madp_cnt_thd[base][3] >> mvbit ||
290 madp_cnt_statistics[1] > mb_num * ctu_madp_cnt_thd[base][4] >> mvbit) &&
291 madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][5] >> mvbit) {
292 madp = MOTION_LEVEL_BIG_MOTION;
293 } else if (madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][6] >> mvbit) {
294 madp = MOTION_LEVEL_BIG_MOTION;
295 } else if (madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][7] >> mvbit) {
296 madp = MOTION_LEVEL_MOTION;
297 }
298 break;
299 }
300 }
301 } else {
302 madp = MOTION_LEVEL_UNKNOW_SCENE;
303 }
304
305 if (MOTION_LEVEL_UNKNOW_SCENE != madp) {
306 nScore = madp * 13 + tune->md_madp[0] * 11 + tune->md_madp[1] * 8;
307 } else {
308 nScore = tune->md_madp[0] * 11 + tune->md_madp[1] * 8;
309 nScoreT -= 13;
310 }
311
312 for (j = 2; j < MD_WIN_LEN; j++) {
313 nScore += tune->md_madp[j] * 6;
314 }
315
316 if (nScore >= nScoreT) {
317 md_flag = 1;
318 }
319
320 tune->curr_scene_motion_flag = 0;
321 if (tune->md_flag_matrix[0] && tune->md_flag_matrix[1] && tune->md_flag_matrix[2]) {
322 tune->curr_scene_motion_flag = 1;
323 } else if ((tune->md_flag_matrix[0] && tune->md_flag_matrix[1]) || (tune->md_flag_matrix[1] && tune->md_flag_matrix[2] && tune->md_flag_matrix[3])) {
324 tune->curr_scene_motion_flag = md_flag;
325 }
326
327 if (MOTION_LEVEL_UNKNOW_SCENE != madp) {
328 for (j = MD_WIN_LEN - 2; j >= 0; j--) {
329 tune->md_madp[j + 1] = tune->md_madp[j];
330 }
331 tune->md_madp[0] = madp;
332 }
333 for (j = MD_SHOW_LEN - 2; j >= 0; j--) {
334 tune->md_flag_matrix[j + 1] = tune->md_flag_matrix[j];
335 }
336
337 tune->md_flag_matrix[0] = md_flag;
338
339 for (j = FRAME_MOTION_ANALYSIS_NUM - 2; j >= 0; j--) {
340 tune->scene_motion_flag_matrix[j + 1] = tune->scene_motion_flag_matrix[j];
341 }
342 tune->scene_motion_flag_matrix[0] = tune->curr_scene_motion_flag;
343
344 tune->pre_madi[1] = tune->pre_madi[0];
345 tune->pre_madp[1] = tune->pre_madp[0];
346 }
347