1 /*
2 * Copyright 2021 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "vepu580_tune.h"
18 #include "hal_h265e_vepu580_reg.h"
19
20 #define HAL_H265E_DBG_CONTENT (0x00200000)
21 #define hal_h264e_dbg_content(fmt, ...) hal_h264e_dbg_f(HAL_H264E_DBG_CONTENT, fmt, ## __VA_ARGS__)
22
23 #define CTU_BASE_CFG_BYTE 64
24 #define CTU_QP_CFG_BYTE 192
25
26 /*
27 * Please follow the configuration below:
28 *
29 * FRAME_CONTENT_ANALYSIS_NUM >= 5
30 * MD_WIN_LEN >= 3
31 * MD_SHOW_LEN == 4
32 */
33
34 typedef struct HalH265eVepu580Tune_t {
35 H265eV580HalContext *ctx;
36
37 /* motion and texture statistic of previous frames */
38 RK_S32 curr_scene_motion_flag;
39 /* motion and texture statistic of previous frames */
40 RK_S32 ap_motion_flag;
41 // level: 0~2: 0 <--> static, 1 <-->medium motion, 2 <--> large motion
42 RK_S32 md_madp[MD_WIN_LEN];
43 // level: 0~2: 0 <--> simple texture, 1 <--> medium texture, 2 <--> complex texture
44 RK_S32 txtr_madi[FRAME_CONTENT_ANALYSIS_NUM];
45 RK_S32 scene_motion_flag_matrix[FRAME_MOTION_ANALYSIS_NUM];
46 RK_S32 md_flag_matrix[MD_SHOW_LEN];
47
48 RK_S32 pre_madp[2];
49 RK_S32 pre_madi[2];
50 } HalH265eVepu580Tune;
51
52 static RK_S32 ctu_avg_madp_thd[6] = {896, 640, 384, 896, 640, 384};
53
54 static RK_U8 lvl32_preintra_cst_wgt[4][8] = {
55 {21, 20, 19, 18, 20, 22, 24, 16},
56 {19, 18, 17, 16, 18, 20, 21, 16},
57 {20, 19, 18, 17, 19, 21, 22, 16},
58 {16, 16, 16, 16, 16, 16, 16, 16},
59 };
60
61 static RK_U8 lvl16_preintra_cst_wgt[4][8] = {
62 {21, 20, 19, 18, 20, 22, 24, 16},
63 {19, 18, 17, 16, 18, 20, 21, 16},
64 {20, 19, 18, 17, 19, 21, 22, 16},
65 {16, 16, 16, 16, 16, 16, 16, 16},
66 };
67
68 static RK_S32 skip_b64_atf_wgt[4][13] = {
69 {16, 13, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
70 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
71 {16, 14, 15, 16, 14, 14, 15, 15, 15, 16, 16, 16, 16},
72 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
73 };
74
75 static RK_S32 intra_b32_atf_wgt[4][12] = {
76 {24, 23, 23, 23, 22, 21, 20, 19, 18, 16, 16, 16},
77 {21, 20, 19, 20, 19, 18, 19, 18, 18, 18, 18, 17},
78 {20, 19, 18, 19, 18, 17, 18, 17, 17, 16, 16, 16},
79 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
80 };
81
82 static RK_S32 skip_b32_atf_wgt[4][13] = {
83 {16, 13, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
84 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
85 {18, 14, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
86 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
87 };
88
89 static RK_S32 intra_b16_atf_wgt[4][12] = {
90 {24, 23, 23, 23, 22, 21, 20, 19, 18, 16, 16, 16},
91 {21, 20, 19, 20, 19, 18, 19, 18, 18, 18, 18, 17},
92 {20, 19, 18, 19, 18, 17, 18, 17, 17, 16, 16, 16},
93 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
94 };
95
96 static RK_S32 skip_b16_atf_wgt[4][13] = {
97 {16, 13, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
98 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
99 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
100 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
101 };
102
103 static RK_S32 intra_b8_atf_wgt[4][12] = {
104 {24, 23, 23, 23, 22, 21, 20, 19, 18, 16, 16, 16},
105 {21, 20, 19, 20, 19, 18, 19, 18, 18, 18, 18, 17},
106 {20, 19, 18, 19, 18, 17, 18, 17, 17, 16, 16, 16},
107 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
108 };
109
110 static RK_S32 skip_b8_atf_wgt[4][13] = {
111 {16, 13, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
112 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
113 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
114 {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
115 };
116
117 static RK_U32 intra_lvl16_sobel_a[4][9] = {
118 {32, 32, 32, 32, 32, 32, 32, 32, 32},
119 {16, 16, 16, 16, 16, 16, 16, 16, 16},
120 {16, 16, 16, 16, 16, 16, 16, 16, 16},
121 {16, 16, 16, 16, 16, 16, 16, 16, 16},
122 };
123
124 static RK_U32 intra_lvl16_sobel_c[4][9] = {
125 {13, 13, 13, 13, 13, 13, 13, 13, 13},
126 {16, 16, 16, 16, 16, 16, 16, 16, 16},
127 {16, 16, 16, 16, 16, 16, 16, 16, 16},
128 {16, 16, 16, 16, 16, 16, 16, 16, 16},
129 };
130
131 static RK_U32 intra_lvl16_sobel_d[4][9] = {
132 {23750, 23750, 23750, 23750, 23750, 23750, 23750, 23750, 23750},
133 {0, 0, 0, 0, 0, 0, 0, 0, 0},
134 {0, 0, 0, 0, 0, 0, 0, 0, 0},
135 {0, 0, 0, 0, 0, 0, 0, 0, 0},
136 };
137
138 static RK_U32 intra_lvl32_sobel_a[4][5] = {
139 {18, 18, 18, 18, 18},
140 {16, 16, 16, 16, 16},
141 {16, 16, 16, 16, 16},
142 {16, 16, 16, 16, 16},
143 };
144
145 static RK_U32 intra_lvl32_sobel_c[4][5] = {
146 {16, 16, 16, 16, 16},
147 {16, 16, 16, 16, 16},
148 {16, 16, 16, 16, 16},
149 {16, 16, 16, 16, 16},
150 };
151
152 static RK_U32 qnt_bias_i[4] = {
153 128, 128, 128, 128
154 };
155
156 static RK_U32 qnt_bias_p[4] = {
157 60, 60, 85, 85
158 };
159
160 static RK_U32 rime_sqi_cime_sad_th[4] = {
161 48, 0, 0, 0
162 };
163
164 static RK_U32 fme_sqi_cime_sad_pu16_th[4] = {
165 16, 0, 0, 0
166 };
167
168 static RK_U32 fme_sqi_cime_sad_pu32_th[4] = {
169 16, 0, 0, 0
170 };
171
172 static RK_U32 fme_sqi_cime_sad_pu64_th[4] = {
173 16, 0, 0, 0
174 };
175
176 static RK_U32 chrm_klut_ofst[4] = {
177 3, 0, 0, 0
178 };
179
180 static RK_S32 pre_intra_b32_cost[4][2] = {
181 {31, 30},
182 {23, 20},
183 {31, 30},
184 {23, 20},
185 };
186
187 static RK_S32 pre_intra_b16_cost[4][2] = {
188 {31, 30},
189 {23, 20},
190 {31, 30},
191 {23, 20},
192 };
193
194 static RK_S32 cime_multi[4][4] = {
195 {4, 8, 24, 24},
196 {4, 7, 20, 20},
197 {4, 8, 24, 24},
198 {4, 4, 4, 4},
199 };
200
201 static RK_S32 rime_multi[4][3] = {
202 {4, 32, 128},
203 {4, 16, 64},
204 {4, 32, 128},
205 {4, 4, 4},
206 };
207
vepu580_h265e_tune_init(H265eV580HalContext * ctx)208 static HalH265eVepu580Tune *vepu580_h265e_tune_init(H265eV580HalContext *ctx)
209 {
210 HalH265eVepu580Tune *tune = mpp_malloc(HalH265eVepu580Tune, 1);
211 RK_S32 scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
212
213 if (NULL == tune)
214 return tune;
215
216 tune->ctx = ctx;
217 tune->curr_scene_motion_flag = 0;
218 tune->ap_motion_flag = scene_mode;
219 memset(tune->md_madp, 0, sizeof(tune->md_madp));
220 memset(tune->txtr_madi, 0, sizeof(tune->txtr_madi));
221 memset(tune->md_flag_matrix, 0, sizeof(tune->md_flag_matrix));
222 memset(tune->scene_motion_flag_matrix, 0, sizeof(tune->scene_motion_flag_matrix));
223 tune->pre_madi[0] = tune->pre_madi[1] = -1;
224 tune->pre_madp[0] = tune->pre_madp[1] = -1;
225
226 return tune;
227 }
228
vepu580_h265e_tune_deinit(void * tune)229 static void vepu580_h265e_tune_deinit(void *tune)
230 {
231 MPP_FREE(tune);
232 }
233
vepu580_h265e_tune_atf(H265eV580HalContext * ctx,RK_U32 sm_flag)234 static void vepu580_h265e_tune_atf(H265eV580HalContext *ctx, RK_U32 sm_flag)
235 {
236 H265eV580RegSet *regs = ctx->frm->regs_set[0];
237 vepu580_rdo_cfg *reg_rdo = ®s->reg_rdo;
238 RdoAtfSkipCfg *s;
239 RdoAtfCfg* p;
240 RK_U32 atf_idx = ctx->cfg->tune.vmaf_opt ? 3 : sm_flag;
241
242 s = ®_rdo->rdo_b64_skip_atf;
243 s->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 1;
244 s->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 2;
245 s->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 4;
246 s->rdo_b_cime_thd1.cu_rdo_cime_thd3 = 6;
247
248 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt00 = skip_b64_atf_wgt[atf_idx][0];
249 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt10 = skip_b64_atf_wgt[atf_idx][1];
250 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt11 = skip_b64_atf_wgt[atf_idx][2];
251 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt12 = skip_b64_atf_wgt[atf_idx][3];
252 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt20 = skip_b64_atf_wgt[atf_idx][4];
253 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt21 = skip_b64_atf_wgt[atf_idx][5];
254 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt22 = skip_b64_atf_wgt[atf_idx][6];
255 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt30 = skip_b64_atf_wgt[atf_idx][7];
256 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt31 = skip_b64_atf_wgt[atf_idx][8];
257 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt32 = skip_b64_atf_wgt[atf_idx][9];
258 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt40 = skip_b64_atf_wgt[atf_idx][10];
259 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt41 = skip_b64_atf_wgt[atf_idx][11];
260 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt42 = skip_b64_atf_wgt[atf_idx][12];
261
262 p = ®_rdo->rdo_b32_intra_atf;
263 p->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 24;
264 p->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 48;
265 p->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 64;
266 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt00 = intra_b32_atf_wgt[atf_idx][0];
267 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt01 = intra_b32_atf_wgt[atf_idx][1];
268 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt02 = intra_b32_atf_wgt[atf_idx][2];
269 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt10 = intra_b32_atf_wgt[atf_idx][3];
270 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt11 = intra_b32_atf_wgt[atf_idx][4];
271 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt12 = intra_b32_atf_wgt[atf_idx][5];
272 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt20 = intra_b32_atf_wgt[atf_idx][6];
273 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt21 = intra_b32_atf_wgt[atf_idx][7];
274 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt22 = intra_b32_atf_wgt[atf_idx][8];
275 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt30 = intra_b32_atf_wgt[atf_idx][9];
276 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt31 = intra_b32_atf_wgt[atf_idx][10];
277 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt32 = intra_b32_atf_wgt[atf_idx][11];
278
279 s = ®_rdo->rdo_b32_skip_atf;
280 s->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 1;
281 s->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 2;
282 s->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 4;
283 s->rdo_b_cime_thd1.cu_rdo_cime_thd3 = 6;
284 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt00 = skip_b32_atf_wgt[atf_idx][0];
285 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt10 = skip_b32_atf_wgt[atf_idx][1];
286 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt11 = skip_b32_atf_wgt[atf_idx][2];
287 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt12 = skip_b32_atf_wgt[atf_idx][3];
288 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt20 = skip_b32_atf_wgt[atf_idx][4];
289 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt21 = skip_b32_atf_wgt[atf_idx][5];
290 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt22 = skip_b32_atf_wgt[atf_idx][6];
291 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt30 = skip_b32_atf_wgt[atf_idx][7];
292 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt31 = skip_b32_atf_wgt[atf_idx][8];
293 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt32 = skip_b32_atf_wgt[atf_idx][9];
294 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt40 = skip_b32_atf_wgt[atf_idx][10];
295 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt41 = skip_b32_atf_wgt[atf_idx][11];
296 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt42 = skip_b32_atf_wgt[atf_idx][12];
297
298 p = ®_rdo->rdo_b16_intra_atf;
299 p->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 24;
300 p->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 48;
301 p->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 64;
302 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt00 = intra_b16_atf_wgt[atf_idx][0];
303 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt01 = intra_b16_atf_wgt[atf_idx][1];
304 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt02 = intra_b16_atf_wgt[atf_idx][2];
305 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt10 = intra_b16_atf_wgt[atf_idx][3];
306 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt11 = intra_b16_atf_wgt[atf_idx][4];
307 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt12 = intra_b16_atf_wgt[atf_idx][5];
308 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt20 = intra_b16_atf_wgt[atf_idx][6];
309 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt21 = intra_b16_atf_wgt[atf_idx][7];
310 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt22 = intra_b16_atf_wgt[atf_idx][8];
311 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt30 = intra_b16_atf_wgt[atf_idx][9];
312 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt31 = intra_b16_atf_wgt[atf_idx][10];
313 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt32 = intra_b16_atf_wgt[atf_idx][11];
314
315 s = ®_rdo->rdo_b16_skip_atf;
316 s->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 1;
317 s->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 2;
318 s->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 4;
319 s->rdo_b_cime_thd1.cu_rdo_cime_thd3 = 6;
320 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt00 = skip_b16_atf_wgt[atf_idx][0];
321 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt10 = skip_b16_atf_wgt[atf_idx][1];
322 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt11 = skip_b16_atf_wgt[atf_idx][2];
323 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt12 = skip_b16_atf_wgt[atf_idx][3];
324 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt20 = skip_b16_atf_wgt[atf_idx][4];
325 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt21 = skip_b16_atf_wgt[atf_idx][5];
326 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt22 = skip_b16_atf_wgt[atf_idx][6];
327 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt30 = skip_b16_atf_wgt[atf_idx][7];
328 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt31 = skip_b16_atf_wgt[atf_idx][8];
329 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt32 = skip_b16_atf_wgt[atf_idx][9];
330 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt40 = skip_b16_atf_wgt[atf_idx][10];
331 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt41 = skip_b16_atf_wgt[atf_idx][11];
332 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt42 = skip_b16_atf_wgt[atf_idx][12];
333
334 p = ®_rdo->rdo_b8_intra_atf;
335 p->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 24;
336 p->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 48;
337 p->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 64;
338 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt00 = intra_b8_atf_wgt[atf_idx][0];
339 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt01 = intra_b8_atf_wgt[atf_idx][1];
340 p->rdo_b_atf_wgt0.cu_rdo_atf_wgt02 = intra_b8_atf_wgt[atf_idx][2];
341 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt10 = intra_b8_atf_wgt[atf_idx][3];
342 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt11 = intra_b8_atf_wgt[atf_idx][4];
343 p->rdo_b_atf_wgt1.cu_rdo_atf_wgt12 = intra_b8_atf_wgt[atf_idx][5];
344 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt20 = intra_b8_atf_wgt[atf_idx][6];
345 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt21 = intra_b8_atf_wgt[atf_idx][7];
346 p->rdo_b_atf_wgt2.cu_rdo_atf_wgt22 = intra_b8_atf_wgt[atf_idx][8];
347 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt30 = intra_b8_atf_wgt[atf_idx][9];
348 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt31 = intra_b8_atf_wgt[atf_idx][10];
349 p->rdo_b_atf_wgt3.cu_rdo_atf_wgt32 = intra_b8_atf_wgt[atf_idx][11];
350
351 s = ®_rdo->rdo_b8_skip_atf;
352 s->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 1;
353 s->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 2;
354 s->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 4;
355 s->rdo_b_cime_thd1.cu_rdo_cime_thd3 = 6;
356 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt00 = skip_b8_atf_wgt[atf_idx][0];
357 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt10 = skip_b8_atf_wgt[atf_idx][1];
358 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt11 = skip_b8_atf_wgt[atf_idx][2];
359 s->rdo_b_atf_wgt0.cu_rdo_atf_wgt12 = skip_b8_atf_wgt[atf_idx][3];
360 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt20 = skip_b8_atf_wgt[atf_idx][4];
361 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt21 = skip_b8_atf_wgt[atf_idx][5];
362 s->rdo_b_atf_wgt1.cu_rdo_atf_wgt22 = skip_b8_atf_wgt[atf_idx][6];
363 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt30 = skip_b8_atf_wgt[atf_idx][7];
364 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt31 = skip_b8_atf_wgt[atf_idx][8];
365 s->rdo_b_atf_wgt2.cu_rdo_atf_wgt32 = skip_b8_atf_wgt[atf_idx][9];
366 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt40 = skip_b8_atf_wgt[atf_idx][10];
367 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt41 = skip_b8_atf_wgt[atf_idx][11];
368 s->rdo_b_atf_wgt3.cu_rdo_atf_wgt42 = skip_b8_atf_wgt[atf_idx][12];
369 }
370
vepu580_h265e_tune_reg_patch(void * p)371 static void vepu580_h265e_tune_reg_patch(void *p)
372 {
373 HalH265eVepu580Tune *tune = (HalH265eVepu580Tune *)p;
374 H265eV580HalContext *ctx = NULL;
375 RK_S32 scene_mode = 0;
376
377 if (NULL == tune)
378 return;
379
380 ctx = tune->ctx;
381 scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
382 tune->ap_motion_flag = scene_mode;
383 /* modify register here */
384 H265eV580RegSet *regs = ctx->frm->regs_set[0];
385 hevc_vepu580_rc_klut *rc_regs = ®s->reg_rc_klut;
386 hevc_vepu580_wgt *reg_wgt = ®s->reg_wgt;
387 vepu580_rdo_cfg *reg_rdo = ®s->reg_rdo;
388 RK_U32 scene_motion_flag = tune->ap_motion_flag * 2 + tune->curr_scene_motion_flag;
389 MppEncHwCfg *hw = &ctx->cfg->hw;
390 RK_S32 vmaf_opt = ctx->cfg->tune.vmaf_opt;
391 RK_U32 pre_intra_idx = vmaf_opt ? 3 : scene_motion_flag;
392
393 if (scene_motion_flag > 3) {
394 mpp_err_f("scene_motion_flag is a wrong value %d\n", scene_motion_flag);
395 return;
396 }
397
398 memcpy(®_wgt->lvl32_intra_CST_WGT0, lvl32_preintra_cst_wgt[pre_intra_idx],
399 sizeof(lvl32_preintra_cst_wgt[pre_intra_idx]));
400 memcpy(®_wgt->lvl16_intra_CST_WGT0, lvl16_preintra_cst_wgt[pre_intra_idx],
401 sizeof(lvl16_preintra_cst_wgt[pre_intra_idx]));
402
403 /* Do not adjust the ATF weight when skip bias is enabled */
404 if (!hw->skip_bias_en)
405 vepu580_h265e_tune_atf(ctx, scene_motion_flag);
406
407 reg_rdo->preintra_b32_cst_wgt.pre_intra32_cst_wgt00 = pre_intra_b32_cost[scene_motion_flag][0];
408 reg_rdo->preintra_b32_cst_wgt.pre_intra32_cst_wgt01 = pre_intra_b32_cost[scene_motion_flag][1];
409 reg_rdo->preintra_b16_cst_wgt.pre_intra16_cst_wgt00 = pre_intra_b16_cost[scene_motion_flag][0];
410 reg_rdo->preintra_b16_cst_wgt.pre_intra16_cst_wgt01 = pre_intra_b16_cost[scene_motion_flag][1];
411
412 rc_regs->md_sad_thd.md_sad_thd0 = 7;
413 rc_regs->md_sad_thd.md_sad_thd1 = 15;
414 rc_regs->md_sad_thd.md_sad_thd2 = 25;
415 rc_regs->madi_thd.madi_thd0 = 4;
416 rc_regs->madi_thd.madi_thd1 = 9;
417 rc_regs->madi_thd.madi_thd2 = 15;
418
419 reg_wgt->cime_sqi_cfg.cime_pmv_set_zero = !tune->curr_scene_motion_flag;
420 reg_wgt->cime_sqi_multi0.cime_multi0 = cime_multi[scene_motion_flag][0];
421 reg_wgt->cime_sqi_multi0.cime_multi1 = cime_multi[scene_motion_flag][1];
422 reg_wgt->cime_sqi_multi1.cime_multi2 = cime_multi[scene_motion_flag][2];
423 reg_wgt->cime_sqi_multi1.cime_multi3 = cime_multi[scene_motion_flag][3];
424
425 reg_wgt->rime_sqi_multi.rime_multi0 = rime_multi[scene_motion_flag][0];
426 reg_wgt->rime_sqi_multi.rime_multi1 = rime_multi[scene_motion_flag][1];
427 reg_wgt->rime_sqi_multi.rime_multi2 = rime_multi[scene_motion_flag][2];
428
429 if (tune->curr_scene_motion_flag) {
430 reg_wgt->fme_sqi_thd1.move_lambda = 8;
431 }
432
433 reg_rdo->rdo_sqi_cfg.rdo_segment_en = vmaf_opt ? 0 : !tune->curr_scene_motion_flag;
434 reg_rdo->rdo_sqi_cfg.rdo_smear_en = vmaf_opt ? 0 : !tune->curr_scene_motion_flag;
435
436 reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp0 = intra_lvl16_sobel_a[scene_motion_flag][0];
437 reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp1 = intra_lvl16_sobel_a[scene_motion_flag][1];
438 reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp2 = intra_lvl16_sobel_a[scene_motion_flag][2];
439 reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp3 = intra_lvl16_sobel_a[scene_motion_flag][3];
440 reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp4 = intra_lvl16_sobel_a[scene_motion_flag][4];
441 reg_wgt->i16_sobel_a_01.intra_l16_sobel_a0_qp5 = intra_lvl16_sobel_a[scene_motion_flag][5];
442 reg_wgt->i16_sobel_a_01.intra_l16_sobel_a0_qp6 = intra_lvl16_sobel_a[scene_motion_flag][6];
443 reg_wgt->i16_sobel_a_01.intra_l16_sobel_a0_qp7 = intra_lvl16_sobel_a[scene_motion_flag][7];
444 reg_wgt->i16_sobel_a_01.intra_l16_sobel_a0_qp8 = intra_lvl16_sobel_a[scene_motion_flag][8];
445 reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp0 = intra_lvl16_sobel_c[scene_motion_flag][0];
446 reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp1 = intra_lvl16_sobel_c[scene_motion_flag][1];
447 reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp2 = intra_lvl16_sobel_c[scene_motion_flag][2];
448 reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp3 = intra_lvl16_sobel_c[scene_motion_flag][3];
449 reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp4 = intra_lvl16_sobel_c[scene_motion_flag][4];
450 reg_wgt->i16_sobel_c_01.intra_l16_sobel_c0_qp5 = intra_lvl16_sobel_c[scene_motion_flag][5];
451 reg_wgt->i16_sobel_c_01.intra_l16_sobel_c0_qp6 = intra_lvl16_sobel_c[scene_motion_flag][6];
452 reg_wgt->i16_sobel_c_01.intra_l16_sobel_c0_qp7 = intra_lvl16_sobel_c[scene_motion_flag][7];
453 reg_wgt->i16_sobel_c_01.intra_l16_sobel_c0_qp8 = intra_lvl16_sobel_c[scene_motion_flag][8];
454 reg_wgt->i16_sobel_d_00.intra_l16_sobel_d0_qp0 = intra_lvl16_sobel_d[scene_motion_flag][0];
455 reg_wgt->i16_sobel_d_00.intra_l16_sobel_d0_qp1 = intra_lvl16_sobel_d[scene_motion_flag][1];
456 reg_wgt->i16_sobel_d_01.intra_l16_sobel_d0_qp2 = intra_lvl16_sobel_d[scene_motion_flag][2];
457 reg_wgt->i16_sobel_d_01.intra_l16_sobel_d0_qp3 = intra_lvl16_sobel_d[scene_motion_flag][3];
458 reg_wgt->i16_sobel_d_02.intra_l16_sobel_d0_qp4 = intra_lvl16_sobel_d[scene_motion_flag][4];
459 reg_wgt->i16_sobel_d_02.intra_l16_sobel_d0_qp5 = intra_lvl16_sobel_d[scene_motion_flag][5];
460 reg_wgt->i16_sobel_d_03.intra_l16_sobel_d0_qp6 = intra_lvl16_sobel_d[scene_motion_flag][6];
461 reg_wgt->i16_sobel_d_03.intra_l16_sobel_d0_qp7 = intra_lvl16_sobel_d[scene_motion_flag][7];
462 reg_wgt->i16_sobel_d_04.intra_l16_sobel_d0_qp8 = intra_lvl16_sobel_d[scene_motion_flag][8];
463 reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp0 = intra_lvl32_sobel_a[scene_motion_flag][0];
464 reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp1 = intra_lvl32_sobel_a[scene_motion_flag][1];
465 reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp2 = intra_lvl32_sobel_a[scene_motion_flag][2];
466 reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp3 = intra_lvl32_sobel_a[scene_motion_flag][3];
467 reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp4 = intra_lvl32_sobel_a[scene_motion_flag][4];
468 reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp0 = intra_lvl32_sobel_c[scene_motion_flag][0];
469 reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp1 = intra_lvl32_sobel_c[scene_motion_flag][1];
470 reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp2 = intra_lvl32_sobel_c[scene_motion_flag][2];
471 reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp3 = intra_lvl32_sobel_c[scene_motion_flag][3];
472 reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp4 = intra_lvl32_sobel_c[scene_motion_flag][4];
473
474 if (hw->qbias_en) {
475 reg_wgt->reg1484_qnt_bias_comb.qnt_bias_i = hw->qbias_i ? hw->qbias_i : 171;
476 reg_wgt->reg1484_qnt_bias_comb.qnt_bias_p = hw->qbias_p ? hw->qbias_p : 85;
477 } else {
478 reg_wgt->reg1484_qnt_bias_comb.qnt_bias_i = qnt_bias_i[scene_motion_flag];
479 reg_wgt->reg1484_qnt_bias_comb.qnt_bias_p = qnt_bias_p[scene_motion_flag];
480 }
481
482 reg_wgt->rime_sqi_thd.cime_sad_th0 = rime_sqi_cime_sad_th[scene_motion_flag];
483 reg_wgt->fme_sqi_thd0.cime_sad_pu16_th = fme_sqi_cime_sad_pu16_th[scene_motion_flag];
484 reg_wgt->fme_sqi_thd0.cime_sad_pu32_th = fme_sqi_cime_sad_pu32_th[scene_motion_flag];
485 reg_wgt->fme_sqi_thd1.cime_sad_pu64_th = fme_sqi_cime_sad_pu64_th[scene_motion_flag];
486 rc_regs->klut_ofst.chrm_klut_ofst = chrm_klut_ofst[scene_motion_flag];
487 }
488
vepu580_h265e_tune_stat_update(void * p,EncRcTaskInfo * rc_info)489 static void vepu580_h265e_tune_stat_update(void *p, EncRcTaskInfo *rc_info)
490 {
491 HalH265eVepu580Tune *tune = (HalH265eVepu580Tune *)p;
492 H265eV580HalContext *ctx = NULL;
493 RK_S32 scene_mode = 0;
494
495 if (NULL == tune)
496 return;
497
498 ctx = tune->ctx;
499 scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
500 tune->ap_motion_flag = scene_mode;
501 /* update statistic info here */
502 RK_S32 j;
503 RK_S32 i = 0;
504 RK_S32 mvbit = 10;
505 Vepu580H265Fbk *fb = &ctx->frm->feedback;
506
507 for (i = 0; i < (RK_S32)ctx->tile_num; i++) {
508 H265eV580StatusElem *elem = ctx->frm->regs_ret[i];
509
510 fb->st_md_sad_b16num0 += elem->st.md_sad_b16num0;
511 fb->st_md_sad_b16num1 += elem->st.md_sad_b16num1;
512 fb->st_md_sad_b16num2 += elem->st.md_sad_b16num2;
513 fb->st_md_sad_b16num3 += elem->st.md_sad_b16num3;
514 fb->st_madi_b16num0 += elem->st.madi_b16num0;
515 fb->st_madi_b16num1 += elem->st.madi_b16num1;
516 fb->st_madi_b16num2 += elem->st.madi_b16num2;
517 fb->st_madi_b16num3 += elem->st.madi_b16num3;
518 }
519
520 RK_S32 mb_num = fb->st_mb_num ? fb->st_mb_num : 1;
521 RK_S32 madp = 0;
522 RK_S32 md_flag = 0;
523 RK_S32 nScore = 0;
524 RK_S32 nScoreT = ((MD_WIN_LEN - 2) * 6 + 2 * 8 + 2 * 11 + 2 * 13) / 2;
525 RK_S32 madp_cnt_statistics[5];
526 RK_U32 md_cnt = (24 * fb->st_md_sad_b16num3 + 22 * fb->st_md_sad_b16num2 + 17 *
527 fb->st_md_sad_b16num1) >> 2;
528 RK_U32 madi_cnt = (6 * fb->st_madi_b16num3 + 5 * fb->st_madi_b16num2 + 4 *
529 fb->st_madi_b16num1) >> 2;
530 RK_U32 mbs = ((ctx->cfg->prep.width + 15) / 16) * ((ctx->cfg->prep.height + 15) / 16);
531 for (i = 0; i < 5; i++) {
532 madp_cnt_statistics[i] = fb->st_md_sad_b16num0 * madp_num_map[i][0] +
533 fb->st_md_sad_b16num1 * madp_num_map[i][1] +
534 fb->st_md_sad_b16num2 * madp_num_map[i][2] +
535 fb->st_md_sad_b16num3 * madp_num_map[i][3];
536 }
537
538 tune->pre_madi[0] = fb->st_madi;
539 tune->pre_madp[0] = fb->st_madp;
540
541 if (0 != tune->ap_motion_flag)
542 mvbit = 15;
543
544 madp = MOTION_LEVEL_STILL;
545 if (0 != madp_cnt_statistics[4]) {
546 RK_S32 base = tune->ap_motion_flag * 3;
547
548 for (i = 0; i < 3; i++, base++) {
549 if (tune->pre_madp[0] >= ctu_avg_madp_thd[i]) {
550 if (madp_cnt_statistics[0] > mb_num * ctu_madp_cnt_thd[base][0] >> mvbit ||
551 madp_cnt_statistics[1] > mb_num * ctu_madp_cnt_thd[base][1] >> mvbit ||
552 madp_cnt_statistics[2] > mb_num * ctu_madp_cnt_thd[base][2] >> mvbit) {
553 madp = MOTION_LEVEL_BIG_MOTION;
554 } else if ((madp_cnt_statistics[0] > mb_num * ctu_madp_cnt_thd[base][3] >> mvbit ||
555 madp_cnt_statistics[1] > mb_num * ctu_madp_cnt_thd[base][4] >> mvbit) &&
556 madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][5] >> mvbit) {
557 madp = MOTION_LEVEL_BIG_MOTION;
558 } else if (madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][6] >> mvbit) {
559 madp = MOTION_LEVEL_BIG_MOTION;
560 } else if (madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][7] >> mvbit) {
561 madp = MOTION_LEVEL_MOTION;
562 }
563 break;
564 }
565 }
566 } else {
567 madp = MOTION_LEVEL_UNKNOW_SCENE;
568 }
569
570 if (MOTION_LEVEL_UNKNOW_SCENE != madp) {
571 nScore = madp * 13 + tune->md_madp[0] * 11 + tune->md_madp[1] * 8;
572 } else {
573 nScore = tune->md_madp[0] * 11 + tune->md_madp[1] * 8;
574 nScoreT -= 13;
575 }
576
577 for (j = 2; j < MD_WIN_LEN; j++) {
578 nScore += tune->md_madp[j] * 6;
579 }
580
581 if (nScore >= nScoreT) {
582 md_flag = 1;
583 }
584
585 tune->curr_scene_motion_flag = 0;
586 if (tune->md_flag_matrix[0] && tune->md_flag_matrix[1] && tune->md_flag_matrix[2]) {
587 tune->curr_scene_motion_flag = 1;
588 } else if ((tune->md_flag_matrix[0] && tune->md_flag_matrix[1]) ||
589 (tune->md_flag_matrix[1] && tune->md_flag_matrix[2] && tune->md_flag_matrix[3])) {
590 tune->curr_scene_motion_flag = md_flag;
591 }
592
593 if (MOTION_LEVEL_UNKNOW_SCENE != madp) {
594 for (j = MD_WIN_LEN - 2; j >= 0; j--) {
595 tune->md_madp[j + 1] = tune->md_madp[j];
596 }
597 tune->md_madp[0] = madp;
598 }
599 for (j = MD_SHOW_LEN - 2; j >= 0; j--) {
600 tune->md_flag_matrix[j + 1] = tune->md_flag_matrix[j];
601 }
602 tune->md_flag_matrix[0] = md_flag;
603
604 for (j = FRAME_MOTION_ANALYSIS_NUM - 2; j >= 0; j--) {
605 tune->scene_motion_flag_matrix[j + 1] = tune->scene_motion_flag_matrix[j];
606 }
607 tune->scene_motion_flag_matrix[0] = tune->curr_scene_motion_flag;
608
609 tune->pre_madi[1] = tune->pre_madi[0];
610 tune->pre_madp[1] = tune->pre_madp[0];
611
612 rc_info->motion_level = 0;
613 if (md_cnt * 100 > 15 * mbs)
614 rc_info->motion_level = 200;
615 else if (md_cnt * 100 > 5 * mbs)
616 rc_info->motion_level = 100;
617 else
618 rc_info->motion_level = 0;
619
620 rc_info->complex_level = 0;
621 if (madi_cnt * 100 > 30 * mbs)
622 rc_info->complex_level = 2;
623 else if (madi_cnt * 100 > 13 * mbs)
624 rc_info->complex_level = 1;
625 else
626 rc_info->complex_level = 0;
627 hal_h265e_dbg_detail("motion_level = %u, complex_level = %u\n", rc_info->motion_level,
628 rc_info->complex_level);
629 }
630
vepu580_setup_qpmap_buf(H265eV580HalContext * ctx)631 static MPP_RET vepu580_setup_qpmap_buf(H265eV580HalContext *ctx)
632 {
633 MPP_RET ret = MPP_OK;
634 RK_S32 w = ctx->cfg->prep.width;
635 RK_S32 h = ctx->cfg->prep.height;
636 RK_S32 ctu_w = MPP_ALIGN(w, 64) / 64;
637 RK_S32 ctu_h = MPP_ALIGN(h, 64) / 64;
638 RK_S32 qpmap_base_cfg_size = ctx->qpmap_base_cfg_size
639 = ctu_w * ctu_h * 64;
640 RK_S32 qpmap_qp_cfg_size = ctx->qpmap_qp_cfg_size
641 = ctu_w * ctu_h * 192;
642 RK_S32 md_flag_size = ctx->md_flag_size
643 = ctu_w * ctu_h * 16;
644
645 if (!ctx->cfg->tune.deblur_en) {
646 mpp_log("deblurring is closed!\n");
647 goto __RET;
648 }
649
650 if (NULL == ctx->qpmap_base_cfg_buf) {
651 mpp_buffer_get(NULL, &ctx->qpmap_base_cfg_buf, qpmap_base_cfg_size);
652 if (!ctx->qpmap_base_cfg_buf) {
653 mpp_err("qpmap_base_cfg_buf malloc fail, qpmap invalid\n");
654 ret = MPP_ERR_VALUE;
655 goto __RET;
656 }
657 }
658
659 if (NULL == ctx->qpmap_qp_cfg_buf) {
660 mpp_buffer_get(NULL, &ctx->qpmap_qp_cfg_buf, qpmap_qp_cfg_size);
661 if (!ctx->qpmap_qp_cfg_buf) {
662 mpp_err("qpmap_qp_cfg_buf malloc fail, qpmap invalid\n");
663 ret = MPP_ERR_VALUE;
664 goto __RET;
665 }
666 }
667
668 if (NULL == ctx->md_flag_buf) {
669 ctx->md_flag_buf = mpp_malloc(RK_U8, md_flag_size);
670 if (!ctx->md_flag_buf) {
671 mpp_err("md_flag_buf malloc fail, qpmap invalid\n");
672 ret = MPP_ERR_VALUE;
673 goto __RET;
674 }
675 }
676
677 __RET:
678 hal_h265e_dbg_func("leave, ret %d\n", ret);
679 return ret;
680 }