xref: /rockchip-linux_mpp/mpp/hal/rkenc/h265e/hal_h265e_vepu580_tune.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /*
2  * Copyright 2021 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "vepu580_tune.h"
18 #include "hal_h265e_vepu580_reg.h"
19 
20 #define HAL_H265E_DBG_CONTENT           (0x00200000)
21 #define hal_h264e_dbg_content(fmt, ...) hal_h264e_dbg_f(HAL_H264E_DBG_CONTENT, fmt, ## __VA_ARGS__)
22 
23 #define CTU_BASE_CFG_BYTE 64
24 #define CTU_QP_CFG_BYTE 192
25 
26 /*
27  * Please follow the configuration below:
28  *
29  * FRAME_CONTENT_ANALYSIS_NUM >= 5
30  * MD_WIN_LEN >= 3
31  * MD_SHOW_LEN == 4
32  */
33 
34 typedef struct HalH265eVepu580Tune_t {
35     H265eV580HalContext  *ctx;
36 
37     /* motion and texture statistic of previous frames */
38     RK_S32  curr_scene_motion_flag;
39     /* motion and texture statistic of previous frames */
40     RK_S32  ap_motion_flag;
41     // level: 0~2: 0 <--> static, 1 <-->medium motion, 2 <--> large motion
42     RK_S32  md_madp[MD_WIN_LEN];
43     // level: 0~2: 0 <--> simple texture, 1 <--> medium texture, 2 <--> complex texture
44     RK_S32  txtr_madi[FRAME_CONTENT_ANALYSIS_NUM];
45     RK_S32  scene_motion_flag_matrix[FRAME_MOTION_ANALYSIS_NUM];
46     RK_S32  md_flag_matrix[MD_SHOW_LEN];
47 
48     RK_S32  pre_madp[2];
49     RK_S32  pre_madi[2];
50 } HalH265eVepu580Tune;
51 
52 static RK_S32 ctu_avg_madp_thd[6] = {896, 640, 384, 896, 640, 384};
53 
54 static RK_U8 lvl32_preintra_cst_wgt[4][8] = {
55     {21, 20, 19, 18, 20, 22, 24, 16},
56     {19, 18, 17, 16, 18, 20, 21, 16},
57     {20, 19, 18, 17, 19, 21, 22, 16},
58     {16, 16, 16, 16, 16, 16, 16, 16},
59 };
60 
61 static RK_U8 lvl16_preintra_cst_wgt[4][8] = {
62     {21, 20, 19, 18, 20, 22, 24, 16},
63     {19, 18, 17, 16, 18, 20, 21, 16},
64     {20, 19, 18, 17, 19, 21, 22, 16},
65     {16, 16, 16, 16, 16, 16, 16, 16},
66 };
67 
68 static RK_S32 skip_b64_atf_wgt[4][13] = {
69     {16, 13, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
70     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
71     {16, 14, 15, 16, 14, 14, 15, 15, 15, 16, 16, 16, 16},
72     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
73 };
74 
75 static RK_S32 intra_b32_atf_wgt[4][12] = {
76     {24, 23, 23, 23, 22, 21, 20, 19, 18, 16, 16, 16},
77     {21, 20, 19, 20, 19, 18, 19, 18, 18, 18, 18, 17},
78     {20, 19, 18, 19, 18, 17, 18, 17, 17, 16, 16, 16},
79     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
80 };
81 
82 static RK_S32 skip_b32_atf_wgt[4][13] = {
83     {16, 13, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
84     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
85     {18, 14, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
86     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
87 };
88 
89 static RK_S32 intra_b16_atf_wgt[4][12] = {
90     {24, 23, 23, 23, 22, 21, 20, 19, 18, 16, 16, 16},
91     {21, 20, 19, 20, 19, 18, 19, 18, 18, 18, 18, 17},
92     {20, 19, 18, 19, 18, 17, 18, 17, 17, 16, 16, 16},
93     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
94 };
95 
96 static RK_S32 skip_b16_atf_wgt[4][13] = {
97     {16, 13, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
98     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
99     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
100     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
101 };
102 
103 static RK_S32 intra_b8_atf_wgt[4][12] = {
104     {24, 23, 23, 23, 22, 21, 20, 19, 18, 16, 16, 16},
105     {21, 20, 19, 20, 19, 18, 19, 18, 18, 18, 18, 17},
106     {20, 19, 18, 19, 18, 17, 18, 17, 17, 16, 16, 16},
107     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
108 };
109 
110 static RK_S32 skip_b8_atf_wgt[4][13] = {
111     {16, 13, 14, 15, 14, 14, 15, 15, 15, 16, 16, 16, 16},
112     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
113     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
114     {16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16},
115 };
116 
117 static RK_U32 intra_lvl16_sobel_a[4][9] = {
118     {32, 32, 32, 32, 32, 32, 32, 32, 32},
119     {16, 16, 16, 16, 16, 16, 16, 16, 16},
120     {16, 16, 16, 16, 16, 16, 16, 16, 16},
121     {16, 16, 16, 16, 16, 16, 16, 16, 16},
122 };
123 
124 static RK_U32 intra_lvl16_sobel_c[4][9] = {
125     {13, 13, 13, 13, 13, 13, 13, 13, 13},
126     {16, 16, 16, 16, 16, 16, 16, 16, 16},
127     {16, 16, 16, 16, 16, 16, 16, 16, 16},
128     {16, 16, 16, 16, 16, 16, 16, 16, 16},
129 };
130 
131 static RK_U32 intra_lvl16_sobel_d[4][9] = {
132     {23750, 23750, 23750, 23750, 23750, 23750, 23750, 23750, 23750},
133     {0, 0, 0, 0, 0, 0, 0, 0, 0},
134     {0, 0, 0, 0, 0, 0, 0, 0, 0},
135     {0, 0, 0, 0, 0, 0, 0, 0, 0},
136 };
137 
138 static RK_U32 intra_lvl32_sobel_a[4][5] = {
139     {18, 18, 18, 18, 18},
140     {16, 16, 16, 16, 16},
141     {16, 16, 16, 16, 16},
142     {16, 16, 16, 16, 16},
143 };
144 
145 static RK_U32 intra_lvl32_sobel_c[4][5] = {
146     {16, 16, 16, 16, 16},
147     {16, 16, 16, 16, 16},
148     {16, 16, 16, 16, 16},
149     {16, 16, 16, 16, 16},
150 };
151 
152 static RK_U32 qnt_bias_i[4] = {
153     128, 128, 128, 128
154 };
155 
156 static RK_U32 qnt_bias_p[4] = {
157     60, 60, 85, 85
158 };
159 
160 static RK_U32 rime_sqi_cime_sad_th[4] = {
161     48, 0, 0, 0
162 };
163 
164 static RK_U32 fme_sqi_cime_sad_pu16_th[4] = {
165     16, 0, 0, 0
166 };
167 
168 static RK_U32 fme_sqi_cime_sad_pu32_th[4] = {
169     16, 0, 0, 0
170 };
171 
172 static RK_U32 fme_sqi_cime_sad_pu64_th[4] = {
173     16, 0, 0, 0
174 };
175 
176 static RK_U32 chrm_klut_ofst[4] = {
177     3, 0, 0, 0
178 };
179 
180 static RK_S32 pre_intra_b32_cost[4][2] = {
181     {31, 30},
182     {23, 20},
183     {31, 30},
184     {23, 20},
185 };
186 
187 static RK_S32 pre_intra_b16_cost[4][2] = {
188     {31, 30},
189     {23, 20},
190     {31, 30},
191     {23, 20},
192 };
193 
194 static RK_S32 cime_multi[4][4] = {
195     {4, 8, 24, 24},
196     {4, 7, 20, 20},
197     {4, 8, 24, 24},
198     {4, 4, 4, 4},
199 };
200 
201 static RK_S32 rime_multi[4][3] = {
202     {4, 32, 128},
203     {4, 16, 64},
204     {4, 32, 128},
205     {4, 4, 4},
206 };
207 
vepu580_h265e_tune_init(H265eV580HalContext * ctx)208 static HalH265eVepu580Tune *vepu580_h265e_tune_init(H265eV580HalContext *ctx)
209 {
210     HalH265eVepu580Tune *tune = mpp_malloc(HalH265eVepu580Tune, 1);
211     RK_S32 scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
212 
213     if (NULL == tune)
214         return tune;
215 
216     tune->ctx = ctx;
217     tune->curr_scene_motion_flag = 0;
218     tune->ap_motion_flag = scene_mode;
219     memset(tune->md_madp, 0, sizeof(tune->md_madp));
220     memset(tune->txtr_madi, 0, sizeof(tune->txtr_madi));
221     memset(tune->md_flag_matrix, 0, sizeof(tune->md_flag_matrix));
222     memset(tune->scene_motion_flag_matrix, 0, sizeof(tune->scene_motion_flag_matrix));
223     tune->pre_madi[0] = tune->pre_madi[1] = -1;
224     tune->pre_madp[0] = tune->pre_madp[1] = -1;
225 
226     return tune;
227 }
228 
vepu580_h265e_tune_deinit(void * tune)229 static void vepu580_h265e_tune_deinit(void *tune)
230 {
231     MPP_FREE(tune);
232 }
233 
vepu580_h265e_tune_atf(H265eV580HalContext * ctx,RK_U32 sm_flag)234 static void vepu580_h265e_tune_atf(H265eV580HalContext *ctx, RK_U32 sm_flag)
235 {
236     H265eV580RegSet *regs = ctx->frm->regs_set[0];
237     vepu580_rdo_cfg  *reg_rdo = &regs->reg_rdo;
238     RdoAtfSkipCfg *s;
239     RdoAtfCfg* p;
240     RK_U32 atf_idx = ctx->cfg->tune.vmaf_opt ? 3 : sm_flag;
241 
242     s = &reg_rdo->rdo_b64_skip_atf;
243     s->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 1;
244     s->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 2;
245     s->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 4;
246     s->rdo_b_cime_thd1.cu_rdo_cime_thd3 = 6;
247 
248     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt00  = skip_b64_atf_wgt[atf_idx][0];
249     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt10  = skip_b64_atf_wgt[atf_idx][1];
250     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt11  = skip_b64_atf_wgt[atf_idx][2];
251     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt12  = skip_b64_atf_wgt[atf_idx][3];
252     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt20  = skip_b64_atf_wgt[atf_idx][4];
253     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt21  = skip_b64_atf_wgt[atf_idx][5];
254     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt22  = skip_b64_atf_wgt[atf_idx][6];
255     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt30  = skip_b64_atf_wgt[atf_idx][7];
256     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt31  = skip_b64_atf_wgt[atf_idx][8];
257     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt32  = skip_b64_atf_wgt[atf_idx][9];
258     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt40  = skip_b64_atf_wgt[atf_idx][10];
259     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt41  = skip_b64_atf_wgt[atf_idx][11];
260     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt42  = skip_b64_atf_wgt[atf_idx][12];
261 
262     p = &reg_rdo->rdo_b32_intra_atf;
263     p->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 24;
264     p->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 48;
265     p->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 64;
266     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt00  = intra_b32_atf_wgt[atf_idx][0];
267     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt01  = intra_b32_atf_wgt[atf_idx][1];
268     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt02  = intra_b32_atf_wgt[atf_idx][2];
269     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt10  = intra_b32_atf_wgt[atf_idx][3];
270     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt11  = intra_b32_atf_wgt[atf_idx][4];
271     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt12  = intra_b32_atf_wgt[atf_idx][5];
272     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt20  = intra_b32_atf_wgt[atf_idx][6];
273     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt21  = intra_b32_atf_wgt[atf_idx][7];
274     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt22  = intra_b32_atf_wgt[atf_idx][8];
275     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt30  = intra_b32_atf_wgt[atf_idx][9];
276     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt31  = intra_b32_atf_wgt[atf_idx][10];
277     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt32  = intra_b32_atf_wgt[atf_idx][11];
278 
279     s = &reg_rdo->rdo_b32_skip_atf;
280     s->rdo_b_cime_thd0.cu_rdo_cime_thd0 =  1;
281     s->rdo_b_cime_thd0.cu_rdo_cime_thd1 =  2;
282     s->rdo_b_cime_thd1.cu_rdo_cime_thd2 =  4;
283     s->rdo_b_cime_thd1.cu_rdo_cime_thd3 =  6;
284     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt00  =  skip_b32_atf_wgt[atf_idx][0];
285     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt10  =  skip_b32_atf_wgt[atf_idx][1];
286     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt11  =  skip_b32_atf_wgt[atf_idx][2];
287     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt12  =  skip_b32_atf_wgt[atf_idx][3];
288     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt20  =  skip_b32_atf_wgt[atf_idx][4];
289     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt21  =  skip_b32_atf_wgt[atf_idx][5];
290     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt22  =  skip_b32_atf_wgt[atf_idx][6];
291     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt30  =  skip_b32_atf_wgt[atf_idx][7];
292     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt31  =  skip_b32_atf_wgt[atf_idx][8];
293     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt32  =  skip_b32_atf_wgt[atf_idx][9];
294     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt40  =  skip_b32_atf_wgt[atf_idx][10];
295     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt41  =  skip_b32_atf_wgt[atf_idx][11];
296     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt42  =  skip_b32_atf_wgt[atf_idx][12];
297 
298     p = &reg_rdo->rdo_b16_intra_atf;
299     p->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 24;
300     p->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 48;
301     p->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 64;
302     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt00  = intra_b16_atf_wgt[atf_idx][0];
303     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt01  = intra_b16_atf_wgt[atf_idx][1];
304     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt02  = intra_b16_atf_wgt[atf_idx][2];
305     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt10  = intra_b16_atf_wgt[atf_idx][3];
306     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt11  = intra_b16_atf_wgt[atf_idx][4];
307     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt12  = intra_b16_atf_wgt[atf_idx][5];
308     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt20  = intra_b16_atf_wgt[atf_idx][6];
309     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt21  = intra_b16_atf_wgt[atf_idx][7];
310     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt22  = intra_b16_atf_wgt[atf_idx][8];
311     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt30  = intra_b16_atf_wgt[atf_idx][9];
312     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt31  = intra_b16_atf_wgt[atf_idx][10];
313     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt32  = intra_b16_atf_wgt[atf_idx][11];
314 
315     s = &reg_rdo->rdo_b16_skip_atf;
316     s->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 1;
317     s->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 2;
318     s->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 4;
319     s->rdo_b_cime_thd1.cu_rdo_cime_thd3 = 6;
320     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt00  = skip_b16_atf_wgt[atf_idx][0];
321     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt10  = skip_b16_atf_wgt[atf_idx][1];
322     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt11  = skip_b16_atf_wgt[atf_idx][2];
323     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt12  = skip_b16_atf_wgt[atf_idx][3];
324     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt20  = skip_b16_atf_wgt[atf_idx][4];
325     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt21  = skip_b16_atf_wgt[atf_idx][5];
326     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt22  = skip_b16_atf_wgt[atf_idx][6];
327     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt30  = skip_b16_atf_wgt[atf_idx][7];
328     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt31  = skip_b16_atf_wgt[atf_idx][8];
329     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt32  = skip_b16_atf_wgt[atf_idx][9];
330     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt40  = skip_b16_atf_wgt[atf_idx][10];
331     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt41  = skip_b16_atf_wgt[atf_idx][11];
332     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt42  = skip_b16_atf_wgt[atf_idx][12];
333 
334     p = &reg_rdo->rdo_b8_intra_atf;
335     p->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 24;
336     p->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 48;
337     p->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 64;
338     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt00  = intra_b8_atf_wgt[atf_idx][0];
339     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt01  = intra_b8_atf_wgt[atf_idx][1];
340     p->rdo_b_atf_wgt0.cu_rdo_atf_wgt02  = intra_b8_atf_wgt[atf_idx][2];
341     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt10  = intra_b8_atf_wgt[atf_idx][3];
342     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt11  = intra_b8_atf_wgt[atf_idx][4];
343     p->rdo_b_atf_wgt1.cu_rdo_atf_wgt12  = intra_b8_atf_wgt[atf_idx][5];
344     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt20  = intra_b8_atf_wgt[atf_idx][6];
345     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt21  = intra_b8_atf_wgt[atf_idx][7];
346     p->rdo_b_atf_wgt2.cu_rdo_atf_wgt22  = intra_b8_atf_wgt[atf_idx][8];
347     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt30  = intra_b8_atf_wgt[atf_idx][9];
348     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt31  = intra_b8_atf_wgt[atf_idx][10];
349     p->rdo_b_atf_wgt3.cu_rdo_atf_wgt32  = intra_b8_atf_wgt[atf_idx][11];
350 
351     s = &reg_rdo->rdo_b8_skip_atf;
352     s->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 1;
353     s->rdo_b_cime_thd0.cu_rdo_cime_thd1 = 2;
354     s->rdo_b_cime_thd1.cu_rdo_cime_thd2 = 4;
355     s->rdo_b_cime_thd1.cu_rdo_cime_thd3 = 6;
356     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt00  = skip_b8_atf_wgt[atf_idx][0];
357     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt10  = skip_b8_atf_wgt[atf_idx][1];
358     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt11  = skip_b8_atf_wgt[atf_idx][2];
359     s->rdo_b_atf_wgt0.cu_rdo_atf_wgt12  = skip_b8_atf_wgt[atf_idx][3];
360     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt20  = skip_b8_atf_wgt[atf_idx][4];
361     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt21  = skip_b8_atf_wgt[atf_idx][5];
362     s->rdo_b_atf_wgt1.cu_rdo_atf_wgt22  = skip_b8_atf_wgt[atf_idx][6];
363     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt30  = skip_b8_atf_wgt[atf_idx][7];
364     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt31  = skip_b8_atf_wgt[atf_idx][8];
365     s->rdo_b_atf_wgt2.cu_rdo_atf_wgt32  = skip_b8_atf_wgt[atf_idx][9];
366     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt40  = skip_b8_atf_wgt[atf_idx][10];
367     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt41  = skip_b8_atf_wgt[atf_idx][11];
368     s->rdo_b_atf_wgt3.cu_rdo_atf_wgt42  = skip_b8_atf_wgt[atf_idx][12];
369 }
370 
vepu580_h265e_tune_reg_patch(void * p)371 static void vepu580_h265e_tune_reg_patch(void *p)
372 {
373     HalH265eVepu580Tune *tune = (HalH265eVepu580Tune *)p;
374     H265eV580HalContext *ctx = NULL;
375     RK_S32 scene_mode = 0;
376 
377     if (NULL == tune)
378         return;
379 
380     ctx = tune->ctx;
381     scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
382     tune->ap_motion_flag = scene_mode;
383     /* modify register here */
384     H265eV580RegSet *regs = ctx->frm->regs_set[0];
385     hevc_vepu580_rc_klut *rc_regs =  &regs->reg_rc_klut;
386     hevc_vepu580_wgt *reg_wgt = &regs->reg_wgt;
387     vepu580_rdo_cfg  *reg_rdo = &regs->reg_rdo;
388     RK_U32 scene_motion_flag = tune->ap_motion_flag * 2 + tune->curr_scene_motion_flag;
389     MppEncHwCfg *hw = &ctx->cfg->hw;
390     RK_S32 vmaf_opt = ctx->cfg->tune.vmaf_opt;
391     RK_U32 pre_intra_idx = vmaf_opt ? 3 : scene_motion_flag;
392 
393     if (scene_motion_flag > 3) {
394         mpp_err_f("scene_motion_flag is a wrong value %d\n", scene_motion_flag);
395         return;
396     }
397 
398     memcpy(&reg_wgt->lvl32_intra_CST_WGT0, lvl32_preintra_cst_wgt[pre_intra_idx],
399            sizeof(lvl32_preintra_cst_wgt[pre_intra_idx]));
400     memcpy(&reg_wgt->lvl16_intra_CST_WGT0, lvl16_preintra_cst_wgt[pre_intra_idx],
401            sizeof(lvl16_preintra_cst_wgt[pre_intra_idx]));
402 
403     /* Do not adjust the ATF weight when skip bias is enabled */
404     if (!hw->skip_bias_en)
405         vepu580_h265e_tune_atf(ctx, scene_motion_flag);
406 
407     reg_rdo->preintra_b32_cst_wgt.pre_intra32_cst_wgt00 = pre_intra_b32_cost[scene_motion_flag][0];
408     reg_rdo->preintra_b32_cst_wgt.pre_intra32_cst_wgt01 = pre_intra_b32_cost[scene_motion_flag][1];
409     reg_rdo->preintra_b16_cst_wgt.pre_intra16_cst_wgt00 = pre_intra_b16_cost[scene_motion_flag][0];
410     reg_rdo->preintra_b16_cst_wgt.pre_intra16_cst_wgt01 = pre_intra_b16_cost[scene_motion_flag][1];
411 
412     rc_regs->md_sad_thd.md_sad_thd0 = 7;
413     rc_regs->md_sad_thd.md_sad_thd1 = 15;
414     rc_regs->md_sad_thd.md_sad_thd2 = 25;
415     rc_regs->madi_thd.madi_thd0     = 4;
416     rc_regs->madi_thd.madi_thd1     = 9;
417     rc_regs->madi_thd.madi_thd2     = 15;
418 
419     reg_wgt->cime_sqi_cfg.cime_pmv_set_zero = !tune->curr_scene_motion_flag;
420     reg_wgt->cime_sqi_multi0.cime_multi0 = cime_multi[scene_motion_flag][0];
421     reg_wgt->cime_sqi_multi0.cime_multi1 = cime_multi[scene_motion_flag][1];
422     reg_wgt->cime_sqi_multi1.cime_multi2 = cime_multi[scene_motion_flag][2];
423     reg_wgt->cime_sqi_multi1.cime_multi3 = cime_multi[scene_motion_flag][3];
424 
425     reg_wgt->rime_sqi_multi.rime_multi0 = rime_multi[scene_motion_flag][0];
426     reg_wgt->rime_sqi_multi.rime_multi1 = rime_multi[scene_motion_flag][1];
427     reg_wgt->rime_sqi_multi.rime_multi2 = rime_multi[scene_motion_flag][2];
428 
429     if (tune->curr_scene_motion_flag) {
430         reg_wgt->fme_sqi_thd1.move_lambda = 8;
431     }
432 
433     reg_rdo->rdo_sqi_cfg.rdo_segment_en = vmaf_opt ? 0 : !tune->curr_scene_motion_flag;
434     reg_rdo->rdo_sqi_cfg.rdo_smear_en = vmaf_opt ? 0 : !tune->curr_scene_motion_flag;
435 
436     reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp0 = intra_lvl16_sobel_a[scene_motion_flag][0];
437     reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp1 = intra_lvl16_sobel_a[scene_motion_flag][1];
438     reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp2 = intra_lvl16_sobel_a[scene_motion_flag][2];
439     reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp3 = intra_lvl16_sobel_a[scene_motion_flag][3];
440     reg_wgt->i16_sobel_a_00.intra_l16_sobel_a0_qp4 = intra_lvl16_sobel_a[scene_motion_flag][4];
441     reg_wgt->i16_sobel_a_01.intra_l16_sobel_a0_qp5 = intra_lvl16_sobel_a[scene_motion_flag][5];
442     reg_wgt->i16_sobel_a_01.intra_l16_sobel_a0_qp6 = intra_lvl16_sobel_a[scene_motion_flag][6];
443     reg_wgt->i16_sobel_a_01.intra_l16_sobel_a0_qp7 = intra_lvl16_sobel_a[scene_motion_flag][7];
444     reg_wgt->i16_sobel_a_01.intra_l16_sobel_a0_qp8 = intra_lvl16_sobel_a[scene_motion_flag][8];
445     reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp0 = intra_lvl16_sobel_c[scene_motion_flag][0];
446     reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp1 = intra_lvl16_sobel_c[scene_motion_flag][1];
447     reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp2 = intra_lvl16_sobel_c[scene_motion_flag][2];
448     reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp3 = intra_lvl16_sobel_c[scene_motion_flag][3];
449     reg_wgt->i16_sobel_c_00.intra_l16_sobel_c0_qp4 = intra_lvl16_sobel_c[scene_motion_flag][4];
450     reg_wgt->i16_sobel_c_01.intra_l16_sobel_c0_qp5 = intra_lvl16_sobel_c[scene_motion_flag][5];
451     reg_wgt->i16_sobel_c_01.intra_l16_sobel_c0_qp6 = intra_lvl16_sobel_c[scene_motion_flag][6];
452     reg_wgt->i16_sobel_c_01.intra_l16_sobel_c0_qp7 = intra_lvl16_sobel_c[scene_motion_flag][7];
453     reg_wgt->i16_sobel_c_01.intra_l16_sobel_c0_qp8 = intra_lvl16_sobel_c[scene_motion_flag][8];
454     reg_wgt->i16_sobel_d_00.intra_l16_sobel_d0_qp0 = intra_lvl16_sobel_d[scene_motion_flag][0];
455     reg_wgt->i16_sobel_d_00.intra_l16_sobel_d0_qp1 = intra_lvl16_sobel_d[scene_motion_flag][1];
456     reg_wgt->i16_sobel_d_01.intra_l16_sobel_d0_qp2 = intra_lvl16_sobel_d[scene_motion_flag][2];
457     reg_wgt->i16_sobel_d_01.intra_l16_sobel_d0_qp3 = intra_lvl16_sobel_d[scene_motion_flag][3];
458     reg_wgt->i16_sobel_d_02.intra_l16_sobel_d0_qp4 = intra_lvl16_sobel_d[scene_motion_flag][4];
459     reg_wgt->i16_sobel_d_02.intra_l16_sobel_d0_qp5 = intra_lvl16_sobel_d[scene_motion_flag][5];
460     reg_wgt->i16_sobel_d_03.intra_l16_sobel_d0_qp6 = intra_lvl16_sobel_d[scene_motion_flag][6];
461     reg_wgt->i16_sobel_d_03.intra_l16_sobel_d0_qp7 = intra_lvl16_sobel_d[scene_motion_flag][7];
462     reg_wgt->i16_sobel_d_04.intra_l16_sobel_d0_qp8 = intra_lvl16_sobel_d[scene_motion_flag][8];
463     reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp0 = intra_lvl32_sobel_a[scene_motion_flag][0];
464     reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp1 = intra_lvl32_sobel_a[scene_motion_flag][1];
465     reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp2 = intra_lvl32_sobel_a[scene_motion_flag][2];
466     reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp3 = intra_lvl32_sobel_a[scene_motion_flag][3];
467     reg_wgt->i32_sobel_a.intra_l32_sobel_a1_qp4 = intra_lvl32_sobel_a[scene_motion_flag][4];
468     reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp0 = intra_lvl32_sobel_c[scene_motion_flag][0];
469     reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp1 = intra_lvl32_sobel_c[scene_motion_flag][1];
470     reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp2 = intra_lvl32_sobel_c[scene_motion_flag][2];
471     reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp3 = intra_lvl32_sobel_c[scene_motion_flag][3];
472     reg_wgt->i32_sobel_c.intra_l32_sobel_c1_qp4 = intra_lvl32_sobel_c[scene_motion_flag][4];
473 
474     if (hw->qbias_en) {
475         reg_wgt->reg1484_qnt_bias_comb.qnt_bias_i = hw->qbias_i ? hw->qbias_i : 171;
476         reg_wgt->reg1484_qnt_bias_comb.qnt_bias_p = hw->qbias_p ? hw->qbias_p : 85;
477     } else {
478         reg_wgt->reg1484_qnt_bias_comb.qnt_bias_i = qnt_bias_i[scene_motion_flag];
479         reg_wgt->reg1484_qnt_bias_comb.qnt_bias_p = qnt_bias_p[scene_motion_flag];
480     }
481 
482     reg_wgt->rime_sqi_thd.cime_sad_th0 = rime_sqi_cime_sad_th[scene_motion_flag];
483     reg_wgt->fme_sqi_thd0.cime_sad_pu16_th = fme_sqi_cime_sad_pu16_th[scene_motion_flag];
484     reg_wgt->fme_sqi_thd0.cime_sad_pu32_th = fme_sqi_cime_sad_pu32_th[scene_motion_flag];
485     reg_wgt->fme_sqi_thd1.cime_sad_pu64_th = fme_sqi_cime_sad_pu64_th[scene_motion_flag];
486     rc_regs->klut_ofst.chrm_klut_ofst = chrm_klut_ofst[scene_motion_flag];
487 }
488 
vepu580_h265e_tune_stat_update(void * p,EncRcTaskInfo * rc_info)489 static void vepu580_h265e_tune_stat_update(void *p, EncRcTaskInfo *rc_info)
490 {
491     HalH265eVepu580Tune *tune = (HalH265eVepu580Tune *)p;
492     H265eV580HalContext *ctx = NULL;
493     RK_S32 scene_mode = 0;
494 
495     if (NULL == tune)
496         return;
497 
498     ctx = tune->ctx;
499     scene_mode = ctx->cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC ? 0 : 1;
500     tune->ap_motion_flag = scene_mode;
501     /* update statistic info here */
502     RK_S32 j;
503     RK_S32 i = 0;
504     RK_S32 mvbit = 10;
505     Vepu580H265Fbk *fb = &ctx->frm->feedback;
506 
507     for (i = 0; i < (RK_S32)ctx->tile_num; i++) {
508         H265eV580StatusElem *elem = ctx->frm->regs_ret[i];
509 
510         fb->st_md_sad_b16num0 += elem->st.md_sad_b16num0;
511         fb->st_md_sad_b16num1 += elem->st.md_sad_b16num1;
512         fb->st_md_sad_b16num2 += elem->st.md_sad_b16num2;
513         fb->st_md_sad_b16num3 += elem->st.md_sad_b16num3;
514         fb->st_madi_b16num0 += elem->st.madi_b16num0;
515         fb->st_madi_b16num1 += elem->st.madi_b16num1;
516         fb->st_madi_b16num2 += elem->st.madi_b16num2;
517         fb->st_madi_b16num3 += elem->st.madi_b16num3;
518     }
519 
520     RK_S32 mb_num = fb->st_mb_num ? fb->st_mb_num : 1;
521     RK_S32 madp = 0;
522     RK_S32 md_flag = 0;
523     RK_S32 nScore = 0;
524     RK_S32 nScoreT = ((MD_WIN_LEN - 2) * 6 + 2 * 8 + 2 * 11 + 2 * 13) / 2;
525     RK_S32 madp_cnt_statistics[5];
526     RK_U32 md_cnt = (24 * fb->st_md_sad_b16num3 + 22 * fb->st_md_sad_b16num2 + 17 *
527                      fb->st_md_sad_b16num1) >> 2;
528     RK_U32 madi_cnt = (6 * fb->st_madi_b16num3 + 5 * fb->st_madi_b16num2 + 4 *
529                        fb->st_madi_b16num1) >> 2;
530     RK_U32 mbs = ((ctx->cfg->prep.width + 15) / 16) * ((ctx->cfg->prep.height + 15) / 16);
531     for (i = 0; i < 5; i++) {
532         madp_cnt_statistics[i] = fb->st_md_sad_b16num0 * madp_num_map[i][0] +
533                                  fb->st_md_sad_b16num1 * madp_num_map[i][1] +
534                                  fb->st_md_sad_b16num2 * madp_num_map[i][2] +
535                                  fb->st_md_sad_b16num3 * madp_num_map[i][3];
536     }
537 
538     tune->pre_madi[0] = fb->st_madi;
539     tune->pre_madp[0] = fb->st_madp;
540 
541     if (0 != tune->ap_motion_flag)
542         mvbit = 15;
543 
544     madp = MOTION_LEVEL_STILL;
545     if (0 != madp_cnt_statistics[4]) {
546         RK_S32 base = tune->ap_motion_flag * 3;
547 
548         for (i = 0; i < 3; i++, base++) {
549             if (tune->pre_madp[0] >= ctu_avg_madp_thd[i]) {
550                 if (madp_cnt_statistics[0] > mb_num * ctu_madp_cnt_thd[base][0] >> mvbit ||
551                     madp_cnt_statistics[1] > mb_num * ctu_madp_cnt_thd[base][1] >> mvbit ||
552                     madp_cnt_statistics[2] > mb_num * ctu_madp_cnt_thd[base][2] >> mvbit) {
553                     madp =  MOTION_LEVEL_BIG_MOTION;
554                 } else if ((madp_cnt_statistics[0] > mb_num * ctu_madp_cnt_thd[base][3] >> mvbit ||
555                             madp_cnt_statistics[1] > mb_num * ctu_madp_cnt_thd[base][4] >> mvbit) &&
556                            madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][5] >> mvbit) {
557                     madp =  MOTION_LEVEL_BIG_MOTION;
558                 } else if (madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][6] >> mvbit) {
559                     madp =  MOTION_LEVEL_BIG_MOTION;
560                 } else if (madp_cnt_statistics[3] < mb_num * ctu_madp_cnt_thd[base][7] >> mvbit) {
561                     madp =  MOTION_LEVEL_MOTION;
562                 }
563                 break;
564             }
565         }
566     } else {
567         madp = MOTION_LEVEL_UNKNOW_SCENE;
568     }
569 
570     if (MOTION_LEVEL_UNKNOW_SCENE != madp) {
571         nScore = madp * 13 + tune->md_madp[0] * 11 + tune->md_madp[1] * 8;
572     } else {
573         nScore = tune->md_madp[0] * 11 + tune->md_madp[1] * 8;
574         nScoreT -= 13;
575     }
576 
577     for (j = 2; j < MD_WIN_LEN; j++) {
578         nScore += tune->md_madp[j] * 6;
579     }
580 
581     if (nScore >= nScoreT) {
582         md_flag = 1;
583     }
584 
585     tune->curr_scene_motion_flag = 0;
586     if (tune->md_flag_matrix[0] && tune->md_flag_matrix[1] && tune->md_flag_matrix[2]) {
587         tune->curr_scene_motion_flag = 1;
588     } else if ((tune->md_flag_matrix[0] && tune->md_flag_matrix[1]) ||
589                (tune->md_flag_matrix[1] && tune->md_flag_matrix[2] && tune->md_flag_matrix[3])) {
590         tune->curr_scene_motion_flag = md_flag;
591     }
592 
593     if (MOTION_LEVEL_UNKNOW_SCENE != madp) {
594         for (j = MD_WIN_LEN - 2; j >= 0; j--) {
595             tune->md_madp[j + 1] = tune->md_madp[j];
596         }
597         tune->md_madp[0] = madp;
598     }
599     for (j = MD_SHOW_LEN - 2; j >= 0;  j--) {
600         tune->md_flag_matrix[j + 1] = tune->md_flag_matrix[j];
601     }
602     tune->md_flag_matrix[0] = md_flag;
603 
604     for (j = FRAME_MOTION_ANALYSIS_NUM - 2; j >= 0;  j--) {
605         tune->scene_motion_flag_matrix[j + 1] = tune->scene_motion_flag_matrix[j];
606     }
607     tune->scene_motion_flag_matrix[0] = tune->curr_scene_motion_flag;
608 
609     tune->pre_madi[1] = tune->pre_madi[0];
610     tune->pre_madp[1] = tune->pre_madp[0];
611 
612     rc_info->motion_level = 0;
613     if (md_cnt * 100 > 15 * mbs)
614         rc_info->motion_level = 200;
615     else if (md_cnt * 100 > 5 * mbs)
616         rc_info->motion_level = 100;
617     else
618         rc_info->motion_level = 0;
619 
620     rc_info->complex_level = 0;
621     if (madi_cnt * 100 > 30 * mbs)
622         rc_info->complex_level = 2;
623     else if (madi_cnt * 100 > 13 * mbs)
624         rc_info->complex_level = 1;
625     else
626         rc_info->complex_level = 0;
627     hal_h265e_dbg_detail("motion_level = %u, complex_level = %u\n", rc_info->motion_level,
628                          rc_info->complex_level);
629 }
630 
vepu580_setup_qpmap_buf(H265eV580HalContext * ctx)631 static MPP_RET vepu580_setup_qpmap_buf(H265eV580HalContext *ctx)
632 {
633     MPP_RET ret = MPP_OK;
634     RK_S32 w = ctx->cfg->prep.width;
635     RK_S32 h = ctx->cfg->prep.height;
636     RK_S32 ctu_w = MPP_ALIGN(w, 64) / 64;
637     RK_S32 ctu_h = MPP_ALIGN(h, 64) / 64;
638     RK_S32 qpmap_base_cfg_size   = ctx->qpmap_base_cfg_size
639                                    = ctu_w * ctu_h * 64;
640     RK_S32 qpmap_qp_cfg_size     = ctx->qpmap_qp_cfg_size
641                                    = ctu_w * ctu_h * 192;
642     RK_S32 md_flag_size = ctx->md_flag_size
643                           = ctu_w * ctu_h * 16;
644 
645     if (!ctx->cfg->tune.deblur_en) {
646         mpp_log("deblurring is closed!\n");
647         goto __RET;
648     }
649 
650     if (NULL == ctx->qpmap_base_cfg_buf) {
651         mpp_buffer_get(NULL, &ctx->qpmap_base_cfg_buf, qpmap_base_cfg_size);
652         if (!ctx->qpmap_base_cfg_buf) {
653             mpp_err("qpmap_base_cfg_buf malloc fail, qpmap invalid\n");
654             ret = MPP_ERR_VALUE;
655             goto __RET;
656         }
657     }
658 
659     if (NULL == ctx->qpmap_qp_cfg_buf) {
660         mpp_buffer_get(NULL, &ctx->qpmap_qp_cfg_buf, qpmap_qp_cfg_size);
661         if (!ctx->qpmap_qp_cfg_buf) {
662             mpp_err("qpmap_qp_cfg_buf malloc fail, qpmap invalid\n");
663             ret = MPP_ERR_VALUE;
664             goto __RET;
665         }
666     }
667 
668     if (NULL == ctx->md_flag_buf) {
669         ctx->md_flag_buf = mpp_malloc(RK_U8, md_flag_size);
670         if (!ctx->md_flag_buf) {
671             mpp_err("md_flag_buf malloc fail, qpmap invalid\n");
672             ret = MPP_ERR_VALUE;
673             goto __RET;
674         }
675     }
676 
677 __RET:
678     hal_h265e_dbg_func("leave, ret %d\n", ret);
679     return ret;
680 }