xref: /rockchip-linux_mpp/mpp/hal/rkenc/h265e/hal_h265e_vepu510_tune.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /*
2  * Copyright 2024 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "hal_enc_task.h"
18 #include "hal_h265e_vepu510_reg.h"
19 
20 typedef struct HalH265eVepu510Tune_t {
21     H265eV510HalContext *ctx;
22 
23     RK_U8 *qm_mv_buf; /* qpmap move flag buffer */
24     RK_U32 qm_mv_buf_size;
25     Vepu510NpuOut *obj_out; /* object map from npu */
26 
27     RK_S32 pre_madp[2];
28     RK_S32 pre_madi[2];
29 } HalH265eVepu510Tune;
30 
31 static RK_U32 aq_thd_default[16] = {
32     0,  0,  0,  0,  3,  3,  5,  5,
33     8,  8,  15, 15, 20, 25, 25, 25
34 };
35 
36 static RK_S32 aq_qp_delta_default[16] = {
37     -8, -7, -6, -5, -4, -3, -2, -1,
38     1,  2,  3,  4,  5,  6,  7,  8
39 };
40 
41 static RK_U32 aq_thd_smt_I[16] = {
42     1,  2,  3,   3,  3,  3,  5,  5,
43     8,  8,  13,  15, 20, 25, 25, 25
44 };
45 
46 static RK_S32 aq_qp_delta_smt_I[16] = {
47     -8, -7, -6, -5, -4, -3, -2, -1,
48     1,  2,  3,  5,  7,  8,  9,  9
49 };
50 
51 static RK_U32 aq_thd_smt_P[16] = {
52     0,  0,  0,   0,  3,  3,  5,  5,
53     8,  8,  15, 15, 20, 25, 25, 25
54 };
55 
56 static RK_S32 aq_qp_delta_smt_P[16] = {
57     -8, -7, -6, -5, -4, -3, -2, -1,
58     1,  2,  3,  4,  6,  7,  9,  9
59 };
60 
vepu510_h265e_tune_init(H265eV510HalContext * ctx)61 static HalH265eVepu510Tune *vepu510_h265e_tune_init(H265eV510HalContext *ctx)
62 {
63     HalH265eVepu510Tune *tune = mpp_calloc(HalH265eVepu510Tune, 1);
64 
65     if (NULL == tune)
66         return tune;
67 
68     tune->ctx = ctx;
69     tune->pre_madi[0] = tune->pre_madi[1] = -1;
70     tune->pre_madp[0] = tune->pre_madp[1] = -1;
71 
72     return tune;
73 }
74 
vepu510_h265e_tune_deinit(void * tune)75 static void vepu510_h265e_tune_deinit(void *tune)
76 {
77     HalH265eVepu510Tune *t = (HalH265eVepu510Tune *)tune;
78 
79     MPP_FREE(t->qm_mv_buf);
80     MPP_FREE(tune);
81 }
82 
vepu510_h265e_tune_aq_prepare(HalH265eVepu510Tune * tune)83 static void vepu510_h265e_tune_aq_prepare(HalH265eVepu510Tune *tune)
84 {
85     if (tune == NULL) {
86         return;
87     }
88 
89     H265eV510HalContext *ctx = tune->ctx;
90     MppEncHwCfg *hw = &ctx->cfg->hw;
91 
92     if (ctx->smart_en) {
93         memcpy(hw->aq_thrd_i, aq_thd_smt_I, sizeof(hw->aq_thrd_i));
94         memcpy(hw->aq_thrd_p, aq_thd_smt_P, sizeof(hw->aq_thrd_p));
95         memcpy(hw->aq_step_i, aq_qp_delta_smt_I, sizeof(hw->aq_step_i));
96         memcpy(hw->aq_step_p, aq_qp_delta_smt_P, sizeof(hw->aq_step_p));
97     } else {
98         memcpy(hw->aq_thrd_i, aq_thd_default, sizeof(hw->aq_thrd_i));
99         memcpy(hw->aq_thrd_p, aq_thd_default, sizeof(hw->aq_thrd_p));
100         memcpy(hw->aq_step_i, aq_qp_delta_default, sizeof(hw->aq_step_i));
101         memcpy(hw->aq_step_p, aq_qp_delta_default, sizeof(hw->aq_step_p));
102     }
103 }
104 
vepu510_h265e_tune_aq(HalH265eVepu510Tune * tune)105 static void vepu510_h265e_tune_aq(HalH265eVepu510Tune *tune)
106 {
107     H265eV510HalContext *ctx = tune->ctx;
108     Vepu510H265eFrmCfg *frm_cfg = ctx->frm;
109     H265eV510RegSet *regs = frm_cfg->regs_set;
110     Vepu510RcRoi *r = &regs->reg_rc_roi;
111     MppEncHwCfg *hw = &ctx->cfg->hw;
112     RK_U32 i = 0;
113     RK_S32 aq_step[16];
114 
115     for (i = 0; i < MPP_ARRAY_ELEMS(aq_thd_default); i++) {
116         if (ctx->frame_type == INTRA_FRAME) {
117             r->aq_tthd[i] = hw->aq_thrd_i[i];
118             aq_step[i] = hw->aq_step_i[i] & 0x1F;
119         } else {
120             r->aq_tthd[i] = hw->aq_thrd_p[i];
121             aq_step[i] = hw->aq_step_p[i] & 0x1F;
122         }
123     }
124 
125     r->aq_stp0.aq_stp_s0 = aq_step[0];
126     r->aq_stp0.aq_stp_0t1 = aq_step[1];
127     r->aq_stp0.aq_stp_1t2 = aq_step[2];
128     r->aq_stp0.aq_stp_2t3 = aq_step[3];
129     r->aq_stp0.aq_stp_3t4 = aq_step[4];
130     r->aq_stp0.aq_stp_4t5 = aq_step[5];
131     r->aq_stp1.aq_stp_5t6 = aq_step[6];
132     r->aq_stp1.aq_stp_6t7 = aq_step[7];
133     r->aq_stp1.aq_stp_7t8 = 0;
134     r->aq_stp1.aq_stp_8t9 = aq_step[8];
135     r->aq_stp1.aq_stp_9t10 = aq_step[9];
136     r->aq_stp1.aq_stp_10t11 = aq_step[10];
137     r->aq_stp2.aq_stp_11t12 = aq_step[11];
138     r->aq_stp2.aq_stp_12t13 = aq_step[12];
139     r->aq_stp2.aq_stp_13t14 = aq_step[13];
140     r->aq_stp2.aq_stp_14t15 = aq_step[14];
141     r->aq_stp2.aq_stp_b15 = aq_step[15];
142 
143     r->aq_clip.aq16_rnge = 5;
144     r->aq_clip.aq32_rnge = 5;
145     r->aq_clip.aq8_rnge = 10;
146     r->aq_clip.aq16_dif0 = 12;
147     r->aq_clip.aq16_dif1 = 12;
148     r->aq_clip.aq_rme_en = 1;
149     r->aq_clip.aq_cme_en = 1;
150 }
151 
vepu510_h265e_tune_qpmap_init(HalH265eVepu510Tune * tune)152 static MPP_RET vepu510_h265e_tune_qpmap_init(HalH265eVepu510Tune *tune)
153 {
154     H265eV510HalContext *ctx = tune->ctx;
155     Vepu510H265eFrmCfg *frm = ctx->frm;
156     H265eV510RegSet *regs = frm->regs_set;
157     H265eVepu510Frame *reg_frm = &regs->reg_frm;
158     RK_S32 w32 = MPP_ALIGN(ctx->cfg->prep.width, 32);
159     RK_S32 h32 = MPP_ALIGN(ctx->cfg->prep.height, 32);
160     RK_S32 roir_buf_fd = -1;
161 
162     if (frm->roi_data) {
163         //TODO: external qpmap buffer
164     } else {
165         if (NULL == frm->roir_buf) {
166             if (NULL == ctx->roi_grp)
167                 mpp_buffer_group_get_internal(&ctx->roi_grp, MPP_BUFFER_TYPE_ION);
168 
169             //TODO: bmap_mdc_dpth = 1 ???
170             frm->roir_buf_size = w32 * h32 / 256 * 4;
171             mpp_buffer_get(ctx->roi_grp, &frm->roir_buf, frm->roir_buf_size);
172         }
173 
174         roir_buf_fd = mpp_buffer_get_fd(frm->roir_buf);
175     }
176 
177     if (frm->roir_buf == NULL) {
178         mpp_err("failed to get roir_buf\n");
179         return MPP_ERR_MALLOC;
180     }
181     reg_frm->common.adr_roir = roir_buf_fd;
182 
183     if (tune->qm_mv_buf == NULL) {
184         tune->qm_mv_buf_size = w32 * h32 / 256;
185         tune->qm_mv_buf = mpp_calloc(RK_U8, tune->qm_mv_buf_size);
186         if (NULL == tune->qm_mv_buf) {
187             mpp_err("failed to get qm_mv_buf\n");
188             return MPP_ERR_MALLOC;
189         }
190     }
191 
192     hal_h265e_dbg_ctl("roir_buf_fd %d, size %d qm_mv_buf %p size %d\n",
193                       roir_buf_fd, frm->roir_buf_size, tune->qm_mv_buf,
194                       tune->qm_mv_buf_size);
195     return MPP_OK;
196 }
197 
vepu510_h265e_tune_qpmap(void * p,HalEncTask * task)198 static void vepu510_h265e_tune_qpmap(void *p, HalEncTask *task)
199 {
200     MPP_RET ret = MPP_OK;
201     HalH265eVepu510Tune *tune = (HalH265eVepu510Tune *)p;
202 
203     (void)task;
204     hal_h265e_dbg_func("enter\n");
205 
206     ret = vepu510_h265e_tune_qpmap_init(tune);
207     if (ret != MPP_OK) {
208         mpp_err("failed to init qpmap\n");
209         return;
210     }
211 
212     hal_h265e_dbg_func("leave\n");
213 }
214 
vepu510_h265e_tune_reg_patch(void * p,HalEncTask * task)215 static void vepu510_h265e_tune_reg_patch(void *p, HalEncTask *task)
216 {
217     HalH265eVepu510Tune *tune = (HalH265eVepu510Tune *)p;
218 
219     if (NULL == tune)
220         return;
221     H265eV510HalContext *ctx = tune->ctx;
222 
223     vepu510_h265e_tune_aq(tune);
224 
225     if (ctx->qpmap_en && (task->md_info != NULL)) {
226         vepu510_h265e_tune_qpmap(tune, task);
227     }
228 }
229 
vepu510_h265e_tune_stat_update(void * p,HalEncTask * task)230 static void vepu510_h265e_tune_stat_update(void *p, HalEncTask *task)
231 {
232     HalH265eVepu510Tune *tune = (HalH265eVepu510Tune *)p;
233     EncRcTaskInfo *hal_rc_ret = (EncRcTaskInfo *)&task->rc_task->info;
234 
235     if (NULL == tune)
236         return;
237 
238     hal_h265e_dbg_func("enter\n");
239     H265eV510HalContext *ctx = tune->ctx;;
240     RK_S32 task_idx = task->flags.reg_idx;
241     Vepu510H265eFrmCfg *frm = ctx->frms[task_idx];
242     Vepu510H265Fbk *fb = &frm->feedback;
243     H265eV510RegSet *regs_set = frm->regs_set;
244     H265eV510StatusElem *elem = frm->regs_ret;
245     MppEncCfgSet *cfg = ctx->cfg;
246     RK_S32 w32 = MPP_ALIGN(cfg->prep.width, 32);
247     RK_S32 h32 = MPP_ALIGN(cfg->prep.height, 32);
248     RK_U32 b16_num = MPP_ALIGN(cfg->prep.width, 16) * MPP_ALIGN(cfg->prep.height, 16) / 256;
249     RK_U32 madi_cnt = 0, madp_cnt = 0;
250 
251     RK_U32 madi_th_cnt0 = elem->st.st_madi_lt_num0.madi_th_lt_cnt0 +
252                           elem->st.st_madi_rt_num0.madi_th_rt_cnt0 +
253                           elem->st.st_madi_lb_num0.madi_th_lb_cnt0 +
254                           elem->st.st_madi_rb_num0.madi_th_rb_cnt0;
255     RK_U32 madi_th_cnt1 = elem->st.st_madi_lt_num0.madi_th_lt_cnt1 +
256                           elem->st.st_madi_rt_num0.madi_th_rt_cnt1 +
257                           elem->st.st_madi_lb_num0.madi_th_lb_cnt1 +
258                           elem->st.st_madi_rb_num0.madi_th_rb_cnt1;
259     RK_U32 madi_th_cnt2 = elem->st.st_madi_lt_num1.madi_th_lt_cnt2 +
260                           elem->st.st_madi_rt_num1.madi_th_rt_cnt2 +
261                           elem->st.st_madi_lb_num1.madi_th_lb_cnt2 +
262                           elem->st.st_madi_rb_num1.madi_th_rb_cnt2;
263     RK_U32 madi_th_cnt3 = elem->st.st_madi_lt_num1.madi_th_lt_cnt3 +
264                           elem->st.st_madi_rt_num1.madi_th_rt_cnt3 +
265                           elem->st.st_madi_lb_num1.madi_th_lb_cnt3 +
266                           elem->st.st_madi_rb_num1.madi_th_rb_cnt3;
267     RK_U32 madp_th_cnt0 = elem->st.st_madp_lt_num0.madp_th_lt_cnt0 +
268                           elem->st.st_madp_rt_num0.madp_th_rt_cnt0 +
269                           elem->st.st_madp_lb_num0.madp_th_lb_cnt0 +
270                           elem->st.st_madp_rb_num0.madp_th_rb_cnt0;
271     RK_U32 madp_th_cnt1 = elem->st.st_madp_lt_num0.madp_th_lt_cnt1 +
272                           elem->st.st_madp_rt_num0.madp_th_rt_cnt1 +
273                           elem->st.st_madp_lb_num0.madp_th_lb_cnt1 +
274                           elem->st.st_madp_rb_num0.madp_th_rb_cnt1;
275     RK_U32 madp_th_cnt2 = elem->st.st_madp_lt_num1.madp_th_lt_cnt2 +
276                           elem->st.st_madp_rt_num1.madp_th_rt_cnt2 +
277                           elem->st.st_madp_lb_num1.madp_th_lb_cnt2 +
278                           elem->st.st_madp_rb_num1.madp_th_rb_cnt2;
279     RK_U32 madp_th_cnt3 = elem->st.st_madp_lt_num1.madp_th_lt_cnt3 +
280                           elem->st.st_madp_rt_num1.madp_th_rt_cnt3 +
281                           elem->st.st_madp_lb_num1.madp_th_lb_cnt3 +
282                           elem->st.st_madp_rb_num1.madp_th_rb_cnt3;
283 
284     madi_cnt = (6 * madi_th_cnt3 + 5 * madi_th_cnt2 + 4 * madi_th_cnt1) >> 2;
285     hal_rc_ret->complex_level = (madi_cnt * 100 > 30 * b16_num) ? 2 :
286                                 (madi_cnt * 100 > 13 * b16_num) ? 1 : 0;
287 
288     {
289         RK_U32 md_cnt = 0, motion_level = 0;
290 
291         if (ctx->smart_en)
292             md_cnt = (12 * madp_th_cnt3 + 11 * madp_th_cnt2 + 8 * madp_th_cnt1) >> 2;
293         else
294             md_cnt = (24 * madp_th_cnt3 + 22 * madp_th_cnt2 + 17 * madp_th_cnt1) >> 2;
295 
296         if (md_cnt * 100 > 15 * b16_num)
297             motion_level = 200;
298         else if (md_cnt * 100 > 5 * b16_num)
299             motion_level = 100;
300         else if (md_cnt * 100 > (b16_num >> 2))
301             motion_level = 1;
302         else
303             motion_level = 0;
304         hal_rc_ret->motion_level = motion_level;
305     }
306     hal_h265e_dbg_st("frame %d complex_level %d motion_level %d\n",
307                      ctx->frame_num - 1, hal_rc_ret->complex_level, hal_rc_ret->motion_level);
308 
309     fb->st_madi = madi_th_cnt0 * regs_set->reg_rc_roi.madi_st_thd.madi_th0 +
310                   madi_th_cnt1 * (regs_set->reg_rc_roi.madi_st_thd.madi_th0 +
311                                   regs_set->reg_rc_roi.madi_st_thd.madi_th1) / 2 +
312                   madi_th_cnt2 * (regs_set->reg_rc_roi.madi_st_thd.madi_th1 +
313                                   regs_set->reg_rc_roi.madi_st_thd.madi_th2) / 2 +
314                   madi_th_cnt3 * regs_set->reg_rc_roi.madi_st_thd.madi_th2;
315 
316     madi_cnt = madi_th_cnt0 + madi_th_cnt1 + madi_th_cnt2 + madi_th_cnt3;
317     if (madi_cnt)
318         fb->st_madi = fb->st_madi / madi_cnt;
319 
320     fb->st_madp = madp_th_cnt0 * regs_set->reg_rc_roi.madp_st_thd0.madp_th0 +
321                   madp_th_cnt1 * (regs_set->reg_rc_roi.madp_st_thd0.madp_th0 +
322                                   regs_set->reg_rc_roi.madp_st_thd0.madp_th1) / 2 +
323                   madp_th_cnt2 * (regs_set->reg_rc_roi.madp_st_thd0.madp_th1 +
324                                   regs_set->reg_rc_roi.madp_st_thd1.madp_th2) / 2 +
325                   madp_th_cnt3 * regs_set->reg_rc_roi.madp_st_thd1.madp_th2;
326 
327     madp_cnt = madp_th_cnt0 + madp_th_cnt1 + madp_th_cnt2 + madp_th_cnt3;
328     if (madp_cnt)
329         fb->st_madp =  fb->st_madp  / madp_cnt;
330 
331     fb->st_mb_num += elem->st.st_bnum_b16.num_b16;
332     fb->frame_type = task->rc_task->frm.is_intra ? INTRA_FRAME : INTER_P_FRAME;
333     hal_rc_ret->bit_real += fb->out_strm_size * 8;
334 
335     hal_rc_ret->madi = elem->st.madi16_sum / fb->st_mb_num;
336     hal_rc_ret->madp = elem->st.madp16_sum / fb->st_mb_num;
337     hal_rc_ret->dsp_y_avg = elem->st.dsp_y_sum / (w32 / 4 * h32 / 4);
338 
339     hal_h265e_dbg_st("frame %d bit_real %d quality_real %d dsp_y_avg %3d\n", ctx->frame_num - 1,
340                      hal_rc_ret->bit_real, hal_rc_ret->quality_real, hal_rc_ret->dsp_y_avg);
341 
342     hal_h265e_dbg_func("leave\n");
343 }