1 /*
2 * Copyright 2024 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "hal_enc_task.h"
18 #include "hal_h265e_vepu510_reg.h"
19
20 typedef struct HalH265eVepu510Tune_t {
21 H265eV510HalContext *ctx;
22
23 RK_U8 *qm_mv_buf; /* qpmap move flag buffer */
24 RK_U32 qm_mv_buf_size;
25 Vepu510NpuOut *obj_out; /* object map from npu */
26
27 RK_S32 pre_madp[2];
28 RK_S32 pre_madi[2];
29 } HalH265eVepu510Tune;
30
31 static RK_U32 aq_thd_default[16] = {
32 0, 0, 0, 0, 3, 3, 5, 5,
33 8, 8, 15, 15, 20, 25, 25, 25
34 };
35
36 static RK_S32 aq_qp_delta_default[16] = {
37 -8, -7, -6, -5, -4, -3, -2, -1,
38 1, 2, 3, 4, 5, 6, 7, 8
39 };
40
41 static RK_U32 aq_thd_smt_I[16] = {
42 1, 2, 3, 3, 3, 3, 5, 5,
43 8, 8, 13, 15, 20, 25, 25, 25
44 };
45
46 static RK_S32 aq_qp_delta_smt_I[16] = {
47 -8, -7, -6, -5, -4, -3, -2, -1,
48 1, 2, 3, 5, 7, 8, 9, 9
49 };
50
51 static RK_U32 aq_thd_smt_P[16] = {
52 0, 0, 0, 0, 3, 3, 5, 5,
53 8, 8, 15, 15, 20, 25, 25, 25
54 };
55
56 static RK_S32 aq_qp_delta_smt_P[16] = {
57 -8, -7, -6, -5, -4, -3, -2, -1,
58 1, 2, 3, 4, 6, 7, 9, 9
59 };
60
vepu510_h265e_tune_init(H265eV510HalContext * ctx)61 static HalH265eVepu510Tune *vepu510_h265e_tune_init(H265eV510HalContext *ctx)
62 {
63 HalH265eVepu510Tune *tune = mpp_calloc(HalH265eVepu510Tune, 1);
64
65 if (NULL == tune)
66 return tune;
67
68 tune->ctx = ctx;
69 tune->pre_madi[0] = tune->pre_madi[1] = -1;
70 tune->pre_madp[0] = tune->pre_madp[1] = -1;
71
72 return tune;
73 }
74
vepu510_h265e_tune_deinit(void * tune)75 static void vepu510_h265e_tune_deinit(void *tune)
76 {
77 HalH265eVepu510Tune *t = (HalH265eVepu510Tune *)tune;
78
79 MPP_FREE(t->qm_mv_buf);
80 MPP_FREE(tune);
81 }
82
vepu510_h265e_tune_aq_prepare(HalH265eVepu510Tune * tune)83 static void vepu510_h265e_tune_aq_prepare(HalH265eVepu510Tune *tune)
84 {
85 if (tune == NULL) {
86 return;
87 }
88
89 H265eV510HalContext *ctx = tune->ctx;
90 MppEncHwCfg *hw = &ctx->cfg->hw;
91
92 if (ctx->smart_en) {
93 memcpy(hw->aq_thrd_i, aq_thd_smt_I, sizeof(hw->aq_thrd_i));
94 memcpy(hw->aq_thrd_p, aq_thd_smt_P, sizeof(hw->aq_thrd_p));
95 memcpy(hw->aq_step_i, aq_qp_delta_smt_I, sizeof(hw->aq_step_i));
96 memcpy(hw->aq_step_p, aq_qp_delta_smt_P, sizeof(hw->aq_step_p));
97 } else {
98 memcpy(hw->aq_thrd_i, aq_thd_default, sizeof(hw->aq_thrd_i));
99 memcpy(hw->aq_thrd_p, aq_thd_default, sizeof(hw->aq_thrd_p));
100 memcpy(hw->aq_step_i, aq_qp_delta_default, sizeof(hw->aq_step_i));
101 memcpy(hw->aq_step_p, aq_qp_delta_default, sizeof(hw->aq_step_p));
102 }
103 }
104
vepu510_h265e_tune_aq(HalH265eVepu510Tune * tune)105 static void vepu510_h265e_tune_aq(HalH265eVepu510Tune *tune)
106 {
107 H265eV510HalContext *ctx = tune->ctx;
108 Vepu510H265eFrmCfg *frm_cfg = ctx->frm;
109 H265eV510RegSet *regs = frm_cfg->regs_set;
110 Vepu510RcRoi *r = ®s->reg_rc_roi;
111 MppEncHwCfg *hw = &ctx->cfg->hw;
112 RK_U32 i = 0;
113 RK_S32 aq_step[16];
114
115 for (i = 0; i < MPP_ARRAY_ELEMS(aq_thd_default); i++) {
116 if (ctx->frame_type == INTRA_FRAME) {
117 r->aq_tthd[i] = hw->aq_thrd_i[i];
118 aq_step[i] = hw->aq_step_i[i] & 0x1F;
119 } else {
120 r->aq_tthd[i] = hw->aq_thrd_p[i];
121 aq_step[i] = hw->aq_step_p[i] & 0x1F;
122 }
123 }
124
125 r->aq_stp0.aq_stp_s0 = aq_step[0];
126 r->aq_stp0.aq_stp_0t1 = aq_step[1];
127 r->aq_stp0.aq_stp_1t2 = aq_step[2];
128 r->aq_stp0.aq_stp_2t3 = aq_step[3];
129 r->aq_stp0.aq_stp_3t4 = aq_step[4];
130 r->aq_stp0.aq_stp_4t5 = aq_step[5];
131 r->aq_stp1.aq_stp_5t6 = aq_step[6];
132 r->aq_stp1.aq_stp_6t7 = aq_step[7];
133 r->aq_stp1.aq_stp_7t8 = 0;
134 r->aq_stp1.aq_stp_8t9 = aq_step[8];
135 r->aq_stp1.aq_stp_9t10 = aq_step[9];
136 r->aq_stp1.aq_stp_10t11 = aq_step[10];
137 r->aq_stp2.aq_stp_11t12 = aq_step[11];
138 r->aq_stp2.aq_stp_12t13 = aq_step[12];
139 r->aq_stp2.aq_stp_13t14 = aq_step[13];
140 r->aq_stp2.aq_stp_14t15 = aq_step[14];
141 r->aq_stp2.aq_stp_b15 = aq_step[15];
142
143 r->aq_clip.aq16_rnge = 5;
144 r->aq_clip.aq32_rnge = 5;
145 r->aq_clip.aq8_rnge = 10;
146 r->aq_clip.aq16_dif0 = 12;
147 r->aq_clip.aq16_dif1 = 12;
148 r->aq_clip.aq_rme_en = 1;
149 r->aq_clip.aq_cme_en = 1;
150 }
151
vepu510_h265e_tune_qpmap_init(HalH265eVepu510Tune * tune)152 static MPP_RET vepu510_h265e_tune_qpmap_init(HalH265eVepu510Tune *tune)
153 {
154 H265eV510HalContext *ctx = tune->ctx;
155 Vepu510H265eFrmCfg *frm = ctx->frm;
156 H265eV510RegSet *regs = frm->regs_set;
157 H265eVepu510Frame *reg_frm = ®s->reg_frm;
158 RK_S32 w32 = MPP_ALIGN(ctx->cfg->prep.width, 32);
159 RK_S32 h32 = MPP_ALIGN(ctx->cfg->prep.height, 32);
160 RK_S32 roir_buf_fd = -1;
161
162 if (frm->roi_data) {
163 //TODO: external qpmap buffer
164 } else {
165 if (NULL == frm->roir_buf) {
166 if (NULL == ctx->roi_grp)
167 mpp_buffer_group_get_internal(&ctx->roi_grp, MPP_BUFFER_TYPE_ION);
168
169 //TODO: bmap_mdc_dpth = 1 ???
170 frm->roir_buf_size = w32 * h32 / 256 * 4;
171 mpp_buffer_get(ctx->roi_grp, &frm->roir_buf, frm->roir_buf_size);
172 }
173
174 roir_buf_fd = mpp_buffer_get_fd(frm->roir_buf);
175 }
176
177 if (frm->roir_buf == NULL) {
178 mpp_err("failed to get roir_buf\n");
179 return MPP_ERR_MALLOC;
180 }
181 reg_frm->common.adr_roir = roir_buf_fd;
182
183 if (tune->qm_mv_buf == NULL) {
184 tune->qm_mv_buf_size = w32 * h32 / 256;
185 tune->qm_mv_buf = mpp_calloc(RK_U8, tune->qm_mv_buf_size);
186 if (NULL == tune->qm_mv_buf) {
187 mpp_err("failed to get qm_mv_buf\n");
188 return MPP_ERR_MALLOC;
189 }
190 }
191
192 hal_h265e_dbg_ctl("roir_buf_fd %d, size %d qm_mv_buf %p size %d\n",
193 roir_buf_fd, frm->roir_buf_size, tune->qm_mv_buf,
194 tune->qm_mv_buf_size);
195 return MPP_OK;
196 }
197
vepu510_h265e_tune_qpmap(void * p,HalEncTask * task)198 static void vepu510_h265e_tune_qpmap(void *p, HalEncTask *task)
199 {
200 MPP_RET ret = MPP_OK;
201 HalH265eVepu510Tune *tune = (HalH265eVepu510Tune *)p;
202
203 (void)task;
204 hal_h265e_dbg_func("enter\n");
205
206 ret = vepu510_h265e_tune_qpmap_init(tune);
207 if (ret != MPP_OK) {
208 mpp_err("failed to init qpmap\n");
209 return;
210 }
211
212 hal_h265e_dbg_func("leave\n");
213 }
214
vepu510_h265e_tune_reg_patch(void * p,HalEncTask * task)215 static void vepu510_h265e_tune_reg_patch(void *p, HalEncTask *task)
216 {
217 HalH265eVepu510Tune *tune = (HalH265eVepu510Tune *)p;
218
219 if (NULL == tune)
220 return;
221 H265eV510HalContext *ctx = tune->ctx;
222
223 vepu510_h265e_tune_aq(tune);
224
225 if (ctx->qpmap_en && (task->md_info != NULL)) {
226 vepu510_h265e_tune_qpmap(tune, task);
227 }
228 }
229
vepu510_h265e_tune_stat_update(void * p,HalEncTask * task)230 static void vepu510_h265e_tune_stat_update(void *p, HalEncTask *task)
231 {
232 HalH265eVepu510Tune *tune = (HalH265eVepu510Tune *)p;
233 EncRcTaskInfo *hal_rc_ret = (EncRcTaskInfo *)&task->rc_task->info;
234
235 if (NULL == tune)
236 return;
237
238 hal_h265e_dbg_func("enter\n");
239 H265eV510HalContext *ctx = tune->ctx;;
240 RK_S32 task_idx = task->flags.reg_idx;
241 Vepu510H265eFrmCfg *frm = ctx->frms[task_idx];
242 Vepu510H265Fbk *fb = &frm->feedback;
243 H265eV510RegSet *regs_set = frm->regs_set;
244 H265eV510StatusElem *elem = frm->regs_ret;
245 MppEncCfgSet *cfg = ctx->cfg;
246 RK_S32 w32 = MPP_ALIGN(cfg->prep.width, 32);
247 RK_S32 h32 = MPP_ALIGN(cfg->prep.height, 32);
248 RK_U32 b16_num = MPP_ALIGN(cfg->prep.width, 16) * MPP_ALIGN(cfg->prep.height, 16) / 256;
249 RK_U32 madi_cnt = 0, madp_cnt = 0;
250
251 RK_U32 madi_th_cnt0 = elem->st.st_madi_lt_num0.madi_th_lt_cnt0 +
252 elem->st.st_madi_rt_num0.madi_th_rt_cnt0 +
253 elem->st.st_madi_lb_num0.madi_th_lb_cnt0 +
254 elem->st.st_madi_rb_num0.madi_th_rb_cnt0;
255 RK_U32 madi_th_cnt1 = elem->st.st_madi_lt_num0.madi_th_lt_cnt1 +
256 elem->st.st_madi_rt_num0.madi_th_rt_cnt1 +
257 elem->st.st_madi_lb_num0.madi_th_lb_cnt1 +
258 elem->st.st_madi_rb_num0.madi_th_rb_cnt1;
259 RK_U32 madi_th_cnt2 = elem->st.st_madi_lt_num1.madi_th_lt_cnt2 +
260 elem->st.st_madi_rt_num1.madi_th_rt_cnt2 +
261 elem->st.st_madi_lb_num1.madi_th_lb_cnt2 +
262 elem->st.st_madi_rb_num1.madi_th_rb_cnt2;
263 RK_U32 madi_th_cnt3 = elem->st.st_madi_lt_num1.madi_th_lt_cnt3 +
264 elem->st.st_madi_rt_num1.madi_th_rt_cnt3 +
265 elem->st.st_madi_lb_num1.madi_th_lb_cnt3 +
266 elem->st.st_madi_rb_num1.madi_th_rb_cnt3;
267 RK_U32 madp_th_cnt0 = elem->st.st_madp_lt_num0.madp_th_lt_cnt0 +
268 elem->st.st_madp_rt_num0.madp_th_rt_cnt0 +
269 elem->st.st_madp_lb_num0.madp_th_lb_cnt0 +
270 elem->st.st_madp_rb_num0.madp_th_rb_cnt0;
271 RK_U32 madp_th_cnt1 = elem->st.st_madp_lt_num0.madp_th_lt_cnt1 +
272 elem->st.st_madp_rt_num0.madp_th_rt_cnt1 +
273 elem->st.st_madp_lb_num0.madp_th_lb_cnt1 +
274 elem->st.st_madp_rb_num0.madp_th_rb_cnt1;
275 RK_U32 madp_th_cnt2 = elem->st.st_madp_lt_num1.madp_th_lt_cnt2 +
276 elem->st.st_madp_rt_num1.madp_th_rt_cnt2 +
277 elem->st.st_madp_lb_num1.madp_th_lb_cnt2 +
278 elem->st.st_madp_rb_num1.madp_th_rb_cnt2;
279 RK_U32 madp_th_cnt3 = elem->st.st_madp_lt_num1.madp_th_lt_cnt3 +
280 elem->st.st_madp_rt_num1.madp_th_rt_cnt3 +
281 elem->st.st_madp_lb_num1.madp_th_lb_cnt3 +
282 elem->st.st_madp_rb_num1.madp_th_rb_cnt3;
283
284 madi_cnt = (6 * madi_th_cnt3 + 5 * madi_th_cnt2 + 4 * madi_th_cnt1) >> 2;
285 hal_rc_ret->complex_level = (madi_cnt * 100 > 30 * b16_num) ? 2 :
286 (madi_cnt * 100 > 13 * b16_num) ? 1 : 0;
287
288 {
289 RK_U32 md_cnt = 0, motion_level = 0;
290
291 if (ctx->smart_en)
292 md_cnt = (12 * madp_th_cnt3 + 11 * madp_th_cnt2 + 8 * madp_th_cnt1) >> 2;
293 else
294 md_cnt = (24 * madp_th_cnt3 + 22 * madp_th_cnt2 + 17 * madp_th_cnt1) >> 2;
295
296 if (md_cnt * 100 > 15 * b16_num)
297 motion_level = 200;
298 else if (md_cnt * 100 > 5 * b16_num)
299 motion_level = 100;
300 else if (md_cnt * 100 > (b16_num >> 2))
301 motion_level = 1;
302 else
303 motion_level = 0;
304 hal_rc_ret->motion_level = motion_level;
305 }
306 hal_h265e_dbg_st("frame %d complex_level %d motion_level %d\n",
307 ctx->frame_num - 1, hal_rc_ret->complex_level, hal_rc_ret->motion_level);
308
309 fb->st_madi = madi_th_cnt0 * regs_set->reg_rc_roi.madi_st_thd.madi_th0 +
310 madi_th_cnt1 * (regs_set->reg_rc_roi.madi_st_thd.madi_th0 +
311 regs_set->reg_rc_roi.madi_st_thd.madi_th1) / 2 +
312 madi_th_cnt2 * (regs_set->reg_rc_roi.madi_st_thd.madi_th1 +
313 regs_set->reg_rc_roi.madi_st_thd.madi_th2) / 2 +
314 madi_th_cnt3 * regs_set->reg_rc_roi.madi_st_thd.madi_th2;
315
316 madi_cnt = madi_th_cnt0 + madi_th_cnt1 + madi_th_cnt2 + madi_th_cnt3;
317 if (madi_cnt)
318 fb->st_madi = fb->st_madi / madi_cnt;
319
320 fb->st_madp = madp_th_cnt0 * regs_set->reg_rc_roi.madp_st_thd0.madp_th0 +
321 madp_th_cnt1 * (regs_set->reg_rc_roi.madp_st_thd0.madp_th0 +
322 regs_set->reg_rc_roi.madp_st_thd0.madp_th1) / 2 +
323 madp_th_cnt2 * (regs_set->reg_rc_roi.madp_st_thd0.madp_th1 +
324 regs_set->reg_rc_roi.madp_st_thd1.madp_th2) / 2 +
325 madp_th_cnt3 * regs_set->reg_rc_roi.madp_st_thd1.madp_th2;
326
327 madp_cnt = madp_th_cnt0 + madp_th_cnt1 + madp_th_cnt2 + madp_th_cnt3;
328 if (madp_cnt)
329 fb->st_madp = fb->st_madp / madp_cnt;
330
331 fb->st_mb_num += elem->st.st_bnum_b16.num_b16;
332 fb->frame_type = task->rc_task->frm.is_intra ? INTRA_FRAME : INTER_P_FRAME;
333 hal_rc_ret->bit_real += fb->out_strm_size * 8;
334
335 hal_rc_ret->madi = elem->st.madi16_sum / fb->st_mb_num;
336 hal_rc_ret->madp = elem->st.madp16_sum / fb->st_mb_num;
337 hal_rc_ret->dsp_y_avg = elem->st.dsp_y_sum / (w32 / 4 * h32 / 4);
338
339 hal_h265e_dbg_st("frame %d bit_real %d quality_real %d dsp_y_avg %3d\n", ctx->frame_num - 1,
340 hal_rc_ret->bit_real, hal_rc_ret->quality_real, hal_rc_ret->dsp_y_avg);
341
342 hal_h265e_dbg_func("leave\n");
343 }