1 /* SPDX-License-Identifier: Apache-2.0 OR MIT */
2 /*
3 * Copyright (c) 2024 Rockchip Electronics Co., Ltd.
4 */
5
6 #define MODULE_TAG "hal_avs2d_vdpu383"
7
8 #include <string.h>
9 #include <stdio.h>
10
11 #include "mpp_env.h"
12 #include "mpp_log.h"
13 #include "mpp_mem.h"
14 #include "mpp_common.h"
15 #include "mpp_debug.h"
16 #include "mpp_bitput.h"
17 #include "mpp_buffer_impl.h"
18
19 #include "avs2d_syntax.h"
20 #include "vdpu383_com.h"
21 #include "vdpu383_avs2d.h"
22 #include "hal_avs2d_global.h"
23 #include "hal_avs2d_vdpu383.h"
24 #include "mpp_dec_cb_param.h"
25
26 #define VDPU383_FAST_REG_SET_CNT (3)
27 #define MAX_REF_NUM (8)
28 #define AVS2_383_SHPH_SIZE (208) /* bytes */
29 #define AVS2_383_SCALIST_SIZE (80) /* bytes */
30 #define VDPU34x_TOTAL_REG_CNT (278)
31
32 #define AVS2_383_SHPH_ALIGNED_SIZE (MPP_ALIGN(AVS2_383_SHPH_SIZE, SZ_4K))
33 #define AVS2_383_SCALIST_ALIGNED_SIZE (MPP_ALIGN(AVS2_383_SCALIST_SIZE, SZ_4K))
34 #define AVS2_383_STREAM_INFO_SET_SIZE (AVS2_383_SHPH_ALIGNED_SIZE + \
35 AVS2_383_SCALIST_ALIGNED_SIZE)
36 #define AVS2_ALL_TBL_BUF_SIZE(cnt) (AVS2_383_STREAM_INFO_SET_SIZE * (cnt))
37 #define AVS2_SHPH_OFFSET(pos) (AVS2_383_STREAM_INFO_SET_SIZE * (pos))
38 #define AVS2_SCALIST_OFFSET(pos) (AVS2_SHPH_OFFSET(pos) + AVS2_383_SHPH_ALIGNED_SIZE)
39
40 #define COLMV_COMPRESS_EN (1)
41 #define COLMV_BLOCK_SIZE (16)
42 #define COLMV_BYTES (16)
43
44 typedef struct avs2d_buf_t {
45 RK_U32 valid;
46 RK_U32 offset_shph;
47 RK_U32 offset_sclst;
48 Vdpu383Avs2dRegSet *regs;
49 } Avs2dRkvBuf_t;
50
51 typedef struct avs2d_reg_ctx_t {
52 Avs2dRkvBuf_t reg_buf[VDPU383_FAST_REG_SET_CNT];
53
54 RK_U32 shph_offset;
55 RK_U32 sclst_offset;
56
57 Vdpu383Avs2dRegSet *regs;
58
59 RK_U8 shph_dat[AVS2_383_SHPH_SIZE];
60 RK_U8 scalist_dat[AVS2_383_SCALIST_SIZE];
61
62 MppBuffer bufs;
63 RK_S32 bufs_fd;
64 void *bufs_ptr;
65
66 MppBuffer rcb_buf[VDPU383_FAST_REG_SET_CNT];
67 RK_S32 rcb_buf_size;
68 Vdpu383RcbInfo rcb_info[RCB_BUF_COUNT];
69 RK_U32 reg_out[VDPU34x_TOTAL_REG_CNT];
70
71 } Avs2dRkvRegCtx_t;
72
73 MPP_RET hal_avs2d_vdpu383_deinit(void *hal);
avs2d_ver_align(RK_U32 val)74 static RK_U32 avs2d_ver_align(RK_U32 val)
75 {
76 return MPP_ALIGN(val, 16);
77 }
78
avs2d_len_align(RK_U32 val)79 static RK_U32 avs2d_len_align(RK_U32 val)
80 {
81 return (2 * MPP_ALIGN(val, 16));
82 }
83
prepare_header(Avs2dHalCtx_t * p_hal,RK_U8 * data,RK_U32 len)84 static MPP_RET prepare_header(Avs2dHalCtx_t *p_hal, RK_U8 *data, RK_U32 len)
85 {
86 RK_U32 i, j;
87 BitputCtx_t bp;
88 RK_U64 *bit_buf = (RK_U64 *)data;
89 Avs2dSyntax_t *syntax = &p_hal->syntax;
90 PicParams_Avs2d *pp = &syntax->pp;
91 AlfParams_Avs2d *alfp = &syntax->alfp;
92 RefParams_Avs2d *refp = &syntax->refp;
93 WqmParams_Avs2d *wqmp = &syntax->wqmp;
94
95 memset(data, 0, len);
96
97 mpp_set_bitput_ctx(&bp, bit_buf, len);
98
99 //!< sequence header syntax
100 mpp_put_bits(&bp, pp->chroma_format_idc, 2);
101 mpp_put_bits(&bp, pp->pic_width_in_luma_samples, 16);
102 mpp_put_bits(&bp, pp->pic_height_in_luma_samples, 16);
103 mpp_put_bits(&bp, pp->bit_depth_luma_minus8, 3);
104 mpp_put_bits(&bp, pp->bit_depth_chroma_minus8, 3);
105 mpp_put_bits(&bp, pp->lcu_size, 3);
106 mpp_put_bits(&bp, pp->progressive_sequence, 1);
107 mpp_put_bits(&bp, pp->field_coded_sequence, 1);
108
109 mpp_put_bits(&bp, pp->secondary_transform_enable_flag, 1);
110 mpp_put_bits(&bp, pp->sample_adaptive_offset_enable_flag, 1);
111 mpp_put_bits(&bp, pp->adaptive_loop_filter_enable_flag, 1);
112 mpp_put_bits(&bp, pp->pmvr_enable_flag, 1);
113 mpp_put_bits(&bp, pp->cross_slice_loopfilter_enable_flag, 1);
114
115 //!< picture header syntax
116 mpp_put_bits(&bp, pp->picture_type, 3);
117 mpp_put_bits(&bp, refp->ref_pic_num, 3);
118 mpp_put_bits(&bp, pp->scene_reference_enable_flag, 1);
119 mpp_put_bits(&bp, pp->bottom_field_picture_flag, 1);
120 mpp_put_bits(&bp, pp->fixed_picture_qp, 1);
121 mpp_put_bits(&bp, pp->picture_qp, 7);
122 mpp_put_bits(&bp, pp->loop_filter_disable_flag, 1);
123 mpp_put_bits(&bp, pp->alpha_c_offset, 5);
124 mpp_put_bits(&bp, pp->beta_offset, 5);
125
126 //!< weight quant param
127 mpp_put_bits(&bp, wqmp->chroma_quant_param_delta_cb, 6);
128 mpp_put_bits(&bp, wqmp->chroma_quant_param_delta_cr, 6);
129 mpp_put_bits(&bp, wqmp->pic_weight_quant_enable_flag, 1);
130
131 //!< alf param
132 mpp_put_bits(&bp, alfp->enable_pic_alf_y, 1);
133 mpp_put_bits(&bp, alfp->enable_pic_alf_cb, 1);
134 mpp_put_bits(&bp, alfp->enable_pic_alf_cr, 1);
135
136 mpp_put_bits(&bp, alfp->alf_filter_num_minus1, 4);
137 for (i = 0; i < 16; i++)
138 mpp_put_bits(&bp, alfp->alf_coeff_idx_tab[i], 4);
139
140 for (i = 0; i < 16; i++)
141 for (j = 0; j < 9; j++)
142 mpp_put_bits(&bp, alfp->alf_coeff_y[i][j], 7);
143
144 for (j = 0; j < 9; j++)
145 mpp_put_bits(&bp, alfp->alf_coeff_cb[j], 7);
146
147 for (j = 0; j < 9; j++)
148 mpp_put_bits(&bp, alfp->alf_coeff_cr[j], 7);
149
150 /* other flags */
151 mpp_put_bits(&bp, pp->multi_hypothesis_skip_enable_flag, 1);
152 mpp_put_bits(&bp, pp->dual_hypothesis_prediction_enable_flag, 1);
153 mpp_put_bits(&bp, pp->weighted_skip_enable_flag, 1);
154 mpp_put_bits(&bp, pp->asymmetrc_motion_partitions_enable_flag, 1);
155 mpp_put_bits(&bp, pp->nonsquare_quadtree_transform_enable_flag, 1);
156 mpp_put_bits(&bp, pp->nonsquare_intra_prediction_enable_flag, 1);
157
158 //!< picture reference params
159 mpp_put_bits(&bp, pp->cur_poc, 32);
160 for (i = 0; i < 8; i++)
161 mpp_put_bits(&bp, (i < refp->ref_pic_num) ? refp->ref_poc_list[i] : 0, 32);
162 for (i = 0; i < 8; i++)
163 mpp_put_bits(&bp, (i < refp->ref_pic_num) ? pp->field_coded_sequence : 0, 1);
164 for (i = 0; i < 8; i++)
165 mpp_put_bits(&bp, (i < refp->ref_pic_num) ? pp->bottom_field_picture_flag : 0, 1);
166 for (i = 0; i < 8; i++)
167 mpp_put_bits(&bp, (i < refp->ref_pic_num), 1);
168
169 return MPP_OK;
170 }
171
prepare_scalist(Avs2dHalCtx_t * p_hal,RK_U8 * data,RK_U32 len)172 static MPP_RET prepare_scalist(Avs2dHalCtx_t *p_hal, RK_U8 *data, RK_U32 len)
173 {
174 Avs2dSyntax_t *syntax = &p_hal->syntax;
175 WqmParams_Avs2d *wqmp = &syntax->wqmp;
176 RK_U32 i = 0;
177 RK_U32 n = 0;
178
179 if (!wqmp->pic_weight_quant_enable_flag)
180 return MPP_OK;
181
182 memset(data, 0, len);
183
184 /* dump by block4x4, vectial direction */
185 for (i = 0; i < 4; i++) {
186 data[n++] = wqmp->wq_matrix[0][i + 0];
187 data[n++] = wqmp->wq_matrix[0][i + 4];
188 data[n++] = wqmp->wq_matrix[0][i + 8];
189 data[n++] = wqmp->wq_matrix[0][i + 12];
190 }
191
192 /* block8x8 */
193 {
194 RK_S32 blk4_x = 0, blk4_y = 0;
195
196 /* dump by block4x4, vectial direction */
197 for (blk4_x = 0; blk4_x < 8; blk4_x += 4) {
198 for (blk4_y = 0; blk4_y < 8; blk4_y += 4) {
199 RK_S32 pos = blk4_y * 8 + blk4_x;
200
201 for (i = 0; i < 4; i++) {
202 data[n++] = wqmp->wq_matrix[1][pos + i + 0];
203 data[n++] = wqmp->wq_matrix[1][pos + i + 8];
204 data[n++] = wqmp->wq_matrix[1][pos + i + 16];
205 data[n++] = wqmp->wq_matrix[1][pos + i + 24];
206 }
207 }
208 }
209 }
210
211 return MPP_OK;
212 }
213
get_frame_fd(Avs2dHalCtx_t * p_hal,RK_S32 idx)214 static RK_S32 get_frame_fd(Avs2dHalCtx_t *p_hal, RK_S32 idx)
215 {
216 RK_S32 ret_fd = 0;
217 MppBuffer mbuffer = NULL;
218
219 mpp_buf_slot_get_prop(p_hal->frame_slots, idx, SLOT_BUFFER, &mbuffer);
220 ret_fd = mpp_buffer_get_fd(mbuffer);
221
222 return ret_fd;
223 }
224
get_packet_fd(Avs2dHalCtx_t * p_hal,RK_S32 idx)225 static RK_S32 get_packet_fd(Avs2dHalCtx_t *p_hal, RK_S32 idx)
226 {
227 RK_S32 ret_fd = 0;
228 MppBuffer mbuffer = NULL;
229
230 mpp_buf_slot_get_prop(p_hal->packet_slots, idx, SLOT_BUFFER, &mbuffer);
231 ret_fd = mpp_buffer_get_fd(mbuffer);
232
233 return ret_fd;
234 }
235
init_ctrl_regs(Vdpu383Avs2dRegSet * regs)236 static void init_ctrl_regs(Vdpu383Avs2dRegSet *regs)
237 {
238 Vdpu383CtrlReg *ctrl_regs = ®s->ctrl_regs;
239
240 ctrl_regs->reg8_dec_mode = 3; // AVS2
241 ctrl_regs->reg9.buf_empty_en = 1;
242
243 ctrl_regs->reg10.strmd_auto_gating_e = 1;
244 ctrl_regs->reg10.inter_auto_gating_e = 1;
245 ctrl_regs->reg10.intra_auto_gating_e = 1;
246 ctrl_regs->reg10.transd_auto_gating_e = 1;
247 ctrl_regs->reg10.recon_auto_gating_e = 1;
248 ctrl_regs->reg10.filterd_auto_gating_e = 1;
249 ctrl_regs->reg10.bus_auto_gating_e = 1;
250 ctrl_regs->reg10.ctrl_auto_gating_e = 1;
251 ctrl_regs->reg10.rcb_auto_gating_e = 1;
252 ctrl_regs->reg10.err_prc_auto_gating_e = 1;
253
254 ctrl_regs->reg13_core_timeout_threshold = 0xffffff;
255
256 ctrl_regs->reg16.error_proc_disable = 1;
257 ctrl_regs->reg16.error_spread_disable = 0;
258 ctrl_regs->reg16.roi_error_ctu_cal_en = 0;
259
260 ctrl_regs->reg20_cabac_error_en_lowbits = 0xffffffff;
261 ctrl_regs->reg21_cabac_error_en_highbits = 0x3fffffff;
262
263 /* performance */
264 ctrl_regs->reg28.axi_perf_work_e = 1;
265 ctrl_regs->reg28.axi_cnt_type = 1;
266 ctrl_regs->reg28.rd_latency_id = 0xb;
267 ctrl_regs->reg28.rd_latency_thr = 0;
268
269 ctrl_regs->reg29.addr_align_type = 2;
270 ctrl_regs->reg29.ar_cnt_id_type = 0;
271 ctrl_regs->reg29.aw_cnt_id_type = 0;
272 ctrl_regs->reg29.ar_count_id = 0xa;
273 ctrl_regs->reg29.aw_count_id = 0;
274 ctrl_regs->reg29.rd_band_width_mode = 0;
275 }
276
avs2d_refine_rcb_size(Vdpu383RcbInfo * rcb_info,RK_S32 width,RK_S32 height,void * dxva)277 static void avs2d_refine_rcb_size(Vdpu383RcbInfo *rcb_info,
278 RK_S32 width, RK_S32 height, void *dxva)
279 {
280 (void) height;
281 Avs2dSyntax_t *syntax = dxva;
282 RK_U8 ctu_size = 1 << syntax->pp.lcu_size;
283 RK_U8 bit_depth = syntax->pp.bit_depth_chroma_minus8 + 8;
284 RK_U32 rcb_bits = 0;
285 RK_U32 filterd_row_append = 8192;
286
287 width = MPP_ALIGN(width, ctu_size);
288
289 /* RCB_STRMD_ROW && RCB_STRMD_TILE_ROW*/
290 if (width > 8192)
291 rcb_bits = ((width + 63) / 64) * 112;
292 else
293 rcb_bits = 0;
294 rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
295 rcb_info[RCB_STRMD_TILE_ROW].size = 0;
296
297 /* RCB_INTER_ROW && RCB_INTER_TILE_ROW*/
298 rcb_bits = ((width + 7) / 8) * 166;
299 rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
300 rcb_info[RCB_INTER_TILE_ROW].size = 0;
301
302 /* RCB_INTRA_ROW && RCB_INTRA_TILE_ROW*/
303 rcb_bits = MPP_ALIGN(width, 512) * (bit_depth + 2);
304 rcb_bits = rcb_bits * 3; //TODO:
305 rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
306 rcb_info[RCB_INTRA_TILE_ROW].size = 0;
307
308 /* RCB_FILTERD_ROW && RCB_FILTERD_TILE_ROW*/
309 if (width > 4096)
310 filterd_row_append = 27648;
311 rcb_bits = MPP_ALIGN(width, 64) * (30 * bit_depth + 9);
312 rcb_info[RCB_FILTERD_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2);
313 rcb_info[RCB_FILTERD_PROTECT_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2);
314 rcb_info[RCB_FILTERD_TILE_ROW].size = 0;
315
316 /* RCB_FILTERD_TILE_COL */
317 rcb_info[RCB_FILTERD_TILE_COL].size = 0;
318 }
319
hal_avs2d_rcb_info_update(void * hal,Vdpu383Avs2dRegSet * regs)320 static void hal_avs2d_rcb_info_update(void *hal, Vdpu383Avs2dRegSet *regs)
321 {
322 MPP_RET ret = MPP_OK;
323 Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
324 Avs2dRkvRegCtx_t *reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
325 RK_S32 width = p_hal->syntax.pp.pic_width_in_luma_samples;
326 RK_S32 height = p_hal->syntax.pp.pic_height_in_luma_samples;
327 RK_S32 i = 0;
328 RK_S32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
329
330 (void) regs;
331
332 reg_ctx->rcb_buf_size = vdpu383_get_rcb_buf_size(reg_ctx->rcb_info, width, height);
333 avs2d_refine_rcb_size(reg_ctx->rcb_info, width, height, (void *)&p_hal->syntax);
334
335 for (i = 0; i < loop; i++) {
336 MppBuffer rcb_buf = NULL;
337
338 if (reg_ctx->rcb_buf[i]) {
339 mpp_buffer_put(reg_ctx->rcb_buf[i]);
340 reg_ctx->rcb_buf[i] = NULL;
341 }
342
343 ret = mpp_buffer_get(p_hal->buf_group, &rcb_buf, reg_ctx->rcb_buf_size);
344 if (ret)
345 mpp_err_f("AVS2D mpp_buffer_group_get failed\n");
346
347 reg_ctx->rcb_buf[i] = rcb_buf;
348 }
349 }
350
fill_registers(Avs2dHalCtx_t * p_hal,Vdpu383Avs2dRegSet * regs,HalTaskInfo * task)351 static MPP_RET fill_registers(Avs2dHalCtx_t *p_hal, Vdpu383Avs2dRegSet *regs, HalTaskInfo *task)
352 {
353 MPP_RET ret = MPP_OK;
354 RK_U32 i;
355 MppFrame mframe = NULL;
356 Avs2dSyntax_t *syntax = &p_hal->syntax;
357 RefParams_Avs2d *refp = &syntax->refp;
358 HalDecTask *task_dec = &task->dec;
359
360 RK_U32 is_fbc = 0;
361 RK_U32 is_tile = 0;
362 HalBuf *mv_buf = NULL;
363
364 mpp_buf_slot_get_prop(p_hal->frame_slots, task_dec->output, SLOT_FRAME_PTR, &mframe);
365 is_fbc = MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe));
366 is_tile = MPP_FRAME_FMT_IS_TILE(mpp_frame_get_fmt(mframe));
367
368 //!< caculate the yuv_frame_size
369 {
370 RK_U32 hor_virstride = 0;
371 RK_U32 ver_virstride = 0;
372 RK_U32 y_virstride = 0;
373 RK_U32 uv_virstride = 0;
374
375 hor_virstride = mpp_frame_get_hor_stride(mframe);
376 ver_virstride = mpp_frame_get_ver_stride(mframe);
377 y_virstride = hor_virstride * ver_virstride;
378 uv_virstride = hor_virstride * ver_virstride / 2;
379 AVS2D_HAL_TRACE("is_fbc %d y_virstride %d, hor_virstride %d, ver_virstride %d\n",
380 is_fbc, y_virstride, hor_virstride, ver_virstride);
381
382 if (is_fbc) {
383 RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
384 RK_U32 fbd_offset;
385
386 regs->ctrl_regs.reg9.fbc_e = 1;
387 regs->avs2d_paras.reg68_hor_virstride = fbc_hdr_stride / 64;
388 fbd_offset = regs->avs2d_paras.reg68_hor_virstride * MPP_ALIGN(ver_virstride, 64) * 4;
389 regs->avs2d_addrs.reg193_fbc_payload_offset = fbd_offset;
390 } else if (is_tile) {
391 regs->ctrl_regs.reg9.tile_e = 1;
392 regs->avs2d_paras.reg68_hor_virstride = hor_virstride * 6 / 16;
393 regs->avs2d_paras.reg70_y_virstride = (y_virstride + uv_virstride) / 16;
394 } else {
395 regs->ctrl_regs.reg9.fbc_e = 0;
396 regs->ctrl_regs.reg9.tile_e = 0;
397 regs->avs2d_paras.reg68_hor_virstride = hor_virstride / 16;
398 regs->avs2d_paras.reg69_raster_uv_hor_virstride = hor_virstride / 16;
399 regs->avs2d_paras.reg70_y_virstride = y_virstride / 16;
400 }
401 }
402
403 // set current
404 {
405 RK_S32 fd = get_frame_fd(p_hal, task_dec->output);
406
407 mpp_assert(fd >= 0);
408
409 regs->avs2d_addrs.reg168_decout_base = fd;
410 regs->avs2d_addrs.reg192_payload_st_cur_base = fd;
411 mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, task_dec->output);
412 regs->avs2d_addrs.reg216_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
413 AVS2D_HAL_TRACE("cur frame index %d, fd %d, colmv fd %d", task_dec->output, fd, regs->avs2d_addrs.reg216_colmv_cur_base);
414
415 // TODO: set up error_ref_base
416 // regs->avs2d_addr.reg169_err_ref_base.base = regs->avs2d_addr.reg216_colmv_cur_base.base;
417 }
418
419 // set reference
420 {
421 RK_S32 valid_slot = -1;
422
423 AVS2D_HAL_TRACE("num of ref %d", refp->ref_pic_num);
424
425 for (i = 0; i < refp->ref_pic_num; i++) {
426 if (task_dec->refer[i] < 0)
427 continue;
428
429 valid_slot = i;
430 break;
431 }
432
433 for (i = 0; i < MAX_REF_NUM; i++) {
434 if (i < refp->ref_pic_num) {
435 MppFrame frame_ref = NULL;
436
437 RK_S32 slot_idx = task_dec->refer[i] < 0 ? task_dec->refer[valid_slot] : task_dec->refer[i];
438
439 if (slot_idx < 0) {
440 AVS2D_HAL_TRACE("missing ref, could not found valid ref");
441 task->dec.flags.ref_err = 1;
442 return ret = MPP_ERR_UNKNOW;
443 }
444
445 mpp_buf_slot_get_prop(p_hal->frame_slots, slot_idx, SLOT_FRAME_PTR, &frame_ref);
446
447 if (frame_ref) {
448 regs->avs2d_addrs.reg170_185_ref_base[i] = get_frame_fd(p_hal, slot_idx);
449 regs->avs2d_addrs.reg195_210_payload_st_ref_base[i] = get_frame_fd(p_hal, slot_idx);
450 mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, slot_idx);
451 regs->avs2d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
452 }
453 }
454 }
455
456 if (p_hal->syntax.refp.scene_ref_enable && p_hal->syntax.refp.scene_ref_slot_idx >= 0) {
457 MppFrame scene_ref = NULL;
458 RK_S32 slot_idx = p_hal->syntax.refp.scene_ref_slot_idx;
459 RK_S32 replace_idx = p_hal->syntax.refp.scene_ref_replace_pos;
460
461 mpp_buf_slot_get_prop(p_hal->frame_slots, slot_idx, SLOT_FRAME_PTR, &scene_ref);
462
463 if (scene_ref) {
464 regs->avs2d_addrs.reg170_185_ref_base[replace_idx] = get_frame_fd(p_hal, slot_idx);
465 regs->avs2d_addrs.reg195_210_payload_st_ref_base[replace_idx] = regs->avs2d_addrs.reg170_185_ref_base[replace_idx];
466 mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, slot_idx);
467 regs->avs2d_addrs.reg217_232_colmv_ref_base[replace_idx] = mpp_buffer_get_fd(mv_buf->buf[0]);
468 }
469 }
470
471 regs->avs2d_addrs.reg169_error_ref_base = regs->avs2d_addrs.reg170_185_ref_base[0];
472 regs->avs2d_addrs.reg194_payload_st_error_ref_base = regs->avs2d_addrs.reg195_210_payload_st_ref_base[0];
473 }
474
475 // set rlc
476 regs->common_addr.reg128_strm_base = get_packet_fd(p_hal, task_dec->input);
477 AVS2D_HAL_TRACE("packet fd %d from slot %d", regs->common_addr.reg128_strm_base, task_dec->input);
478
479 regs->avs2d_paras.reg66_stream_len = MPP_ALIGN(mpp_packet_get_length(task_dec->input_packet), 16) + 64;
480
481 {
482 //scale down config
483 mpp_buf_slot_get_prop(p_hal->frame_slots, task_dec->output,
484 SLOT_FRAME_PTR, &mframe);
485 if (mpp_frame_get_thumbnail_en(mframe)) {
486 regs->common_addr.reg133_scale_down_base = regs->avs2d_addrs.reg168_decout_base;
487 vdpu383_setup_down_scale(mframe, p_hal->dev, ®s->ctrl_regs,
488 (void *)®s->avs2d_paras);
489 } else {
490 regs->ctrl_regs.reg9.scale_down_en = 0;
491 }
492 }
493
494 return ret;
495 }
496
hal_avs2d_vdpu383_deinit(void * hal)497 MPP_RET hal_avs2d_vdpu383_deinit(void *hal)
498 {
499 MPP_RET ret = MPP_OK;
500 RK_U32 i, loop;
501 Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
502 Avs2dRkvRegCtx_t *reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
503
504 AVS2D_HAL_TRACE("In.");
505
506 INP_CHECK(ret, NULL == reg_ctx);
507
508 //!< malloc buffers
509 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
510 for (i = 0; i < loop; i++) {
511 if (reg_ctx->rcb_buf[i]) {
512 mpp_buffer_put(reg_ctx->rcb_buf[i]);
513 reg_ctx->rcb_buf[i] = NULL;
514 }
515
516 MPP_FREE(reg_ctx->reg_buf[i].regs);
517 }
518
519 if (reg_ctx->bufs) {
520 mpp_buffer_put(reg_ctx->bufs);
521 reg_ctx->bufs = NULL;
522 }
523
524 if (p_hal->cmv_bufs) {
525 hal_bufs_deinit(p_hal->cmv_bufs);
526 p_hal->cmv_bufs = NULL;
527 }
528
529 MPP_FREE(p_hal->reg_ctx);
530
531 __RETURN:
532 AVS2D_HAL_TRACE("Out. ret %d", ret);
533 return ret;
534 }
535
hal_avs2d_vdpu383_init(void * hal,MppHalCfg * cfg)536 MPP_RET hal_avs2d_vdpu383_init(void *hal, MppHalCfg *cfg)
537 {
538 MPP_RET ret = MPP_OK;
539 RK_U32 i, loop;
540 Avs2dRkvRegCtx_t *reg_ctx;
541 Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
542
543 AVS2D_HAL_TRACE("In.");
544
545 INP_CHECK(ret, NULL == p_hal);
546
547 MEM_CHECK(ret, p_hal->reg_ctx = mpp_calloc_size(void, sizeof(Avs2dRkvRegCtx_t)));
548 reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
549
550 //!< malloc buffers
551 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
552 FUN_CHECK(ret = mpp_buffer_get(p_hal->buf_group, ®_ctx->bufs, AVS2_ALL_TBL_BUF_SIZE(loop)));
553 reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
554 reg_ctx->bufs_ptr = mpp_buffer_get_ptr(reg_ctx->bufs);
555 mpp_buffer_attach_dev(reg_ctx->bufs, p_hal->dev);
556
557 for (i = 0; i < loop; i++) {
558 reg_ctx->reg_buf[i].regs = mpp_calloc(Vdpu383Avs2dRegSet, 1);
559 init_ctrl_regs(reg_ctx->reg_buf[i].regs);
560 reg_ctx->reg_buf[i].offset_shph = AVS2_SHPH_OFFSET(i);
561 reg_ctx->reg_buf[i].offset_sclst = AVS2_SCALIST_OFFSET(i);
562 }
563
564 if (!p_hal->fast_mode) {
565 reg_ctx->regs = reg_ctx->reg_buf[0].regs;
566 reg_ctx->shph_offset = reg_ctx->reg_buf[0].offset_shph;
567 reg_ctx->sclst_offset = reg_ctx->reg_buf[0].offset_sclst;
568 }
569
570 mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
571 mpp_slots_set_prop(p_hal->frame_slots, SLOTS_VER_ALIGN, avs2d_ver_align);
572 mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, avs2d_len_align);
573
574 __RETURN:
575 AVS2D_HAL_TRACE("Out. ret %d", ret);
576 (void)cfg;
577 return ret;
578 __FAILED:
579 hal_avs2d_vdpu383_deinit(p_hal);
580 AVS2D_HAL_TRACE("Out. ret %d", ret);
581 return ret;
582 }
583
calc_mv_size(RK_S32 pic_w,RK_S32 pic_h,RK_S32 ctu_w)584 static RK_S32 calc_mv_size(RK_S32 pic_w, RK_S32 pic_h, RK_S32 ctu_w)
585 {
586 RK_S32 seg_w = 64 * 16 * 16 / ctu_w; // colmv_block_size = 16, colmv_per_bytes = 16
587 RK_S32 seg_cnt_w = MPP_ALIGN(pic_w, seg_w) / seg_w;
588 RK_S32 seg_cnt_h = MPP_ALIGN(pic_h, ctu_w) / ctu_w;
589 RK_S32 mv_size = seg_cnt_w * seg_cnt_h * 64 * 16;
590
591 return mv_size;
592 }
593
set_up_colmv_buf(void * hal)594 static MPP_RET set_up_colmv_buf(void *hal)
595 {
596 MPP_RET ret = MPP_OK;
597 Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
598 Avs2dSyntax_t *syntax = &p_hal->syntax;
599 PicParams_Avs2d *pp = &syntax->pp;
600 RK_U32 ctu_size = 1 << (p_hal->syntax.pp.lcu_size);
601 RK_S32 mv_size = calc_mv_size(pp->pic_width_in_luma_samples,
602 pp->pic_height_in_luma_samples * (1 + pp->field_coded_sequence),
603 ctu_size);
604
605 AVS2D_HAL_TRACE("mv_size %d", mv_size);
606
607 if (p_hal->cmv_bufs == NULL || p_hal->mv_size < (RK_U32)mv_size) {
608 size_t size = mv_size;
609
610 if (p_hal->cmv_bufs) {
611 hal_bufs_deinit(p_hal->cmv_bufs);
612 p_hal->cmv_bufs = NULL;
613 }
614
615 hal_bufs_init(&p_hal->cmv_bufs);
616 if (p_hal->cmv_bufs == NULL) {
617 mpp_err_f("colmv bufs init fail");
618 ret = MPP_ERR_INIT;
619 goto __RETURN;
620 }
621
622 p_hal->mv_size = mv_size;
623 p_hal->mv_count = mpp_buf_slot_get_count(p_hal->frame_slots);
624 hal_bufs_setup(p_hal->cmv_bufs, p_hal->mv_count, 1, &size);
625 }
626
627 __RETURN:
628 return ret;
629 }
630
hal_avs2d_vdpu383_gen_regs(void * hal,HalTaskInfo * task)631 MPP_RET hal_avs2d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
632 {
633 MPP_RET ret = MPP_OK;
634 Avs2dRkvRegCtx_t *reg_ctx;
635 Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
636 Vdpu383Avs2dRegSet *regs = NULL;
637
638 AVS2D_HAL_TRACE("In.");
639
640 INP_CHECK(ret, NULL == p_hal);
641 if ((task->dec.flags.parse_err || task->dec.flags.ref_err) &&
642 !p_hal->cfg->base.disable_error) {
643 ret = MPP_NOK;
644 goto __RETURN;
645 }
646
647 ret = set_up_colmv_buf(p_hal);
648 if (ret)
649 goto __RETURN;
650
651 reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
652
653 if (p_hal->fast_mode) {
654 RK_U32 i = 0;
655
656 for (i = 0; i < MPP_ARRAY_ELEMS(reg_ctx->reg_buf); i++) {
657 if (!reg_ctx->reg_buf[i].valid) {
658 task->dec.reg_index = i;
659 regs = reg_ctx->reg_buf[i].regs;
660 reg_ctx->shph_offset = reg_ctx->reg_buf[i].offset_shph;
661 reg_ctx->sclst_offset = reg_ctx->reg_buf[i].offset_sclst;
662 reg_ctx->regs = reg_ctx->reg_buf[i].regs;
663 reg_ctx->reg_buf[i].valid = 1;
664 break;
665 }
666 }
667
668 mpp_assert(regs);
669 }
670
671 regs = reg_ctx->regs;
672
673 prepare_header(p_hal, reg_ctx->shph_dat, sizeof(reg_ctx->shph_dat) / 8);
674 prepare_scalist(p_hal, reg_ctx->scalist_dat, sizeof(reg_ctx->scalist_dat));
675
676 ret = fill_registers(p_hal, regs, task);
677
678 if (ret)
679 goto __RETURN;
680
681 {
682 memcpy(reg_ctx->bufs_ptr + reg_ctx->shph_offset, reg_ctx->shph_dat, sizeof(reg_ctx->shph_dat));
683 memcpy(reg_ctx->bufs_ptr + reg_ctx->sclst_offset, reg_ctx->scalist_dat, sizeof(reg_ctx->scalist_dat));
684
685 regs->common_addr.reg131_gbl_base = reg_ctx->bufs_fd;
686 mpp_dev_set_reg_offset(p_hal->dev, 131, reg_ctx->shph_offset);
687 regs->avs2d_paras.reg67_global_len = AVS2_383_SHPH_SIZE;
688
689 regs->common_addr.reg132_scanlist_addr = reg_ctx->bufs_fd;
690 mpp_dev_set_reg_offset(p_hal->dev, 132, reg_ctx->sclst_offset);
691 }
692
693 // set rcb
694 {
695 hal_avs2d_rcb_info_update(p_hal, regs);
696 vdpu383_setup_rcb(®s->common_addr, p_hal->dev, p_hal->fast_mode ?
697 reg_ctx->rcb_buf[task->dec.reg_index] : reg_ctx->rcb_buf[0],
698 reg_ctx->rcb_info);
699
700 }
701
702 vdpu383_setup_statistic(®s->ctrl_regs);
703 mpp_buffer_sync_end(reg_ctx->bufs);
704
705 __RETURN:
706 AVS2D_HAL_TRACE("Out. ret %d", ret);
707 return ret;
708 }
709
hal_avs2d_vdpu383_start(void * hal,HalTaskInfo * task)710 MPP_RET hal_avs2d_vdpu383_start(void *hal, HalTaskInfo *task)
711 {
712 MPP_RET ret = MPP_OK;
713 Vdpu383Avs2dRegSet *regs = NULL;
714 Avs2dRkvRegCtx_t *reg_ctx;
715 MppDev dev = NULL;
716 Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
717
718 AVS2D_HAL_TRACE("In.");
719 INP_CHECK(ret, NULL == p_hal);
720
721 if ((task->dec.flags.parse_err || task->dec.flags.ref_err) &&
722 !p_hal->cfg->base.disable_error) {
723 goto __RETURN;
724 }
725
726 reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
727 regs = p_hal->fast_mode ? reg_ctx->reg_buf[task->dec.reg_index].regs : reg_ctx->regs;
728 dev = p_hal->dev;
729
730 p_hal->frame_no++;
731
732 do {
733 MppDevRegWrCfg wr_cfg;
734 MppDevRegRdCfg rd_cfg;
735
736 wr_cfg.reg = ®s->ctrl_regs;
737 wr_cfg.size = sizeof(regs->ctrl_regs);
738 wr_cfg.offset = OFFSET_CTRL_REGS;
739 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
740 if (ret) {
741 mpp_err_f("set register write failed %d\n", ret);
742 break;
743 }
744
745 wr_cfg.reg = ®s->common_addr;
746 wr_cfg.size = sizeof(regs->common_addr);
747 wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
748 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
749 if (ret) {
750 mpp_err_f("set register write failed %d\n", ret);
751 break;
752 }
753
754 wr_cfg.reg = ®s->avs2d_paras;
755 wr_cfg.size = sizeof(regs->avs2d_paras);
756 wr_cfg.offset = OFFSET_CODEC_PARAS_REGS;
757 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
758 if (ret) {
759 mpp_err_f("set register write failed %d\n", ret);
760 break;
761 }
762
763 wr_cfg.reg = ®s->avs2d_addrs;
764 wr_cfg.size = sizeof(regs->avs2d_addrs);
765 wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
766 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
767 if (ret) {
768 mpp_err_f("set register write failed %d\n", ret);
769 break;
770 }
771
772 rd_cfg.reg = ®s->ctrl_regs.reg15;
773 rd_cfg.size = sizeof(regs->ctrl_regs.reg15);
774 rd_cfg.offset = OFFSET_INTERRUPT_REGS;
775 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
776 if (ret) {
777 mpp_err_f("set register read failed %d\n", ret);
778 break;
779 }
780
781 if (avs2d_hal_debug & AVS2D_HAL_DBG_REG) {
782 memset(reg_ctx->reg_out, 0, sizeof(reg_ctx->reg_out));
783 rd_cfg.reg = reg_ctx->reg_out;
784 rd_cfg.size = sizeof(reg_ctx->reg_out);
785 rd_cfg.offset = 0;
786 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
787 }
788
789 /* rcb info for sram */
790 vdpu383_set_rcbinfo(dev, (Vdpu383RcbInfo*)reg_ctx->rcb_info);
791
792 // send request to hardware
793 ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL);
794 if (ret) {
795 mpp_err_f("send cmd failed %d\n", ret);
796 break;
797 }
798
799 } while (0);
800
801 __RETURN:
802 AVS2D_HAL_TRACE("Out.");
803 return ret;
804 }
805
fetch_data(RK_U32 fmt,RK_U8 * line,RK_U32 num)806 static RK_U8 fetch_data(RK_U32 fmt, RK_U8 *line, RK_U32 num)
807 {
808 RK_U32 offset = 0;
809 RK_U32 value = 0;
810
811 if (fmt == MPP_FMT_YUV420SP_10BIT) {
812 offset = (num * 2) & 7;
813 value = (line[num * 10 / 8] >> offset) |
814 (line[num * 10 / 8 + 1] << (8 - offset));
815
816 value = (value & 0x3ff) >> 2;
817 } else if (fmt == MPP_FMT_YUV420SP) {
818 value = line[num];
819 }
820
821 return value;
822 }
823
hal_avs2d_vdpu383_dump_yuv(void * hal,HalTaskInfo * task)824 static MPP_RET hal_avs2d_vdpu383_dump_yuv(void *hal, HalTaskInfo *task)
825 {
826 MPP_RET ret = MPP_OK;
827 Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
828
829 MppFrameFormat fmt = MPP_FMT_YUV420SP;
830 RK_U32 vir_w = 0;
831 RK_U32 vir_h = 0;
832 RK_U32 i = 0;
833 RK_U32 j = 0;
834 FILE *fp_stream = NULL;
835 char name[50];
836 MppBuffer buffer = NULL;
837 MppFrame frame;
838 void *base = NULL;
839
840 ret = mpp_buf_slot_get_prop(p_hal->frame_slots, task->dec.output, SLOT_FRAME_PTR, &frame);
841
842 if (ret != MPP_OK || frame == NULL)
843 mpp_log_f("failed to get frame slot %d", task->dec.output);
844
845 ret = mpp_buf_slot_get_prop(p_hal->frame_slots, task->dec.output, SLOT_BUFFER, &buffer);
846
847 if (ret != MPP_OK || buffer == NULL)
848 mpp_log_f("failed to get frame buffer slot %d", task->dec.output);
849
850 AVS2D_HAL_TRACE("frame slot %d, fd %d\n", task->dec.output, mpp_buffer_get_fd(buffer));
851 base = mpp_buffer_get_ptr(buffer);
852 vir_w = mpp_frame_get_hor_stride(frame);
853 vir_h = mpp_frame_get_ver_stride(frame);
854 fmt = mpp_frame_get_fmt(frame);
855 snprintf(name, sizeof(name), "/data/tmp/rkv_out_%dx%d_nv12_%03d.yuv", vir_w, vir_h,
856 p_hal->frame_no);
857 fp_stream = fopen(name, "wb");
858
859 if (fmt != MPP_FMT_YUV420SP_10BIT) {
860 fwrite(base, 1, vir_w * vir_h * 3 / 2, fp_stream);
861 } else {
862 RK_U8 tmp = 0;
863 for (i = 0; i < vir_h; i++) {
864 for (j = 0; j < vir_w; j++) {
865 tmp = fetch_data(fmt, base, j);
866 fwrite(&tmp, 1, 1, fp_stream);
867 }
868 base += vir_w;
869 }
870
871 for (i = 0; i < vir_h / 2; i++) {
872 for (j = 0; j < vir_w; j++) {
873 tmp = fetch_data(fmt, base, j);
874 fwrite(&tmp, 1, 1, fp_stream);
875 }
876 base += vir_w;
877 }
878 }
879 fclose(fp_stream);
880
881 return ret;
882 }
883
hal_avs2d_vdpu383_wait(void * hal,HalTaskInfo * task)884 MPP_RET hal_avs2d_vdpu383_wait(void *hal, HalTaskInfo *task)
885 {
886 MPP_RET ret = MPP_OK;
887 Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
888 Avs2dRkvRegCtx_t *reg_ctx;
889 Vdpu383Avs2dRegSet *regs;
890
891 INP_CHECK(ret, NULL == p_hal);
892 reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
893 regs = p_hal->fast_mode ? reg_ctx->reg_buf[task->dec.reg_index].regs : reg_ctx->regs;
894
895 if ((task->dec.flags.parse_err || task->dec.flags.ref_err) &&
896 !p_hal->cfg->base.disable_error) {
897 AVS2D_HAL_DBG(AVS2D_HAL_DBG_ERROR, "found task error.\n");
898 ret = MPP_NOK;
899 goto __RETURN;
900 } else {
901 ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL);
902 if (ret)
903 mpp_err_f("poll cmd failed %d\n", ret);
904 }
905
906 if (avs2d_hal_debug & AVS2D_HAL_DBG_OUT)
907 hal_avs2d_vdpu383_dump_yuv(hal, task);
908
909 AVS2D_HAL_TRACE("read irq_status 0x%08x\n", regs->ctrl_regs.reg19);
910
911 if (p_hal->dec_cb) {
912 DecCbHalDone param;
913
914 param.task = (void *)&task->dec;
915 param.regs = (RK_U32 *)regs;
916
917 if ((!regs->ctrl_regs.reg15.rkvdec_frame_rdy_sta) ||
918 regs->ctrl_regs.reg15.rkvdec_strm_error_sta ||
919 regs->ctrl_regs.reg15.rkvdec_core_timeout_sta ||
920 regs->ctrl_regs.reg15.rkvdec_ip_timeout_sta ||
921 regs->ctrl_regs.reg15.rkvdec_bus_error_sta ||
922 regs->ctrl_regs.reg15.rkvdec_buffer_empty_sta ||
923 regs->ctrl_regs.reg15.rkvdec_colmv_ref_error_sta)
924 param.hard_err = 1;
925 else
926 param.hard_err = 0;
927
928 task->dec.flags.ref_info_valid = 0;
929
930 AVS2D_HAL_TRACE("hal frame %d hard_err= %d", p_hal->frame_no, param.hard_err);
931
932 mpp_callback(p_hal->dec_cb, ¶m);
933 }
934
935 memset(®s->ctrl_regs.reg19, 0, sizeof(RK_U32));
936 if (p_hal->fast_mode)
937 reg_ctx->reg_buf[task->dec.reg_index].valid = 0;
938
939 __RETURN:
940 AVS2D_HAL_TRACE("Out. ret %d", ret);
941 return ret;
942 }
943
944 const MppHalApi hal_avs2d_vdpu383 = {
945 .name = "avs2d_vdpu383",
946 .type = MPP_CTX_DEC,
947 .coding = MPP_VIDEO_CodingAVS2,
948 .ctx_size = sizeof(Avs2dRkvRegCtx_t),
949 .flag = 0,
950 .init = hal_avs2d_vdpu383_init,
951 .deinit = hal_avs2d_vdpu383_deinit,
952 .reg_gen = hal_avs2d_vdpu383_gen_regs,
953 .start = hal_avs2d_vdpu383_start,
954 .wait = hal_avs2d_vdpu383_wait,
955 .reset = NULL,
956 .flush = NULL,
957 .control = NULL,
958 };
959