xref: /rockchip-linux_mpp/mpp/hal/rkdec/avs2d/hal_avs2d_vdpu383.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /* SPDX-License-Identifier: Apache-2.0 OR MIT */
2 /*
3  * Copyright (c) 2024 Rockchip Electronics Co., Ltd.
4  */
5 
6 #define MODULE_TAG "hal_avs2d_vdpu383"
7 
8 #include <string.h>
9 #include <stdio.h>
10 
11 #include "mpp_env.h"
12 #include "mpp_log.h"
13 #include "mpp_mem.h"
14 #include "mpp_common.h"
15 #include "mpp_debug.h"
16 #include "mpp_bitput.h"
17 #include "mpp_buffer_impl.h"
18 
19 #include "avs2d_syntax.h"
20 #include "vdpu383_com.h"
21 #include "vdpu383_avs2d.h"
22 #include "hal_avs2d_global.h"
23 #include "hal_avs2d_vdpu383.h"
24 #include "mpp_dec_cb_param.h"
25 
26 #define VDPU383_FAST_REG_SET_CNT    (3)
27 #define MAX_REF_NUM                 (8)
28 #define AVS2_383_SHPH_SIZE          (208)            /* bytes */
29 #define AVS2_383_SCALIST_SIZE       (80)             /* bytes */
30 #define VDPU34x_TOTAL_REG_CNT       (278)
31 
32 #define AVS2_383_SHPH_ALIGNED_SIZE          (MPP_ALIGN(AVS2_383_SHPH_SIZE, SZ_4K))
33 #define AVS2_383_SCALIST_ALIGNED_SIZE       (MPP_ALIGN(AVS2_383_SCALIST_SIZE, SZ_4K))
34 #define AVS2_383_STREAM_INFO_SET_SIZE       (AVS2_383_SHPH_ALIGNED_SIZE + \
35                                             AVS2_383_SCALIST_ALIGNED_SIZE)
36 #define AVS2_ALL_TBL_BUF_SIZE(cnt)          (AVS2_383_STREAM_INFO_SET_SIZE * (cnt))
37 #define AVS2_SHPH_OFFSET(pos)               (AVS2_383_STREAM_INFO_SET_SIZE * (pos))
38 #define AVS2_SCALIST_OFFSET(pos)            (AVS2_SHPH_OFFSET(pos) + AVS2_383_SHPH_ALIGNED_SIZE)
39 
40 #define COLMV_COMPRESS_EN       (1)
41 #define COLMV_BLOCK_SIZE        (16)
42 #define COLMV_BYTES             (16)
43 
44 typedef struct avs2d_buf_t {
45     RK_U32                  valid;
46     RK_U32                  offset_shph;
47     RK_U32                  offset_sclst;
48     Vdpu383Avs2dRegSet      *regs;
49 } Avs2dRkvBuf_t;
50 
51 typedef struct avs2d_reg_ctx_t {
52     Avs2dRkvBuf_t           reg_buf[VDPU383_FAST_REG_SET_CNT];
53 
54     RK_U32                  shph_offset;
55     RK_U32                  sclst_offset;
56 
57     Vdpu383Avs2dRegSet      *regs;
58 
59     RK_U8                   shph_dat[AVS2_383_SHPH_SIZE];
60     RK_U8                   scalist_dat[AVS2_383_SCALIST_SIZE];
61 
62     MppBuffer               bufs;
63     RK_S32                  bufs_fd;
64     void                    *bufs_ptr;
65 
66     MppBuffer               rcb_buf[VDPU383_FAST_REG_SET_CNT];
67     RK_S32                  rcb_buf_size;
68     Vdpu383RcbInfo          rcb_info[RCB_BUF_COUNT];
69     RK_U32                  reg_out[VDPU34x_TOTAL_REG_CNT];
70 
71 } Avs2dRkvRegCtx_t;
72 
73 MPP_RET hal_avs2d_vdpu383_deinit(void *hal);
avs2d_ver_align(RK_U32 val)74 static RK_U32 avs2d_ver_align(RK_U32 val)
75 {
76     return MPP_ALIGN(val, 16);
77 }
78 
avs2d_len_align(RK_U32 val)79 static RK_U32 avs2d_len_align(RK_U32 val)
80 {
81     return (2 * MPP_ALIGN(val, 16));
82 }
83 
prepare_header(Avs2dHalCtx_t * p_hal,RK_U8 * data,RK_U32 len)84 static MPP_RET prepare_header(Avs2dHalCtx_t *p_hal, RK_U8 *data, RK_U32 len)
85 {
86     RK_U32 i, j;
87     BitputCtx_t bp;
88     RK_U64 *bit_buf = (RK_U64 *)data;
89     Avs2dSyntax_t *syntax = &p_hal->syntax;
90     PicParams_Avs2d *pp   = &syntax->pp;
91     AlfParams_Avs2d *alfp = &syntax->alfp;
92     RefParams_Avs2d *refp = &syntax->refp;
93     WqmParams_Avs2d *wqmp = &syntax->wqmp;
94 
95     memset(data, 0, len);
96 
97     mpp_set_bitput_ctx(&bp, bit_buf, len);
98 
99     //!< sequence header syntax
100     mpp_put_bits(&bp, pp->chroma_format_idc, 2);
101     mpp_put_bits(&bp, pp->pic_width_in_luma_samples, 16);
102     mpp_put_bits(&bp, pp->pic_height_in_luma_samples, 16);
103     mpp_put_bits(&bp, pp->bit_depth_luma_minus8, 3);
104     mpp_put_bits(&bp, pp->bit_depth_chroma_minus8, 3);
105     mpp_put_bits(&bp, pp->lcu_size, 3);
106     mpp_put_bits(&bp, pp->progressive_sequence, 1);
107     mpp_put_bits(&bp, pp->field_coded_sequence, 1);
108 
109     mpp_put_bits(&bp, pp->secondary_transform_enable_flag, 1);
110     mpp_put_bits(&bp, pp->sample_adaptive_offset_enable_flag, 1);
111     mpp_put_bits(&bp, pp->adaptive_loop_filter_enable_flag, 1);
112     mpp_put_bits(&bp, pp->pmvr_enable_flag, 1);
113     mpp_put_bits(&bp, pp->cross_slice_loopfilter_enable_flag, 1);
114 
115     //!< picture header syntax
116     mpp_put_bits(&bp, pp->picture_type, 3);
117     mpp_put_bits(&bp, refp->ref_pic_num, 3);
118     mpp_put_bits(&bp, pp->scene_reference_enable_flag, 1);
119     mpp_put_bits(&bp, pp->bottom_field_picture_flag, 1);
120     mpp_put_bits(&bp, pp->fixed_picture_qp, 1);
121     mpp_put_bits(&bp, pp->picture_qp, 7);
122     mpp_put_bits(&bp, pp->loop_filter_disable_flag, 1);
123     mpp_put_bits(&bp, pp->alpha_c_offset, 5);
124     mpp_put_bits(&bp, pp->beta_offset, 5);
125 
126     //!< weight quant param
127     mpp_put_bits(&bp, wqmp->chroma_quant_param_delta_cb, 6);
128     mpp_put_bits(&bp, wqmp->chroma_quant_param_delta_cr, 6);
129     mpp_put_bits(&bp, wqmp->pic_weight_quant_enable_flag, 1);
130 
131     //!< alf param
132     mpp_put_bits(&bp, alfp->enable_pic_alf_y, 1);
133     mpp_put_bits(&bp, alfp->enable_pic_alf_cb, 1);
134     mpp_put_bits(&bp, alfp->enable_pic_alf_cr, 1);
135 
136     mpp_put_bits(&bp, alfp->alf_filter_num_minus1, 4);
137     for (i = 0; i < 16; i++)
138         mpp_put_bits(&bp, alfp->alf_coeff_idx_tab[i], 4);
139 
140     for (i = 0; i < 16; i++)
141         for (j = 0; j < 9; j++)
142             mpp_put_bits(&bp, alfp->alf_coeff_y[i][j], 7);
143 
144     for (j = 0; j < 9; j++)
145         mpp_put_bits(&bp, alfp->alf_coeff_cb[j], 7);
146 
147     for (j = 0; j < 9; j++)
148         mpp_put_bits(&bp, alfp->alf_coeff_cr[j], 7);
149 
150     /* other flags */
151     mpp_put_bits(&bp, pp->multi_hypothesis_skip_enable_flag, 1);
152     mpp_put_bits(&bp, pp->dual_hypothesis_prediction_enable_flag, 1);
153     mpp_put_bits(&bp, pp->weighted_skip_enable_flag, 1);
154     mpp_put_bits(&bp, pp->asymmetrc_motion_partitions_enable_flag, 1);
155     mpp_put_bits(&bp, pp->nonsquare_quadtree_transform_enable_flag, 1);
156     mpp_put_bits(&bp, pp->nonsquare_intra_prediction_enable_flag, 1);
157 
158     //!< picture reference params
159     mpp_put_bits(&bp, pp->cur_poc, 32);
160     for (i = 0; i < 8; i++)
161         mpp_put_bits(&bp, (i < refp->ref_pic_num) ? refp->ref_poc_list[i] : 0, 32);
162     for (i = 0; i < 8; i++)
163         mpp_put_bits(&bp, (i < refp->ref_pic_num) ? pp->field_coded_sequence : 0, 1);
164     for (i = 0; i < 8; i++)
165         mpp_put_bits(&bp, (i < refp->ref_pic_num) ? pp->bottom_field_picture_flag : 0, 1);
166     for (i = 0; i < 8; i++)
167         mpp_put_bits(&bp, (i < refp->ref_pic_num), 1);
168 
169     return MPP_OK;
170 }
171 
prepare_scalist(Avs2dHalCtx_t * p_hal,RK_U8 * data,RK_U32 len)172 static MPP_RET prepare_scalist(Avs2dHalCtx_t *p_hal, RK_U8 *data, RK_U32 len)
173 {
174     Avs2dSyntax_t *syntax = &p_hal->syntax;
175     WqmParams_Avs2d *wqmp = &syntax->wqmp;
176     RK_U32 i = 0;
177     RK_U32 n = 0;
178 
179     if (!wqmp->pic_weight_quant_enable_flag)
180         return MPP_OK;
181 
182     memset(data, 0, len);
183 
184     /* dump by block4x4, vectial direction */
185     for (i = 0; i < 4; i++) {
186         data[n++] = wqmp->wq_matrix[0][i + 0];
187         data[n++] = wqmp->wq_matrix[0][i + 4];
188         data[n++] = wqmp->wq_matrix[0][i + 8];
189         data[n++] = wqmp->wq_matrix[0][i + 12];
190     }
191 
192     /* block8x8 */
193     {
194         RK_S32 blk4_x = 0, blk4_y = 0;
195 
196         /* dump by block4x4, vectial direction */
197         for (blk4_x = 0; blk4_x < 8; blk4_x += 4) {
198             for (blk4_y = 0; blk4_y < 8; blk4_y += 4) {
199                 RK_S32 pos = blk4_y * 8 + blk4_x;
200 
201                 for (i = 0; i < 4; i++) {
202                     data[n++] = wqmp->wq_matrix[1][pos + i + 0];
203                     data[n++] = wqmp->wq_matrix[1][pos + i + 8];
204                     data[n++] = wqmp->wq_matrix[1][pos + i + 16];
205                     data[n++] = wqmp->wq_matrix[1][pos + i + 24];
206                 }
207             }
208         }
209     }
210 
211     return MPP_OK;
212 }
213 
get_frame_fd(Avs2dHalCtx_t * p_hal,RK_S32 idx)214 static RK_S32 get_frame_fd(Avs2dHalCtx_t *p_hal, RK_S32 idx)
215 {
216     RK_S32 ret_fd = 0;
217     MppBuffer mbuffer = NULL;
218 
219     mpp_buf_slot_get_prop(p_hal->frame_slots, idx, SLOT_BUFFER, &mbuffer);
220     ret_fd = mpp_buffer_get_fd(mbuffer);
221 
222     return ret_fd;
223 }
224 
get_packet_fd(Avs2dHalCtx_t * p_hal,RK_S32 idx)225 static RK_S32 get_packet_fd(Avs2dHalCtx_t *p_hal, RK_S32 idx)
226 {
227     RK_S32 ret_fd = 0;
228     MppBuffer mbuffer = NULL;
229 
230     mpp_buf_slot_get_prop(p_hal->packet_slots, idx, SLOT_BUFFER, &mbuffer);
231     ret_fd =  mpp_buffer_get_fd(mbuffer);
232 
233     return ret_fd;
234 }
235 
init_ctrl_regs(Vdpu383Avs2dRegSet * regs)236 static void init_ctrl_regs(Vdpu383Avs2dRegSet *regs)
237 {
238     Vdpu383CtrlReg *ctrl_regs = &regs->ctrl_regs;
239 
240     ctrl_regs->reg8_dec_mode = 3;  // AVS2
241     ctrl_regs->reg9.buf_empty_en = 1;
242 
243     ctrl_regs->reg10.strmd_auto_gating_e      = 1;
244     ctrl_regs->reg10.inter_auto_gating_e      = 1;
245     ctrl_regs->reg10.intra_auto_gating_e      = 1;
246     ctrl_regs->reg10.transd_auto_gating_e     = 1;
247     ctrl_regs->reg10.recon_auto_gating_e      = 1;
248     ctrl_regs->reg10.filterd_auto_gating_e    = 1;
249     ctrl_regs->reg10.bus_auto_gating_e        = 1;
250     ctrl_regs->reg10.ctrl_auto_gating_e       = 1;
251     ctrl_regs->reg10.rcb_auto_gating_e        = 1;
252     ctrl_regs->reg10.err_prc_auto_gating_e    = 1;
253 
254     ctrl_regs->reg13_core_timeout_threshold = 0xffffff;
255 
256     ctrl_regs->reg16.error_proc_disable = 1;
257     ctrl_regs->reg16.error_spread_disable = 0;
258     ctrl_regs->reg16.roi_error_ctu_cal_en = 0;
259 
260     ctrl_regs->reg20_cabac_error_en_lowbits = 0xffffffff;
261     ctrl_regs->reg21_cabac_error_en_highbits = 0x3fffffff;
262 
263     /* performance */
264     ctrl_regs->reg28.axi_perf_work_e = 1;
265     ctrl_regs->reg28.axi_cnt_type = 1;
266     ctrl_regs->reg28.rd_latency_id = 0xb;
267     ctrl_regs->reg28.rd_latency_thr = 0;
268 
269     ctrl_regs->reg29.addr_align_type = 2;
270     ctrl_regs->reg29.ar_cnt_id_type = 0;
271     ctrl_regs->reg29.aw_cnt_id_type = 0;
272     ctrl_regs->reg29.ar_count_id = 0xa;
273     ctrl_regs->reg29.aw_count_id = 0;
274     ctrl_regs->reg29.rd_band_width_mode = 0;
275 }
276 
avs2d_refine_rcb_size(Vdpu383RcbInfo * rcb_info,RK_S32 width,RK_S32 height,void * dxva)277 static void avs2d_refine_rcb_size(Vdpu383RcbInfo *rcb_info,
278                                   RK_S32 width, RK_S32 height, void *dxva)
279 {
280     (void) height;
281     Avs2dSyntax_t *syntax = dxva;
282     RK_U8 ctu_size = 1 << syntax->pp.lcu_size;
283     RK_U8 bit_depth = syntax->pp.bit_depth_chroma_minus8 + 8;
284     RK_U32 rcb_bits = 0;
285     RK_U32 filterd_row_append = 8192;
286 
287     width = MPP_ALIGN(width, ctu_size);
288 
289     /* RCB_STRMD_ROW && RCB_STRMD_TILE_ROW*/
290     if (width > 8192)
291         rcb_bits = ((width + 63) / 64) * 112;
292     else
293         rcb_bits = 0;
294     rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
295     rcb_info[RCB_STRMD_TILE_ROW].size = 0;
296 
297     /* RCB_INTER_ROW && RCB_INTER_TILE_ROW*/
298     rcb_bits = ((width + 7) / 8) * 166;
299     rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
300     rcb_info[RCB_INTER_TILE_ROW].size = 0;
301 
302     /* RCB_INTRA_ROW && RCB_INTRA_TILE_ROW*/
303     rcb_bits = MPP_ALIGN(width, 512) * (bit_depth + 2);
304     rcb_bits = rcb_bits * 3; //TODO:
305     rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
306     rcb_info[RCB_INTRA_TILE_ROW].size = 0;
307 
308     /* RCB_FILTERD_ROW && RCB_FILTERD_TILE_ROW*/
309     if (width > 4096)
310         filterd_row_append = 27648;
311     rcb_bits = MPP_ALIGN(width, 64) * (30 * bit_depth + 9);
312     rcb_info[RCB_FILTERD_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2);
313     rcb_info[RCB_FILTERD_PROTECT_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2);
314     rcb_info[RCB_FILTERD_TILE_ROW].size = 0;
315 
316     /* RCB_FILTERD_TILE_COL */
317     rcb_info[RCB_FILTERD_TILE_COL].size = 0;
318 }
319 
hal_avs2d_rcb_info_update(void * hal,Vdpu383Avs2dRegSet * regs)320 static void hal_avs2d_rcb_info_update(void *hal, Vdpu383Avs2dRegSet *regs)
321 {
322     MPP_RET ret = MPP_OK;
323     Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
324     Avs2dRkvRegCtx_t *reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
325     RK_S32 width = p_hal->syntax.pp.pic_width_in_luma_samples;
326     RK_S32 height = p_hal->syntax.pp.pic_height_in_luma_samples;
327     RK_S32 i = 0;
328     RK_S32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
329 
330     (void) regs;
331 
332     reg_ctx->rcb_buf_size = vdpu383_get_rcb_buf_size(reg_ctx->rcb_info, width, height);
333     avs2d_refine_rcb_size(reg_ctx->rcb_info, width, height, (void *)&p_hal->syntax);
334 
335     for (i = 0; i < loop; i++) {
336         MppBuffer rcb_buf = NULL;
337 
338         if (reg_ctx->rcb_buf[i]) {
339             mpp_buffer_put(reg_ctx->rcb_buf[i]);
340             reg_ctx->rcb_buf[i] = NULL;
341         }
342 
343         ret = mpp_buffer_get(p_hal->buf_group, &rcb_buf, reg_ctx->rcb_buf_size);
344         if (ret)
345             mpp_err_f("AVS2D mpp_buffer_group_get failed\n");
346 
347         reg_ctx->rcb_buf[i] = rcb_buf;
348     }
349 }
350 
fill_registers(Avs2dHalCtx_t * p_hal,Vdpu383Avs2dRegSet * regs,HalTaskInfo * task)351 static MPP_RET fill_registers(Avs2dHalCtx_t *p_hal, Vdpu383Avs2dRegSet *regs, HalTaskInfo *task)
352 {
353     MPP_RET ret = MPP_OK;
354     RK_U32 i;
355     MppFrame mframe = NULL;
356     Avs2dSyntax_t *syntax = &p_hal->syntax;
357     RefParams_Avs2d *refp = &syntax->refp;
358     HalDecTask *task_dec  = &task->dec;
359 
360     RK_U32 is_fbc = 0;
361     RK_U32 is_tile = 0;
362     HalBuf *mv_buf = NULL;
363 
364     mpp_buf_slot_get_prop(p_hal->frame_slots, task_dec->output, SLOT_FRAME_PTR, &mframe);
365     is_fbc = MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe));
366     is_tile = MPP_FRAME_FMT_IS_TILE(mpp_frame_get_fmt(mframe));
367 
368     //!< caculate the yuv_frame_size
369     {
370         RK_U32 hor_virstride = 0;
371         RK_U32 ver_virstride = 0;
372         RK_U32 y_virstride = 0;
373         RK_U32 uv_virstride = 0;
374 
375         hor_virstride = mpp_frame_get_hor_stride(mframe);
376         ver_virstride = mpp_frame_get_ver_stride(mframe);
377         y_virstride = hor_virstride * ver_virstride;
378         uv_virstride = hor_virstride * ver_virstride / 2;
379         AVS2D_HAL_TRACE("is_fbc %d y_virstride %d, hor_virstride %d, ver_virstride %d\n",
380                         is_fbc, y_virstride, hor_virstride, ver_virstride);
381 
382         if (is_fbc) {
383             RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
384             RK_U32 fbd_offset;
385 
386             regs->ctrl_regs.reg9.fbc_e = 1;
387             regs->avs2d_paras.reg68_hor_virstride = fbc_hdr_stride / 64;
388             fbd_offset = regs->avs2d_paras.reg68_hor_virstride * MPP_ALIGN(ver_virstride, 64) * 4;
389             regs->avs2d_addrs.reg193_fbc_payload_offset = fbd_offset;
390         } else if (is_tile) {
391             regs->ctrl_regs.reg9.tile_e = 1;
392             regs->avs2d_paras.reg68_hor_virstride = hor_virstride * 6 / 16;
393             regs->avs2d_paras.reg70_y_virstride = (y_virstride + uv_virstride) / 16;
394         } else {
395             regs->ctrl_regs.reg9.fbc_e = 0;
396             regs->ctrl_regs.reg9.tile_e = 0;
397             regs->avs2d_paras.reg68_hor_virstride = hor_virstride / 16;
398             regs->avs2d_paras.reg69_raster_uv_hor_virstride = hor_virstride / 16;
399             regs->avs2d_paras.reg70_y_virstride = y_virstride / 16;
400         }
401     }
402 
403     // set current
404     {
405         RK_S32 fd = get_frame_fd(p_hal, task_dec->output);
406 
407         mpp_assert(fd >= 0);
408 
409         regs->avs2d_addrs.reg168_decout_base = fd;
410         regs->avs2d_addrs.reg192_payload_st_cur_base = fd;
411         mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, task_dec->output);
412         regs->avs2d_addrs.reg216_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
413         AVS2D_HAL_TRACE("cur frame index %d, fd %d, colmv fd %d", task_dec->output, fd, regs->avs2d_addrs.reg216_colmv_cur_base);
414 
415         // TODO: set up error_ref_base
416         // regs->avs2d_addr.reg169_err_ref_base.base = regs->avs2d_addr.reg216_colmv_cur_base.base;
417     }
418 
419     // set reference
420     {
421         RK_S32 valid_slot = -1;
422 
423         AVS2D_HAL_TRACE("num of ref %d", refp->ref_pic_num);
424 
425         for (i = 0; i < refp->ref_pic_num; i++) {
426             if (task_dec->refer[i] < 0)
427                 continue;
428 
429             valid_slot = i;
430             break;
431         }
432 
433         for (i = 0; i < MAX_REF_NUM; i++) {
434             if (i < refp->ref_pic_num) {
435                 MppFrame frame_ref = NULL;
436 
437                 RK_S32 slot_idx = task_dec->refer[i] < 0 ? task_dec->refer[valid_slot] : task_dec->refer[i];
438 
439                 if (slot_idx < 0) {
440                     AVS2D_HAL_TRACE("missing ref, could not found valid ref");
441                     task->dec.flags.ref_err = 1;
442                     return ret = MPP_ERR_UNKNOW;
443                 }
444 
445                 mpp_buf_slot_get_prop(p_hal->frame_slots, slot_idx, SLOT_FRAME_PTR, &frame_ref);
446 
447                 if (frame_ref) {
448                     regs->avs2d_addrs.reg170_185_ref_base[i] = get_frame_fd(p_hal, slot_idx);
449                     regs->avs2d_addrs.reg195_210_payload_st_ref_base[i] = get_frame_fd(p_hal, slot_idx);
450                     mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, slot_idx);
451                     regs->avs2d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
452                 }
453             }
454         }
455 
456         if (p_hal->syntax.refp.scene_ref_enable && p_hal->syntax.refp.scene_ref_slot_idx >= 0) {
457             MppFrame scene_ref = NULL;
458             RK_S32 slot_idx = p_hal->syntax.refp.scene_ref_slot_idx;
459             RK_S32 replace_idx = p_hal->syntax.refp.scene_ref_replace_pos;
460 
461             mpp_buf_slot_get_prop(p_hal->frame_slots, slot_idx, SLOT_FRAME_PTR, &scene_ref);
462 
463             if (scene_ref) {
464                 regs->avs2d_addrs.reg170_185_ref_base[replace_idx] = get_frame_fd(p_hal, slot_idx);
465                 regs->avs2d_addrs.reg195_210_payload_st_ref_base[replace_idx] = regs->avs2d_addrs.reg170_185_ref_base[replace_idx];
466                 mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, slot_idx);
467                 regs->avs2d_addrs.reg217_232_colmv_ref_base[replace_idx] = mpp_buffer_get_fd(mv_buf->buf[0]);
468             }
469         }
470 
471         regs->avs2d_addrs.reg169_error_ref_base = regs->avs2d_addrs.reg170_185_ref_base[0];
472         regs->avs2d_addrs.reg194_payload_st_error_ref_base = regs->avs2d_addrs.reg195_210_payload_st_ref_base[0];
473     }
474 
475     // set rlc
476     regs->common_addr.reg128_strm_base = get_packet_fd(p_hal, task_dec->input);
477     AVS2D_HAL_TRACE("packet fd %d from slot %d", regs->common_addr.reg128_strm_base, task_dec->input);
478 
479     regs->avs2d_paras.reg66_stream_len = MPP_ALIGN(mpp_packet_get_length(task_dec->input_packet), 16) + 64;
480 
481     {
482         //scale down config
483         mpp_buf_slot_get_prop(p_hal->frame_slots, task_dec->output,
484                               SLOT_FRAME_PTR, &mframe);
485         if (mpp_frame_get_thumbnail_en(mframe)) {
486             regs->common_addr.reg133_scale_down_base = regs->avs2d_addrs.reg168_decout_base;
487             vdpu383_setup_down_scale(mframe, p_hal->dev, &regs->ctrl_regs,
488                                      (void *)&regs->avs2d_paras);
489         } else {
490             regs->ctrl_regs.reg9.scale_down_en = 0;
491         }
492     }
493 
494     return ret;
495 }
496 
hal_avs2d_vdpu383_deinit(void * hal)497 MPP_RET hal_avs2d_vdpu383_deinit(void *hal)
498 {
499     MPP_RET ret = MPP_OK;
500     RK_U32 i, loop;
501     Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
502     Avs2dRkvRegCtx_t *reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
503 
504     AVS2D_HAL_TRACE("In.");
505 
506     INP_CHECK(ret, NULL == reg_ctx);
507 
508     //!< malloc buffers
509     loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
510     for (i = 0; i < loop; i++) {
511         if (reg_ctx->rcb_buf[i]) {
512             mpp_buffer_put(reg_ctx->rcb_buf[i]);
513             reg_ctx->rcb_buf[i] = NULL;
514         }
515 
516         MPP_FREE(reg_ctx->reg_buf[i].regs);
517     }
518 
519     if (reg_ctx->bufs) {
520         mpp_buffer_put(reg_ctx->bufs);
521         reg_ctx->bufs = NULL;
522     }
523 
524     if (p_hal->cmv_bufs) {
525         hal_bufs_deinit(p_hal->cmv_bufs);
526         p_hal->cmv_bufs = NULL;
527     }
528 
529     MPP_FREE(p_hal->reg_ctx);
530 
531 __RETURN:
532     AVS2D_HAL_TRACE("Out. ret %d", ret);
533     return ret;
534 }
535 
hal_avs2d_vdpu383_init(void * hal,MppHalCfg * cfg)536 MPP_RET hal_avs2d_vdpu383_init(void *hal, MppHalCfg *cfg)
537 {
538     MPP_RET ret = MPP_OK;
539     RK_U32 i, loop;
540     Avs2dRkvRegCtx_t *reg_ctx;
541     Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
542 
543     AVS2D_HAL_TRACE("In.");
544 
545     INP_CHECK(ret, NULL == p_hal);
546 
547     MEM_CHECK(ret, p_hal->reg_ctx = mpp_calloc_size(void, sizeof(Avs2dRkvRegCtx_t)));
548     reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
549 
550     //!< malloc buffers
551     loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
552     FUN_CHECK(ret = mpp_buffer_get(p_hal->buf_group, &reg_ctx->bufs, AVS2_ALL_TBL_BUF_SIZE(loop)));
553     reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
554     reg_ctx->bufs_ptr = mpp_buffer_get_ptr(reg_ctx->bufs);
555     mpp_buffer_attach_dev(reg_ctx->bufs, p_hal->dev);
556 
557     for (i = 0; i < loop; i++) {
558         reg_ctx->reg_buf[i].regs = mpp_calloc(Vdpu383Avs2dRegSet, 1);
559         init_ctrl_regs(reg_ctx->reg_buf[i].regs);
560         reg_ctx->reg_buf[i].offset_shph = AVS2_SHPH_OFFSET(i);
561         reg_ctx->reg_buf[i].offset_sclst = AVS2_SCALIST_OFFSET(i);
562     }
563 
564     if (!p_hal->fast_mode) {
565         reg_ctx->regs = reg_ctx->reg_buf[0].regs;
566         reg_ctx->shph_offset = reg_ctx->reg_buf[0].offset_shph;
567         reg_ctx->sclst_offset = reg_ctx->reg_buf[0].offset_sclst;
568     }
569 
570     mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
571     mpp_slots_set_prop(p_hal->frame_slots, SLOTS_VER_ALIGN, avs2d_ver_align);
572     mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, avs2d_len_align);
573 
574 __RETURN:
575     AVS2D_HAL_TRACE("Out. ret %d", ret);
576     (void)cfg;
577     return ret;
578 __FAILED:
579     hal_avs2d_vdpu383_deinit(p_hal);
580     AVS2D_HAL_TRACE("Out. ret %d", ret);
581     return ret;
582 }
583 
calc_mv_size(RK_S32 pic_w,RK_S32 pic_h,RK_S32 ctu_w)584 static RK_S32 calc_mv_size(RK_S32 pic_w, RK_S32 pic_h, RK_S32 ctu_w)
585 {
586     RK_S32 seg_w = 64 * 16 * 16 / ctu_w; // colmv_block_size = 16, colmv_per_bytes = 16
587     RK_S32 seg_cnt_w = MPP_ALIGN(pic_w, seg_w) / seg_w;
588     RK_S32 seg_cnt_h = MPP_ALIGN(pic_h, ctu_w) / ctu_w;
589     RK_S32 mv_size   = seg_cnt_w * seg_cnt_h * 64 * 16;
590 
591     return mv_size;
592 }
593 
set_up_colmv_buf(void * hal)594 static MPP_RET set_up_colmv_buf(void *hal)
595 {
596     MPP_RET ret = MPP_OK;
597     Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
598     Avs2dSyntax_t *syntax = &p_hal->syntax;
599     PicParams_Avs2d *pp   = &syntax->pp;
600     RK_U32 ctu_size = 1 << (p_hal->syntax.pp.lcu_size);
601     RK_S32 mv_size = calc_mv_size(pp->pic_width_in_luma_samples,
602                                   pp->pic_height_in_luma_samples * (1 + pp->field_coded_sequence),
603                                   ctu_size);
604 
605     AVS2D_HAL_TRACE("mv_size %d", mv_size);
606 
607     if (p_hal->cmv_bufs == NULL || p_hal->mv_size < (RK_U32)mv_size) {
608         size_t size = mv_size;
609 
610         if (p_hal->cmv_bufs) {
611             hal_bufs_deinit(p_hal->cmv_bufs);
612             p_hal->cmv_bufs = NULL;
613         }
614 
615         hal_bufs_init(&p_hal->cmv_bufs);
616         if (p_hal->cmv_bufs == NULL) {
617             mpp_err_f("colmv bufs init fail");
618             ret = MPP_ERR_INIT;
619             goto __RETURN;
620         }
621 
622         p_hal->mv_size = mv_size;
623         p_hal->mv_count = mpp_buf_slot_get_count(p_hal->frame_slots);
624         hal_bufs_setup(p_hal->cmv_bufs, p_hal->mv_count, 1, &size);
625     }
626 
627 __RETURN:
628     return ret;
629 }
630 
hal_avs2d_vdpu383_gen_regs(void * hal,HalTaskInfo * task)631 MPP_RET hal_avs2d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
632 {
633     MPP_RET ret = MPP_OK;
634     Avs2dRkvRegCtx_t *reg_ctx;
635     Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
636     Vdpu383Avs2dRegSet *regs = NULL;
637 
638     AVS2D_HAL_TRACE("In.");
639 
640     INP_CHECK(ret, NULL == p_hal);
641     if ((task->dec.flags.parse_err || task->dec.flags.ref_err) &&
642         !p_hal->cfg->base.disable_error) {
643         ret = MPP_NOK;
644         goto __RETURN;
645     }
646 
647     ret = set_up_colmv_buf(p_hal);
648     if (ret)
649         goto __RETURN;
650 
651     reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
652 
653     if (p_hal->fast_mode) {
654         RK_U32 i = 0;
655 
656         for (i = 0; i <  MPP_ARRAY_ELEMS(reg_ctx->reg_buf); i++) {
657             if (!reg_ctx->reg_buf[i].valid) {
658                 task->dec.reg_index = i;
659                 regs = reg_ctx->reg_buf[i].regs;
660                 reg_ctx->shph_offset = reg_ctx->reg_buf[i].offset_shph;
661                 reg_ctx->sclst_offset = reg_ctx->reg_buf[i].offset_sclst;
662                 reg_ctx->regs = reg_ctx->reg_buf[i].regs;
663                 reg_ctx->reg_buf[i].valid = 1;
664                 break;
665             }
666         }
667 
668         mpp_assert(regs);
669     }
670 
671     regs = reg_ctx->regs;
672 
673     prepare_header(p_hal, reg_ctx->shph_dat, sizeof(reg_ctx->shph_dat) / 8);
674     prepare_scalist(p_hal, reg_ctx->scalist_dat, sizeof(reg_ctx->scalist_dat));
675 
676     ret = fill_registers(p_hal, regs, task);
677 
678     if (ret)
679         goto __RETURN;
680 
681     {
682         memcpy(reg_ctx->bufs_ptr + reg_ctx->shph_offset, reg_ctx->shph_dat, sizeof(reg_ctx->shph_dat));
683         memcpy(reg_ctx->bufs_ptr + reg_ctx->sclst_offset, reg_ctx->scalist_dat, sizeof(reg_ctx->scalist_dat));
684 
685         regs->common_addr.reg131_gbl_base = reg_ctx->bufs_fd;
686         mpp_dev_set_reg_offset(p_hal->dev, 131, reg_ctx->shph_offset);
687         regs->avs2d_paras.reg67_global_len = AVS2_383_SHPH_SIZE;
688 
689         regs->common_addr.reg132_scanlist_addr = reg_ctx->bufs_fd;
690         mpp_dev_set_reg_offset(p_hal->dev, 132, reg_ctx->sclst_offset);
691     }
692 
693     // set rcb
694     {
695         hal_avs2d_rcb_info_update(p_hal, regs);
696         vdpu383_setup_rcb(&regs->common_addr, p_hal->dev, p_hal->fast_mode ?
697                           reg_ctx->rcb_buf[task->dec.reg_index] : reg_ctx->rcb_buf[0],
698                           reg_ctx->rcb_info);
699 
700     }
701 
702     vdpu383_setup_statistic(&regs->ctrl_regs);
703     mpp_buffer_sync_end(reg_ctx->bufs);
704 
705 __RETURN:
706     AVS2D_HAL_TRACE("Out. ret %d", ret);
707     return ret;
708 }
709 
hal_avs2d_vdpu383_start(void * hal,HalTaskInfo * task)710 MPP_RET hal_avs2d_vdpu383_start(void *hal, HalTaskInfo *task)
711 {
712     MPP_RET ret = MPP_OK;
713     Vdpu383Avs2dRegSet *regs = NULL;
714     Avs2dRkvRegCtx_t *reg_ctx;
715     MppDev dev = NULL;
716     Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
717 
718     AVS2D_HAL_TRACE("In.");
719     INP_CHECK(ret, NULL == p_hal);
720 
721     if ((task->dec.flags.parse_err || task->dec.flags.ref_err) &&
722         !p_hal->cfg->base.disable_error) {
723         goto __RETURN;
724     }
725 
726     reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
727     regs = p_hal->fast_mode ? reg_ctx->reg_buf[task->dec.reg_index].regs : reg_ctx->regs;
728     dev = p_hal->dev;
729 
730     p_hal->frame_no++;
731 
732     do {
733         MppDevRegWrCfg wr_cfg;
734         MppDevRegRdCfg rd_cfg;
735 
736         wr_cfg.reg = &regs->ctrl_regs;
737         wr_cfg.size = sizeof(regs->ctrl_regs);
738         wr_cfg.offset = OFFSET_CTRL_REGS;
739         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
740         if (ret) {
741             mpp_err_f("set register write failed %d\n", ret);
742             break;
743         }
744 
745         wr_cfg.reg = &regs->common_addr;
746         wr_cfg.size = sizeof(regs->common_addr);
747         wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
748         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
749         if (ret) {
750             mpp_err_f("set register write failed %d\n", ret);
751             break;
752         }
753 
754         wr_cfg.reg = &regs->avs2d_paras;
755         wr_cfg.size = sizeof(regs->avs2d_paras);
756         wr_cfg.offset = OFFSET_CODEC_PARAS_REGS;
757         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
758         if (ret) {
759             mpp_err_f("set register write failed %d\n", ret);
760             break;
761         }
762 
763         wr_cfg.reg = &regs->avs2d_addrs;
764         wr_cfg.size = sizeof(regs->avs2d_addrs);
765         wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
766         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
767         if (ret) {
768             mpp_err_f("set register write failed %d\n", ret);
769             break;
770         }
771 
772         rd_cfg.reg = &regs->ctrl_regs.reg15;
773         rd_cfg.size = sizeof(regs->ctrl_regs.reg15);
774         rd_cfg.offset = OFFSET_INTERRUPT_REGS;
775         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
776         if (ret) {
777             mpp_err_f("set register read failed %d\n", ret);
778             break;
779         }
780 
781         if (avs2d_hal_debug & AVS2D_HAL_DBG_REG) {
782             memset(reg_ctx->reg_out, 0, sizeof(reg_ctx->reg_out));
783             rd_cfg.reg = reg_ctx->reg_out;
784             rd_cfg.size = sizeof(reg_ctx->reg_out);
785             rd_cfg.offset = 0;
786             ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
787         }
788 
789         /* rcb info for sram */
790         vdpu383_set_rcbinfo(dev, (Vdpu383RcbInfo*)reg_ctx->rcb_info);
791 
792         // send request to hardware
793         ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL);
794         if (ret) {
795             mpp_err_f("send cmd failed %d\n", ret);
796             break;
797         }
798 
799     } while (0);
800 
801 __RETURN:
802     AVS2D_HAL_TRACE("Out.");
803     return ret;
804 }
805 
fetch_data(RK_U32 fmt,RK_U8 * line,RK_U32 num)806 static RK_U8 fetch_data(RK_U32 fmt, RK_U8 *line, RK_U32 num)
807 {
808     RK_U32 offset = 0;
809     RK_U32 value = 0;
810 
811     if (fmt == MPP_FMT_YUV420SP_10BIT) {
812         offset = (num * 2) & 7;
813         value = (line[num * 10 / 8] >> offset) |
814                 (line[num * 10 / 8 + 1] << (8 - offset));
815 
816         value = (value & 0x3ff) >> 2;
817     } else if (fmt == MPP_FMT_YUV420SP) {
818         value = line[num];
819     }
820 
821     return value;
822 }
823 
hal_avs2d_vdpu383_dump_yuv(void * hal,HalTaskInfo * task)824 static MPP_RET hal_avs2d_vdpu383_dump_yuv(void *hal, HalTaskInfo *task)
825 {
826     MPP_RET ret = MPP_OK;
827     Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
828 
829     MppFrameFormat fmt = MPP_FMT_YUV420SP;
830     RK_U32 vir_w = 0;
831     RK_U32 vir_h = 0;
832     RK_U32 i = 0;
833     RK_U32 j = 0;
834     FILE *fp_stream = NULL;
835     char name[50];
836     MppBuffer buffer = NULL;
837     MppFrame frame;
838     void *base = NULL;
839 
840     ret = mpp_buf_slot_get_prop(p_hal->frame_slots, task->dec.output, SLOT_FRAME_PTR, &frame);
841 
842     if (ret != MPP_OK || frame == NULL)
843         mpp_log_f("failed to get frame slot %d", task->dec.output);
844 
845     ret = mpp_buf_slot_get_prop(p_hal->frame_slots, task->dec.output, SLOT_BUFFER, &buffer);
846 
847     if (ret != MPP_OK || buffer == NULL)
848         mpp_log_f("failed to get frame buffer slot %d", task->dec.output);
849 
850     AVS2D_HAL_TRACE("frame slot %d, fd %d\n", task->dec.output, mpp_buffer_get_fd(buffer));
851     base = mpp_buffer_get_ptr(buffer);
852     vir_w = mpp_frame_get_hor_stride(frame);
853     vir_h = mpp_frame_get_ver_stride(frame);
854     fmt = mpp_frame_get_fmt(frame);
855     snprintf(name, sizeof(name), "/data/tmp/rkv_out_%dx%d_nv12_%03d.yuv", vir_w, vir_h,
856              p_hal->frame_no);
857     fp_stream = fopen(name, "wb");
858 
859     if (fmt != MPP_FMT_YUV420SP_10BIT) {
860         fwrite(base, 1, vir_w * vir_h * 3 / 2, fp_stream);
861     } else {
862         RK_U8 tmp = 0;
863         for (i = 0; i < vir_h; i++) {
864             for (j = 0; j < vir_w; j++) {
865                 tmp = fetch_data(fmt, base, j);
866                 fwrite(&tmp, 1, 1, fp_stream);
867             }
868             base += vir_w;
869         }
870 
871         for (i = 0; i < vir_h / 2; i++) {
872             for (j = 0; j < vir_w; j++) {
873                 tmp = fetch_data(fmt, base, j);
874                 fwrite(&tmp, 1, 1, fp_stream);
875             }
876             base += vir_w;
877         }
878     }
879     fclose(fp_stream);
880 
881     return ret;
882 }
883 
hal_avs2d_vdpu383_wait(void * hal,HalTaskInfo * task)884 MPP_RET hal_avs2d_vdpu383_wait(void *hal, HalTaskInfo *task)
885 {
886     MPP_RET ret = MPP_OK;
887     Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal;
888     Avs2dRkvRegCtx_t *reg_ctx;
889     Vdpu383Avs2dRegSet *regs;
890 
891     INP_CHECK(ret, NULL == p_hal);
892     reg_ctx = (Avs2dRkvRegCtx_t *)p_hal->reg_ctx;
893     regs = p_hal->fast_mode ? reg_ctx->reg_buf[task->dec.reg_index].regs : reg_ctx->regs;
894 
895     if ((task->dec.flags.parse_err || task->dec.flags.ref_err) &&
896         !p_hal->cfg->base.disable_error) {
897         AVS2D_HAL_DBG(AVS2D_HAL_DBG_ERROR, "found task error.\n");
898         ret = MPP_NOK;
899         goto __RETURN;
900     } else {
901         ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL);
902         if (ret)
903             mpp_err_f("poll cmd failed %d\n", ret);
904     }
905 
906     if (avs2d_hal_debug & AVS2D_HAL_DBG_OUT)
907         hal_avs2d_vdpu383_dump_yuv(hal, task);
908 
909     AVS2D_HAL_TRACE("read irq_status 0x%08x\n", regs->ctrl_regs.reg19);
910 
911     if (p_hal->dec_cb) {
912         DecCbHalDone param;
913 
914         param.task = (void *)&task->dec;
915         param.regs = (RK_U32 *)regs;
916 
917         if ((!regs->ctrl_regs.reg15.rkvdec_frame_rdy_sta) ||
918             regs->ctrl_regs.reg15.rkvdec_strm_error_sta ||
919             regs->ctrl_regs.reg15.rkvdec_core_timeout_sta ||
920             regs->ctrl_regs.reg15.rkvdec_ip_timeout_sta ||
921             regs->ctrl_regs.reg15.rkvdec_bus_error_sta ||
922             regs->ctrl_regs.reg15.rkvdec_buffer_empty_sta ||
923             regs->ctrl_regs.reg15.rkvdec_colmv_ref_error_sta)
924             param.hard_err = 1;
925         else
926             param.hard_err = 0;
927 
928         task->dec.flags.ref_info_valid = 0;
929 
930         AVS2D_HAL_TRACE("hal frame %d hard_err= %d", p_hal->frame_no, param.hard_err);
931 
932         mpp_callback(p_hal->dec_cb, &param);
933     }
934 
935     memset(&regs->ctrl_regs.reg19, 0, sizeof(RK_U32));
936     if (p_hal->fast_mode)
937         reg_ctx->reg_buf[task->dec.reg_index].valid = 0;
938 
939 __RETURN:
940     AVS2D_HAL_TRACE("Out. ret %d", ret);
941     return ret;
942 }
943 
944 const MppHalApi hal_avs2d_vdpu383 = {
945     .name     = "avs2d_vdpu383",
946     .type     = MPP_CTX_DEC,
947     .coding   = MPP_VIDEO_CodingAVS2,
948     .ctx_size = sizeof(Avs2dRkvRegCtx_t),
949     .flag     = 0,
950     .init     = hal_avs2d_vdpu383_init,
951     .deinit   = hal_avs2d_vdpu383_deinit,
952     .reg_gen  = hal_avs2d_vdpu383_gen_regs,
953     .start    = hal_avs2d_vdpu383_start,
954     .wait     = hal_avs2d_vdpu383_wait,
955     .reset    = NULL,
956     .flush    = NULL,
957     .control  = NULL,
958 };
959