xref: /rockchip-linux_mpp/mpp/hal/rkdec/h265d/hal_h265d_vdpu384a.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /* SPDX-License-Identifier: Apache-2.0 OR MIT */
2 /*
3  * Copyright (c) 2024 Rockchip Electronics Co., Ltd.
4  */
5 
6 #define MODULE_TAG "hal_h265d_vdpu384a"
7 
8 #include <stdio.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <stdarg.h>
12 #include <inttypes.h>
13 #include <ctype.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 
17 #include "mpp_env.h"
18 #include "mpp_mem.h"
19 #include "mpp_bitread.h"
20 #include "mpp_bitput.h"
21 #include "mpp_buffer_impl.h"
22 
23 #include "h265d_syntax.h"
24 #include "hal_h265d_debug.h"
25 #include "hal_h265d_ctx.h"
26 #include "hal_h265d_com.h"
27 #include "hal_h265d_vdpu384a.h"
28 #include "vdpu384a_h265d.h"
29 #include "vdpu384a_com.h"
30 
31 #define PPS_SIZE                (112 * 64)//(96x64)
32 
33 #define FMT 4
34 #define CTU 3
35 
36 typedef struct {
37     RK_U32 a;
38     RK_U32 b;
39 } FilterdColBufRatio;
40 
41 #define SPSPPS_ALIGNED_SIZE             (MPP_ALIGN(2181 + 64, 128) / 8) // byte, 2181 bit + Reserve 64
42 #define SCALIST_ALIGNED_SIZE            (MPP_ALIGN(81 * 1360, SZ_4K))
43 #define INFO_BUFFER_SIZE                (SPSPPS_ALIGNED_SIZE + SCALIST_ALIGNED_SIZE)
44 #define ALL_BUFFER_SIZE(cnt)            (INFO_BUFFER_SIZE *cnt)
45 
46 #define SPSPPS_OFFSET(pos)              (INFO_BUFFER_SIZE * pos)
47 #define SCALIST_OFFSET(pos)             (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
48 
49 #define pocdistance(a, b)               (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
50 
rkv_len_align_422(RK_U32 val)51 static RK_U32 rkv_len_align_422(RK_U32 val)
52 {
53     return (2 * MPP_ALIGN(val, 16));
54 }
55 
rkv_len_align_444(RK_U32 val)56 static RK_U32 rkv_len_align_444(RK_U32 val)
57 {
58     return (3 * MPP_ALIGN(val, 16));
59 }
60 
vdpu384a_setup_scale_origin_bufs(HalH265dCtx * ctx,MppFrame mframe)61 static MPP_RET vdpu384a_setup_scale_origin_bufs(HalH265dCtx *ctx, MppFrame mframe)
62 {
63     /* for 8K FrameBuf scale mode */
64     size_t origin_buf_size = 0;
65 
66     origin_buf_size = mpp_frame_get_buf_size(mframe);
67 
68     if (!origin_buf_size) {
69         mpp_err_f("origin_bufs get buf size failed\n");
70         return MPP_NOK;
71     }
72 
73     if (ctx->origin_bufs) {
74         hal_bufs_deinit(ctx->origin_bufs);
75         ctx->origin_bufs = NULL;
76     }
77     hal_bufs_init(&ctx->origin_bufs);
78     if (!ctx->origin_bufs) {
79         mpp_err_f("origin_bufs init fail\n");
80         return MPP_ERR_NOMEM;
81     }
82 
83     hal_bufs_setup(ctx->origin_bufs, 16, 1, &origin_buf_size);
84 
85     return MPP_OK;
86 }
87 
hal_h265d_vdpu384a_init(void * hal,MppHalCfg * cfg)88 static MPP_RET hal_h265d_vdpu384a_init(void *hal, MppHalCfg *cfg)
89 {
90     RK_S32 ret = 0;
91     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
92 
93     mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
94     mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
95 
96     reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
97     if (reg_ctx->scaling_qm == NULL) {
98         mpp_err("scaling_org alloc fail");
99         return MPP_ERR_MALLOC;
100     }
101 
102     reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
103     reg_ctx->pps_buf = mpp_calloc(RK_U8, SPSPPS_ALIGNED_SIZE);
104 
105     if (reg_ctx->scaling_rk == NULL) {
106         mpp_err("scaling_rk alloc fail");
107         return MPP_ERR_MALLOC;
108     }
109 
110     if (reg_ctx->group == NULL) {
111         ret = mpp_buffer_group_get_internal(&reg_ctx->group, MPP_BUFFER_TYPE_ION);
112         if (ret) {
113             mpp_err("h265d mpp_buffer_group_get failed\n");
114             return ret;
115         }
116     }
117 
118     {
119         RK_U32 i = 0;
120         RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
121 
122         //!< malloc buffers
123         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
124         if (ret) {
125             mpp_err("h265d mpp_buffer_get failed\n");
126             return ret;
127         }
128 
129         reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
130         for (i = 0; i < max_cnt; i++) {
131             reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu384aH265dRegSet));
132             reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
133             reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
134         }
135 
136         mpp_buffer_attach_dev(reg_ctx->bufs, reg_ctx->dev);
137     }
138 
139     if (!reg_ctx->fast_mode) {
140         reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
141         reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
142         reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
143     }
144 
145     if (cfg->hal_fbc_adj_cfg) {
146         cfg->hal_fbc_adj_cfg->func = vdpu384a_afbc_align_calc;
147         cfg->hal_fbc_adj_cfg->expand = 16;
148     }
149 
150     (void) cfg;
151     return MPP_OK;
152 }
153 
hal_h265d_vdpu384a_deinit(void * hal)154 static MPP_RET hal_h265d_vdpu384a_deinit(void *hal)
155 {
156     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
157     RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
158     RK_U32 i;
159 
160     if (reg_ctx->bufs) {
161         mpp_buffer_put(reg_ctx->bufs);
162         reg_ctx->bufs = NULL;
163     }
164 
165     loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
166     for (i = 0; i < loop; i++) {
167         if (reg_ctx->rcb_buf[i]) {
168             mpp_buffer_put(reg_ctx->rcb_buf[i]);
169             reg_ctx->rcb_buf[i] = NULL;
170         }
171     }
172 
173     if (reg_ctx->group) {
174         mpp_buffer_group_put(reg_ctx->group);
175         reg_ctx->group = NULL;
176     }
177 
178     for (i = 0; i < loop; i++)
179         MPP_FREE(reg_ctx->g_buf[i].hw_regs);
180 
181     MPP_FREE(reg_ctx->scaling_qm);
182     MPP_FREE(reg_ctx->scaling_rk);
183     MPP_FREE(reg_ctx->pps_buf);
184 
185     if (reg_ctx->cmv_bufs) {
186         hal_bufs_deinit(reg_ctx->cmv_bufs);
187         reg_ctx->cmv_bufs = NULL;
188     }
189 
190     if (reg_ctx->origin_bufs) {
191         hal_bufs_deinit(reg_ctx->origin_bufs);
192         reg_ctx->origin_bufs = NULL;
193     }
194 
195     return MPP_OK;
196 }
197 
198 #define SCALING_LIST_NUM 6
199 
hal_vdpu384a_record_scaling_list(scalingFactor_t * pScalingFactor_out,scalingList_t * pScalingList)200 void hal_vdpu384a_record_scaling_list(scalingFactor_t *pScalingFactor_out, scalingList_t *pScalingList)
201 {
202     RK_S32 i;
203     RK_U32 listId;
204     BitputCtx_t bp;
205 
206     mpp_set_bitput_ctx(&bp, (RK_U64 *)pScalingFactor_out, 170); // 170*64bits
207 
208     //-------- following make it by hardware needed --------
209     //sizeId == 0, block4x4
210     for (listId = 0; listId < SCALING_LIST_NUM; listId++) {
211         RK_U8 *p_data = pScalingList->sl[0][listId];
212         /* dump by block4x4, vectial direction */
213         for (i = 0; i < 4; i++) {
214             mpp_put_bits(&bp, p_data[i + 0], 8);
215             mpp_put_bits(&bp, p_data[i + 4], 8);
216             mpp_put_bits(&bp, p_data[i + 8], 8);
217             mpp_put_bits(&bp, p_data[i + 12], 8);
218         }
219     }
220     //sizeId == 1, block8x8
221     for (listId = 0; listId < SCALING_LIST_NUM; listId++) {
222         RK_S32 blk4_x = 0, blk4_y = 0;
223         RK_U8 *p_data = pScalingList->sl[1][listId];
224 
225         /* dump by block4x4, vectial direction */
226         for (blk4_x = 0; blk4_x < 8; blk4_x += 4) {
227             for (blk4_y = 0; blk4_y < 8; blk4_y += 4) {
228                 RK_S32 pos = blk4_y * 8 + blk4_x;
229 
230                 for (i = 0; i < 4; i++) {
231                     mpp_put_bits(&bp, p_data[pos + i + 0], 8);
232                     mpp_put_bits(&bp, p_data[pos + i + 8], 8);
233                     mpp_put_bits(&bp, p_data[pos + i + 16], 8);
234                     mpp_put_bits(&bp, p_data[pos + i + 24], 8);
235                 }
236             }
237         }
238     }
239     //sizeId == 2, block16x16
240     for (listId = 0; listId < SCALING_LIST_NUM; listId++) {
241         RK_S32 blk4_x = 0, blk4_y = 0;
242         RK_U8 *p_data = pScalingList->sl[2][listId];
243 
244         /* dump by block4x4, vectial direction */
245         for (blk4_x = 0; blk4_x < 8; blk4_x += 4) {
246             for (blk4_y = 0; blk4_y < 8; blk4_y += 4) {
247                 RK_S32 pos = blk4_y * 8 + blk4_x;
248 
249                 for (i = 0; i < 4; i++) {
250                     mpp_put_bits(&bp, p_data[pos + i + 0], 8);
251                     mpp_put_bits(&bp, p_data[pos + i + 8], 8);
252                     mpp_put_bits(&bp, p_data[pos + i + 16], 8);
253                     mpp_put_bits(&bp, p_data[pos + i + 24], 8);
254                 }
255             }
256         }
257     }
258     //sizeId == 3, blcok32x32
259     for (listId = 0; listId < 6; listId++) {
260         RK_S32 blk4_x = 0, blk4_y = 0;
261         RK_U8 *p_data = pScalingList->sl[3][listId];
262 
263         /* dump by block4x4, vectial direction */
264         for (blk4_x = 0; blk4_x < 8; blk4_x += 4) {
265             for (blk4_y = 0; blk4_y < 8; blk4_y += 4) {
266                 RK_S32 pos = blk4_y * 8 + blk4_x;
267 
268                 for (i = 0; i < 4; i++) {
269                     mpp_put_bits(&bp, p_data[pos + i + 0], 8);
270                     mpp_put_bits(&bp, p_data[pos + i + 8], 8);
271                     mpp_put_bits(&bp, p_data[pos + i + 16], 8);
272                     mpp_put_bits(&bp, p_data[pos + i + 24], 8);
273                 }
274             }
275         }
276     }
277     //sizeId == 0, block4x4, horiztion direction */
278     for (listId = 0; listId < SCALING_LIST_NUM; listId++) {
279         RK_U8 *p_data = pScalingList->sl[0][listId];
280 
281         for (i = 0; i < 16; i++)
282             mpp_put_bits(&bp, p_data[i], 8);
283     }
284 
285     // dump dc value
286     for (i = 0; i < SCALING_LIST_NUM; i++)//sizeId = 2, 16x16
287         mpp_put_bits(&bp, pScalingList->sl_dc[0][i], 8);
288     for (i = 0; i < SCALING_LIST_NUM; i++) //sizeId = 3, 32x32
289         mpp_put_bits(&bp, pScalingList->sl_dc[1][i], 8);
290 
291     mpp_put_align(&bp, 128, 0);
292 }
293 
hal_h265d_vdpu384a_scalinglist_packet(void * hal,void * ptr,void * dxva)294 static MPP_RET hal_h265d_vdpu384a_scalinglist_packet(void *hal, void *ptr, void *dxva)
295 {
296     scalingList_t sl;
297     RK_U32 i, j, pos;
298     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
299     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
300 
301     if (!dxva_ctx->pp.scaling_list_enabled_flag) {
302         return MPP_OK;
303     }
304 
305     if (memcmp((void*)&dxva_ctx->qm, reg_ctx->scaling_qm, sizeof(DXVA_Qmatrix_HEVC))) {
306         memset(&sl, 0, sizeof(scalingList_t));
307 
308         for (i = 0; i < 6; i++) {
309             for (j = 0; j < 16; j++) {
310                 pos = 4 * hal_hevc_diag_scan4x4_y[j] + hal_hevc_diag_scan4x4_x[j];
311                 sl.sl[0][i][pos] = dxva_ctx->qm.ucScalingLists0[i][j];
312             }
313 
314             for (j = 0; j < 64; j++) {
315                 pos = 8 * hal_hevc_diag_scan8x8_y[j] + hal_hevc_diag_scan8x8_x[j];
316                 sl.sl[1][i][pos] =  dxva_ctx->qm.ucScalingLists1[i][j];
317                 sl.sl[2][i][pos] =  dxva_ctx->qm.ucScalingLists2[i][j];
318 
319                 if (i == 0)
320                     sl.sl[3][i][pos] =  dxva_ctx->qm.ucScalingLists3[0][j];
321                 else if (i == 3)
322                     sl.sl[3][i][pos] =  dxva_ctx->qm.ucScalingLists3[1][j];
323                 else
324                     sl.sl[3][i][pos] =  dxva_ctx->qm.ucScalingLists2[i][j];
325             }
326 
327             sl.sl_dc[0][i] =  dxva_ctx->qm.ucScalingListDCCoefSizeID2[i];
328             if (i == 0)
329                 sl.sl_dc[1][i] =  dxva_ctx->qm.ucScalingListDCCoefSizeID3[0];
330             else if (i == 3)
331                 sl.sl_dc[1][i] =  dxva_ctx->qm.ucScalingListDCCoefSizeID3[1];
332             else
333                 sl.sl_dc[1][i] =  dxva_ctx->qm.ucScalingListDCCoefSizeID2[i];
334         }
335         hal_vdpu384a_record_scaling_list((scalingFactor_t *)reg_ctx->scaling_rk, &sl);
336     }
337 
338     memcpy(ptr, reg_ctx->scaling_rk, sizeof(scalingFactor_t));
339 
340     return MPP_OK;
341 }
342 
hal_h265d_v345_output_pps_packet(void * hal,void * dxva)343 static RK_S32 hal_h265d_v345_output_pps_packet(void *hal, void *dxva)
344 {
345     RK_S32 i;
346     RK_U32 log2_min_cb_size;
347     RK_S32 width, height;
348     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
349     Vdpu384aH265dRegSet *hw_reg = (Vdpu384aH265dRegSet*)(reg_ctx->hw_regs);
350     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
351     BitputCtx_t bp;
352 
353     if (NULL == reg_ctx || dxva_ctx == NULL) {
354         mpp_err("%s:%s:%d reg_ctx or dxva_ctx is NULL",
355                 __FILE__, __FUNCTION__, __LINE__);
356         return MPP_ERR_NULL_PTR;
357     }
358 
359     // SPS
360     {
361         void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
362         RK_U64 *pps_packet = reg_ctx->pps_buf;
363 
364         if (NULL == pps_ptr) {
365             mpp_err("pps_data get ptr error");
366             return MPP_ERR_NOMEM;
367         }
368 
369         log2_min_cb_size = dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3;
370         width = (dxva_ctx->pp.PicWidthInMinCbsY << log2_min_cb_size);
371         height = (dxva_ctx->pp.PicHeightInMinCbsY << log2_min_cb_size);
372 
373         mpp_set_bitput_ctx(&bp, pps_packet, SPSPPS_ALIGNED_SIZE / 8);
374 
375         if (dxva_ctx->pp.ps_update_flag) {
376             mpp_put_bits(&bp, dxva_ctx->pp.vps_id, 4);
377             mpp_put_bits(&bp, dxva_ctx->pp.sps_id, 4);
378             mpp_put_bits(&bp, dxva_ctx->pp.chroma_format_idc, 2);
379 
380             mpp_put_bits(&bp, width, 16);
381             mpp_put_bits(&bp, height, 16);
382             mpp_put_bits(&bp, dxva_ctx->pp.bit_depth_luma_minus8, 3);
383             mpp_put_bits(&bp, dxva_ctx->pp.bit_depth_chroma_minus8, 3);
384             mpp_put_bits(&bp, dxva_ctx->pp.log2_max_pic_order_cnt_lsb_minus4 + 4, 5);
385             mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size, 2);
386             mpp_put_bits(&bp, dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3, 3);
387             mpp_put_bits(&bp, dxva_ctx->pp.log2_min_transform_block_size_minus2 + 2, 3);
388 
389             mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_transform_block_size, 2);
390             mpp_put_bits(&bp, dxva_ctx->pp.max_transform_hierarchy_depth_inter, 3);
391             mpp_put_bits(&bp, dxva_ctx->pp.max_transform_hierarchy_depth_intra, 3);
392             mpp_put_bits(&bp, dxva_ctx->pp.scaling_list_enabled_flag, 1);
393             mpp_put_bits(&bp, dxva_ctx->pp.amp_enabled_flag, 1);
394             mpp_put_bits(&bp, dxva_ctx->pp.sample_adaptive_offset_enabled_flag, 1);
395             ///<-zrh comment ^  68 bit above
396             mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag, 1);
397             mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0, 4);
398             mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0, 4);
399             mpp_put_bits(&bp, dxva_ctx->pp.pcm_loop_filter_disabled_flag, 1);
400             mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_pcm_luma_coding_block_size, 3);
401             mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
402 
403             mpp_put_bits(&bp, dxva_ctx->pp.num_short_term_ref_pic_sets, 7);
404             mpp_put_bits(&bp, dxva_ctx->pp.long_term_ref_pics_present_flag, 1);
405             mpp_put_bits(&bp, dxva_ctx->pp.num_long_term_ref_pics_sps, 6);
406             mpp_put_bits(&bp, dxva_ctx->pp.sps_temporal_mvp_enabled_flag, 1);
407             mpp_put_bits(&bp, dxva_ctx->pp.strong_intra_smoothing_enabled_flag, 1);
408             // SPS extenstion
409             mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_rotation_enabled_flag, 1);
410             mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_context_enabled_flag, 1);
411             mpp_put_bits(&bp, dxva_ctx->pp.strong_intra_smoothing_enabled_flag, 1);
412             mpp_put_bits(&bp, dxva_ctx->pp.implicit_rdpcm_enabled_flag, 1);
413             mpp_put_bits(&bp, dxva_ctx->pp.explicit_rdpcm_enabled_flag, 1);
414             mpp_put_bits(&bp, dxva_ctx->pp.extended_precision_processing_flag, 1);
415             mpp_put_bits(&bp, dxva_ctx->pp.intra_smoothing_disabled_flag, 1);
416             mpp_put_bits(&bp, dxva_ctx->pp.sps_max_dec_pic_buffering_minus1, 4);
417             mpp_put_bits(&bp, dxva_ctx->pp.separate_colour_plane_flag, 1);
418             mpp_put_bits(&bp, dxva_ctx->pp.high_precision_offsets_enabled_flag, 1);
419             mpp_put_bits(&bp, dxva_ctx->pp.persistent_rice_adaptation_enabled_flag, 1);
420 
421             /* PPS */
422             mpp_put_bits(&bp, dxva_ctx->pp.pps_id, 6);
423             mpp_put_bits(&bp, dxva_ctx->pp.sps_id, 4);
424             mpp_put_bits(&bp, dxva_ctx->pp.dependent_slice_segments_enabled_flag, 1);
425             mpp_put_bits(&bp, dxva_ctx->pp.output_flag_present_flag, 1);
426             mpp_put_bits(&bp, dxva_ctx->pp.num_extra_slice_header_bits, 13);
427 
428             mpp_put_bits(&bp, dxva_ctx->pp.sign_data_hiding_enabled_flag, 1);
429             mpp_put_bits(&bp, dxva_ctx->pp.cabac_init_present_flag, 1);
430             mpp_put_bits(&bp, dxva_ctx->pp.num_ref_idx_l0_default_active_minus1 + 1, 4);
431             mpp_put_bits(&bp, dxva_ctx->pp.num_ref_idx_l1_default_active_minus1 + 1, 4);
432             mpp_put_bits(&bp, dxva_ctx->pp.init_qp_minus26, 7);
433             mpp_put_bits(&bp, dxva_ctx->pp.constrained_intra_pred_flag, 1);
434             mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_enabled_flag, 1);
435             mpp_put_bits(&bp, dxva_ctx->pp.cu_qp_delta_enabled_flag, 1);
436             mpp_put_bits(&bp, log2_min_cb_size + dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size - dxva_ctx->pp.diff_cu_qp_delta_depth, 3);
437 
438             mpp_put_bits(&bp, dxva_ctx->pp.pps_cb_qp_offset, 5);
439             mpp_put_bits(&bp, dxva_ctx->pp.pps_cr_qp_offset, 5);
440             mpp_put_bits(&bp, dxva_ctx->pp.pps_slice_chroma_qp_offsets_present_flag, 1);
441             mpp_put_bits(&bp, dxva_ctx->pp.weighted_pred_flag, 1);
442             mpp_put_bits(&bp, dxva_ctx->pp.weighted_bipred_flag, 1);
443             mpp_put_bits(&bp, dxva_ctx->pp.transquant_bypass_enabled_flag, 1);
444             mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag, 1);
445             mpp_put_bits(&bp, dxva_ctx->pp.entropy_coding_sync_enabled_flag, 1);
446             mpp_put_bits(&bp, dxva_ctx->pp.pps_loop_filter_across_slices_enabled_flag, 1);
447 
448             mpp_put_bits(&bp, dxva_ctx->pp.loop_filter_across_tiles_enabled_flag, 1);
449             mpp_put_bits(&bp, dxva_ctx->pp.deblocking_filter_override_enabled_flag, 1);
450             mpp_put_bits(&bp, dxva_ctx->pp.pps_deblocking_filter_disabled_flag, 1);
451             mpp_put_bits(&bp, dxva_ctx->pp.pps_beta_offset_div2, 4);
452             mpp_put_bits(&bp, dxva_ctx->pp.pps_tc_offset_div2, 4);
453             mpp_put_bits(&bp, dxva_ctx->pp.lists_modification_present_flag, 1);
454             mpp_put_bits(&bp, dxva_ctx->pp.log2_parallel_merge_level_minus2 + 2, 3);
455             mpp_put_bits(&bp, dxva_ctx->pp.slice_segment_header_extension_present_flag, 1);
456             mpp_put_bits(&bp, 0, 3);
457 
458             // PPS externsion
459             if (dxva_ctx->pp.log2_max_transform_skip_block_size > 2) {
460                 mpp_put_bits(&bp, dxva_ctx->pp.log2_max_transform_skip_block_size - 2, 2);
461             } else {
462                 mpp_put_bits(&bp, 0, 2);
463             }
464             mpp_put_bits(&bp, dxva_ctx->pp.cross_component_prediction_enabled_flag, 1);
465             mpp_put_bits(&bp, dxva_ctx->pp.chroma_qp_offset_list_enabled_flag, 1);
466 
467             RK_S32 log2_min_cu_chroma_qp_delta_size = log2_min_cb_size +
468                                                       dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size -
469                                                       dxva_ctx->pp.diff_cu_chroma_qp_offset_depth;
470             mpp_put_bits(&bp, log2_min_cu_chroma_qp_delta_size, 3);
471             for (i = 0; i < 6; i++)
472                 mpp_put_bits(&bp, dxva_ctx->pp.cb_qp_offset_list[i], 5);
473             for (i = 0; i < 6; i++)
474                 mpp_put_bits(&bp, dxva_ctx->pp.cr_qp_offset_list[i], 5);
475             mpp_put_bits(&bp, dxva_ctx->pp.chroma_qp_offset_list_len_minus1, 3);
476 
477             /* mvc0 && mvc1 */
478             mpp_put_bits(&bp, 0xffff, 16);
479             mpp_put_bits(&bp, 0, 1);
480             mpp_put_bits(&bp, 0, 6);
481             mpp_put_bits(&bp, 0, 1);
482             mpp_put_bits(&bp, 0, 1);
483         } else {
484             bp.index = 4;
485             bp.bitpos = 41;
486             bp.bvalue = bp.pbuf[bp.index] & MPP_GENMASK(bp.bitpos - 1, 0);
487         }
488         /* poc info */
489         {
490             RK_S32 dpb_valid[15] = {0}, refpic_poc[15] = {0};
491 
492             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) {
493                 if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff &&
494                     dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) {
495                     dpb_valid[i] = 1;
496                     refpic_poc[i] = dxva_ctx->pp.PicOrderCntValList[i];
497                 }
498             }
499 
500             mpp_put_bits(&bp, 0, 1);
501             mpp_put_bits(&bp, 0, 1);
502             mpp_put_bits(&bp, 0, 1);
503             mpp_put_bits(&bp, dxva_ctx->pp.current_poc, 32);
504 
505             for (i = 0; i < 15; i++)
506                 mpp_put_bits(&bp, refpic_poc[i], 32);
507             mpp_put_bits(&bp, 0, 32);
508             for (i = 0; i < 15; i++)
509                 mpp_put_bits(&bp, dpb_valid[i], 1);
510             mpp_put_bits(&bp, 0, 1);
511         }
512 
513         /* tile info */
514         mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag ? (dxva_ctx->pp.num_tile_columns_minus1 + 1) : 1, 5);
515         mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag ? (dxva_ctx->pp.num_tile_rows_minus1 + 1) : 1, 5);
516         {
517             /// tiles info begin
518             RK_U16 column_width[20];
519             RK_U16 row_height[22];
520 
521             memset(column_width, 0, sizeof(column_width));
522             memset(row_height, 0, sizeof(row_height));
523 
524             if (dxva_ctx->pp.tiles_enabled_flag) {
525                 if (dxva_ctx->pp.uniform_spacing_flag == 0) {
526                     RK_S32 maxcuwidth = dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
527                     RK_S32 ctu_width_in_pic = (width +
528                                                (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
529                     RK_S32 ctu_height_in_pic = (height +
530                                                 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
531                     RK_S32 sum = 0;
532                     for (i = 0; i < dxva_ctx->pp.num_tile_columns_minus1; i++) {
533                         column_width[i] = dxva_ctx->pp.column_width_minus1[i] + 1;
534                         sum += column_width[i]  ;
535                     }
536                     column_width[i] = ctu_width_in_pic - sum;
537 
538                     sum = 0;
539                     for (i = 0; i < dxva_ctx->pp.num_tile_rows_minus1; i++) {
540                         row_height[i] = dxva_ctx->pp.row_height_minus1[i] + 1;
541                         sum += row_height[i];
542                     }
543                     row_height[i] = ctu_height_in_pic - sum;
544                 }  else {
545                     RK_S32    pic_in_cts_width = (width +
546                                                   (1 << (log2_min_cb_size +
547                                                          dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
548                                                  / (1 << (log2_min_cb_size +
549                                                           dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size));
550                     RK_S32 pic_in_cts_height = (height +
551                                                 (1 << (log2_min_cb_size +
552                                                        dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
553                                                / (1 << (log2_min_cb_size +
554                                                         dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size));
555 
556                     for (i = 0; i < dxva_ctx->pp.num_tile_columns_minus1 + 1; i++)
557                         column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_ctx->pp.num_tile_columns_minus1 + 1) -
558                                           (i * pic_in_cts_width) / (dxva_ctx->pp.num_tile_columns_minus1 + 1);
559 
560                     for (i = 0; i < dxva_ctx->pp.num_tile_rows_minus1 + 1; i++)
561                         row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_ctx->pp.num_tile_rows_minus1 + 1) -
562                                         (i * pic_in_cts_height) / (dxva_ctx->pp.num_tile_rows_minus1 + 1);
563                 }
564             } else {
565                 RK_S32 MaxCUWidth = (1 << (dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
566                 column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
567                 row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
568             }
569 
570             for (i = 0; i < 20; i++)
571                 mpp_put_bits(&bp, column_width[i], 12);
572 
573             for (i = 0; i < 22; i++)
574                 mpp_put_bits(&bp, row_height[i], 12);
575         }
576 
577         /* update rps */
578         if (dxva_ctx->pp.rps_update_flag) {
579             Short_SPS_RPS_HEVC *cur_st_rps_ptr = &dxva_ctx->pp.cur_st_rps;
580 
581             for (i = 0; i < 32; i ++) {
582                 mpp_put_bits(&bp, dxva_ctx->pp.sps_lt_rps[i].lt_ref_pic_poc_lsb, 16);
583                 mpp_put_bits(&bp, dxva_ctx->pp.sps_lt_rps[i].used_by_curr_pic_lt_flag, 1);
584             }
585 
586             mpp_put_bits(&bp, cur_st_rps_ptr->num_negative_pics, 4);
587             mpp_put_bits(&bp, cur_st_rps_ptr->num_positive_pics, 4);
588 
589             for (i = 0; i <  cur_st_rps_ptr->num_negative_pics; i++) {
590                 mpp_put_bits(&bp, cur_st_rps_ptr->delta_poc_s0[i], 16);
591                 mpp_put_bits(&bp, cur_st_rps_ptr->s0_used_flag[i], 1);
592             }
593 
594             for (i = 0; i <  cur_st_rps_ptr->num_positive_pics; i++) {
595                 mpp_put_bits(&bp, cur_st_rps_ptr->delta_poc_s1[i], 16);
596                 mpp_put_bits(&bp, cur_st_rps_ptr->s1_used_flag[i], 1);
597             }
598 
599             for ( i = cur_st_rps_ptr->num_negative_pics + cur_st_rps_ptr->num_positive_pics; i < 15; i++) {
600                 mpp_put_bits(&bp, 0, 16);
601                 mpp_put_bits(&bp, 0, 1);
602             }
603             mpp_put_align(&bp, 64, 0);//128
604         }
605         memcpy(pps_ptr, reg_ctx->pps_buf, SPSPPS_ALIGNED_SIZE);
606     } /* --- end spspps data ------*/
607 
608     if (dxva_ctx->pp.scaling_list_enabled_flag) {
609         RK_U32 addr;
610         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
611 
612         if (dxva_ctx->pp.scaling_list_data_present_flag) {
613             addr = (dxva_ctx->pp.pps_id + 16) * 1360;
614         } else if (dxva_ctx->pp.scaling_list_enabled_flag) {
615             addr = dxva_ctx->pp.sps_id * 1360;
616         } else {
617             addr = 80 * 1360;
618         }
619 
620         hal_h265d_vdpu384a_scalinglist_packet(hal, ptr_scaling + addr, dxva);
621 
622         hw_reg->common_addr.reg132_scanlist_addr = reg_ctx->bufs_fd;
623         mpp_dev_set_reg_offset(reg_ctx->dev, 132, addr + reg_ctx->sclst_offset);
624     }
625 
626 #ifdef dump
627     fwrite(pps_ptr, 1, 80 * 64, fp);
628     RK_U32 *tmp = (RK_U32 *)pps_ptr;
629     for (i = 0; i < 112 / 4; i++) {
630         mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
631     }
632 #endif
633 #ifdef DUMP_VDPU384A_DATAS
634     {
635         char *cur_fname = "global_cfg.dat";
636         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
637         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
638         dump_data_to_file(dump_cur_fname_path, (void *)bp.pbuf, 18 * 128, 128, 0);
639     }
640 #endif
641 
642     return 0;
643 }
644 
h265d_refine_rcb_size(Vdpu384aRcbInfo * rcb_info,RK_S32 width,RK_S32 height,void * dxva)645 static void h265d_refine_rcb_size(Vdpu384aRcbInfo *rcb_info,
646                                   RK_S32 width, RK_S32 height, void *dxva)
647 {
648     RK_U32 rcb_bits = 0;
649     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
650     DXVA_PicParams_HEVC *pp = &dxva_ctx->pp;
651     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
652     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
653     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
654     RK_U32 tile_row_cut_num = pp->num_tile_rows_minus1;
655     RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
656     RK_U32 ext_row_align_size = tile_row_cut_num * 64 * 8;
657     RK_U32 ext_col_align_size = tile_col_cut_num * 64 * 8;
658     RK_U32 filterd_row_append = 8192;
659     RK_U32 row_uv_para = 0;
660     RK_U32 col_uv_para = 0;
661 
662     if (chroma_fmt_idc == 1) {
663         row_uv_para = 1;
664         col_uv_para = 1;
665     } else if (chroma_fmt_idc == 2) {
666         row_uv_para = 1;
667         col_uv_para = 3;
668     } else if (chroma_fmt_idc == 3) {
669         row_uv_para = 3;
670         col_uv_para = 3;
671     }
672 
673     width = MPP_ALIGN(width, ctu_size);
674     height = MPP_ALIGN(height, ctu_size);
675     /* RCB_STRMD_ROW && RCB_STRMD_TILE_ROW*/
676     rcb_info[RCB_STRMD_ROW].size = 0;
677     rcb_info[RCB_STRMD_TILE_ROW].size = 0;
678 
679     /* RCB_INTER_ROW && RCB_INTER_TILE_ROW*/
680     rcb_bits = ((width + 7) / 8) * 174;
681     rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
682     rcb_bits += ext_row_align_size;
683     if (tile_row_cut_num)
684         rcb_info[RCB_INTER_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
685     else
686         rcb_info[RCB_INTER_TILE_ROW].size = 0;
687 
688     /* RCB_INTRA_ROW && RCB_INTRA_TILE_ROW*/
689     rcb_bits = MPP_ALIGN(width, 512) * (bit_depth + 2);
690     rcb_bits = rcb_bits * 4; //TODO:
691     rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
692     rcb_bits += ext_row_align_size;
693     if (tile_row_cut_num)
694         rcb_info[RCB_INTRA_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
695     else
696         rcb_info[RCB_INTRA_TILE_ROW].size = 0;
697 
698     /* RCB_FILTERD_ROW && RCB_FILTERD_TILE_ROW*/
699     rcb_bits = (MPP_ALIGN(width, 64) * (1.2 * bit_depth + 0.5) * (8 + 5 * row_uv_para));
700     // save space mode : half for RCB_FILTERD_ROW, half for RCB_FILTERD_PROTECT_ROW
701     if (width > 4096)
702         filterd_row_append = 27648;
703     rcb_info[RCB_FILTERD_ROW].size = MPP_RCB_BYTES(rcb_bits / 2) + filterd_row_append;
704     rcb_info[RCB_FILTERD_PROTECT_ROW].size = MPP_RCB_BYTES(rcb_bits / 2) + filterd_row_append;
705     rcb_bits += ext_row_align_size;
706     if (tile_row_cut_num)
707         rcb_info[RCB_FILTERD_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
708     else
709         rcb_info[RCB_FILTERD_TILE_ROW].size = 0;
710 
711     /* RCB_FILTERD_TILE_COL */
712     if (tile_col_cut_num) {
713         rcb_bits = (MPP_ALIGN(height, 64) * (1.6 * bit_depth + 0.5) * (16.5 + 5.5 * col_uv_para)) + ext_col_align_size;
714         rcb_info[RCB_FILTERD_TILE_COL].size = MPP_RCB_BYTES(rcb_bits);
715     } else {
716         rcb_info[RCB_FILTERD_TILE_COL].size = 0;
717     }
718 
719 }
720 
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu384aH265dRegSet * hw_regs,RK_S32 width,RK_S32 height)721 static void hal_h265d_rcb_info_update(void *hal,  void *dxva,
722                                       Vdpu384aH265dRegSet *hw_regs,
723                                       RK_S32 width, RK_S32 height)
724 {
725     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
726     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
727     DXVA_PicParams_HEVC *pp = &dxva_ctx->pp;
728     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
729     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
730     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
731     RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
732     (void)hw_regs;
733 
734     if (reg_ctx->num_row_tiles != num_tiles ||
735         reg_ctx->bit_depth != bit_depth ||
736         reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
737         reg_ctx->ctu_size !=  ctu_size ||
738         reg_ctx->width != width ||
739         reg_ctx->height != height) {
740         RK_U32 i = 0;
741         RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
742 
743         reg_ctx->rcb_buf_size = vdpu384a_get_rcb_buf_size((Vdpu384aRcbInfo *)reg_ctx->rcb_info, width, height);
744         h265d_refine_rcb_size((Vdpu384aRcbInfo *)reg_ctx->rcb_info, width, height, dxva_ctx);
745         /* vdpu384a_check_rcb_buf_size((Vdpu384aRcbInfo *)reg_ctx->rcb_info, width, height); */
746 
747         for (i = 0; i < loop; i++) {
748             MppBuffer rcb_buf;
749 
750             if (reg_ctx->rcb_buf[i]) {
751                 mpp_buffer_put(reg_ctx->rcb_buf[i]);
752                 reg_ctx->rcb_buf[i] = NULL;
753             }
754             mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
755             reg_ctx->rcb_buf[i] = rcb_buf;
756         }
757 
758         reg_ctx->num_row_tiles  = num_tiles;
759         reg_ctx->bit_depth      = bit_depth;
760         reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
761         reg_ctx->ctu_size       = ctu_size;
762         reg_ctx->width          = width;
763         reg_ctx->height         = height;
764     }
765 }
766 
calc_mv_size(RK_S32 pic_w,RK_S32 pic_h,RK_S32 ctu_w)767 static RK_S32 calc_mv_size(RK_S32 pic_w, RK_S32 pic_h, RK_S32 ctu_w)
768 {
769     RK_S32 seg_w = 64 * 16 * 16 / ctu_w; // colmv_block_size = 16, colmv_per_bytes = 16
770     RK_S32 seg_cnt_w = MPP_ALIGN(pic_w, seg_w) / seg_w;
771     RK_S32 seg_cnt_h = MPP_ALIGN(pic_h, ctu_w) / ctu_w;
772     RK_S32 mv_size   = seg_cnt_w * seg_cnt_h * 64 * 16;
773 
774     return mv_size;
775 }
776 
hal_h265d_vdpu384a_gen_regs(void * hal,HalTaskInfo * syn)777 static MPP_RET hal_h265d_vdpu384a_gen_regs(void *hal,  HalTaskInfo *syn)
778 {
779     RK_S32 i = 0;
780     RK_S32 log2_min_cb_size;
781     RK_S32 width, height;
782     RK_S32 stride_y, stride_uv, virstrid_y;
783     Vdpu384aH265dRegSet *hw_regs;
784     RK_S32 ret = MPP_SUCCESS;
785     MppBuffer streambuf = NULL;
786     RK_S32 aglin_offset = 0;
787     RK_S32 valid_ref = -1;
788     MppBuffer framebuf = NULL;
789     HalBuf *mv_buf = NULL;
790     RK_S32 fd = -1;
791     RK_U32 mv_size = 0;
792     RK_S32 distance = INT_MAX;
793     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
794 
795     (void) fd;
796     if (syn->dec.flags.parse_err ||
797         (syn->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
798         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
799         return MPP_OK;
800     }
801 
802     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
803     HalBuf *origin_buf = NULL;
804 
805     if (reg_ctx ->fast_mode) {
806         for (i = 0; i < MAX_GEN_REG; i++) {
807             if (!reg_ctx->g_buf[i].use_flag) {
808                 syn->dec.reg_index = i;
809 
810                 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
811                 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
812 
813                 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
814                 reg_ctx->g_buf[i].use_flag = 1;
815                 break;
816             }
817         }
818         if (i == MAX_GEN_REG) {
819             mpp_err("hevc rps buf all used");
820             return MPP_ERR_NOMEM;
821         }
822     }
823 
824     if (syn->dec.syntax.data == NULL) {
825         mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
826         return MPP_ERR_NULL_PTR;
827     }
828 
829 #ifdef DUMP_VDPU384A_DATAS
830     {
831         memset(dump_cur_dir, 0, sizeof(dump_cur_dir));
832         sprintf(dump_cur_dir, "/data/hevc/Frame%04d", dump_cur_frame);
833         if (access(dump_cur_dir, 0)) {
834             if (mkdir(dump_cur_dir))
835                 mpp_err_f("error: mkdir %s\n", dump_cur_dir);
836         }
837         dump_cur_frame++;
838     }
839 #endif
840 
841     /* output pps */
842     hw_regs = (Vdpu384aH265dRegSet*)reg_ctx->hw_regs;
843     memset(hw_regs, 0, sizeof(Vdpu384aH265dRegSet));
844 
845     if (NULL == reg_ctx->hw_regs) {
846         return MPP_ERR_NULL_PTR;
847     }
848 
849 
850     log2_min_cb_size = dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3;
851     width = (dxva_ctx->pp.PicWidthInMinCbsY << log2_min_cb_size);
852     height = (dxva_ctx->pp.PicHeightInMinCbsY << log2_min_cb_size);
853     mv_size = calc_mv_size(width, height, 1 << log2_min_cb_size) * 2;
854 
855     if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
856         size_t size = mv_size;
857 
858         if (reg_ctx->cmv_bufs) {
859             hal_bufs_deinit(reg_ctx->cmv_bufs);
860             reg_ctx->cmv_bufs = NULL;
861         }
862 
863         hal_bufs_init(&reg_ctx->cmv_bufs);
864         if (reg_ctx->cmv_bufs == NULL) {
865             mpp_err_f("colmv bufs init fail");
866             return MPP_ERR_NULL_PTR;
867         }
868 
869         reg_ctx->mv_size = mv_size;
870         reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
871         hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
872     }
873 
874     {
875         MppFrame mframe = NULL;
876         RK_U32 ver_virstride;
877         RK_U32 virstrid_uv;
878         MppFrameFormat fmt;
879         RK_U32 chroma_fmt_idc = dxva_ctx->pp.chroma_format_idc;
880 
881         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits,
882                               SLOT_FRAME_PTR, &mframe);
883         /* for 8K downscale mode*/
884         if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY &&
885             reg_ctx->origin_bufs == NULL) {
886             vdpu384a_setup_scale_origin_bufs(reg_ctx, mframe);
887         }
888 
889         fmt = mpp_frame_get_fmt(mframe);
890 
891         stride_y = mpp_frame_get_hor_stride(mframe);
892         ver_virstride = mpp_frame_get_ver_stride(mframe);
893         stride_uv = stride_y;
894         virstrid_y = ver_virstride * stride_y;
895         if (chroma_fmt_idc == 3)
896             stride_uv *= 2;
897         if (chroma_fmt_idc == 3 || chroma_fmt_idc == 2) {
898             virstrid_uv = stride_uv * ver_virstride;
899         } else {
900             virstrid_uv = stride_uv * ver_virstride / 2;
901         }
902         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
903             RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
904             RK_U32 fbd_offset;
905 
906             hw_regs->ctrl_regs.reg9.dpb_data_sel = 0;
907             hw_regs->ctrl_regs.reg9.dpb_output_dis = 0;
908             hw_regs->ctrl_regs.reg9.pp_m_output_mode = 0;
909 
910             hw_regs->h265d_paras.reg68_dpb_hor_virstride = fbc_hdr_stride / 64;
911             fbd_offset = fbc_hdr_stride * MPP_ALIGN(ver_virstride, 64) / 16;
912             hw_regs->h265d_addrs.reg193_dpb_fbc64x4_payload_offset = fbd_offset;
913             hw_regs->h265d_paras.reg80_error_ref_hor_virstride = hw_regs->h265d_paras.reg68_dpb_hor_virstride;
914         } else if (MPP_FRAME_FMT_IS_TILE(fmt)) {
915             hw_regs->ctrl_regs.reg9.dpb_data_sel = 1;
916             hw_regs->ctrl_regs.reg9.dpb_output_dis = 1;
917             hw_regs->ctrl_regs.reg9.pp_m_output_mode = 2;
918 
919             if (chroma_fmt_idc == 0) { //yuv400
920                 hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y * 4 / 16;
921             } else if (chroma_fmt_idc == 2) { //yuv422
922                 hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y * 8 / 16;
923             } else if (chroma_fmt_idc == 3) { //yuv444
924                 hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y * 12 / 16;
925             } else { //yuv420
926                 hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y * 6 / 16;
927             }
928             hw_regs->h265d_paras.reg79_pp_m_y_virstride = (virstrid_y + virstrid_uv) / 16;
929             hw_regs->h265d_paras.reg80_error_ref_hor_virstride = hw_regs->h265d_paras.reg77_pp_m_hor_stride;
930         } else {
931             hw_regs->ctrl_regs.reg9.dpb_data_sel = 1;
932             hw_regs->ctrl_regs.reg9.dpb_output_dis = 1;
933             hw_regs->ctrl_regs.reg9.pp_m_output_mode = 1;
934 
935             hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y >> 4;
936             hw_regs->h265d_paras.reg78_pp_m_uv_hor_stride = stride_uv >> 4;
937             hw_regs->h265d_paras.reg79_pp_m_y_virstride = virstrid_y >> 4;
938             hw_regs->h265d_paras.reg80_error_ref_hor_virstride = hw_regs->h265d_paras.reg77_pp_m_hor_stride;
939         }
940         hw_regs->h265d_paras.reg81_error_ref_raster_uv_hor_virstride = hw_regs->h265d_paras.reg78_pp_m_uv_hor_stride;
941         hw_regs->h265d_paras.reg82_error_ref_virstride = hw_regs->h265d_paras.reg79_pp_m_y_virstride;
942     }
943     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits,
944                           SLOT_BUFFER, &framebuf);
945 
946     if (reg_ctx->origin_bufs) {
947         origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs,
948                                       dxva_ctx->pp.CurrPic.Index7Bits);
949         framebuf = origin_buf->buf[0];
950     }
951 
952     /* output rkfbc64 */
953     // hw_regs->h265d_addrs.reg168_dpb_decout_base = mpp_buffer_get_fd(framebuf); //just index need map
954     /* output raster/tile4x4 */
955     hw_regs->common_addr.reg135_pp_m_decout_base = mpp_buffer_get_fd(framebuf); //just index need map
956     hw_regs->h265d_addrs.reg169_error_ref_base = mpp_buffer_get_fd(framebuf);
957     /*if out_base is equal to zero it means this frame may error
958     we return directly add by csy*/
959 
960     /* output rkfbc64 */
961     // if (!hw_regs->h265d_addrs.reg168_dpb_decout_base)
962     //     return 0;
963     /* output raster/tile4x4 */
964     if (!hw_regs->common_addr.reg135_pp_m_decout_base)
965         return 0;
966 
967     fd =  mpp_buffer_get_fd(framebuf);
968     /* output rkfbc64 */
969     // hw_regs->h265d_addrs.reg168_dpb_decout_base = fd;
970     /* output raster/tile4x4 */
971     hw_regs->common_addr.reg135_pp_m_decout_base = fd;
972     hw_regs->h265d_addrs.reg192_dpb_payload64x4_st_cur_base = fd;
973     mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_ctx->pp.CurrPic.Index7Bits);
974 
975     hw_regs->h265d_addrs.reg216_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
976 #ifdef DUMP_VDPU384A_DATAS
977     {
978         char *cur_fname = "colmv_cur_frame.dat";
979         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
980         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
981         dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(mv_buf->buf[0]),
982                           mpp_buffer_get_size(mv_buf->buf[0]), 64, 0);
983     }
984 #endif
985 
986     mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
987                           &streambuf);
988     if ( dxva_ctx->bitstream == NULL) {
989         dxva_ctx->bitstream = mpp_buffer_get_ptr(streambuf);
990     }
991 
992 #ifdef DUMP_VDPU384A_DATAS
993     {
994         char *cur_fname = "stream_in_128bit.dat";
995         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
996         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
997         dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(streambuf),
998                           mpp_buffer_get_size(streambuf), 128, 0);
999     }
1000 #endif
1001 
1002     hw_regs->common_addr.reg128_strm_base = mpp_buffer_get_fd(streambuf);
1003     hw_regs->h265d_paras.reg66_stream_len = ((dxva_ctx->bitstream_size + 15) & (~15)) + 64;
1004     hw_regs->common_addr.reg129_stream_buf_st_base = mpp_buffer_get_fd(streambuf);
1005     hw_regs->common_addr.reg130_stream_buf_end_base = mpp_buffer_get_fd(streambuf);
1006     mpp_dev_set_reg_offset(reg_ctx->dev, 130, mpp_buffer_get_size(streambuf));
1007     aglin_offset =  hw_regs->h265d_paras.reg66_stream_len - dxva_ctx->bitstream_size;
1008     if (aglin_offset > 0)
1009         memset((void *)(dxva_ctx->bitstream + dxva_ctx->bitstream_size), 0, aglin_offset);
1010 
1011     /* common setting */
1012     hw_regs->ctrl_regs.reg8_dec_mode = 0; // hevc
1013     hw_regs->ctrl_regs.reg9.low_latency_en = 0;
1014 
1015     hw_regs->ctrl_regs.reg10.strmd_auto_gating_e      = 1;
1016     hw_regs->ctrl_regs.reg10.inter_auto_gating_e      = 1;
1017     hw_regs->ctrl_regs.reg10.intra_auto_gating_e      = 1;
1018     hw_regs->ctrl_regs.reg10.transd_auto_gating_e     = 1;
1019     hw_regs->ctrl_regs.reg10.recon_auto_gating_e      = 1;
1020     hw_regs->ctrl_regs.reg10.filterd_auto_gating_e    = 1;
1021     hw_regs->ctrl_regs.reg10.bus_auto_gating_e        = 1;
1022     hw_regs->ctrl_regs.reg10.ctrl_auto_gating_e       = 1;
1023     hw_regs->ctrl_regs.reg10.rcb_auto_gating_e        = 1;
1024     hw_regs->ctrl_regs.reg10.err_prc_auto_gating_e    = 1;
1025 
1026     hw_regs->ctrl_regs.reg11.rd_outstanding = 32;
1027     hw_regs->ctrl_regs.reg11.wr_outstanding = 250;
1028     // hw_regs->ctrl_regs.reg11.dec_timeout_dis = 1;
1029 
1030     hw_regs->ctrl_regs.reg16.error_proc_disable = 1;
1031     hw_regs->ctrl_regs.reg16.error_spread_disable = 0;
1032     hw_regs->ctrl_regs.reg16.roi_error_ctu_cal_en = 0;
1033 
1034     hw_regs->ctrl_regs.reg20_cabac_error_en_lowbits = 0xffffffff;
1035     hw_regs->ctrl_regs.reg21_cabac_error_en_highbits = 0x3ff3f9ff;
1036 
1037     hw_regs->ctrl_regs.reg13_core_timeout_threshold = 0xffff;
1038 
1039 
1040     /* output rkfbc64 */
1041     // valid_ref = hw_regs->h265d_addrs.reg168_dpb_decout_base;
1042     /* output raster/tile4x4 */
1043     valid_ref = hw_regs->common_addr.reg135_pp_m_decout_base;
1044     reg_ctx->error_index[syn->dec.reg_index] = dxva_ctx->pp.CurrPic.Index7Bits;
1045 
1046     hw_regs->h265d_addrs.reg169_error_ref_base = valid_ref;
1047     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) {
1048         if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff &&
1049             dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) {
1050 
1051             MppFrame mframe = NULL;
1052             mpp_buf_slot_get_prop(reg_ctx->slots,
1053                                   dxva_ctx->pp.RefPicList[i].Index7Bits,
1054                                   SLOT_BUFFER, &framebuf);
1055             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.RefPicList[i].Index7Bits,
1056                                   SLOT_FRAME_PTR, &mframe);
1057             if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) {
1058                 origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs,
1059                                               dxva_ctx->pp.RefPicList[i].Index7Bits);
1060                 framebuf = origin_buf->buf[0];
1061             }
1062             if (framebuf != NULL) {
1063                 hw_regs->h265d_addrs.reg170_185_ref_base[i] = mpp_buffer_get_fd(framebuf);
1064                 hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = mpp_buffer_get_fd(framebuf);
1065                 valid_ref = hw_regs->h265d_addrs.reg170_185_ref_base[i];
1066                 if ((pocdistance(dxva_ctx->pp.PicOrderCntValList[i], dxva_ctx->pp.current_poc) < distance)
1067                     && (!mpp_frame_get_errinfo(mframe))) {
1068 
1069                     distance = pocdistance(dxva_ctx->pp.PicOrderCntValList[i], dxva_ctx->pp.current_poc);
1070                     hw_regs->h265d_addrs.reg169_error_ref_base = hw_regs->h265d_addrs.reg170_185_ref_base[i];
1071                     reg_ctx->error_index[syn->dec.reg_index] = dxva_ctx->pp.RefPicList[i].Index7Bits;
1072                     hw_regs->ctrl_regs.reg16.error_proc_disable = 1;
1073                 }
1074             } else {
1075                 hw_regs->h265d_addrs.reg170_185_ref_base[i] = valid_ref;
1076                 hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = valid_ref;
1077             }
1078 
1079             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_ctx->pp.RefPicList[i].Index7Bits);
1080             hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1081         }
1082     }
1083 
1084     if ((reg_ctx->error_index[syn->dec.reg_index] == dxva_ctx->pp.CurrPic.Index7Bits) &&
1085         !dxva_ctx->pp.IntraPicFlag) {
1086         h265h_dbg(H265H_DBG_TASK_ERR, "current frm may be err, should skip process");
1087         syn->dec.flags.ref_err = 1;
1088         return MPP_OK;
1089     }
1090 
1091     /* pps */
1092     hw_regs->common_addr.reg131_gbl_base = reg_ctx->bufs_fd;
1093     hw_regs->h265d_paras.reg67_global_len = SPSPPS_ALIGNED_SIZE / 16;
1094 
1095     mpp_dev_set_reg_offset(reg_ctx->dev, 131, reg_ctx->spspps_offset);
1096 
1097     hal_h265d_v345_output_pps_packet(hal, syn->dec.syntax.data);
1098 
1099     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) {
1100 
1101         if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff &&
1102             dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) {
1103             MppFrame mframe = NULL;
1104 
1105             mpp_buf_slot_get_prop(reg_ctx->slots,
1106                                   dxva_ctx->pp.RefPicList[i].Index7Bits,
1107                                   SLOT_BUFFER, &framebuf);
1108 
1109             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.RefPicList[i].Index7Bits,
1110                                   SLOT_FRAME_PTR, &mframe);
1111 
1112             if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) {
1113                 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
1114                 hw_regs->h265d_addrs.reg170_185_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base;
1115                 hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base;
1116                 hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1117             }
1118         } else {
1119             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
1120             hw_regs->h265d_addrs.reg170_185_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base;
1121             hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base;
1122             hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1123         }
1124     }
1125 
1126     hal_h265d_rcb_info_update(hal, dxva_ctx, hw_regs, width, height);
1127     vdpu384a_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
1128                        reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
1129                        (Vdpu384aRcbInfo *)reg_ctx->rcb_info);
1130     vdpu384a_setup_statistic(&hw_regs->ctrl_regs);
1131     mpp_buffer_sync_end(reg_ctx->bufs);
1132 
1133     {
1134         //scale down config
1135         MppFrame mframe = NULL;
1136         MppBuffer mbuffer = NULL;
1137         MppFrameThumbnailMode thumbnail_mode;
1138 
1139         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits,
1140                               SLOT_BUFFER, &mbuffer);
1141         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits,
1142                               SLOT_FRAME_PTR, &mframe);
1143         thumbnail_mode = mpp_frame_get_thumbnail_en(mframe);
1144         switch (thumbnail_mode) {
1145         case MPP_FRAME_THUMBNAIL_ONLY:
1146             hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer);
1147             origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs, dxva_ctx->pp.CurrPic.Index7Bits);
1148             fd = mpp_buffer_get_fd(origin_buf->buf[0]);
1149             /* output rkfbc64 */
1150             // hw_regs->h265d_addrs.reg168_dpb_decout_base = fd;
1151             /* output raster/tile4x4 */
1152             hw_regs->common_addr.reg135_pp_m_decout_base = fd;
1153             hw_regs->h265d_addrs.reg192_dpb_payload64x4_st_cur_base = fd;
1154             hw_regs->h265d_addrs.reg169_error_ref_base = fd;
1155             vdpu384a_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->ctrl_regs, (void*)&hw_regs->h265d_paras);
1156             break;
1157         case MPP_FRAME_THUMBNAIL_MIXED:
1158             hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer);
1159             vdpu384a_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->ctrl_regs, (void*)&hw_regs->h265d_paras);
1160             break;
1161         case MPP_FRAME_THUMBNAIL_NONE:
1162         default:
1163             hw_regs->ctrl_regs.reg9.scale_down_en = 0;
1164             break;
1165         }
1166     }
1167 
1168     return ret;
1169 }
1170 
hal_h265d_vdpu384a_start(void * hal,HalTaskInfo * task)1171 static MPP_RET hal_h265d_vdpu384a_start(void *hal, HalTaskInfo *task)
1172 {
1173     MPP_RET ret = MPP_OK;
1174     RK_U8* p = NULL;
1175     Vdpu384aH265dRegSet *hw_regs = NULL;
1176     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1177     RK_S32 index =  task->dec.reg_index;
1178 
1179     RK_U32 i;
1180 
1181     if (task->dec.flags.parse_err ||
1182         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1183         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1184         return MPP_OK;
1185     }
1186 
1187     if (reg_ctx->fast_mode) {
1188         p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
1189         hw_regs = ( Vdpu384aH265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1190     } else {
1191         p = (RK_U8*)reg_ctx->hw_regs;
1192         hw_regs = ( Vdpu384aH265dRegSet *)reg_ctx->hw_regs;
1193     }
1194 
1195     if (hw_regs == NULL) {
1196         mpp_err("hal_h265d_start hw_regs is NULL");
1197         return MPP_ERR_NULL_PTR;
1198     }
1199     for (i = 0; i < 68; i++) {
1200         h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1201                   i, *((RK_U32*)p));
1202         //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
1203         p += 4;
1204     }
1205 
1206     do {
1207         MppDevRegWrCfg wr_cfg;
1208         MppDevRegRdCfg rd_cfg;
1209 
1210         wr_cfg.reg = &hw_regs->ctrl_regs;
1211         wr_cfg.size = sizeof(hw_regs->ctrl_regs);
1212         wr_cfg.offset = OFFSET_CTRL_REGS;
1213         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1214         if (ret) {
1215             mpp_err_f("set register read failed %d\n", ret);
1216             break;
1217         }
1218 
1219         wr_cfg.reg = &hw_regs->common_addr;
1220         wr_cfg.size = sizeof(hw_regs->common_addr);
1221         wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1222         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1223         if (ret) {
1224             mpp_err_f("set register write failed %d\n", ret);
1225             break;
1226         }
1227 
1228         wr_cfg.reg = &hw_regs->h265d_paras;
1229         wr_cfg.size = sizeof(hw_regs->h265d_paras);
1230         wr_cfg.offset = OFFSET_CODEC_PARAS_REGS;
1231         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1232         if (ret) {
1233             mpp_err_f("set register write failed %d\n", ret);
1234             break;
1235         }
1236 
1237         wr_cfg.reg = &hw_regs->h265d_addrs;
1238         wr_cfg.size = sizeof(hw_regs->h265d_addrs);
1239         wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1240         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1241         if (ret) {
1242             mpp_err_f("set register write failed %d\n", ret);
1243             break;
1244         }
1245 
1246         rd_cfg.reg = &hw_regs->ctrl_regs.reg15;
1247         rd_cfg.size = sizeof(hw_regs->ctrl_regs.reg15);
1248         rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1249         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1250         if (ret) {
1251             mpp_err_f("set register read failed %d\n", ret);
1252             break;
1253         }
1254 
1255         /* rcb info for sram */
1256         vdpu384a_set_rcbinfo(reg_ctx->dev, (Vdpu384aRcbInfo*)reg_ctx->rcb_info);
1257 
1258         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1259         if (ret) {
1260             mpp_err_f("send cmd failed %d\n", ret);
1261             break;
1262         }
1263     } while (0);
1264 
1265     return ret;
1266 }
1267 
1268 
hal_h265d_vdpu384a_wait(void * hal,HalTaskInfo * task)1269 static MPP_RET hal_h265d_vdpu384a_wait(void *hal, HalTaskInfo *task)
1270 {
1271     MPP_RET ret = MPP_OK;
1272     RK_S32 index =  task->dec.reg_index;
1273     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1274     RK_U8* p = NULL;
1275     Vdpu384aH265dRegSet *hw_regs = NULL;
1276     RK_S32 i;
1277 
1278     if (reg_ctx->fast_mode) {
1279         hw_regs = ( Vdpu384aH265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1280     } else {
1281         hw_regs = ( Vdpu384aH265dRegSet *)reg_ctx->hw_regs;
1282     }
1283 
1284     p = (RK_U8*)hw_regs;
1285 
1286     if (task->dec.flags.parse_err ||
1287         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1288         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1289         goto ERR_PROC;
1290     }
1291 
1292     ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1293     if (ret)
1294         mpp_err_f("poll cmd failed %d\n", ret);
1295 
1296 ERR_PROC:
1297     if (task->dec.flags.parse_err ||
1298         task->dec.flags.ref_err ||
1299         (!hw_regs->ctrl_regs.reg15.rkvdec_frame_rdy_sta) ||
1300         hw_regs->ctrl_regs.reg15.rkvdec_strm_error_sta ||
1301         hw_regs->ctrl_regs.reg15.rkvdec_core_timeout_sta ||
1302         hw_regs->ctrl_regs.reg15.rkvdec_ip_timeout_sta ||
1303         hw_regs->ctrl_regs.reg15.rkvdec_bus_error_sta ||
1304         hw_regs->ctrl_regs.reg15.rkvdec_buffer_empty_sta ||
1305         hw_regs->ctrl_regs.reg15.rkvdec_colmv_ref_error_sta) {
1306         if (!reg_ctx->fast_mode) {
1307             if (reg_ctx->dec_cb)
1308                 mpp_callback(reg_ctx->dec_cb, &task->dec);
1309         } else {
1310             MppFrame mframe = NULL;
1311             mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1312                                   SLOT_FRAME_PTR, &mframe);
1313             if (mframe) {
1314                 reg_ctx->fast_mode_err_found = 1;
1315                 mpp_frame_set_errinfo(mframe, 1);
1316             }
1317         }
1318     } else {
1319         if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1320             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1321                 if (task->dec.refer[i] >= 0) {
1322                     MppFrame frame_ref = NULL;
1323 
1324                     mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1325                                           SLOT_FRAME_PTR, &frame_ref);
1326                     h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1327                               i, task->dec.refer[i], frame_ref);
1328                     if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1329                         MppFrame frame_out = NULL;
1330                         mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1331                                               SLOT_FRAME_PTR, &frame_out);
1332                         mpp_frame_set_errinfo(frame_out, 1);
1333                         break;
1334                     }
1335                 }
1336             }
1337         }
1338     }
1339 
1340     for (i = 0; i < 68; i++) {
1341         if (i == 1) {
1342             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1343                       i, *((RK_U32*)p));
1344         }
1345 
1346         if (i == 45) {
1347             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1348                       i, *((RK_U32*)p));
1349         }
1350         p += 4;
1351     }
1352 
1353     if (reg_ctx->fast_mode) {
1354         reg_ctx->g_buf[index].use_flag = 0;
1355     }
1356 
1357     return ret;
1358 }
1359 
hal_h265d_vdpu384a_reset(void * hal)1360 static MPP_RET hal_h265d_vdpu384a_reset(void *hal)
1361 {
1362     MPP_RET ret = MPP_OK;
1363     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1364     p_hal->fast_mode_err_found = 0;
1365     (void)hal;
1366     return ret;
1367 }
1368 
hal_h265d_vdpu384a_flush(void * hal)1369 static MPP_RET hal_h265d_vdpu384a_flush(void *hal)
1370 {
1371     MPP_RET ret = MPP_OK;
1372 
1373     (void)hal;
1374     return ret;
1375 }
1376 
hal_h265d_vdpu384a_control(void * hal,MpiCmd cmd_type,void * param)1377 static MPP_RET hal_h265d_vdpu384a_control(void *hal, MpiCmd cmd_type, void *param)
1378 {
1379     MPP_RET ret = MPP_OK;
1380     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1381 
1382     (void)hal;
1383     (void)param;
1384     switch ((MpiCmd)cmd_type) {
1385     case MPP_DEC_SET_FRAME_INFO: {
1386         MppFrame frame = (MppFrame)param;
1387         MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1388         RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param);
1389         RK_U32 imgheight = mpp_frame_get_height((MppFrame)param);
1390 
1391         if (fmt == MPP_FMT_YUV422SP) {
1392             mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_422);
1393         } else if (fmt == MPP_FMT_YUV444SP) {
1394             mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_444);
1395         }
1396         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1397             vdpu384a_afbc_align_calc(p_hal->slots, frame, 16);
1398         } else if (imgwidth > 1920 || imgheight > 1088) {
1399             mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
1400         }
1401         break;
1402     }
1403     case MPP_DEC_GET_THUMBNAIL_FRAME_INFO: {
1404         vdpu384a_update_thumbnail_frame_info((MppFrame)param);
1405     } break;
1406     case MPP_DEC_SET_OUTPUT_FORMAT: {
1407     } break;
1408     default: {
1409     } break;
1410     }
1411     return  ret;
1412 }
1413 
1414 const MppHalApi hal_h265d_vdpu384a = {
1415     .name = "h265d_vdpu384a",
1416     .type = MPP_CTX_DEC,
1417     .coding = MPP_VIDEO_CodingHEVC,
1418     .ctx_size = sizeof(HalH265dCtx),
1419     .flag = 0,
1420     .init = hal_h265d_vdpu384a_init,
1421     .deinit = hal_h265d_vdpu384a_deinit,
1422     .reg_gen = hal_h265d_vdpu384a_gen_regs,
1423     .start = hal_h265d_vdpu384a_start,
1424     .wait = hal_h265d_vdpu384a_wait,
1425     .reset = hal_h265d_vdpu384a_reset,
1426     .flush = hal_h265d_vdpu384a_flush,
1427     .control = hal_h265d_vdpu384a_control,
1428 };
1429