xref: /rockchip-linux_mpp/mpp/hal/rkdec/h265d/hal_h265d_vdpu383.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /* SPDX-License-Identifier: Apache-2.0 OR MIT */
2 /*
3  * Copyright (c) 2024 Rockchip Electronics Co., Ltd.
4  */
5 
6 #define MODULE_TAG "hal_h265d_vdpu383"
7 
8 #include <stdio.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <stdarg.h>
12 #include <inttypes.h>
13 #include <ctype.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 
17 #include "mpp_env.h"
18 #include "mpp_mem.h"
19 #include "mpp_bitread.h"
20 #include "mpp_bitput.h"
21 #include "mpp_buffer_impl.h"
22 
23 #include "h265d_syntax.h"
24 #include "hal_h265d_debug.h"
25 #include "hal_h265d_ctx.h"
26 #include "hal_h265d_com.h"
27 #include "hal_h265d_vdpu383.h"
28 #include "vdpu383_h265d.h"
29 #include "vdpu383_com.h"
30 
31 #define HW_RPS
32 #define PPS_SIZE                (112 * 64)//(96x64)
33 
34 #define FMT 4
35 #define CTU 3
36 
37 typedef struct {
38     RK_U32 a;
39     RK_U32 b;
40 } FilterdColBufRatio;
41 
42 #if 0
43 static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = {
44     /* 400    420      422       444 */
45     {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16
46     {{0, 0}, {27, 8},  {36, 8},  {52, 8}}, //ctu 32
47     {{0, 0}, {27, 5},  {36, 5},  {52, 5}}  //ctu 64
48 };
49 
50 static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = {
51     /* 400    420      422       444 */
52     {{0, 0}, {9, 31}, {12, 39}, {12, 39}}, //ctu 16
53     {{0, 0}, {9, 25}, {12, 33}, {12, 33}}, //ctu 32
54     {{0, 0}, {9, 21}, {12, 29}, {12, 29}}  //ctu 64
55 };
56 #endif
57 
58 #define CABAC_TAB_ALIGEND_SIZE          (MPP_ALIGN(27456, SZ_4K))
59 #define SPSPPS_ALIGNED_SIZE             (MPP_ALIGN(176, SZ_4K))
60 #define RPS_ALIGEND_SIZE                (MPP_ALIGN(400 * 8, SZ_4K))
61 #define SCALIST_ALIGNED_SIZE            (MPP_ALIGN(81 * 1360, SZ_4K))
62 #define INFO_BUFFER_SIZE                (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE)
63 #define ALL_BUFFER_SIZE(cnt)            (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt)
64 
65 #define CABAC_TAB_OFFSET                (0)
66 #define SPSPPS_OFFSET(pos)              (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos))
67 #define RPS_OFFSET(pos)                 (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
68 #define SCALIST_OFFSET(pos)             (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE)
69 
70 #define pocdistance(a, b)               (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
71 
rkv_len_align_422(RK_U32 val)72 static RK_U32 rkv_len_align_422(RK_U32 val)
73 {
74     return (2 * MPP_ALIGN(val, 16));
75 }
76 
rkv_len_align_444(RK_U32 val)77 static RK_U32 rkv_len_align_444(RK_U32 val)
78 {
79     return (3 * MPP_ALIGN(val, 16));
80 }
81 
vdpu383_setup_scale_origin_bufs(HalH265dCtx * ctx,MppFrame mframe)82 static MPP_RET vdpu383_setup_scale_origin_bufs(HalH265dCtx *ctx, MppFrame mframe)
83 {
84     /* for 8K FrameBuf scale mode */
85     size_t origin_buf_size = 0;
86 
87     origin_buf_size = mpp_frame_get_buf_size(mframe);
88 
89     if (!origin_buf_size) {
90         mpp_err_f("origin_bufs get buf size failed\n");
91         return MPP_NOK;
92     }
93 
94     if (ctx->origin_bufs) {
95         hal_bufs_deinit(ctx->origin_bufs);
96         ctx->origin_bufs = NULL;
97     }
98     hal_bufs_init(&ctx->origin_bufs);
99     if (!ctx->origin_bufs) {
100         mpp_err_f("origin_bufs init fail\n");
101         return MPP_ERR_NOMEM;
102     }
103 
104     hal_bufs_setup(ctx->origin_bufs, 16, 1, &origin_buf_size);
105 
106     return MPP_OK;
107 }
108 
hal_h265d_vdpu383_init(void * hal,MppHalCfg * cfg)109 static MPP_RET hal_h265d_vdpu383_init(void *hal, MppHalCfg *cfg)
110 {
111     RK_S32 ret = 0;
112     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
113 
114     mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
115     mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
116 
117     reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
118     if (reg_ctx->scaling_qm == NULL) {
119         mpp_err("scaling_org alloc fail");
120         return MPP_ERR_MALLOC;
121     }
122 
123     reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
124     reg_ctx->pps_buf = mpp_calloc(RK_U64, 24);
125     reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
126 
127     if (reg_ctx->scaling_rk == NULL) {
128         mpp_err("scaling_rk alloc fail");
129         return MPP_ERR_MALLOC;
130     }
131 
132     if (reg_ctx->group == NULL) {
133         ret = mpp_buffer_group_get_internal(&reg_ctx->group, MPP_BUFFER_TYPE_ION);
134         if (ret) {
135             mpp_err("h265d mpp_buffer_group_get failed\n");
136             return ret;
137         }
138     }
139 
140     {
141         RK_U32 i = 0;
142         RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
143 
144         //!< malloc buffers
145         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
146         if (ret) {
147             mpp_err("h265d mpp_buffer_get failed\n");
148             return ret;
149         }
150 
151         reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
152         reg_ctx->offset_cabac = CABAC_TAB_OFFSET;
153         for (i = 0; i < max_cnt; i++) {
154             reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu383H265dRegSet));
155             reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
156             reg_ctx->offset_rps[i] = RPS_OFFSET(i);
157             reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
158         }
159 
160         mpp_buffer_attach_dev(reg_ctx->bufs, reg_ctx->dev);
161     }
162 
163     if (!reg_ctx->fast_mode) {
164         reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
165         reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
166         reg_ctx->rps_offset = reg_ctx->offset_rps[0];
167         reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
168     }
169 
170     ret = mpp_buffer_write(reg_ctx->bufs, 0, (void*)cabac_table, sizeof(cabac_table));
171     if (ret) {
172         mpp_err("h265d write cabac_table data failed\n");
173         return ret;
174     }
175 
176     if (cfg->hal_fbc_adj_cfg) {
177         cfg->hal_fbc_adj_cfg->func = vdpu383_afbc_align_calc;
178         cfg->hal_fbc_adj_cfg->expand = 16;
179     }
180 
181     (void) cfg;
182     return MPP_OK;
183 }
184 
hal_h265d_vdpu383_deinit(void * hal)185 static MPP_RET hal_h265d_vdpu383_deinit(void *hal)
186 {
187     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
188     RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
189     RK_U32 i;
190 
191     if (reg_ctx->bufs) {
192         mpp_buffer_put(reg_ctx->bufs);
193         reg_ctx->bufs = NULL;
194     }
195 
196     loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
197     for (i = 0; i < loop; i++) {
198         if (reg_ctx->rcb_buf[i]) {
199             mpp_buffer_put(reg_ctx->rcb_buf[i]);
200             reg_ctx->rcb_buf[i] = NULL;
201         }
202     }
203 
204     if (reg_ctx->group) {
205         mpp_buffer_group_put(reg_ctx->group);
206         reg_ctx->group = NULL;
207     }
208 
209     for (i = 0; i < loop; i++)
210         MPP_FREE(reg_ctx->g_buf[i].hw_regs);
211 
212     MPP_FREE(reg_ctx->scaling_qm);
213     MPP_FREE(reg_ctx->scaling_rk);
214     MPP_FREE(reg_ctx->pps_buf);
215     MPP_FREE(reg_ctx->sw_rps_buf);
216 
217     if (reg_ctx->cmv_bufs) {
218         hal_bufs_deinit(reg_ctx->cmv_bufs);
219         reg_ctx->cmv_bufs = NULL;
220     }
221 
222     if (reg_ctx->origin_bufs) {
223         hal_bufs_deinit(reg_ctx->origin_bufs);
224         reg_ctx->origin_bufs = NULL;
225     }
226 
227     return MPP_OK;
228 }
229 
230 #define SCALING_LIST_NUM 6
231 
hal_vdpu383_record_scaling_list(scalingFactor_t * pScalingFactor_out,scalingList_t * pScalingList)232 void hal_vdpu383_record_scaling_list(scalingFactor_t *pScalingFactor_out, scalingList_t *pScalingList)
233 {
234     RK_S32 i;
235     RK_U32 listId;
236     BitputCtx_t bp;
237 
238     mpp_set_bitput_ctx(&bp, (RK_U64 *)pScalingFactor_out, 170); // 170*64bits
239 
240     //-------- following make it by hardware needed --------
241     //sizeId == 0, block4x4
242     for (listId = 0; listId < SCALING_LIST_NUM; listId++) {
243         RK_U8 *p_data = pScalingList->sl[0][listId];
244         /* dump by block4x4, vectial direction */
245         for (i = 0; i < 4; i++) {
246             mpp_put_bits(&bp, p_data[i + 0], 8);
247             mpp_put_bits(&bp, p_data[i + 4], 8);
248             mpp_put_bits(&bp, p_data[i + 8], 8);
249             mpp_put_bits(&bp, p_data[i + 12], 8);
250         }
251     }
252     //sizeId == 1, block8x8
253     for (listId = 0; listId < SCALING_LIST_NUM; listId++) {
254         RK_S32 blk4_x = 0, blk4_y = 0;
255         RK_U8 *p_data = pScalingList->sl[1][listId];
256 
257         /* dump by block4x4, vectial direction */
258         for (blk4_x = 0; blk4_x < 8; blk4_x += 4) {
259             for (blk4_y = 0; blk4_y < 8; blk4_y += 4) {
260                 RK_S32 pos = blk4_y * 8 + blk4_x;
261 
262                 for (i = 0; i < 4; i++) {
263                     mpp_put_bits(&bp, p_data[pos + i + 0], 8);
264                     mpp_put_bits(&bp, p_data[pos + i + 8], 8);
265                     mpp_put_bits(&bp, p_data[pos + i + 16], 8);
266                     mpp_put_bits(&bp, p_data[pos + i + 24], 8);
267                 }
268             }
269         }
270     }
271     //sizeId == 2, block16x16
272     for (listId = 0; listId < SCALING_LIST_NUM; listId++) {
273         RK_S32 blk4_x = 0, blk4_y = 0;
274         RK_U8 *p_data = pScalingList->sl[2][listId];
275 
276         /* dump by block4x4, vectial direction */
277         for (blk4_x = 0; blk4_x < 8; blk4_x += 4) {
278             for (blk4_y = 0; blk4_y < 8; blk4_y += 4) {
279                 RK_S32 pos = blk4_y * 8 + blk4_x;
280 
281                 for (i = 0; i < 4; i++) {
282                     mpp_put_bits(&bp, p_data[pos + i + 0], 8);
283                     mpp_put_bits(&bp, p_data[pos + i + 8], 8);
284                     mpp_put_bits(&bp, p_data[pos + i + 16], 8);
285                     mpp_put_bits(&bp, p_data[pos + i + 24], 8);
286                 }
287             }
288         }
289     }
290     //sizeId == 3, blcok32x32
291     for (listId = 0; listId < 6; listId++) {
292         RK_S32 blk4_x = 0, blk4_y = 0;
293         RK_U8 *p_data = pScalingList->sl[3][listId];
294 
295         /* dump by block4x4, vectial direction */
296         for (blk4_x = 0; blk4_x < 8; blk4_x += 4) {
297             for (blk4_y = 0; blk4_y < 8; blk4_y += 4) {
298                 RK_S32 pos = blk4_y * 8 + blk4_x;
299 
300                 for (i = 0; i < 4; i++) {
301                     mpp_put_bits(&bp, p_data[pos + i + 0], 8);
302                     mpp_put_bits(&bp, p_data[pos + i + 8], 8);
303                     mpp_put_bits(&bp, p_data[pos + i + 16], 8);
304                     mpp_put_bits(&bp, p_data[pos + i + 24], 8);
305                 }
306             }
307         }
308     }
309     //sizeId == 0, block4x4, horiztion direction */
310     for (listId = 0; listId < SCALING_LIST_NUM; listId++) {
311         RK_U8 *p_data = pScalingList->sl[0][listId];
312 
313         for (i = 0; i < 16; i++)
314             mpp_put_bits(&bp, p_data[i], 8);
315     }
316 
317     // dump dc value
318     for (i = 0; i < SCALING_LIST_NUM; i++)//sizeId = 2, 16x16
319         mpp_put_bits(&bp, pScalingList->sl_dc[0][i], 8);
320     for (i = 0; i < SCALING_LIST_NUM; i++) //sizeId = 3, 32x32
321         mpp_put_bits(&bp, pScalingList->sl_dc[1][i], 8);
322 
323     mpp_put_align(&bp, 128, 0);
324 }
325 
hal_h265d_vdpu383_scalinglist_packet(void * hal,void * ptr,void * dxva)326 static MPP_RET hal_h265d_vdpu383_scalinglist_packet(void *hal, void *ptr, void *dxva)
327 {
328     scalingList_t sl;
329     RK_U32 i, j, pos;
330     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
331     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
332 
333     if (!dxva_ctx->pp.scaling_list_enabled_flag) {
334         return MPP_OK;
335     }
336 
337     if (memcmp((void*)&dxva_ctx->qm, reg_ctx->scaling_qm, sizeof(DXVA_Qmatrix_HEVC))) {
338         memset(&sl, 0, sizeof(scalingList_t));
339 
340         for (i = 0; i < 6; i++) {
341             for (j = 0; j < 16; j++) {
342                 pos = 4 * hal_hevc_diag_scan4x4_y[j] + hal_hevc_diag_scan4x4_x[j];
343                 sl.sl[0][i][pos] = dxva_ctx->qm.ucScalingLists0[i][j];
344             }
345 
346             for (j = 0; j < 64; j++) {
347                 pos = 8 * hal_hevc_diag_scan8x8_y[j] + hal_hevc_diag_scan8x8_x[j];
348                 sl.sl[1][i][pos] =  dxva_ctx->qm.ucScalingLists1[i][j];
349                 sl.sl[2][i][pos] =  dxva_ctx->qm.ucScalingLists2[i][j];
350 
351                 if (i == 0)
352                     sl.sl[3][i][pos] =  dxva_ctx->qm.ucScalingLists3[0][j];
353                 else if (i == 3)
354                     sl.sl[3][i][pos] =  dxva_ctx->qm.ucScalingLists3[1][j];
355                 else
356                     sl.sl[3][i][pos] =  dxva_ctx->qm.ucScalingLists2[i][j];
357             }
358 
359             sl.sl_dc[0][i] =  dxva_ctx->qm.ucScalingListDCCoefSizeID2[i];
360             if (i == 0)
361                 sl.sl_dc[1][i] =  dxva_ctx->qm.ucScalingListDCCoefSizeID3[0];
362             else if (i == 3)
363                 sl.sl_dc[1][i] =  dxva_ctx->qm.ucScalingListDCCoefSizeID3[1];
364             else
365                 sl.sl_dc[1][i] =  dxva_ctx->qm.ucScalingListDCCoefSizeID2[i];
366         }
367         hal_vdpu383_record_scaling_list((scalingFactor_t *)reg_ctx->scaling_rk, &sl);
368     }
369 
370     memcpy(ptr, reg_ctx->scaling_rk, sizeof(scalingFactor_t));
371 
372     return MPP_OK;
373 }
374 
hal_h265d_v345_output_pps_packet(void * hal,void * dxva)375 static RK_S32 hal_h265d_v345_output_pps_packet(void *hal, void *dxva)
376 {
377     RK_S32 i;
378     RK_U32 log2_min_cb_size;
379     RK_S32 width, height;
380     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
381     Vdpu383H265dRegSet *hw_reg = (Vdpu383H265dRegSet*)(reg_ctx->hw_regs);
382     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
383     BitputCtx_t bp;
384 
385     if (NULL == reg_ctx || dxva_ctx == NULL) {
386         mpp_err("%s:%s:%d reg_ctx or dxva_ctx is NULL",
387                 __FILE__, __FUNCTION__, __LINE__);
388         return MPP_ERR_NULL_PTR;
389     }
390 
391     // SPS
392     {
393         void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
394         RK_U64 *pps_packet = reg_ctx->pps_buf;
395 
396         if (NULL == pps_ptr) {
397             mpp_err("pps_data get ptr error");
398             return MPP_ERR_NOMEM;
399         }
400 
401         log2_min_cb_size = dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3;
402         width = (dxva_ctx->pp.PicWidthInMinCbsY << log2_min_cb_size);
403         height = (dxva_ctx->pp.PicHeightInMinCbsY << log2_min_cb_size);
404 
405         mpp_set_bitput_ctx(&bp, pps_packet, 22); // 22*64bits
406 
407         if (dxva_ctx->pp.ps_update_flag) {
408             mpp_put_bits(&bp, dxva_ctx->pp.vps_id, 4);
409             mpp_put_bits(&bp, dxva_ctx->pp.sps_id, 4);
410             mpp_put_bits(&bp, dxva_ctx->pp.chroma_format_idc, 2);
411 
412             mpp_put_bits(&bp, width, 16);
413             mpp_put_bits(&bp, height, 16);
414             mpp_put_bits(&bp, dxva_ctx->pp.bit_depth_luma_minus8, 3);
415             mpp_put_bits(&bp, dxva_ctx->pp.bit_depth_chroma_minus8, 3);
416             mpp_put_bits(&bp, dxva_ctx->pp.log2_max_pic_order_cnt_lsb_minus4 + 4, 5);
417             mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size, 2);
418             mpp_put_bits(&bp, dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3, 3);
419             mpp_put_bits(&bp, dxva_ctx->pp.log2_min_transform_block_size_minus2 + 2, 3);
420 
421             mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_transform_block_size, 2);
422             mpp_put_bits(&bp, dxva_ctx->pp.max_transform_hierarchy_depth_inter, 3);
423             mpp_put_bits(&bp, dxva_ctx->pp.max_transform_hierarchy_depth_intra, 3);
424             mpp_put_bits(&bp, dxva_ctx->pp.scaling_list_enabled_flag, 1);
425             mpp_put_bits(&bp, dxva_ctx->pp.amp_enabled_flag, 1);
426             mpp_put_bits(&bp, dxva_ctx->pp.sample_adaptive_offset_enabled_flag, 1);
427             ///<-zrh comment ^  68 bit above
428             mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag, 1);
429             mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0, 4);
430             mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0, 4);
431             mpp_put_bits(&bp, dxva_ctx->pp.pcm_loop_filter_disabled_flag, 1);
432             mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_pcm_luma_coding_block_size, 3);
433             mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
434 
435             mpp_put_bits(&bp, dxva_ctx->pp.num_short_term_ref_pic_sets, 7);
436             mpp_put_bits(&bp, dxva_ctx->pp.long_term_ref_pics_present_flag, 1);
437             mpp_put_bits(&bp, dxva_ctx->pp.num_long_term_ref_pics_sps, 6);
438             mpp_put_bits(&bp, dxva_ctx->pp.sps_temporal_mvp_enabled_flag, 1);
439             mpp_put_bits(&bp, dxva_ctx->pp.strong_intra_smoothing_enabled_flag, 1);
440             // SPS extenstion
441             mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_rotation_enabled_flag, 1);
442             mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_context_enabled_flag, 1);
443             mpp_put_bits(&bp, dxva_ctx->pp.strong_intra_smoothing_enabled_flag, 1);
444             mpp_put_bits(&bp, dxva_ctx->pp.implicit_rdpcm_enabled_flag, 1);
445             mpp_put_bits(&bp, dxva_ctx->pp.explicit_rdpcm_enabled_flag, 1);
446             mpp_put_bits(&bp, dxva_ctx->pp.extended_precision_processing_flag, 1);
447             mpp_put_bits(&bp, dxva_ctx->pp.intra_smoothing_disabled_flag, 1);
448             mpp_put_bits(&bp, dxva_ctx->pp.sps_max_dec_pic_buffering_minus1, 4);
449             mpp_put_bits(&bp, dxva_ctx->pp.separate_colour_plane_flag, 1);
450             mpp_put_bits(&bp, dxva_ctx->pp.high_precision_offsets_enabled_flag, 1);
451             mpp_put_bits(&bp, dxva_ctx->pp.persistent_rice_adaptation_enabled_flag, 1);
452 
453             /* PPS */
454             mpp_put_bits(&bp, dxva_ctx->pp.pps_id, 6);
455             mpp_put_bits(&bp, dxva_ctx->pp.sps_id, 4);
456             mpp_put_bits(&bp, dxva_ctx->pp.dependent_slice_segments_enabled_flag, 1);
457             mpp_put_bits(&bp, dxva_ctx->pp.output_flag_present_flag, 1);
458             mpp_put_bits(&bp, dxva_ctx->pp.num_extra_slice_header_bits, 13);
459 
460             mpp_put_bits(&bp, dxva_ctx->pp.sign_data_hiding_enabled_flag, 1);
461             mpp_put_bits(&bp, dxva_ctx->pp.cabac_init_present_flag, 1);
462             mpp_put_bits(&bp, dxva_ctx->pp.num_ref_idx_l0_default_active_minus1 + 1, 4);
463             mpp_put_bits(&bp, dxva_ctx->pp.num_ref_idx_l1_default_active_minus1 + 1, 4);
464             mpp_put_bits(&bp, dxva_ctx->pp.init_qp_minus26, 7);
465             mpp_put_bits(&bp, dxva_ctx->pp.constrained_intra_pred_flag, 1);
466             mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_enabled_flag, 1);
467             mpp_put_bits(&bp, dxva_ctx->pp.cu_qp_delta_enabled_flag, 1);
468             mpp_put_bits(&bp, log2_min_cb_size + dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size - dxva_ctx->pp.diff_cu_qp_delta_depth, 3);
469 
470             mpp_put_bits(&bp, dxva_ctx->pp.pps_cb_qp_offset, 5);
471             mpp_put_bits(&bp, dxva_ctx->pp.pps_cr_qp_offset, 5);
472             mpp_put_bits(&bp, dxva_ctx->pp.pps_slice_chroma_qp_offsets_present_flag, 1);
473             mpp_put_bits(&bp, dxva_ctx->pp.weighted_pred_flag, 1);
474             mpp_put_bits(&bp, dxva_ctx->pp.weighted_bipred_flag, 1);
475             mpp_put_bits(&bp, dxva_ctx->pp.transquant_bypass_enabled_flag, 1);
476             mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag, 1);
477             mpp_put_bits(&bp, dxva_ctx->pp.entropy_coding_sync_enabled_flag, 1);
478             mpp_put_bits(&bp, dxva_ctx->pp.pps_loop_filter_across_slices_enabled_flag, 1);
479 
480             mpp_put_bits(&bp, dxva_ctx->pp.loop_filter_across_tiles_enabled_flag, 1);
481             mpp_put_bits(&bp, dxva_ctx->pp.deblocking_filter_override_enabled_flag, 1);
482             mpp_put_bits(&bp, dxva_ctx->pp.pps_deblocking_filter_disabled_flag, 1);
483             mpp_put_bits(&bp, dxva_ctx->pp.pps_beta_offset_div2, 4);
484             mpp_put_bits(&bp, dxva_ctx->pp.pps_tc_offset_div2, 4);
485             mpp_put_bits(&bp, dxva_ctx->pp.lists_modification_present_flag, 1);
486             mpp_put_bits(&bp, dxva_ctx->pp.log2_parallel_merge_level_minus2 + 2, 3);
487             mpp_put_bits(&bp, dxva_ctx->pp.slice_segment_header_extension_present_flag, 1);
488             mpp_put_bits(&bp, 0, 3);
489 
490             // PPS externsion
491             if (dxva_ctx->pp.log2_max_transform_skip_block_size > 2) {
492                 mpp_put_bits(&bp, dxva_ctx->pp.log2_max_transform_skip_block_size - 2, 2);
493             } else {
494                 mpp_put_bits(&bp, 0, 2);
495             }
496             mpp_put_bits(&bp, dxva_ctx->pp.cross_component_prediction_enabled_flag, 1);
497             mpp_put_bits(&bp, dxva_ctx->pp.chroma_qp_offset_list_enabled_flag, 1);
498 
499             RK_S32 log2_min_cu_chroma_qp_delta_size = log2_min_cb_size +
500                                                       dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size -
501                                                       dxva_ctx->pp.diff_cu_chroma_qp_offset_depth;
502             mpp_put_bits(&bp, log2_min_cu_chroma_qp_delta_size, 3);
503             for (i = 0; i < 6; i++)
504                 mpp_put_bits(&bp, dxva_ctx->pp.cb_qp_offset_list[i], 5);
505             for (i = 0; i < 6; i++)
506                 mpp_put_bits(&bp, dxva_ctx->pp.cr_qp_offset_list[i], 5);
507             mpp_put_bits(&bp, dxva_ctx->pp.chroma_qp_offset_list_len_minus1, 3);
508 
509             /* mvc0 && mvc1 */
510             mpp_put_bits(&bp, 0xffff, 16);
511             mpp_put_bits(&bp, 0, 1);
512             mpp_put_bits(&bp, 0, 6);
513             mpp_put_bits(&bp, 0, 1);
514             mpp_put_bits(&bp, 0, 1);
515         } else {
516             bp.index = 4;
517             bp.bitpos = 41;
518             bp.bvalue = bp.pbuf[bp.index] & MPP_GENMASK(bp.bitpos - 1, 0);
519         }
520         /* poc info */
521         {
522             RK_S32 dpb_valid[15] = {0}, refpic_poc[15] = {0};
523 
524             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) {
525                 if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff &&
526                     dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) {
527                     dpb_valid[i] = 1;
528                     refpic_poc[i] = dxva_ctx->pp.PicOrderCntValList[i];
529                 }
530             }
531 
532             mpp_put_bits(&bp, 0, 1);
533             mpp_put_bits(&bp, 0, 1);
534             mpp_put_bits(&bp, 0, 1);
535             mpp_put_bits(&bp, dxva_ctx->pp.current_poc, 32);
536 
537             for (i = 0; i < 15; i++)
538                 mpp_put_bits(&bp, refpic_poc[i], 32);
539             mpp_put_bits(&bp, 0, 32);
540             for (i = 0; i < 15; i++)
541                 mpp_put_bits(&bp, dpb_valid[i], 1);
542             mpp_put_bits(&bp, 0, 1);
543         }
544 
545         /* tile info */
546         mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag ? (dxva_ctx->pp.num_tile_columns_minus1 + 1) : 1, 5);
547         mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag ? (dxva_ctx->pp.num_tile_rows_minus1 + 1) : 1, 5);
548         {
549             /// tiles info begin
550             RK_U16 column_width[20];
551             RK_U16 row_height[22];
552 
553             memset(column_width, 0, sizeof(column_width));
554             memset(row_height, 0, sizeof(row_height));
555 
556             if (dxva_ctx->pp.tiles_enabled_flag) {
557                 if (dxva_ctx->pp.uniform_spacing_flag == 0) {
558                     RK_S32 maxcuwidth = dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
559                     RK_S32 ctu_width_in_pic = (width +
560                                                (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
561                     RK_S32 ctu_height_in_pic = (height +
562                                                 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
563                     RK_S32 sum = 0;
564                     for (i = 0; i < dxva_ctx->pp.num_tile_columns_minus1; i++) {
565                         column_width[i] = dxva_ctx->pp.column_width_minus1[i] + 1;
566                         sum += column_width[i]  ;
567                     }
568                     column_width[i] = ctu_width_in_pic - sum;
569 
570                     sum = 0;
571                     for (i = 0; i < dxva_ctx->pp.num_tile_rows_minus1; i++) {
572                         row_height[i] = dxva_ctx->pp.row_height_minus1[i] + 1;
573                         sum += row_height[i];
574                     }
575                     row_height[i] = ctu_height_in_pic - sum;
576                 }  else {
577                     RK_S32    pic_in_cts_width = (width +
578                                                   (1 << (log2_min_cb_size +
579                                                          dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
580                                                  / (1 << (log2_min_cb_size +
581                                                           dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size));
582                     RK_S32 pic_in_cts_height = (height +
583                                                 (1 << (log2_min_cb_size +
584                                                        dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
585                                                / (1 << (log2_min_cb_size +
586                                                         dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size));
587 
588                     for (i = 0; i < dxva_ctx->pp.num_tile_columns_minus1 + 1; i++)
589                         column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_ctx->pp.num_tile_columns_minus1 + 1) -
590                                           (i * pic_in_cts_width) / (dxva_ctx->pp.num_tile_columns_minus1 + 1);
591 
592                     for (i = 0; i < dxva_ctx->pp.num_tile_rows_minus1 + 1; i++)
593                         row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_ctx->pp.num_tile_rows_minus1 + 1) -
594                                         (i * pic_in_cts_height) / (dxva_ctx->pp.num_tile_rows_minus1 + 1);
595                 }
596             } else {
597                 RK_S32 MaxCUWidth = (1 << (dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
598                 column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
599                 row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
600             }
601 
602             for (i = 0; i < 20; i++)
603                 mpp_put_bits(&bp, column_width[i], 12);
604 
605             for (i = 0; i < 22; i++)
606                 mpp_put_bits(&bp, row_height[i], 12);
607         }
608         mpp_put_align(&bp, 64, 0);//128
609         memcpy(pps_ptr, reg_ctx->pps_buf, 176);
610     } /* --- end spspps data ------*/
611 
612     if (dxva_ctx->pp.scaling_list_enabled_flag) {
613         RK_U32 addr;
614         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
615 
616         if (dxva_ctx->pp.scaling_list_data_present_flag) {
617             addr = (dxva_ctx->pp.pps_id + 16) * 1360;
618         } else if (dxva_ctx->pp.scaling_list_enabled_flag) {
619             addr = dxva_ctx->pp.sps_id * 1360;
620         } else {
621             addr = 80 * 1360;
622         }
623 
624         hal_h265d_vdpu383_scalinglist_packet(hal, ptr_scaling + addr, dxva);
625 
626         hw_reg->common_addr.reg132_scanlist_addr = reg_ctx->bufs_fd;
627         mpp_dev_set_reg_offset(reg_ctx->dev, 132, addr + reg_ctx->sclst_offset);
628     }
629 
630 #ifdef dump
631     fwrite(pps_ptr, 1, 80 * 64, fp);
632     RK_U32 *tmp = (RK_U32 *)pps_ptr;
633     for (i = 0; i < 112 / 4; i++) {
634         mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
635     }
636 #endif
637 #ifdef DUMP_VDPU383_DATAS
638     {
639         char *cur_fname = "global_cfg.dat";
640         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
641         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
642         dump_data_to_file(dump_cur_fname_path, (void *)bp.pbuf, 64 * bp.index + bp.bitpos, 128, 0);
643     }
644 #endif
645 
646     return 0;
647 }
648 
h265d_refine_rcb_size(Vdpu383RcbInfo * rcb_info,RK_S32 width,RK_S32 height,void * dxva)649 static void h265d_refine_rcb_size(Vdpu383RcbInfo *rcb_info,
650                                   RK_S32 width, RK_S32 height, void *dxva)
651 {
652     RK_U32 rcb_bits = 0;
653     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
654     DXVA_PicParams_HEVC *pp = &dxva_ctx->pp;
655     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
656     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
657     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
658     RK_U32 tile_row_cut_num = pp->num_tile_rows_minus1;
659     RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
660     RK_U32 ext_row_align_size = tile_row_cut_num * 64 * 8;
661     RK_U32 ext_col_align_size = tile_col_cut_num * 64 * 8;
662     RK_U32 filterd_row_append = 8192;
663     RK_U32 row_uv_para = 0;
664     RK_U32 col_uv_para = 0;
665 
666     if (chroma_fmt_idc == 1) {
667         row_uv_para = 1;
668         col_uv_para = 1;
669     } else if (chroma_fmt_idc == 2) {
670         row_uv_para = 1;
671         col_uv_para = 3;
672     } else if (chroma_fmt_idc == 3) {
673         row_uv_para = 3;
674         col_uv_para = 3;
675     }
676 
677     width = MPP_ALIGN(width, ctu_size);
678     height = MPP_ALIGN(height, ctu_size);
679     /* RCB_STRMD_ROW && RCB_STRMD_TILE_ROW*/
680     rcb_info[RCB_STRMD_ROW].size = 0;
681     rcb_info[RCB_STRMD_TILE_ROW].size = 0;
682 
683     /* RCB_INTER_ROW && RCB_INTER_TILE_ROW*/
684     rcb_bits = ((width + 7) / 8) * 174;
685     rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
686     rcb_bits += ext_row_align_size;
687     if (tile_row_cut_num)
688         rcb_info[RCB_INTER_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
689     else
690         rcb_info[RCB_INTER_TILE_ROW].size = 0;
691 
692     /* RCB_INTRA_ROW && RCB_INTRA_TILE_ROW*/
693     rcb_bits = MPP_ALIGN(width, 512) * (bit_depth + 2);
694     rcb_bits = rcb_bits * 4; //TODO:
695     rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
696     rcb_bits += ext_row_align_size;
697     if (tile_row_cut_num)
698         rcb_info[RCB_INTRA_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
699     else
700         rcb_info[RCB_INTRA_TILE_ROW].size = 0;
701 
702     /* RCB_FILTERD_ROW && RCB_FILTERD_TILE_ROW*/
703     rcb_bits = (MPP_ALIGN(width, 64) * (1.6 * bit_depth + 0.5) * (8 + 5 * row_uv_para));
704     // save space mode : half for RCB_FILTERD_ROW, half for RCB_FILTERD_PROTECT_ROW
705     if (width > 4096)
706         filterd_row_append = 27648;
707     rcb_info[RCB_FILTERD_ROW].size = MPP_RCB_BYTES(rcb_bits / 2) + filterd_row_append;
708     rcb_info[RCB_FILTERD_PROTECT_ROW].size = MPP_RCB_BYTES(rcb_bits / 2) + filterd_row_append;
709     rcb_bits += ext_row_align_size;
710     if (tile_row_cut_num)
711         rcb_info[RCB_FILTERD_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
712     else
713         rcb_info[RCB_FILTERD_TILE_ROW].size = 0;
714 
715     /* RCB_FILTERD_TILE_COL */
716     if (tile_col_cut_num) {
717         rcb_bits = (MPP_ALIGN(height, 64) * (1.6 * bit_depth + 0.5) * (16.5 + 5 * col_uv_para)) + ext_col_align_size;
718         rcb_info[RCB_FILTERD_TILE_COL].size = MPP_RCB_BYTES(rcb_bits);
719     } else {
720         rcb_info[RCB_FILTERD_TILE_COL].size = 0;
721     }
722 
723 }
724 
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu383H265dRegSet * hw_regs,RK_S32 width,RK_S32 height)725 static void hal_h265d_rcb_info_update(void *hal,  void *dxva,
726                                       Vdpu383H265dRegSet *hw_regs,
727                                       RK_S32 width, RK_S32 height)
728 {
729     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
730     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
731     DXVA_PicParams_HEVC *pp = &dxva_ctx->pp;
732     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
733     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
734     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
735     RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
736     (void)hw_regs;
737 
738     if (reg_ctx->num_row_tiles != num_tiles ||
739         reg_ctx->bit_depth != bit_depth ||
740         reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
741         reg_ctx->ctu_size !=  ctu_size ||
742         reg_ctx->width != width ||
743         reg_ctx->height != height) {
744         RK_U32 i = 0;
745         RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
746 
747         reg_ctx->rcb_buf_size = vdpu383_get_rcb_buf_size((Vdpu383RcbInfo *)reg_ctx->rcb_info, width, height);
748         h265d_refine_rcb_size((Vdpu383RcbInfo *)reg_ctx->rcb_info, width, height, dxva_ctx);
749 
750         for (i = 0; i < loop; i++) {
751             MppBuffer rcb_buf;
752 
753             if (reg_ctx->rcb_buf[i]) {
754                 mpp_buffer_put(reg_ctx->rcb_buf[i]);
755                 reg_ctx->rcb_buf[i] = NULL;
756             }
757             mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
758             reg_ctx->rcb_buf[i] = rcb_buf;
759         }
760 
761         reg_ctx->num_row_tiles  = num_tiles;
762         reg_ctx->bit_depth      = bit_depth;
763         reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
764         reg_ctx->ctu_size       = ctu_size;
765         reg_ctx->width          = width;
766         reg_ctx->height         = height;
767     }
768 }
769 
hal_h265d_vdpu383_rps(void * dxva,void * rps_buf,void * sw_rps_buf,RK_U32 fast_mode)770 static RK_S32 hal_h265d_vdpu383_rps(void *dxva, void *rps_buf, void* sw_rps_buf, RK_U32 fast_mode)
771 {
772     BitputCtx_t bp;
773     RK_S32 fifo_len = 400;
774     RK_S32 i = 0, j = 0;
775     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva;
776 
777     if (!dxva_ctx->pp.rps_update_flag) {
778         if (fast_mode) {
779             memcpy(rps_buf, sw_rps_buf, fifo_len * sizeof(RK_U64));
780         }
781         return 0;
782     }
783 
784     mpp_set_bitput_ctx(&bp, (RK_U64*)sw_rps_buf, fifo_len);
785 
786     for (i = 0; i < 32; i ++) {
787         mpp_put_bits(&bp, dxva_ctx->pp.sps_lt_rps[i].lt_ref_pic_poc_lsb, 16);
788         mpp_put_bits(&bp, dxva_ctx->pp.sps_lt_rps[i].used_by_curr_pic_lt_flag, 1);
789         mpp_put_bits(&bp, 0,     15);
790     }
791 
792     for (i = 0; i < 64; i++) {
793         if (i < dxva_ctx->pp.num_short_term_ref_pic_sets) {
794 
795             mpp_put_bits(&bp, dxva_ctx->pp.sps_st_rps[i].num_negative_pics, 4);
796             mpp_put_bits(&bp, dxva_ctx->pp.sps_st_rps[i].num_positive_pics, 4);
797             for ( j = 0; j <  dxva_ctx->pp.sps_st_rps[i].num_negative_pics; j++) {
798 
799                 mpp_put_bits(&bp, dxva_ctx->pp.sps_st_rps[i].delta_poc_s0[j], 16);
800                 mpp_put_bits(&bp, dxva_ctx->pp.sps_st_rps[i].s0_used_flag[j], 1);
801             }
802 
803             for (j = 0; j <  dxva_ctx->pp.sps_st_rps[i].num_positive_pics; j++) {
804                 mpp_put_bits(&bp, dxva_ctx->pp.sps_st_rps[i].delta_poc_s1[j], 16);
805                 mpp_put_bits(&bp, dxva_ctx->pp.sps_st_rps[i].s1_used_flag[j], 1);
806 
807             }
808 
809             for ( j = dxva_ctx->pp.sps_st_rps[i].num_negative_pics + dxva_ctx->pp.sps_st_rps[i].num_positive_pics; j < 15; j++) {
810                 mpp_put_bits(&bp, 0, 16);
811                 mpp_put_bits(&bp, 0, 1);
812             }
813 
814         } else {
815             mpp_put_bits(&bp, 0, 4);
816             mpp_put_bits(&bp, 0, 4);
817             for ( j = 0; j < 15; j++) {
818                 mpp_put_bits(&bp, 0, 16);
819                 mpp_put_bits(&bp, 0, 1);
820             }
821         }
822         mpp_put_align(&bp, 64, 0);
823         mpp_put_bits(&bp,  0, 128);
824     }
825 
826 #ifdef DUMP_VDPU383_DATAS
827     {
828         char *cur_fname = "rps_128bit.dat";
829         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
830         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
831         dump_data_to_file(dump_cur_fname_path, (void *)bp.pbuf, 64 * bp.index + bp.bitpos, 128, 0);
832     }
833 #endif
834 
835     RK_U32 *tmp = (RK_U32 *)sw_rps_buf;
836     memcpy(rps_buf, sw_rps_buf, fifo_len * sizeof(RK_U64));
837 
838     if (hal_h265d_debug & H265H_DBG_RPS) {
839         for (i = 0; i < 400 * 8 / 4; i++) {
840             mpp_log("rps[%3d] = 0x%08x\n", i, tmp[i]);
841         }
842     }
843     return 0;
844 }
845 
calc_mv_size(RK_S32 pic_w,RK_S32 pic_h,RK_S32 ctu_w)846 static RK_S32 calc_mv_size(RK_S32 pic_w, RK_S32 pic_h, RK_S32 ctu_w)
847 {
848     RK_S32 seg_w = 64 * 16 * 16 / ctu_w; // colmv_block_size = 16, colmv_per_bytes = 16
849     RK_S32 seg_cnt_w = MPP_ALIGN(pic_w, seg_w) / seg_w;
850     RK_S32 seg_cnt_h = MPP_ALIGN(pic_h, ctu_w) / ctu_w;
851     RK_S32 mv_size   = seg_cnt_w * seg_cnt_h * 64 * 16;
852 
853     return mv_size;
854 }
855 
hal_h265d_vdpu383_gen_regs(void * hal,HalTaskInfo * syn)856 static MPP_RET hal_h265d_vdpu383_gen_regs(void *hal,  HalTaskInfo *syn)
857 {
858     RK_S32 i = 0;
859     RK_S32 log2_min_cb_size;
860     RK_S32 width, height;
861     RK_S32 stride_y, stride_uv, virstrid_y;
862     Vdpu383H265dRegSet *hw_regs;
863     RK_S32 ret = MPP_SUCCESS;
864     MppBuffer streambuf = NULL;
865     RK_S32 aglin_offset = 0;
866     RK_S32 valid_ref = -1;
867     MppBuffer framebuf = NULL;
868     HalBuf *mv_buf = NULL;
869     RK_S32 fd = -1;
870     RK_U32 mv_size = 0;
871     RK_S32 distance = INT_MAX;
872     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
873 
874     (void) fd;
875     if (syn->dec.flags.parse_err ||
876         (syn->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
877         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
878         return MPP_OK;
879     }
880 
881     h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
882     HalBuf *origin_buf = NULL;
883 
884     void *rps_ptr = NULL;
885     if (reg_ctx ->fast_mode) {
886         for (i = 0; i < MAX_GEN_REG; i++) {
887             if (!reg_ctx->g_buf[i].use_flag) {
888                 syn->dec.reg_index = i;
889 
890                 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
891                 reg_ctx->rps_offset = reg_ctx->offset_rps[i];
892                 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
893 
894                 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
895                 reg_ctx->g_buf[i].use_flag = 1;
896                 break;
897             }
898         }
899         if (i == MAX_GEN_REG) {
900             mpp_err("hevc rps buf all used");
901             return MPP_ERR_NOMEM;
902         }
903     }
904     rps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->rps_offset;
905     if (NULL == rps_ptr) {
906 
907         mpp_err("rps_data get ptr error");
908         return MPP_ERR_NOMEM;
909     }
910 
911 
912     if (syn->dec.syntax.data == NULL) {
913         mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
914         return MPP_ERR_NULL_PTR;
915     }
916 
917 #ifdef DUMP_VDPU383_DATAS
918     {
919         memset(dump_cur_dir, 0, sizeof(dump_cur_dir));
920         sprintf(dump_cur_dir, "hevc/Frame%04d", dump_cur_frame);
921         if (access(dump_cur_dir, 0)) {
922             if (mkdir(dump_cur_dir))
923                 mpp_err_f("error: mkdir %s\n", dump_cur_dir);
924         }
925         dump_cur_frame++;
926     }
927 #endif
928 
929     /* output pps */
930     hw_regs = (Vdpu383H265dRegSet*)reg_ctx->hw_regs;
931     memset(hw_regs, 0, sizeof(Vdpu383H265dRegSet));
932 
933     if (NULL == reg_ctx->hw_regs) {
934         return MPP_ERR_NULL_PTR;
935     }
936 
937 
938     log2_min_cb_size = dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3;
939     width = (dxva_ctx->pp.PicWidthInMinCbsY << log2_min_cb_size);
940     height = (dxva_ctx->pp.PicHeightInMinCbsY << log2_min_cb_size);
941     mv_size = calc_mv_size(width, height, 1 << log2_min_cb_size) * 2;
942 
943     if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
944         size_t size = mv_size;
945 
946         if (reg_ctx->cmv_bufs) {
947             hal_bufs_deinit(reg_ctx->cmv_bufs);
948             reg_ctx->cmv_bufs = NULL;
949         }
950 
951         hal_bufs_init(&reg_ctx->cmv_bufs);
952         if (reg_ctx->cmv_bufs == NULL) {
953             mpp_err_f("colmv bufs init fail");
954             return MPP_ERR_NULL_PTR;
955         }
956 
957         reg_ctx->mv_size = mv_size;
958         reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
959         hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
960     }
961 
962     {
963         MppFrame mframe = NULL;
964         RK_U32 ver_virstride;
965         RK_U32 virstrid_uv;
966         MppFrameFormat fmt;
967         RK_U32 chroma_fmt_idc = dxva_ctx->pp.chroma_format_idc;
968 
969         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits,
970                               SLOT_FRAME_PTR, &mframe);
971         /* for 8K downscale mode*/
972         if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY &&
973             reg_ctx->origin_bufs == NULL) {
974             vdpu383_setup_scale_origin_bufs(reg_ctx, mframe);
975         }
976 
977         fmt = mpp_frame_get_fmt(mframe);
978 
979         stride_y = mpp_frame_get_hor_stride(mframe);
980         ver_virstride = mpp_frame_get_ver_stride(mframe);
981         stride_uv = stride_y;
982         virstrid_y = ver_virstride * stride_y;
983         if (chroma_fmt_idc == 3)
984             stride_uv *= 2;
985         if (chroma_fmt_idc == 3 || chroma_fmt_idc == 2) {
986             virstrid_uv = stride_uv * ver_virstride;
987         } else {
988             virstrid_uv = stride_uv * ver_virstride / 2;
989         }
990         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
991             RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
992             RK_U32 fbd_offset;
993 
994             hw_regs->ctrl_regs.reg9.fbc_e = 1;
995             hw_regs->h265d_paras.reg68_hor_virstride = fbc_hdr_stride / 64;
996             fbd_offset = fbc_hdr_stride * MPP_ALIGN(ver_virstride, 64) / 16;
997             hw_regs->h265d_addrs.reg193_fbc_payload_offset = fbd_offset;
998         } else if (MPP_FRAME_FMT_IS_TILE(fmt)) {
999             hw_regs->ctrl_regs.reg9.tile_e = 1;
1000             if (chroma_fmt_idc == 0) { //yuv400
1001                 hw_regs->h265d_paras.reg68_hor_virstride = stride_y * 4 / 16;
1002             } else if (chroma_fmt_idc == 2) { //yuv422
1003                 hw_regs->h265d_paras.reg68_hor_virstride = stride_y * 8 / 16;
1004             } else if (chroma_fmt_idc == 3) { //yuv444
1005                 hw_regs->h265d_paras.reg68_hor_virstride = stride_y * 12 / 16;
1006             } else { //yuv420
1007                 hw_regs->h265d_paras.reg68_hor_virstride = stride_y * 6 / 16;
1008             }
1009             hw_regs->h265d_paras.reg70_y_virstride = (virstrid_y + virstrid_uv) / 16;
1010         } else {
1011             hw_regs->ctrl_regs.reg9.fbc_e = 0;
1012             hw_regs->h265d_paras.reg68_hor_virstride = stride_y >> 4;
1013             hw_regs->h265d_paras.reg69_raster_uv_hor_virstride = stride_uv >> 4;
1014             hw_regs->h265d_paras.reg70_y_virstride = virstrid_y >> 4;
1015         }
1016         hw_regs->h265d_paras.reg80_error_ref_hor_virstride = hw_regs->h265d_paras.reg68_hor_virstride;
1017         hw_regs->h265d_paras.reg81_error_ref_raster_uv_hor_virstride = hw_regs->h265d_paras.reg69_raster_uv_hor_virstride;
1018         hw_regs->h265d_paras.reg82_error_ref_virstride = hw_regs->h265d_paras.reg70_y_virstride;
1019     }
1020     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits,
1021                           SLOT_BUFFER, &framebuf);
1022 
1023     if (reg_ctx->origin_bufs) {
1024         origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs,
1025                                       dxva_ctx->pp.CurrPic.Index7Bits);
1026         framebuf = origin_buf->buf[0];
1027     }
1028 
1029     hw_regs->h265d_addrs.reg168_decout_base = mpp_buffer_get_fd(framebuf); //just index need map
1030     hw_regs->h265d_addrs.reg169_error_ref_base = mpp_buffer_get_fd(framebuf);
1031     /*if out_base is equal to zero it means this frame may error
1032     we return directly add by csy*/
1033 
1034     if (hw_regs->h265d_addrs.reg168_decout_base == 0) {
1035         return 0;
1036     }
1037 
1038     fd =  mpp_buffer_get_fd(framebuf);
1039     hw_regs->h265d_addrs.reg168_decout_base = fd;
1040     hw_regs->h265d_addrs.reg192_payload_st_cur_base = fd;
1041     mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_ctx->pp.CurrPic.Index7Bits);
1042 
1043     hw_regs->h265d_addrs.reg216_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
1044 #ifdef DUMP_VDPU383_DATAS
1045     {
1046         char *cur_fname = "colmv_cur_frame.dat";
1047         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
1048         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
1049         dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(mv_buf->buf[0]),
1050                           mpp_buffer_get_size(mv_buf->buf[0]), 64, 0);
1051     }
1052 #endif
1053 
1054     mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
1055                           &streambuf);
1056     if ( dxva_ctx->bitstream == NULL) {
1057         dxva_ctx->bitstream = mpp_buffer_get_ptr(streambuf);
1058     }
1059 
1060 #ifdef DUMP_VDPU383_DATAS
1061     {
1062         char *cur_fname = "stream_in_128bit.dat";
1063         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
1064         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
1065         dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(streambuf),
1066                           mpp_buffer_get_size(streambuf), 128, 0);
1067     }
1068 #endif
1069 
1070     hal_h265d_vdpu383_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
1071 
1072     hw_regs->common_addr.reg128_strm_base = mpp_buffer_get_fd(streambuf);
1073     hw_regs->h265d_paras.reg66_stream_len = ((dxva_ctx->bitstream_size + 15) & (~15)) + 64;
1074     aglin_offset =  hw_regs->h265d_paras.reg66_stream_len - dxva_ctx->bitstream_size;
1075     if (aglin_offset > 0)
1076         memset((void *)(dxva_ctx->bitstream + dxva_ctx->bitstream_size), 0, aglin_offset);
1077 
1078     /* common setting */
1079     hw_regs->ctrl_regs.reg8_dec_mode = 0; // hevc
1080     hw_regs->ctrl_regs.reg9.buf_empty_en = 0;
1081 
1082     hw_regs->ctrl_regs.reg10.strmd_auto_gating_e      = 1;
1083     hw_regs->ctrl_regs.reg10.inter_auto_gating_e      = 1;
1084     hw_regs->ctrl_regs.reg10.intra_auto_gating_e      = 1;
1085     hw_regs->ctrl_regs.reg10.transd_auto_gating_e     = 1;
1086     hw_regs->ctrl_regs.reg10.recon_auto_gating_e      = 1;
1087     hw_regs->ctrl_regs.reg10.filterd_auto_gating_e    = 1;
1088     hw_regs->ctrl_regs.reg10.bus_auto_gating_e        = 1;
1089     hw_regs->ctrl_regs.reg10.ctrl_auto_gating_e       = 1;
1090     hw_regs->ctrl_regs.reg10.rcb_auto_gating_e        = 1;
1091     hw_regs->ctrl_regs.reg10.err_prc_auto_gating_e    = 1;
1092 
1093     // hw_regs->ctrl_regs.reg11.dec_timeout_dis = 1;
1094 
1095     hw_regs->ctrl_regs.reg16.error_proc_disable = 1;
1096     hw_regs->ctrl_regs.reg16.error_spread_disable = 0;
1097     hw_regs->ctrl_regs.reg16.roi_error_ctu_cal_en = 0;
1098 
1099     hw_regs->ctrl_regs.reg20_cabac_error_en_lowbits = 0xffffffff;
1100     hw_regs->ctrl_regs.reg21_cabac_error_en_highbits = 0x3ff3f9ff;
1101 
1102     hw_regs->ctrl_regs.reg13_core_timeout_threshold = 0xffff;
1103 
1104 
1105     valid_ref = hw_regs->h265d_addrs.reg168_decout_base;
1106     reg_ctx->error_index[syn->dec.reg_index] = dxva_ctx->pp.CurrPic.Index7Bits;
1107 
1108     hw_regs->h265d_addrs.reg169_error_ref_base = valid_ref;
1109     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) {
1110         if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff &&
1111             dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) {
1112 
1113             MppFrame mframe = NULL;
1114             mpp_buf_slot_get_prop(reg_ctx->slots,
1115                                   dxva_ctx->pp.RefPicList[i].Index7Bits,
1116                                   SLOT_BUFFER, &framebuf);
1117             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.RefPicList[i].Index7Bits,
1118                                   SLOT_FRAME_PTR, &mframe);
1119             if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) {
1120                 origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs,
1121                                               dxva_ctx->pp.RefPicList[i].Index7Bits);
1122                 framebuf = origin_buf->buf[0];
1123             }
1124             if (framebuf != NULL) {
1125                 hw_regs->h265d_addrs.reg170_185_ref_base[i] = mpp_buffer_get_fd(framebuf);
1126                 hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = mpp_buffer_get_fd(framebuf);
1127                 valid_ref = hw_regs->h265d_addrs.reg170_185_ref_base[i];
1128                 if ((pocdistance(dxva_ctx->pp.PicOrderCntValList[i], dxva_ctx->pp.current_poc) < distance)
1129                     && (!mpp_frame_get_errinfo(mframe))) {
1130 
1131                     distance = pocdistance(dxva_ctx->pp.PicOrderCntValList[i], dxva_ctx->pp.current_poc);
1132                     hw_regs->h265d_addrs.reg169_error_ref_base = hw_regs->h265d_addrs.reg170_185_ref_base[i];
1133                     reg_ctx->error_index[syn->dec.reg_index] = dxva_ctx->pp.RefPicList[i].Index7Bits;
1134                     hw_regs->ctrl_regs.reg16.error_proc_disable = 1;
1135                 }
1136             } else {
1137                 hw_regs->h265d_addrs.reg170_185_ref_base[i] = valid_ref;
1138                 hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = valid_ref;
1139             }
1140 
1141             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_ctx->pp.RefPicList[i].Index7Bits);
1142             hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1143         }
1144     }
1145 
1146     if ((reg_ctx->error_index[syn->dec.reg_index] == dxva_ctx->pp.CurrPic.Index7Bits) &&
1147         !dxva_ctx->pp.IntraPicFlag) {
1148         h265h_dbg(H265H_DBG_TASK_ERR, "current frm may be err, should skip process");
1149         syn->dec.flags.ref_err = 1;
1150         return MPP_OK;
1151     }
1152 
1153     /* pps */
1154     hw_regs->common_addr.reg131_gbl_base = reg_ctx->bufs_fd;
1155     hw_regs->h265d_paras.reg67_global_len = 0xc; //22 * 8;
1156     mpp_dev_set_reg_offset(reg_ctx->dev, 131, reg_ctx->spspps_offset);
1157 
1158     /* rps */
1159     hw_regs->common_addr.reg129_rps_base = reg_ctx->bufs_fd;
1160     mpp_dev_set_reg_offset(reg_ctx->dev, 129, reg_ctx->rps_offset);
1161 
1162     hal_h265d_v345_output_pps_packet(hal, syn->dec.syntax.data);
1163 
1164     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) {
1165 
1166         if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff &&
1167             dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) {
1168             MppFrame mframe = NULL;
1169 
1170             mpp_buf_slot_get_prop(reg_ctx->slots,
1171                                   dxva_ctx->pp.RefPicList[i].Index7Bits,
1172                                   SLOT_BUFFER, &framebuf);
1173 
1174             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.RefPicList[i].Index7Bits,
1175                                   SLOT_FRAME_PTR, &mframe);
1176 
1177             if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) {
1178                 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
1179                 hw_regs->h265d_addrs.reg170_185_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base;
1180                 hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base;
1181                 hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1182             }
1183         } else {
1184             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
1185             hw_regs->h265d_addrs.reg170_185_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base;
1186             hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base;
1187             hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1188         }
1189     }
1190 
1191     hal_h265d_rcb_info_update(hal, dxva_ctx, hw_regs, width, height);
1192     vdpu383_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
1193                       reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
1194                       (Vdpu383RcbInfo *)reg_ctx->rcb_info);
1195     vdpu383_setup_statistic(&hw_regs->ctrl_regs);
1196     mpp_buffer_sync_end(reg_ctx->bufs);
1197 
1198     {
1199         //scale down config
1200         MppFrame mframe = NULL;
1201         MppBuffer mbuffer = NULL;
1202         MppFrameThumbnailMode thumbnail_mode;
1203 
1204         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits,
1205                               SLOT_BUFFER, &mbuffer);
1206         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits,
1207                               SLOT_FRAME_PTR, &mframe);
1208         thumbnail_mode = mpp_frame_get_thumbnail_en(mframe);
1209         switch (thumbnail_mode) {
1210         case MPP_FRAME_THUMBNAIL_ONLY:
1211             hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer);
1212             origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs, dxva_ctx->pp.CurrPic.Index7Bits);
1213             fd = mpp_buffer_get_fd(origin_buf->buf[0]);
1214             hw_regs->h265d_addrs.reg168_decout_base = fd;
1215             hw_regs->h265d_addrs.reg192_payload_st_cur_base = fd;
1216             hw_regs->h265d_addrs.reg169_error_ref_base = fd;
1217             vdpu383_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->ctrl_regs, (void*)&hw_regs->h265d_paras);
1218             break;
1219         case MPP_FRAME_THUMBNAIL_MIXED:
1220             hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer);
1221             vdpu383_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->ctrl_regs, (void*)&hw_regs->h265d_paras);
1222             break;
1223         case MPP_FRAME_THUMBNAIL_NONE:
1224         default:
1225             hw_regs->ctrl_regs.reg9.scale_down_en = 0;
1226             break;
1227         }
1228     }
1229 
1230     return ret;
1231 }
1232 
hal_h265d_vdpu383_start(void * hal,HalTaskInfo * task)1233 static MPP_RET hal_h265d_vdpu383_start(void *hal, HalTaskInfo *task)
1234 {
1235     MPP_RET ret = MPP_OK;
1236     RK_U8* p = NULL;
1237     Vdpu383H265dRegSet *hw_regs = NULL;
1238     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1239     RK_S32 index =  task->dec.reg_index;
1240 
1241     RK_U32 i;
1242 
1243     if (task->dec.flags.parse_err ||
1244         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1245         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1246         return MPP_OK;
1247     }
1248 
1249     if (reg_ctx->fast_mode) {
1250         p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
1251         hw_regs = ( Vdpu383H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1252     } else {
1253         p = (RK_U8*)reg_ctx->hw_regs;
1254         hw_regs = ( Vdpu383H265dRegSet *)reg_ctx->hw_regs;
1255     }
1256 
1257     if (hw_regs == NULL) {
1258         mpp_err("hal_h265d_start hw_regs is NULL");
1259         return MPP_ERR_NULL_PTR;
1260     }
1261     for (i = 0; i < 68; i++) {
1262         h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1263                   i, *((RK_U32*)p));
1264         //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
1265         p += 4;
1266     }
1267 
1268     do {
1269         MppDevRegWrCfg wr_cfg;
1270         MppDevRegRdCfg rd_cfg;
1271 
1272         wr_cfg.reg = &hw_regs->ctrl_regs;
1273         wr_cfg.size = sizeof(hw_regs->ctrl_regs);
1274         wr_cfg.offset = OFFSET_CTRL_REGS;
1275         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1276         if (ret) {
1277             mpp_err_f("set register read failed %d\n", ret);
1278             break;
1279         }
1280 
1281         wr_cfg.reg = &hw_regs->common_addr;
1282         wr_cfg.size = sizeof(hw_regs->common_addr);
1283         wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1284         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1285         if (ret) {
1286             mpp_err_f("set register write failed %d\n", ret);
1287             break;
1288         }
1289 
1290         wr_cfg.reg = &hw_regs->h265d_paras;
1291         wr_cfg.size = sizeof(hw_regs->h265d_paras);
1292         wr_cfg.offset = OFFSET_CODEC_PARAS_REGS;
1293         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1294         if (ret) {
1295             mpp_err_f("set register write failed %d\n", ret);
1296             break;
1297         }
1298 
1299         wr_cfg.reg = &hw_regs->h265d_addrs;
1300         wr_cfg.size = sizeof(hw_regs->h265d_addrs);
1301         wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1302         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1303         if (ret) {
1304             mpp_err_f("set register write failed %d\n", ret);
1305             break;
1306         }
1307 
1308         rd_cfg.reg = &hw_regs->ctrl_regs.reg15;
1309         rd_cfg.size = sizeof(hw_regs->ctrl_regs.reg15);
1310         rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1311         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1312         if (ret) {
1313             mpp_err_f("set register read failed %d\n", ret);
1314             break;
1315         }
1316 
1317         /* rcb info for sram */
1318         vdpu383_set_rcbinfo(reg_ctx->dev, (Vdpu383RcbInfo*)reg_ctx->rcb_info);
1319 
1320         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1321         if (ret) {
1322             mpp_err_f("send cmd failed %d\n", ret);
1323             break;
1324         }
1325     } while (0);
1326 
1327     return ret;
1328 }
1329 
1330 
hal_h265d_vdpu383_wait(void * hal,HalTaskInfo * task)1331 static MPP_RET hal_h265d_vdpu383_wait(void *hal, HalTaskInfo *task)
1332 {
1333     MPP_RET ret = MPP_OK;
1334     RK_S32 index =  task->dec.reg_index;
1335     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1336     RK_U8* p = NULL;
1337     Vdpu383H265dRegSet *hw_regs = NULL;
1338     RK_S32 i;
1339 
1340     if (reg_ctx->fast_mode) {
1341         hw_regs = ( Vdpu383H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1342     } else {
1343         hw_regs = ( Vdpu383H265dRegSet *)reg_ctx->hw_regs;
1344     }
1345 
1346     p = (RK_U8*)hw_regs;
1347 
1348     if (task->dec.flags.parse_err ||
1349         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1350         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1351         goto ERR_PROC;
1352     }
1353 
1354     ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1355     if (ret)
1356         mpp_err_f("poll cmd failed %d\n", ret);
1357 
1358 ERR_PROC:
1359     if (task->dec.flags.parse_err ||
1360         task->dec.flags.ref_err ||
1361         (!hw_regs->ctrl_regs.reg15.rkvdec_frame_rdy_sta) ||
1362         hw_regs->ctrl_regs.reg15.rkvdec_strm_error_sta ||
1363         hw_regs->ctrl_regs.reg15.rkvdec_core_timeout_sta ||
1364         hw_regs->ctrl_regs.reg15.rkvdec_ip_timeout_sta ||
1365         hw_regs->ctrl_regs.reg15.rkvdec_bus_error_sta ||
1366         hw_regs->ctrl_regs.reg15.rkvdec_buffer_empty_sta ||
1367         hw_regs->ctrl_regs.reg15.rkvdec_colmv_ref_error_sta) {
1368         if (!reg_ctx->fast_mode) {
1369             if (reg_ctx->dec_cb)
1370                 mpp_callback(reg_ctx->dec_cb, &task->dec);
1371         } else {
1372             MppFrame mframe = NULL;
1373             mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1374                                   SLOT_FRAME_PTR, &mframe);
1375             if (mframe) {
1376                 reg_ctx->fast_mode_err_found = 1;
1377                 mpp_frame_set_errinfo(mframe, 1);
1378             }
1379         }
1380     } else {
1381         if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1382             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1383                 if (task->dec.refer[i] >= 0) {
1384                     MppFrame frame_ref = NULL;
1385 
1386                     mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1387                                           SLOT_FRAME_PTR, &frame_ref);
1388                     h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1389                               i, task->dec.refer[i], frame_ref);
1390                     if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1391                         MppFrame frame_out = NULL;
1392                         mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1393                                               SLOT_FRAME_PTR, &frame_out);
1394                         mpp_frame_set_errinfo(frame_out, 1);
1395                         break;
1396                     }
1397                 }
1398             }
1399         }
1400     }
1401 
1402     for (i = 0; i < 68; i++) {
1403         if (i == 1) {
1404             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1405                       i, *((RK_U32*)p));
1406         }
1407 
1408         if (i == 45) {
1409             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1410                       i, *((RK_U32*)p));
1411         }
1412         p += 4;
1413     }
1414 
1415     if (reg_ctx->fast_mode) {
1416         reg_ctx->g_buf[index].use_flag = 0;
1417     }
1418 
1419     return ret;
1420 }
1421 
hal_h265d_vdpu383_reset(void * hal)1422 static MPP_RET hal_h265d_vdpu383_reset(void *hal)
1423 {
1424     MPP_RET ret = MPP_OK;
1425     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1426     p_hal->fast_mode_err_found = 0;
1427     (void)hal;
1428     return ret;
1429 }
1430 
hal_h265d_vdpu383_flush(void * hal)1431 static MPP_RET hal_h265d_vdpu383_flush(void *hal)
1432 {
1433     MPP_RET ret = MPP_OK;
1434 
1435     (void)hal;
1436     return ret;
1437 }
1438 
hal_h265d_vdpu383_control(void * hal,MpiCmd cmd_type,void * param)1439 static MPP_RET hal_h265d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param)
1440 {
1441     MPP_RET ret = MPP_OK;
1442     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1443 
1444     (void)hal;
1445     (void)param;
1446     switch ((MpiCmd)cmd_type) {
1447     case MPP_DEC_SET_FRAME_INFO: {
1448         MppFrame frame = (MppFrame)param;
1449         MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1450         RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param);
1451         RK_U32 imgheight = mpp_frame_get_height((MppFrame)param);
1452 
1453         if (fmt == MPP_FMT_YUV422SP) {
1454             mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_422);
1455         } else if (fmt == MPP_FMT_YUV444SP || fmt == MPP_FMT_YUV444SP_10BIT) {
1456             mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_444);
1457         }
1458         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1459             vdpu383_afbc_align_calc(p_hal->slots, frame, 16);
1460         } else if (imgwidth > 1920 || imgheight > 1088) {
1461             mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
1462         }
1463         break;
1464     }
1465     case MPP_DEC_GET_THUMBNAIL_FRAME_INFO: {
1466         vdpu383_update_thumbnail_frame_info((MppFrame)param);
1467     } break;
1468     case MPP_DEC_SET_OUTPUT_FORMAT: {
1469     } break;
1470     default: {
1471     } break;
1472     }
1473     return  ret;
1474 }
1475 
1476 const MppHalApi hal_h265d_vdpu383 = {
1477     .name = "h265d_vdpu383",
1478     .type = MPP_CTX_DEC,
1479     .coding = MPP_VIDEO_CodingHEVC,
1480     .ctx_size = sizeof(HalH265dCtx),
1481     .flag = 0,
1482     .init = hal_h265d_vdpu383_init,
1483     .deinit = hal_h265d_vdpu383_deinit,
1484     .reg_gen = hal_h265d_vdpu383_gen_regs,
1485     .start = hal_h265d_vdpu383_start,
1486     .wait = hal_h265d_vdpu383_wait,
1487     .reset = hal_h265d_vdpu383_reset,
1488     .flush = hal_h265d_vdpu383_flush,
1489     .control = hal_h265d_vdpu383_control,
1490 };
1491