xref: /rockchip-linux_mpp/mpp/hal/rkdec/h265d/hal_h265d_vdpu382.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /*
2  * Copyright 2022 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_h265d_vdpu382"
18 
19 #include <stdio.h>
20 #include <string.h>
21 
22 #include "mpp_env.h"
23 #include "mpp_mem.h"
24 #include "mpp_bitread.h"
25 #include "mpp_bitput.h"
26 
27 #include "h265d_syntax.h"
28 #include "hal_h265d_debug.h"
29 #include "hal_h265d_ctx.h"
30 #include "hal_h265d_com.h"
31 #include "hal_h265d_vdpu382.h"
32 #include "vdpu382_h265d.h"
33 
34 /* #define dump */
35 #ifdef dump
36 static FILE *fp = NULL;
37 #endif
38 
39 #define HW_RPS
40 #define PPS_SIZE                (112 * 64)//(96x64)
41 
42 #define SET_REF_VALID(regs, index, value)\
43     do{ \
44         switch(index){\
45         case 0: regs.reg99.hevc_ref_valid_0 = value; break;\
46         case 1: regs.reg99.hevc_ref_valid_1 = value; break;\
47         case 2: regs.reg99.hevc_ref_valid_2 = value; break;\
48         case 3: regs.reg99.hevc_ref_valid_3 = value; break;\
49         case 4: regs.reg99.hevc_ref_valid_4 = value; break;\
50         case 5: regs.reg99.hevc_ref_valid_5 = value; break;\
51         case 6: regs.reg99.hevc_ref_valid_6 = value; break;\
52         case 7: regs.reg99.hevc_ref_valid_7 = value; break;\
53         case 8: regs.reg99.hevc_ref_valid_8 = value; break;\
54         case 9: regs.reg99.hevc_ref_valid_9 = value; break;\
55         case 10: regs.reg99.hevc_ref_valid_10 = value; break;\
56         case 11: regs.reg99.hevc_ref_valid_11 = value; break;\
57         case 12: regs.reg99.hevc_ref_valid_12 = value; break;\
58         case 13: regs.reg99.hevc_ref_valid_13 = value; break;\
59         case 14: regs.reg99.hevc_ref_valid_14 = value; break;\
60         default: break;}\
61     }while(0)
62 
63 #define FMT 4
64 #define CTU 3
65 
66 typedef struct {
67     RK_U32 a;
68     RK_U32 b;
69 } FilterdColBufRatio;
70 
71 static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = {
72     /* 400    420      422       444 */
73     {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16
74     {{0, 0}, {27, 8},  {36, 8},  {52, 8}}, //ctu 32
75     {{0, 0}, {27, 5},  {36, 5},  {52, 5}}  //ctu 64
76 };
77 
78 static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = {
79     /* 400     420       422       444 */
80     {{0, 0}, {15, 5},  {20, 5},  {20, 5}},  //ctu 16
81     {{0, 0}, {15, 9},  {20, 9},  {20, 9}},  //ctu 32
82     {{0, 0}, {15, 16}, {20, 16}, {20, 16}}  //ctu 64
83 };
84 
85 #define CABAC_TAB_ALIGEND_SIZE          (MPP_ALIGN(27456, SZ_4K))
86 #define SPSPPS_ALIGNED_SIZE             (MPP_ALIGN(112 * 64, SZ_4K))
87 #define RPS_ALIGEND_SIZE                (MPP_ALIGN(400 * 8, SZ_4K))
88 #define SCALIST_ALIGNED_SIZE            (MPP_ALIGN(81 * 1360, SZ_4K))
89 #define INFO_BUFFER_SIZE                (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE)
90 #define ALL_BUFFER_SIZE(cnt)            (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt)
91 
92 #define CABAC_TAB_OFFSET                (0)
93 #define SPSPPS_OFFSET(pos)              (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos))
94 #define RPS_OFFSET(pos)                 (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
95 #define SCALIST_OFFSET(pos)             (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE)
96 
hal_h265d_vdpu382_init(void * hal,MppHalCfg * cfg)97 static MPP_RET hal_h265d_vdpu382_init(void *hal, MppHalCfg *cfg)
98 {
99     RK_S32 ret = 0;
100     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
101 
102     mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
103     mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
104 
105     reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
106     if (reg_ctx->scaling_qm == NULL) {
107         mpp_err("scaling_org alloc fail");
108         return MPP_ERR_MALLOC;
109     }
110 
111     reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
112     reg_ctx->pps_buf = mpp_calloc(RK_U64, 15);
113     reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
114 
115     if (reg_ctx->scaling_rk == NULL) {
116         mpp_err("scaling_rk alloc fail");
117         return MPP_ERR_MALLOC;
118     }
119 
120     if (reg_ctx->group == NULL) {
121         ret = mpp_buffer_group_get_internal(&reg_ctx->group, MPP_BUFFER_TYPE_ION);
122         if (ret) {
123             mpp_err("h265d mpp_buffer_group_get failed\n");
124             return ret;
125         }
126     }
127 
128     {
129         RK_U32 i = 0;
130         RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
131 
132         //!< malloc buffers
133         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
134         if (ret) {
135             mpp_err("h265d mpp_buffer_get failed\n");
136             return ret;
137         }
138 
139         reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
140         reg_ctx->offset_cabac = CABAC_TAB_OFFSET;
141         for (i = 0; i < max_cnt; i++) {
142             reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu382H265dRegSet));
143             reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
144             reg_ctx->offset_rps[i] = RPS_OFFSET(i);
145             reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
146         }
147     }
148 
149     if (!reg_ctx->fast_mode) {
150         reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
151         reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
152         reg_ctx->rps_offset = reg_ctx->offset_rps[0];
153         reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
154     }
155 
156     ret = mpp_buffer_write(reg_ctx->bufs, 0, (void*)cabac_table, sizeof(cabac_table));
157     if (ret) {
158         mpp_err("h265d write cabac_table data failed\n");
159         return ret;
160     }
161 
162     if (cfg->hal_fbc_adj_cfg) {
163         cfg->hal_fbc_adj_cfg->func = vdpu382_afbc_align_calc;
164         cfg->hal_fbc_adj_cfg->expand = 16;
165     }
166 
167 #ifdef dump
168     fp = fopen("/data/hal.bin", "wb");
169 #endif
170     (void) cfg;
171     return MPP_OK;
172 }
173 
hal_h265d_vdpu382_deinit(void * hal)174 static MPP_RET hal_h265d_vdpu382_deinit(void *hal)
175 {
176     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
177     RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
178     RK_U32 i;
179 
180     if (reg_ctx->bufs) {
181         mpp_buffer_put(reg_ctx->bufs);
182         reg_ctx->bufs = NULL;
183     }
184 
185     loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
186     for (i = 0; i < loop; i++) {
187         if (reg_ctx->rcb_buf[i]) {
188             mpp_buffer_put(reg_ctx->rcb_buf[i]);
189             reg_ctx->rcb_buf[i] = NULL;
190         }
191     }
192 
193     if (reg_ctx->group) {
194         mpp_buffer_group_put(reg_ctx->group);
195         reg_ctx->group = NULL;
196     }
197 
198     for (i = 0; i < loop; i++)
199         MPP_FREE(reg_ctx->g_buf[i].hw_regs);
200 
201     MPP_FREE(reg_ctx->scaling_qm);
202     MPP_FREE(reg_ctx->scaling_rk);
203     MPP_FREE(reg_ctx->pps_buf);
204     MPP_FREE(reg_ctx->sw_rps_buf);
205 
206     if (reg_ctx->cmv_bufs) {
207         hal_bufs_deinit(reg_ctx->cmv_bufs);
208         reg_ctx->cmv_bufs = NULL;
209     }
210 
211     return MPP_OK;
212 }
213 
hal_h265d_v382_output_pps_packet(void * hal,void * dxva)214 static RK_S32 hal_h265d_v382_output_pps_packet(void *hal, void *dxva)
215 {
216     RK_S32 fifo_len = 14;//12
217     RK_S32 i, j;
218     RK_U32 addr;
219     RK_U32 log2_min_cb_size;
220     RK_S32 width, height;
221     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
222     Vdpu382H265dRegSet *hw_reg = (Vdpu382H265dRegSet*)(reg_ctx->hw_regs);
223     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
224     BitputCtx_t bp;
225 
226     if (NULL == reg_ctx || dxva_cxt == NULL) {
227         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
228                 __FILE__, __FUNCTION__, __LINE__);
229         return MPP_ERR_NULL_PTR;
230     }
231     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
232     if (dxva_cxt->pp.ps_update_flag) {
233         RK_U64 *pps_packet = reg_ctx->pps_buf;
234         if (NULL == pps_ptr) {
235             mpp_err("pps_data get ptr error");
236             return MPP_ERR_NOMEM;
237         }
238 
239         for (i = 0; i < 14; i++) pps_packet[i] = 0;
240 
241         mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
242 
243         // SPS
244         mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
245         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
246         mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
247 
248         log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
249         width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
250         height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
251 
252         mpp_put_bits(&bp, width                                          , 16);
253         mpp_put_bits(&bp, height                                         , 16);
254         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
255         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
256         mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
257         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
258         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
259         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
260         ///<-zrh comment ^  63 bit above
261         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
262         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
263         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
264         mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
265         mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
266         mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
267         ///<-zrh comment ^  68 bit above
268         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
269         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
270         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
271         mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
272         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
273         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
274 
275         mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
276         mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
277         mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
278         mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
279         mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
280         ///<-zrh comment ^ 100 bit above
281 
282         mpp_put_bits(&bp, 0                                                    , 7 ); //49bits
283         //yandong change
284         mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1,       4);
285         mpp_put_bits(&bp, 0, 3);
286         mpp_put_align(&bp                                                        , 32, 0xf); //128
287         // PPS
288         mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
289         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
290         mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
291         mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
292         mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
293         mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
294         mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
295         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);//31 bits
296         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
297         mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
298         mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
299         mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
300         mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1); //164
301         mpp_put_bits(&bp, log2_min_cb_size +
302                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
303                      dxva_cxt->pp.diff_cu_qp_delta_depth                             , 3);
304 
305         h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
306                   dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
307 
308         mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
309         mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
310         mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
311         mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
312         mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
313         mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1 );
314         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1 );
315         mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
316         mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
317         mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1); //185
318         mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
319         mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
320         mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
321         mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
322         mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
323         mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
324         mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
325         mpp_put_bits(&bp, 0                                                        , 3);
326         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
327         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
328         mpp_put_bits(&bp, 0, 4);//2 //mSps_Pps[i]->mMode
329         mpp_put_align(&bp, 64, 0xf);
330         {
331             /// tiles info begin
332             RK_U16 column_width[20];
333             RK_U16 row_height[22];
334 
335             memset(column_width, 0, sizeof(column_width));
336             memset(row_height, 0, sizeof(row_height));
337 
338             if (dxva_cxt->pp.tiles_enabled_flag) {
339 
340                 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
341                     RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
342                     RK_S32 ctu_width_in_pic = (width +
343                                                (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
344                     RK_S32 ctu_height_in_pic = (height +
345                                                 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
346                     RK_S32 sum = 0;
347                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
348                         column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
349                         sum += column_width[i]  ;
350                     }
351                     column_width[i] = ctu_width_in_pic - sum;
352 
353                     sum = 0;
354                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
355                         row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
356                         sum += row_height[i];
357                     }
358                     row_height[i] = ctu_height_in_pic - sum;
359                 } // end of (pps->uniform_spacing_flag == 0)
360                 else {
361 
362                     RK_S32    pic_in_cts_width = (width +
363                                                   (1 << (log2_min_cb_size +
364                                                          dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
365                                                  / (1 << (log2_min_cb_size +
366                                                           dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
367                     RK_S32 pic_in_cts_height = (height +
368                                                 (1 << (log2_min_cb_size +
369                                                        dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
370                                                / (1 << (log2_min_cb_size +
371                                                         dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
372 
373                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
374                         column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
375                                           (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
376 
377                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
378                         row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
379                                         (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
380                 }
381             } // pps->tiles_enabled_flag
382             else {
383                 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
384                 column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
385                 row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
386             }
387 
388             for (j = 0; j < 20; j++) {
389                 if (column_width[j] > 0)
390                     column_width[j]--;
391                 mpp_put_bits(&bp, column_width[j], 12);
392             }
393 
394             for (j = 0; j < 22; j++) {
395                 if (row_height[j] > 0)
396                     row_height[j]--;
397                 mpp_put_bits(&bp, row_height[j], 12);
398             }
399         }
400 
401         mpp_put_bits(&bp, 0, 32);
402         mpp_put_bits(&bp, 0, 70);
403         mpp_put_align(&bp, 64, 0xf);//128
404     }
405 
406     if (dxva_cxt->pp.scaling_list_enabled_flag) {
407         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
408 
409         if (dxva_cxt->pp.scaling_list_data_present_flag) {
410             addr = (dxva_cxt->pp.pps_id + 16) * 1360;
411         } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
412             addr = dxva_cxt->pp.sps_id * 1360;
413         } else {
414             addr = 80 * 1360;
415         }
416 
417         hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
418 
419         hw_reg->h265d_addr.reg180_scanlist_addr = reg_ctx->bufs_fd;
420         hw_reg->common.reg012.scanlist_addr_valid_en = 1;
421 
422         /* need to config addr */
423         mpp_dev_set_reg_offset(reg_ctx->dev, 180, addr + reg_ctx->sclst_offset);
424     }
425 
426     for (i = 0; i < 64; i++)
427         memcpy(pps_ptr + i * 112, reg_ctx->pps_buf, 112);
428 #ifdef dump
429     fwrite(pps_ptr, 1, 80 * 64, fp);
430     RK_U32 *tmp = (RK_U32 *)pps_ptr;
431     for (i = 0; i < 112 / 4; i++) {
432         mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
433     }
434 #endif
435     return 0;
436 }
437 
h265d_refine_rcb_size(Vdpu382RcbInfo * rcb_info,Vdpu382H265dRegSet * hw_regs,RK_S32 width,RK_S32 height,void * dxva)438 static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
439                                   Vdpu382H265dRegSet *hw_regs,
440                                   RK_S32 width, RK_S32 height, void *dxva)
441 {
442     RK_U32 rcb_bits = 0;
443     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
444     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
445     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 420 ,2 422,3 444
446     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
447     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
448     RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
449     RK_U32 ext_align_size = tile_col_cut_num * 64 * 8;
450 
451     width = MPP_ALIGN(width, ctu_size);
452     height = MPP_ALIGN(height, ctu_size);
453 
454     /* RCB_STRMD_ROW */
455     if (width >= 8192) {
456         RK_U32 factor = 64 / ctu_size;
457 
458         rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) / factor * 24 + ext_align_size;
459     } else
460         rcb_bits = 0;
461     rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
462 
463     /* RCB_TRANSD_ROW */
464     if (width >= 8192)
465         rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
466     else
467         rcb_bits = 0;
468     rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
469 
470     /* RCB_TRANSD_COL */
471     if (height >= 8192 && tile_col_cut_num)
472         rcb_bits = tile_col_cut_num ? (MPP_ALIGN(height - 8192, 4) << 1) : 0;
473     else
474         rcb_bits = 0;
475     rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
476 
477     /* RCB_INTER_ROW */
478     rcb_bits = width * 22 + ext_align_size;
479     rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
480 
481     /* RCB_INTER_COL */
482     rcb_bits = tile_col_cut_num ? (height * 22) : 0;
483     rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
484 
485     /* RCB_INTRA_ROW */
486     rcb_bits = width * ((chroma_fmt_idc ? 1 : 0) + 1) * 11 + ext_align_size;
487     rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
488 
489     /* RCB_DBLK_ROW */
490     if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
491         if (ctu_size == 32)
492             rcb_bits = width * ( 4 + 6 * bit_depth);
493         else
494             rcb_bits = width * ( 2 + 6 * bit_depth);
495     } else {
496         if (ctu_size == 32)
497             rcb_bits = width * ( 4 + 8 * bit_depth);
498         else
499             rcb_bits = width * ( 2 + 8 * bit_depth);
500     }
501     rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
502     rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
503 
504     /* RCB_SAO_ROW */
505     if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
506         rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
507     } else {
508         rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
509     }
510     rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
511     rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
512 
513     /* RCB_FBC_ROW */
514     if (hw_regs->common.reg012.fbc_e) {
515         rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
516         rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 128 : 64)) + ext_align_size;
517     } else
518         rcb_bits = 0;
519     rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
520 
521     /* RCB_FILT_COL */
522     if (tile_col_cut_num) {
523         if (hw_regs->common.reg012.fbc_e) {
524             RK_U32 ctu_idx = ctu_size >> 5;
525             RK_U32 a = filterd_fbc_on[ctu_idx][chroma_fmt_idc].a;
526             RK_U32 b = filterd_fbc_on[ctu_idx][chroma_fmt_idc].b;
527 
528             rcb_bits = height * (a * bit_depth + b);
529         } else {
530             RK_U32 ctu_idx = ctu_size >> 5;
531             RK_U32 a = filterd_fbc_off[ctu_idx][chroma_fmt_idc].a;
532             RK_U32 b = filterd_fbc_off[ctu_idx][chroma_fmt_idc].b;
533 
534             rcb_bits = height * (a * bit_depth + b);
535         }
536     } else
537         rcb_bits = 0;
538     rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
539 }
540 
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu382H265dRegSet * hw_regs,RK_S32 width,RK_S32 height)541 static void hal_h265d_rcb_info_update(void *hal,  void *dxva,
542                                       Vdpu382H265dRegSet *hw_regs,
543                                       RK_S32 width, RK_S32 height)
544 {
545     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
546     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
547     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
548     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
549     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
550     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
551     RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
552 
553     if (reg_ctx->num_row_tiles != num_tiles ||
554         reg_ctx->bit_depth != bit_depth ||
555         reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
556         reg_ctx->ctu_size !=  ctu_size ||
557         reg_ctx->width != width ||
558         reg_ctx->height != height) {
559         RK_U32 i = 0;
560         RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
561 
562         reg_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size((Vdpu382RcbInfo*)reg_ctx->rcb_info, width, height);
563         h265d_refine_rcb_size((Vdpu382RcbInfo*)reg_ctx->rcb_info, hw_regs, width, height, dxva_cxt);
564 
565         for (i = 0; i < loop; i++) {
566             MppBuffer rcb_buf;
567 
568             if (reg_ctx->rcb_buf[i]) {
569                 mpp_buffer_put(reg_ctx->rcb_buf[i]);
570                 reg_ctx->rcb_buf[i] = NULL;
571             }
572             mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
573             reg_ctx->rcb_buf[i] = rcb_buf;
574         }
575 
576         reg_ctx->num_row_tiles  = num_tiles;
577         reg_ctx->bit_depth      = bit_depth;
578         reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
579         reg_ctx->ctu_size       = ctu_size;
580         reg_ctx->width          = width;
581         reg_ctx->height         = height;
582     }
583 }
584 
585 #define SET_POC_HIGNBIT_INFO(regs, index, field, value)\
586     do{ \
587         switch(index){\
588         case 0: regs.reg200.ref0_##field = value; break;\
589         case 1: regs.reg200.ref1_##field = value; break;\
590         case 2: regs.reg200.ref2_##field = value; break;\
591         case 3: regs.reg200.ref3_##field = value; break;\
592         case 4: regs.reg200.ref4_##field = value; break;\
593         case 5: regs.reg200.ref5_##field = value; break;\
594         case 6: regs.reg200.ref6_##field = value; break;\
595         case 7: regs.reg200.ref7_##field = value; break;\
596         case 8: regs.reg201.ref8_##field = value; break;\
597         case 9: regs.reg201.ref9_##field = value; break;\
598         case 10: regs.reg201.ref10_##field = value; break;\
599         case 11: regs.reg201.ref11_##field = value; break;\
600         case 12: regs.reg201.ref12_##field = value; break;\
601         case 13: regs.reg201.ref13_##field = value; break;\
602         case 14: regs.reg201.ref14_##field = value; break;\
603         case 15: regs.reg201.ref15_##field = value; break;\
604         default: break;}\
605     }while(0)
606 
607 #define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
608 
hal_h265d_vdpu382_setup_colmv_buf(void * hal,HalTaskInfo * syn)609 static MPP_RET hal_h265d_vdpu382_setup_colmv_buf(void *hal, HalTaskInfo *syn)
610 {
611     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
612     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
613     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
614     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size +
615                            pp->log2_min_luma_coding_block_size_minus3 + 3);
616     RK_U32 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
617 
618     RK_U32 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
619     RK_U32 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
620     RK_U32 mv_size = 0, colmv_size = 16, colmv_byte = 16;
621     RK_U32 compress = reg_ctx->hw_info ? reg_ctx->hw_info->cap_colmv_compress : 1;
622 
623 
624     mv_size = vdpu382_get_colmv_size(width, height, ctu_size, colmv_byte, colmv_size, compress);
625 
626     if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
627         size_t size = mv_size;
628 
629         if (reg_ctx->cmv_bufs) {
630             hal_bufs_deinit(reg_ctx->cmv_bufs);
631             reg_ctx->cmv_bufs = NULL;
632         }
633 
634         hal_bufs_init(&reg_ctx->cmv_bufs);
635         if (reg_ctx->cmv_bufs == NULL) {
636             mpp_err_f("colmv bufs init fail");
637             return MPP_ERR_NOMEM;;
638         }
639 
640         reg_ctx->mv_size = mv_size;
641         reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
642         hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
643     }
644 
645     return MPP_OK;
646 }
647 
hal_h265d_vdpu382_gen_regs(void * hal,HalTaskInfo * syn)648 static MPP_RET hal_h265d_vdpu382_gen_regs(void *hal,  HalTaskInfo *syn)
649 {
650     RK_S32 i = 0;
651     RK_S32 log2_min_cb_size;
652     RK_S32 width, height;
653     RK_S32 stride_y, stride_uv, virstrid_y;
654     Vdpu382H265dRegSet *hw_regs;
655     RK_S32 ret = MPP_SUCCESS;
656     MppBuffer streambuf = NULL;
657     RK_S32 aglin_offset = 0;
658     RK_S32 valid_ref = -1;
659     MppBuffer framebuf = NULL;
660     HalBuf *mv_buf = NULL;
661     RK_S32 fd = -1;
662     RK_S32 distance = INT_MAX;
663     h265d_dxva2_picture_context_t *dxva_cxt =
664         (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
665     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
666     void *rps_ptr = NULL;
667     RK_U32 stream_buf_size = 0;
668 
669     if (syn->dec.flags.parse_err ||
670         (syn->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
671         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
672         return MPP_OK;
673     }
674 
675     if (reg_ctx ->fast_mode) {
676         for (i = 0; i < MAX_GEN_REG; i++) {
677             if (!reg_ctx->g_buf[i].use_flag) {
678                 syn->dec.reg_index = i;
679 
680                 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
681                 reg_ctx->rps_offset = reg_ctx->offset_rps[i];
682                 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
683 
684                 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
685                 reg_ctx->g_buf[i].use_flag = 1;
686                 break;
687             }
688         }
689         if (i == MAX_GEN_REG) {
690             mpp_err("hevc rps buf all used");
691             return MPP_ERR_NOMEM;
692         }
693     } else {
694         syn->dec.reg_index = 0;
695     }
696     rps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->rps_offset;
697     if (NULL == rps_ptr) {
698 
699         mpp_err("rps_data get ptr error");
700         return MPP_ERR_NOMEM;
701     }
702 
703 
704     if (syn->dec.syntax.data == NULL) {
705         mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
706         return MPP_ERR_NULL_PTR;
707     }
708 
709     /* output pps */
710     hw_regs = (Vdpu382H265dRegSet*)reg_ctx->hw_regs;
711     memset(hw_regs, 0, sizeof(Vdpu382H265dRegSet));
712 
713     if (NULL == reg_ctx->hw_regs) {
714         return MPP_ERR_NULL_PTR;
715     }
716 
717 
718     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
719 
720     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
721     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
722     ret = hal_h265d_vdpu382_setup_colmv_buf(hal, syn);
723     if (ret)
724         return MPP_ERR_NOMEM;
725 
726     {
727         MppFrame mframe = NULL;
728         RK_U32 ver_virstride;
729 
730         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
731                               SLOT_FRAME_PTR, &mframe);
732         stride_y = mpp_frame_get_hor_stride(mframe);
733         ver_virstride = mpp_frame_get_ver_stride(mframe);
734         stride_uv = stride_y;
735         virstrid_y = ver_virstride * stride_y;
736         hw_regs->common.reg013.h26x_error_mode = 1;
737         hw_regs->common.reg021.error_deb_en = 1;
738         hw_regs->common.reg021.inter_error_prc_mode = 0;
739         hw_regs->common.reg021.error_intra_mode = 1;
740 
741         hw_regs->common.reg017.slice_num = dxva_cxt->slice_count;
742         hw_regs->h265d_param.reg64.h26x_rps_mode = 0;
743         hw_regs->h265d_param.reg64.h26x_frame_orslice = 0;
744         hw_regs->h265d_param.reg64.h26x_stream_mode = 0;
745 
746         if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) {
747             RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
748             RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (ver_virstride + 64) / 16, SZ_4K);
749 
750             hw_regs->common.reg012.fbc_e = 1;
751             hw_regs->common.reg018.y_hor_virstride = fbc_hdr_stride >> 4;
752             hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
753             hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
754         } else {
755             hw_regs->common.reg012.fbc_e = 0;
756             hw_regs->common.reg018.y_hor_virstride = stride_y >> 4;
757             hw_regs->common.reg019.uv_hor_virstride = stride_uv >> 4;
758             hw_regs->common.reg020_y_virstride.y_virstride = virstrid_y >> 4;
759         }
760     }
761     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
762                           SLOT_BUFFER, &framebuf);
763     hw_regs->common_addr.reg130_decout_base  = mpp_buffer_get_fd(framebuf); //just index need map
764     /*if out_base is equal to zero it means this frame may error
765     we return directly add by csy*/
766 
767     if (hw_regs->common_addr.reg130_decout_base == 0) {
768         return 0;
769     }
770     fd =  mpp_buffer_get_fd(framebuf);
771     hw_regs->common_addr.reg130_decout_base = fd;
772     mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.CurrPic.Index7Bits);
773     hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
774 
775     hw_regs->h265d_param.reg65.cur_top_poc = dxva_cxt->pp.CurrPicOrderCntVal;
776 
777     mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
778                           &streambuf);
779     if ( dxva_cxt->bitstream == NULL) {
780         dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
781     }
782 #ifdef HW_RPS
783     hw_regs->h265d_param.reg103.ref_pic_layer_same_with_cur = 0xffff;
784     hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
785 #else
786     hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
787     hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
788 #endif
789 
790     /* cabac table */
791     hw_regs->h265d_addr.reg197_cabactbl_base    = reg_ctx->bufs_fd;
792     /* pps */
793     hw_regs->h265d_addr.reg161_pps_base         = reg_ctx->bufs_fd;
794     hw_regs->h265d_addr.reg163_rps_base         = reg_ctx->bufs_fd;
795 
796     hw_regs->common_addr.reg128_rlc_base        = mpp_buffer_get_fd(streambuf);
797     hw_regs->common_addr.reg129_rlcwrite_base   = mpp_buffer_get_fd(streambuf);
798     stream_buf_size                             = mpp_buffer_get_size(streambuf);
799     hw_regs->common.reg016_str_len              = ((dxva_cxt->bitstream_size + 15)
800                                                    & (~15)) + 64;
801     hw_regs->common.reg016_str_len = stream_buf_size > hw_regs->common.reg016_str_len ?
802                                      hw_regs->common.reg016_str_len : stream_buf_size;
803 
804     aglin_offset =  hw_regs->common.reg016_str_len - dxva_cxt->bitstream_size;
805     if (aglin_offset > 0) {
806         memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
807                aglin_offset);
808     }
809     hw_regs->common.reg010.dec_e                = 1;
810     hw_regs->common.reg012.colmv_compress_en = reg_ctx->hw_info ?
811                                                reg_ctx->hw_info->cap_colmv_compress : 0;
812 
813     hw_regs->common.reg024.cabac_err_en_lowbits = 0xffffdfff;
814     hw_regs->common.reg025.cabac_err_en_highbits = 0x3ffbf9ff;
815 
816     hw_regs->common.reg011.dec_clkgate_e    = 1;
817     hw_regs->common.reg011.err_head_fill_e  = 1;
818     hw_regs->common.reg011.err_colmv_fill_e = 1;
819 
820     hw_regs->common.reg026.inter_auto_gating_e = 1;
821     hw_regs->common.reg026.filterd_auto_gating_e = 1;
822     hw_regs->common.reg026.strmd_auto_gating_e = 1;
823     hw_regs->common.reg026.mcp_auto_gating_e = 1;
824     hw_regs->common.reg026.busifd_auto_gating_e = 1;
825     hw_regs->common.reg026.dec_ctrl_auto_gating_e = 1;
826     hw_regs->common.reg026.intra_auto_gating_e = 1;
827     hw_regs->common.reg026.mc_auto_gating_e = 1;
828     hw_regs->common.reg026.transd_auto_gating_e = 1;
829     hw_regs->common.reg026.sram_auto_gating_e = 1;
830     hw_regs->common.reg026.cru_auto_gating_e = 1;
831     hw_regs->common.reg026.reg_cfg_gating_en = 1;
832     hw_regs->common.reg032_timeout_threshold = 0x3ffff;
833 
834     valid_ref = hw_regs->common_addr.reg130_decout_base;
835     reg_ctx->error_index[syn->dec.reg_index] = dxva_cxt->pp.CurrPic.Index7Bits;
836     hw_regs->common_addr.reg132_error_ref_base = valid_ref;
837 
838     memset(&hw_regs->highpoc.reg205, 0, sizeof(RK_U32));
839 
840     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
841         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
842             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
843 
844             MppFrame mframe = NULL;
845             hw_regs->h265d_param.reg67_82_ref_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
846             mpp_buf_slot_get_prop(reg_ctx->slots,
847                                   dxva_cxt->pp.RefPicList[i].Index7Bits,
848                                   SLOT_BUFFER, &framebuf);
849             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
850                                   SLOT_FRAME_PTR, &mframe);
851             if (framebuf != NULL) {
852                 hw_regs->h265d_addr.reg164_179_ref_base[i] = mpp_buffer_get_fd(framebuf);
853                 valid_ref = hw_regs->h265d_addr.reg164_179_ref_base[i];
854                 // mpp_log("cur poc %d, ref poc %d", dxva_cxt->pp.current_poc, dxva_cxt->pp.PicOrderCntValList[i]);
855                 if ((pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc) < distance)
856                     && (!mpp_frame_get_errinfo(mframe))) {
857                     distance = pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc);
858                     hw_regs->common_addr.reg132_error_ref_base = hw_regs->h265d_addr.reg164_179_ref_base[i];
859                     reg_ctx->error_index[syn->dec.reg_index] = dxva_cxt->pp.RefPicList[i].Index7Bits;
860                     hw_regs->common.reg021.error_intra_mode = 0;
861 
862                 }
863             } else {
864                 hw_regs->h265d_addr.reg164_179_ref_base[i] = valid_ref;
865             }
866 
867             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.RefPicList[i].Index7Bits);
868             hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
869 
870             SET_REF_VALID(hw_regs->h265d_param, i, 1);
871         }
872     }
873 
874     if ((reg_ctx->error_index[syn->dec.reg_index] == dxva_cxt->pp.CurrPic.Index7Bits) &&
875         !dxva_cxt->pp.IntraPicFlag) {
876         h265h_dbg(H265H_DBG_TASK_ERR, "current frm may be err, should skip process");
877         syn->dec.flags.ref_err = 1;
878         return MPP_OK;
879     }
880 
881     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
882 
883         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
884             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
885             MppFrame mframe = NULL;
886 
887             mpp_buf_slot_get_prop(reg_ctx->slots,
888                                   dxva_cxt->pp.RefPicList[i].Index7Bits,
889                                   SLOT_BUFFER, &framebuf);
890 
891             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
892                                   SLOT_FRAME_PTR, &mframe);
893 
894             if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) {
895                 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
896                 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
897                 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
898             }
899         } else {
900             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
901             hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
902             hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
903             /* mark 3 to differ from current frame */
904             if (reg_ctx->error_index[syn->dec.reg_index] == dxva_cxt->pp.CurrPic.Index7Bits)
905                 SET_POC_HIGNBIT_INFO(hw_regs->highpoc, i, poc_highbit, 3);
906         }
907     }
908     hal_h265d_v382_output_pps_packet(hal, syn->dec.syntax.data);
909 
910     mpp_dev_set_reg_offset(reg_ctx->dev, 161, reg_ctx->spspps_offset);
911     /* rps */
912     mpp_dev_set_reg_offset(reg_ctx->dev, 163, reg_ctx->rps_offset);
913 
914     hw_regs->common.reg013.cur_pic_is_idr = dxva_cxt->pp.IdrPicFlag;//p_hal->slice_long->idr_flag;
915 
916     hw_regs->common.reg011.buf_empty_en = 1;
917 
918     hal_h265d_rcb_info_update(hal, dxva_cxt, hw_regs, width, height);
919     vdpu382_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
920                       reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
921                       (Vdpu382RcbInfo*)reg_ctx->rcb_info);
922     {
923         MppFrame mframe = NULL;
924 
925         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
926                               SLOT_FRAME_PTR, &mframe);
927 
928         if (mpp_frame_get_thumbnail_en(mframe)) {
929             hw_regs->h265d_addr.reg198_scale_down_luma_base =
930                 hw_regs->common_addr.reg130_decout_base;
931             hw_regs->h265d_addr.reg199_scale_down_chorme_base =
932                 hw_regs->common_addr.reg130_decout_base;
933             vdpu382_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->common);
934         } else {
935             hw_regs->h265d_addr.reg198_scale_down_luma_base = 0;
936             hw_regs->h265d_addr.reg199_scale_down_chorme_base = 0;
937             hw_regs->common.reg012.scale_down_en = 0;
938         }
939     }
940     vdpu382_setup_statistic(&hw_regs->common, &hw_regs->statistic);
941     mpp_buffer_sync_end(reg_ctx->bufs);
942 
943     return ret;
944 }
945 
hal_h265d_vdpu382_start(void * hal,HalTaskInfo * task)946 static MPP_RET hal_h265d_vdpu382_start(void *hal, HalTaskInfo *task)
947 {
948     MPP_RET ret = MPP_OK;
949     RK_U8* p = NULL;
950     Vdpu382H265dRegSet *hw_regs = NULL;
951     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
952     RK_S32 index =  task->dec.reg_index;
953 
954     RK_U32 i;
955 
956     if (task->dec.flags.parse_err ||
957         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
958         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
959         return MPP_OK;
960     }
961 
962     if (reg_ctx->fast_mode) {
963         p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
964         hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
965     } else {
966         p = (RK_U8*)reg_ctx->hw_regs;
967         hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->hw_regs;
968     }
969 
970     if (hw_regs == NULL) {
971         mpp_err("hal_h265d_start hw_regs is NULL");
972         return MPP_ERR_NULL_PTR;
973     }
974     for (i = 0; i < 68; i++) {
975         h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
976                   i, *((RK_U32*)p));
977         //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
978         p += 4;
979     }
980 
981     do {
982         MppDevRegWrCfg wr_cfg;
983         MppDevRegRdCfg rd_cfg;
984 
985         wr_cfg.reg = &hw_regs->common;
986         wr_cfg.size = sizeof(hw_regs->common);
987         wr_cfg.offset = OFFSET_COMMON_REGS;
988 
989         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
990         if (ret) {
991             mpp_err_f("set register write failed %d\n", ret);
992             break;
993         }
994 
995         wr_cfg.reg = &hw_regs->h265d_param;
996         wr_cfg.size = sizeof(hw_regs->h265d_param);
997         wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS;
998 
999         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1000         if (ret) {
1001             mpp_err_f("set register write failed %d\n", ret);
1002             break;
1003         }
1004 
1005         wr_cfg.reg = &hw_regs->common_addr;
1006         wr_cfg.size = sizeof(hw_regs->common_addr);
1007         wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1008 
1009         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1010         if (ret) {
1011             mpp_err_f("set register write failed %d\n", ret);
1012             break;
1013         }
1014 
1015         wr_cfg.reg = &hw_regs->h265d_addr;
1016         wr_cfg.size = sizeof(hw_regs->h265d_addr);
1017         wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1018 
1019         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1020         if (ret) {
1021             mpp_err_f("set register write failed %d\n", ret);
1022             break;
1023         }
1024 
1025         wr_cfg.reg = &hw_regs->statistic;
1026         wr_cfg.size = sizeof(hw_regs->statistic);
1027         wr_cfg.offset = OFFSET_STATISTIC_REGS;
1028 
1029         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1030         if (ret) {
1031             mpp_err_f("set register write failed %d\n", ret);
1032             break;
1033         }
1034 
1035         wr_cfg.reg = &hw_regs->highpoc;
1036         wr_cfg.size = sizeof(hw_regs->highpoc);
1037         wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS;
1038 
1039         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1040         if (ret) {
1041             mpp_err_f("set register write failed %d\n", ret);
1042             break;
1043         }
1044 
1045         rd_cfg.reg = &hw_regs->irq_status;
1046         rd_cfg.size = sizeof(hw_regs->irq_status);
1047         rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1048 
1049         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1050         if (ret) {
1051             mpp_err_f("set register read failed %d\n", ret);
1052             break;
1053         }
1054         /* rcb info for sram */
1055         vdpu382_set_rcbinfo(reg_ctx->dev, (Vdpu382RcbInfo*)reg_ctx->rcb_info);
1056 
1057         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1058         if (ret) {
1059             mpp_err_f("send cmd failed %d\n", ret);
1060             break;
1061         }
1062     } while (0);
1063 
1064     return ret;
1065 }
1066 
1067 
hal_h265d_vdpu382_wait(void * hal,HalTaskInfo * task)1068 static MPP_RET hal_h265d_vdpu382_wait(void *hal, HalTaskInfo *task)
1069 {
1070     MPP_RET ret = MPP_OK;
1071     RK_S32 index =  task->dec.reg_index;
1072     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1073     RK_U8* p = NULL;
1074     Vdpu382H265dRegSet *hw_regs = NULL;
1075     RK_S32 i;
1076 
1077     if (reg_ctx->fast_mode) {
1078         hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1079     } else {
1080         hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->hw_regs;
1081     }
1082 
1083     p = (RK_U8*)hw_regs;
1084 
1085     if (task->dec.flags.parse_err ||
1086         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1087         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1088         goto ERR_PROC;
1089     }
1090 
1091     ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1092     if (ret)
1093         mpp_err_f("poll cmd failed %d\n", ret);
1094 
1095 ERR_PROC:
1096     if (task->dec.flags.parse_err ||
1097         task->dec.flags.ref_err ||
1098         hw_regs->irq_status.reg224.dec_error_sta ||
1099         hw_regs->irq_status.reg224.buf_empty_sta ||
1100         hw_regs->irq_status.reg224.dec_bus_sta ||
1101         !hw_regs->irq_status.reg224.dec_rdy_sta) {
1102         if (!reg_ctx->fast_mode) {
1103             if (reg_ctx->dec_cb)
1104                 mpp_callback(reg_ctx->dec_cb, &task->dec);
1105         } else {
1106             MppFrame mframe = NULL;
1107             mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1108                                   SLOT_FRAME_PTR, &mframe);
1109             if (mframe) {
1110                 reg_ctx->fast_mode_err_found = 1;
1111                 mpp_frame_set_errinfo(mframe, 1);
1112             }
1113         }
1114     } else {
1115         if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1116             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1117                 if (task->dec.refer[i] >= 0) {
1118                     MppFrame frame_ref = NULL;
1119 
1120                     mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1121                                           SLOT_FRAME_PTR, &frame_ref);
1122                     h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1123                               i, task->dec.refer[i], frame_ref);
1124                     if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1125                         MppFrame frame_out = NULL;
1126                         mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1127                                               SLOT_FRAME_PTR, &frame_out);
1128                         mpp_frame_set_errinfo(frame_out, 1);
1129                         break;
1130                     }
1131                 }
1132             }
1133         }
1134     }
1135 
1136     for (i = 0; i < 68; i++) {
1137         if (i == 1) {
1138             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1139                       i, *((RK_U32*)p));
1140         }
1141 
1142         if (i == 45) {
1143             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1144                       i, *((RK_U32*)p));
1145         }
1146         p += 4;
1147     }
1148 
1149     if (reg_ctx->fast_mode) {
1150         reg_ctx->g_buf[index].use_flag = 0;
1151     }
1152 
1153     return ret;
1154 }
1155 
hal_h265d_vdpu382_reset(void * hal)1156 static MPP_RET hal_h265d_vdpu382_reset(void *hal)
1157 {
1158     MPP_RET ret = MPP_OK;
1159     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1160     p_hal->fast_mode_err_found = 0;
1161     (void)hal;
1162     return ret;
1163 }
1164 
hal_h265d_vdpu382_flush(void * hal)1165 static MPP_RET hal_h265d_vdpu382_flush(void *hal)
1166 {
1167     MPP_RET ret = MPP_OK;
1168 
1169     (void)hal;
1170     return ret;
1171 }
1172 
hal_h265d_vdpu382_control(void * hal,MpiCmd cmd_type,void * param)1173 static MPP_RET hal_h265d_vdpu382_control(void *hal, MpiCmd cmd_type, void *param)
1174 {
1175     MPP_RET ret = MPP_OK;
1176     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1177 
1178     (void)hal;
1179     switch ((MpiCmd)cmd_type) {
1180     case MPP_DEC_SET_FRAME_INFO: {
1181         MppFrame frame = (MppFrame)param;
1182         MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1183 
1184         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1185             vdpu382_afbc_align_calc(p_hal->slots, frame, 16);
1186         }
1187         break;
1188     }
1189     case MPP_DEC_SET_OUTPUT_FORMAT: {
1190     } break;
1191     default:
1192         break;
1193     }
1194     return  ret;
1195 }
1196 
1197 const MppHalApi hal_h265d_vdpu382 = {
1198     .name = "h265d_vdpu382",
1199     .type = MPP_CTX_DEC,
1200     .coding = MPP_VIDEO_CodingHEVC,
1201     .ctx_size = sizeof(HalH265dCtx),
1202     .flag = 0,
1203     .init = hal_h265d_vdpu382_init,
1204     .deinit = hal_h265d_vdpu382_deinit,
1205     .reg_gen = hal_h265d_vdpu382_gen_regs,
1206     .start = hal_h265d_vdpu382_start,
1207     .wait = hal_h265d_vdpu382_wait,
1208     .reset = hal_h265d_vdpu382_reset,
1209     .flush = hal_h265d_vdpu382_flush,
1210     .control = hal_h265d_vdpu382_control,
1211 };
1212