xref: /OK3568_Linux_fs/external/mpp/mpp/hal/rkdec/h265d/hal_h265d_vdpu382.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Copyright 2022 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_h265d_vdpu382"
18 
19 #include <stdio.h>
20 #include <string.h>
21 
22 #include "mpp_env.h"
23 #include "mpp_mem.h"
24 #include "mpp_bitread.h"
25 #include "mpp_bitput.h"
26 
27 #include "h265d_syntax.h"
28 #include "hal_h265d_debug.h"
29 #include "hal_h265d_ctx.h"
30 #include "hal_h265d_com.h"
31 #include "hal_h265d_vdpu382.h"
32 #include "vdpu382_h265d.h"
33 #include "rk_hdr_meta_com.h"
34 
35 /* #define dump */
36 #ifdef dump
37 static FILE *fp = NULL;
38 #endif
39 
40 #define HW_RPS
41 #define PPS_SIZE                (112 * 64)//(96x64)
42 
43 #define SET_REF_VALID(regs, index, value)\
44     do{ \
45         switch(index){\
46         case 0: regs.reg99.hevc_ref_valid_0 = value; break;\
47         case 1: regs.reg99.hevc_ref_valid_1 = value; break;\
48         case 2: regs.reg99.hevc_ref_valid_2 = value; break;\
49         case 3: regs.reg99.hevc_ref_valid_3 = value; break;\
50         case 4: regs.reg99.hevc_ref_valid_4 = value; break;\
51         case 5: regs.reg99.hevc_ref_valid_5 = value; break;\
52         case 6: regs.reg99.hevc_ref_valid_6 = value; break;\
53         case 7: regs.reg99.hevc_ref_valid_7 = value; break;\
54         case 8: regs.reg99.hevc_ref_valid_8 = value; break;\
55         case 9: regs.reg99.hevc_ref_valid_9 = value; break;\
56         case 10: regs.reg99.hevc_ref_valid_10 = value; break;\
57         case 11: regs.reg99.hevc_ref_valid_11 = value; break;\
58         case 12: regs.reg99.hevc_ref_valid_12 = value; break;\
59         case 13: regs.reg99.hevc_ref_valid_13 = value; break;\
60         case 14: regs.reg99.hevc_ref_valid_14 = value; break;\
61         default: break;}\
62     }while(0)
63 
64 #define FMT 4
65 #define CTU 3
66 
67 typedef struct {
68     RK_U32 a;
69     RK_U32 b;
70 } FilterdColBufRatio;
71 
72 static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = {
73     /* 400    420      422       444 */
74     {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16
75     {{0, 0}, {27, 8},  {36, 8},  {52, 8}}, //ctu 32
76     {{0, 0}, {27, 5},  {36, 5},  {52, 5}}  //ctu 64
77 };
78 
79 static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = {
80     /* 400    420      422       444 */
81     {{0, 0}, {9, 31}, {12, 39}, {12, 39}}, //ctu 16
82     {{0, 0}, {9, 25}, {12, 33}, {12, 33}}, //ctu 32
83     {{0, 0}, {9, 21}, {12, 29}, {12, 29}}  //ctu 64
84 };
85 
86 #define CABAC_TAB_ALIGEND_SIZE          (MPP_ALIGN(27456, SZ_4K))
87 #define SPSPPS_ALIGNED_SIZE             (MPP_ALIGN(112 * 64, SZ_4K))
88 #define RPS_ALIGEND_SIZE                (MPP_ALIGN(400 * 8, SZ_4K))
89 #define SCALIST_ALIGNED_SIZE            (MPP_ALIGN(81 * 1360, SZ_4K))
90 #define INFO_BUFFER_SIZE                (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE)
91 #define ALL_BUFFER_SIZE(cnt)            (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt)
92 
93 #define CABAC_TAB_OFFSET                (0)
94 #define SPSPPS_OFFSET(pos)              (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos))
95 #define RPS_OFFSET(pos)                 (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
96 #define SCALIST_OFFSET(pos)             (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE)
97 
hal_h265d_vdpu382_init(void * hal,MppHalCfg * cfg)98 static MPP_RET hal_h265d_vdpu382_init(void *hal, MppHalCfg *cfg)
99 {
100     RK_S32 ret = 0;
101     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
102 
103     mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
104     mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
105 
106     reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
107     if (reg_ctx->scaling_qm == NULL) {
108         mpp_err("scaling_org alloc fail");
109         return MPP_ERR_MALLOC;
110     }
111 
112     reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
113     reg_ctx->pps_buf = mpp_calloc(RK_U64, 15);
114     reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
115 
116     if (reg_ctx->scaling_rk == NULL) {
117         mpp_err("scaling_rk alloc fail");
118         return MPP_ERR_MALLOC;
119     }
120 
121     if (reg_ctx->group == NULL) {
122         ret = mpp_buffer_group_get_internal(&reg_ctx->group, MPP_BUFFER_TYPE_ION);
123         if (ret) {
124             mpp_err("h265d mpp_buffer_group_get failed\n");
125             return ret;
126         }
127     }
128 
129     {
130         RK_U32 i = 0;
131         RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
132 
133         //!< malloc buffers
134         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
135         if (ret) {
136             mpp_err("h265d mpp_buffer_get failed\n");
137             return ret;
138         }
139 
140         reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
141         reg_ctx->offset_cabac = CABAC_TAB_OFFSET;
142         for (i = 0; i < max_cnt; i++) {
143             reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu382H265dRegSet));
144             reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
145             reg_ctx->offset_rps[i] = RPS_OFFSET(i);
146             reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
147         }
148     }
149 
150     if (!reg_ctx->fast_mode) {
151         reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
152         reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
153         reg_ctx->rps_offset = reg_ctx->offset_rps[0];
154         reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
155     }
156 
157     ret = mpp_buffer_write(reg_ctx->bufs, 0, (void*)cabac_table, sizeof(cabac_table));
158     if (ret) {
159         mpp_err("h265d write cabac_table data failed\n");
160         return ret;
161     }
162 
163     {
164         // report hw_info to parser
165         const MppSocInfo *info = mpp_get_soc_info();
166         const void *hw_info = NULL;
167         RK_U32 i;
168 
169         for (i = 0; i < MPP_ARRAY_ELEMS(info->dec_caps); i++) {
170             if (info->dec_caps[i] && info->dec_caps[i]->type == VPU_CLIENT_RKVDEC) {
171                 hw_info = info->dec_caps[i];
172                 break;
173             }
174         }
175 
176         mpp_assert(hw_info);
177         cfg->hw_info = hw_info;
178 
179         //save hw_info to context
180         reg_ctx->hw_info = hw_info;
181     }
182 
183 #ifdef dump
184     fp = fopen("/data/hal.bin", "wb");
185 #endif
186     (void) cfg;
187     return MPP_OK;
188 }
189 
hal_h265d_vdpu382_deinit(void * hal)190 static MPP_RET hal_h265d_vdpu382_deinit(void *hal)
191 {
192     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
193     RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
194     RK_U32 i;
195 
196     if (reg_ctx->bufs) {
197         mpp_buffer_put(reg_ctx->bufs);
198         reg_ctx->bufs = NULL;
199     }
200 
201     loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
202     for (i = 0; i < loop; i++) {
203         if (reg_ctx->rcb_buf[i]) {
204             mpp_buffer_put(reg_ctx->rcb_buf[i]);
205             reg_ctx->rcb_buf[i] = NULL;
206         }
207     }
208 
209     if (reg_ctx->group) {
210         mpp_buffer_group_put(reg_ctx->group);
211         reg_ctx->group = NULL;
212     }
213 
214     for (i = 0; i < loop; i++)
215         MPP_FREE(reg_ctx->g_buf[i].hw_regs);
216 
217     MPP_FREE(reg_ctx->scaling_qm);
218     MPP_FREE(reg_ctx->scaling_rk);
219     MPP_FREE(reg_ctx->pps_buf);
220     MPP_FREE(reg_ctx->sw_rps_buf);
221 
222     if (reg_ctx->cmv_bufs) {
223         hal_bufs_deinit(reg_ctx->cmv_bufs);
224         reg_ctx->cmv_bufs = NULL;
225     }
226 
227     return MPP_OK;
228 }
229 
hal_h265d_v382_output_pps_packet(void * hal,void * dxva)230 static RK_S32 hal_h265d_v382_output_pps_packet(void *hal, void *dxva)
231 {
232     RK_S32 fifo_len = 14;//12
233     RK_S32 i, j;
234     RK_U32 addr;
235     RK_U32 log2_min_cb_size;
236     RK_S32 width, height;
237     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
238     Vdpu382H265dRegSet *hw_reg = (Vdpu382H265dRegSet*)(reg_ctx->hw_regs);
239     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
240     BitputCtx_t bp;
241 
242     if (NULL == reg_ctx || dxva_cxt == NULL) {
243         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
244                 __FILE__, __FUNCTION__, __LINE__);
245         return MPP_ERR_NULL_PTR;
246     }
247     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
248     if (dxva_cxt->pp.ps_update_flag) {
249         RK_U64 *pps_packet = reg_ctx->pps_buf;
250         if (NULL == pps_ptr) {
251             mpp_err("pps_data get ptr error");
252             return MPP_ERR_NOMEM;
253         }
254 
255         for (i = 0; i < 14; i++) pps_packet[i] = 0;
256 
257         mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
258 
259         // SPS
260         mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
261         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
262         mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
263 
264         log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
265         width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
266         height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
267 
268         mpp_put_bits(&bp, width                                          , 16);
269         mpp_put_bits(&bp, height                                         , 16);
270         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
271         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
272         mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
273         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
274         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
275         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
276         ///<-zrh comment ^  63 bit above
277         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
278         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
279         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
280         mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
281         mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
282         mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
283         ///<-zrh comment ^  68 bit above
284         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
285         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
286         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
287         mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
288         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
289         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
290 
291         mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
292         mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
293         mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
294         mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
295         mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
296         ///<-zrh comment ^ 100 bit above
297 
298         mpp_put_bits(&bp, 0                                                    , 7 ); //49bits
299         //yandong change
300         mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1,       4);
301         mpp_put_bits(&bp, 0, 3);
302         mpp_put_align(&bp                                                        , 32, 0xf); //128
303         // PPS
304         mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
305         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
306         mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
307         mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
308         mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
309         mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
310         mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
311         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);//31 bits
312         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
313         mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
314         mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
315         mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
316         mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1); //164
317         mpp_put_bits(&bp, log2_min_cb_size +
318                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
319                      dxva_cxt->pp.diff_cu_qp_delta_depth                             , 3);
320 
321         h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
322                   dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
323 
324         mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
325         mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
326         mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
327         mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
328         mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
329         mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1 );
330         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1 );
331         mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
332         mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
333         mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1); //185
334         mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
335         mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
336         mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
337         mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
338         mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
339         mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
340         mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
341         mpp_put_bits(&bp, 0                                                        , 3);
342         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
343         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
344         mpp_put_bits(&bp, 0, 4);//2 //mSps_Pps[i]->mMode
345         mpp_put_align(&bp, 64, 0xf);
346         {
347             /// tiles info begin
348             RK_U16 column_width[20];
349             RK_U16 row_height[22];
350 
351             memset(column_width, 0, sizeof(column_width));
352             memset(row_height, 0, sizeof(row_height));
353 
354             if (dxva_cxt->pp.tiles_enabled_flag) {
355 
356                 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
357                     RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
358                     RK_S32 ctu_width_in_pic = (width +
359                                                (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
360                     RK_S32 ctu_height_in_pic = (height +
361                                                 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
362                     RK_S32 sum = 0;
363                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
364                         column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
365                         sum += column_width[i]  ;
366                     }
367                     column_width[i] = ctu_width_in_pic - sum;
368 
369                     sum = 0;
370                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
371                         row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
372                         sum += row_height[i];
373                     }
374                     row_height[i] = ctu_height_in_pic - sum;
375                 } // end of (pps->uniform_spacing_flag == 0)
376                 else {
377 
378                     RK_S32    pic_in_cts_width = (width +
379                                                   (1 << (log2_min_cb_size +
380                                                          dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
381                                                  / (1 << (log2_min_cb_size +
382                                                           dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
383                     RK_S32 pic_in_cts_height = (height +
384                                                 (1 << (log2_min_cb_size +
385                                                        dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
386                                                / (1 << (log2_min_cb_size +
387                                                         dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
388 
389                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
390                         column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
391                                           (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
392 
393                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
394                         row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
395                                         (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
396                 }
397             } // pps->tiles_enabled_flag
398             else {
399                 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
400                 column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
401                 row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
402             }
403 
404             for (j = 0; j < 20; j++) {
405                 if (column_width[j] > 0)
406                     column_width[j]--;
407                 mpp_put_bits(&bp, column_width[j], 12);
408             }
409 
410             for (j = 0; j < 22; j++) {
411                 if (row_height[j] > 0)
412                     row_height[j]--;
413                 mpp_put_bits(&bp, row_height[j], 12);
414             }
415         }
416 
417         mpp_put_bits(&bp, 0, 32);
418         mpp_put_bits(&bp, 0, 70);
419         mpp_put_align(&bp, 64, 0xf);//128
420     }
421 
422     if (dxva_cxt->pp.scaling_list_enabled_flag) {
423         MppDevRegOffsetCfg trans_cfg;
424         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
425 
426         if (dxva_cxt->pp.scaling_list_data_present_flag) {
427             addr = (dxva_cxt->pp.pps_id + 16) * 1360;
428         } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
429             addr = dxva_cxt->pp.sps_id * 1360;
430         } else {
431             addr = 80 * 1360;
432         }
433 
434         hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
435 
436         hw_reg->h265d_addr.reg180_scanlist_addr = reg_ctx->bufs_fd;
437         hw_reg->common.reg012.scanlist_addr_valid_en = 1;
438 
439         /* need to config addr */
440         trans_cfg.reg_idx = 180;
441         trans_cfg.offset = addr + reg_ctx->sclst_offset;
442         mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
443     }
444 
445     for (i = 0; i < 64; i++)
446         memcpy(pps_ptr + i * 112, reg_ctx->pps_buf, 112);
447 #ifdef dump
448     fwrite(pps_ptr, 1, 80 * 64, fp);
449     RK_U32 *tmp = (RK_U32 *)pps_ptr;
450     for (i = 0; i < 112 / 4; i++) {
451         mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
452     }
453 #endif
454     return 0;
455 }
456 
h265d_refine_rcb_size(Vdpu382RcbInfo * rcb_info,Vdpu382H265dRegSet * hw_regs,RK_S32 width,RK_S32 height,void * dxva)457 static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
458                                   Vdpu382H265dRegSet *hw_regs,
459                                   RK_S32 width, RK_S32 height, void *dxva)
460 {
461     RK_U32 rcb_bits = 0;
462     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
463     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
464     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 420 ,2 422,3 444
465     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
466     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
467     RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
468     RK_U32 ext_align_size = tile_col_cut_num * 64 * 8;
469 
470     width = MPP_ALIGN(width, ctu_size);
471     height = MPP_ALIGN(height, ctu_size);
472 
473     /* RCB_STRMD_ROW */
474     if (width >= 8192) {
475         RK_U32 factor = 64 / ctu_size;
476 
477         rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) / factor * 24 + ext_align_size;
478     } else
479         rcb_bits = 0;
480     rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
481 
482     /* RCB_TRANSD_ROW */
483     if (width >= 8192)
484         rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
485     else
486         rcb_bits = 0;
487     rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
488 
489     /* RCB_TRANSD_COL */
490     if (height >= 8192 && tile_col_cut_num)
491         rcb_bits = tile_col_cut_num ? (MPP_ALIGN(height - 8192, 4) << 1) : 0;
492     else
493         rcb_bits = 0;
494     rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
495 
496     /* RCB_INTER_ROW */
497     rcb_bits = width * 22 + ext_align_size;
498     rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
499 
500     /* RCB_INTER_COL */
501     rcb_bits = tile_col_cut_num ? (height * 22) : 0;
502     rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
503 
504     /* RCB_INTRA_ROW */
505     rcb_bits = width * ((chroma_fmt_idc ? 1 : 0) + 1) * 11 + ext_align_size;
506     rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
507 
508     /* RCB_DBLK_ROW */
509     if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
510         if (ctu_size == 32)
511             rcb_bits = width * ( 4 + 6 * bit_depth);
512         else
513             rcb_bits = width * ( 2 + 6 * bit_depth);
514     } else {
515         if (ctu_size == 32)
516             rcb_bits = width * ( 4 + 8 * bit_depth);
517         else
518             rcb_bits = width * ( 2 + 8 * bit_depth);
519     }
520     rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
521     rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
522 
523     /* RCB_SAO_ROW */
524     if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
525         rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
526     } else {
527         rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
528     }
529     rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
530     rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
531 
532     /* RCB_FBC_ROW */
533     if (hw_regs->common.reg012.fbc_e) {
534         rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
535         rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 128 : 64)) + ext_align_size;
536     } else
537         rcb_bits = 0;
538     rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
539 
540     /* RCB_FILT_COL */
541     if (tile_col_cut_num) {
542         if (hw_regs->common.reg012.fbc_e) {
543             RK_U32 ctu_idx = ctu_size >> 5;
544             RK_U32 a = filterd_fbc_on[ctu_idx][chroma_fmt_idc].a;
545             RK_U32 b = filterd_fbc_on[ctu_idx][chroma_fmt_idc].b;
546 
547             rcb_bits = height * (a * bit_depth + b);
548         } else {
549             RK_U32 ctu_idx = ctu_size >> 5;
550             RK_U32 a = filterd_fbc_off[ctu_idx][chroma_fmt_idc].a;
551             RK_U32 b = filterd_fbc_off[ctu_idx][chroma_fmt_idc].b;
552 
553             rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 192 * ctu_size >> 4 : 0));
554         }
555     } else
556         rcb_bits = 0;
557     rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
558 }
559 
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu382H265dRegSet * hw_regs,RK_S32 width,RK_S32 height)560 static void hal_h265d_rcb_info_update(void *hal,  void *dxva,
561                                       Vdpu382H265dRegSet *hw_regs,
562                                       RK_S32 width, RK_S32 height)
563 {
564     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
565     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
566     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
567     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
568     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
569     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
570     RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
571 
572     if (reg_ctx->num_row_tiles != num_tiles ||
573         reg_ctx->bit_depth != bit_depth ||
574         reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
575         reg_ctx->ctu_size !=  ctu_size ||
576         reg_ctx->width != width ||
577         reg_ctx->height != height) {
578         RK_U32 i = 0;
579         RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
580 
581         reg_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size((Vdpu382RcbInfo*)reg_ctx->rcb_info, width, height);
582         h265d_refine_rcb_size((Vdpu382RcbInfo*)reg_ctx->rcb_info, hw_regs, width, height, dxva_cxt);
583 
584         for (i = 0; i < loop; i++) {
585             MppBuffer rcb_buf;
586 
587             if (reg_ctx->rcb_buf[i]) {
588                 mpp_buffer_put(reg_ctx->rcb_buf[i]);
589                 reg_ctx->rcb_buf[i] = NULL;
590             }
591             mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
592             reg_ctx->rcb_buf[i] = rcb_buf;
593         }
594 
595         reg_ctx->num_row_tiles  = num_tiles;
596         reg_ctx->bit_depth      = bit_depth;
597         reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
598         reg_ctx->ctu_size       = ctu_size;
599         reg_ctx->width          = width;
600         reg_ctx->height         = height;
601     }
602 }
603 
604 #define SET_POC_HIGNBIT_INFO(regs, index, field, value)\
605     do{ \
606         switch(index){\
607         case 0: regs.reg200.ref0_##field = value; break;\
608         case 1: regs.reg200.ref1_##field = value; break;\
609         case 2: regs.reg200.ref2_##field = value; break;\
610         case 3: regs.reg200.ref3_##field = value; break;\
611         case 4: regs.reg200.ref4_##field = value; break;\
612         case 5: regs.reg200.ref5_##field = value; break;\
613         case 6: regs.reg200.ref6_##field = value; break;\
614         case 7: regs.reg200.ref7_##field = value; break;\
615         case 8: regs.reg201.ref8_##field = value; break;\
616         case 9: regs.reg201.ref9_##field = value; break;\
617         case 10: regs.reg201.ref10_##field = value; break;\
618         case 11: regs.reg201.ref11_##field = value; break;\
619         case 12: regs.reg201.ref12_##field = value; break;\
620         case 13: regs.reg201.ref13_##field = value; break;\
621         case 14: regs.reg201.ref14_##field = value; break;\
622         case 15: regs.reg201.ref15_##field = value; break;\
623         default: break;}\
624     }while(0)
625 
626 #define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
627 #define MAX_INT           2147483647
628 
hal_h265d_vdpu382_gen_regs(void * hal,HalTaskInfo * syn)629 static MPP_RET hal_h265d_vdpu382_gen_regs(void *hal,  HalTaskInfo *syn)
630 {
631     RK_S32 i = 0;
632     RK_S32 log2_min_cb_size;
633     RK_S32 width, height;
634     RK_S32 stride_y, stride_uv, virstrid_y;
635     Vdpu382H265dRegSet *hw_regs;
636     RK_S32 ret = MPP_SUCCESS;
637     MppBuffer streambuf = NULL;
638     RK_S32 aglin_offset = 0;
639     RK_S32 valid_ref = -1;
640     MppBuffer framebuf = NULL;
641     HalBuf *mv_buf = NULL;
642     RK_S32 fd = -1;
643     RK_U32 mv_size = 0;
644     RK_S32 distance = MAX_INT;
645     h265d_dxva2_picture_context_t *dxva_cxt =
646         (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
647     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
648     void *rps_ptr = NULL;
649     RK_U32 stream_buf_size = 0;
650 
651     if (syn->dec.flags.parse_err ||
652         syn->dec.flags.ref_err) {
653         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
654         return MPP_OK;
655     }
656 
657     if (reg_ctx ->fast_mode) {
658         for (i = 0; i < MAX_GEN_REG; i++) {
659             if (!reg_ctx->g_buf[i].use_flag) {
660                 syn->dec.reg_index = i;
661 
662                 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
663                 reg_ctx->rps_offset = reg_ctx->offset_rps[i];
664                 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
665 
666                 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
667                 reg_ctx->g_buf[i].use_flag = 1;
668                 break;
669             }
670         }
671         if (i == MAX_GEN_REG) {
672             mpp_err("hevc rps buf all used");
673             return MPP_ERR_NOMEM;
674         }
675     }
676     rps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->rps_offset;
677     if (NULL == rps_ptr) {
678 
679         mpp_err("rps_data get ptr error");
680         return MPP_ERR_NOMEM;
681     }
682 
683 
684     if (syn->dec.syntax.data == NULL) {
685         mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
686         return MPP_ERR_NULL_PTR;
687     }
688 
689     /* output pps */
690     hw_regs = (Vdpu382H265dRegSet*)reg_ctx->hw_regs;
691     memset(hw_regs, 0, sizeof(Vdpu382H265dRegSet));
692 
693     hal_h265d_v382_output_pps_packet(hal, syn->dec.syntax.data);
694 
695     if (NULL == reg_ctx->hw_regs) {
696         return MPP_ERR_NULL_PTR;
697     }
698 
699 
700     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
701 
702     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
703     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
704     mv_size = (MPP_ALIGN(width, 64) * MPP_ALIGN(height, 64)) >> 3;
705     if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
706         size_t size = mv_size;
707 
708         if (reg_ctx->cmv_bufs) {
709             hal_bufs_deinit(reg_ctx->cmv_bufs);
710             reg_ctx->cmv_bufs = NULL;
711         }
712 
713         hal_bufs_init(&reg_ctx->cmv_bufs);
714         if (reg_ctx->cmv_bufs == NULL) {
715             mpp_err_f("colmv bufs init fail");
716             return MPP_ERR_NULL_PTR;
717         }
718 
719         reg_ctx->mv_size = mv_size;
720         reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
721         hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
722     }
723 
724     {
725         MppFrame mframe = NULL;
726         RK_U32 ver_virstride;
727 
728         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
729                               SLOT_FRAME_PTR, &mframe);
730         stride_y = mpp_frame_get_hor_stride(mframe);
731         ver_virstride = mpp_frame_get_ver_stride(mframe);
732         stride_uv = stride_y;
733         virstrid_y = ver_virstride * stride_y;
734         hw_regs->common.reg013.h26x_error_mode = 1;
735         hw_regs->common.reg021.error_deb_en = 1;
736         hw_regs->common.reg021.inter_error_prc_mode = 0;
737         hw_regs->common.reg021.error_intra_mode = 1;
738 
739         hw_regs->common.reg017.slice_num = dxva_cxt->slice_count;
740         hw_regs->h265d_param.reg64.h26x_rps_mode = 0;
741         hw_regs->h265d_param.reg64.h26x_frame_orslice = 0;
742         hw_regs->h265d_param.reg64.h26x_stream_mode = 0;
743 
744         if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) {
745             RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
746             RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (ver_virstride + 64) / 16, SZ_4K);
747 
748             hw_regs->common.reg012.fbc_e = 1;
749             hw_regs->common.reg018.y_hor_virstride = fbc_hdr_stride >> 4;
750             hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
751             hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
752         } else {
753             hw_regs->common.reg012.fbc_e = 0;
754             hw_regs->common.reg018.y_hor_virstride = stride_y >> 4;
755             hw_regs->common.reg019.uv_hor_virstride = stride_uv >> 4;
756             hw_regs->common.reg020_y_virstride.y_virstride = virstrid_y >> 4;
757         }
758 
759         if (MPP_FRAME_FMT_IS_HDR(mpp_frame_get_fmt(mframe)) && reg_ctx->cfg->base.enable_hdr_meta)
760             fill_hdr_meta_to_frame(mframe, HDR_HEVC);
761     }
762     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
763                           SLOT_BUFFER, &framebuf);
764     hw_regs->common_addr.reg130_decout_base  = mpp_buffer_get_fd(framebuf); //just index need map
765     /*if out_base is equal to zero it means this frame may error
766     we return directly add by csy*/
767 
768     if (hw_regs->common_addr.reg130_decout_base == 0) {
769         return 0;
770     }
771     fd =  mpp_buffer_get_fd(framebuf);
772     hw_regs->common_addr.reg130_decout_base = fd;
773     mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.CurrPic.Index7Bits);
774     hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
775 
776     hw_regs->h265d_param.reg65.cur_top_poc = dxva_cxt->pp.CurrPicOrderCntVal;
777 
778     mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
779                           &streambuf);
780     if ( dxva_cxt->bitstream == NULL) {
781         dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
782     }
783 #ifdef HW_RPS
784     hw_regs->h265d_param.reg103.ref_pic_layer_same_with_cur = 0xffff;
785     hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
786 #else
787     hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
788     hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
789 #endif
790 
791     MppDevRegOffsetCfg trans_cfg;
792     /* cabac table */
793     hw_regs->h265d_addr.reg197_cabactbl_base    = reg_ctx->bufs_fd;
794     /* pps */
795     hw_regs->h265d_addr.reg161_pps_base         = reg_ctx->bufs_fd;
796     hw_regs->h265d_addr.reg163_rps_base         = reg_ctx->bufs_fd;
797 
798     hw_regs->common_addr.reg128_rlc_base        = mpp_buffer_get_fd(streambuf);
799     hw_regs->common_addr.reg129_rlcwrite_base   = mpp_buffer_get_fd(streambuf);
800     stream_buf_size                             = mpp_buffer_get_size(streambuf);
801     hw_regs->common.reg016_str_len              = ((dxva_cxt->bitstream_size + 15)
802                                                    & (~15)) + 64;
803     hw_regs->common.reg016_str_len = stream_buf_size > hw_regs->common.reg016_str_len ?
804                                      hw_regs->common.reg016_str_len : stream_buf_size;
805 
806     aglin_offset =  hw_regs->common.reg016_str_len - dxva_cxt->bitstream_size;
807     if (aglin_offset > 0) {
808         memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
809                aglin_offset);
810     }
811     hw_regs->common.reg010.dec_e                = 1;
812     hw_regs->common.reg012.colmv_compress_en = reg_ctx->hw_info ?
813                                                reg_ctx->hw_info->cap_colmv_compress : 0;
814 
815     hw_regs->common.reg024.cabac_err_en_lowbits = 0xffffdfff;
816     hw_regs->common.reg025.cabac_err_en_highbits = 0x3ffbf9ff;
817 
818     hw_regs->common.reg011.dec_clkgate_e    = 1;
819     hw_regs->common.reg011.err_head_fill_e  = 1;
820     hw_regs->common.reg011.err_colmv_fill_e = 1;
821 
822     hw_regs->common.reg026.inter_auto_gating_e = 1;
823     hw_regs->common.reg026.filterd_auto_gating_e = 1;
824     hw_regs->common.reg026.strmd_auto_gating_e = 1;
825     hw_regs->common.reg026.mcp_auto_gating_e = 1;
826     hw_regs->common.reg026.busifd_auto_gating_e = 1;
827     hw_regs->common.reg026.dec_ctrl_auto_gating_e = 1;
828     hw_regs->common.reg026.intra_auto_gating_e = 1;
829     hw_regs->common.reg026.mc_auto_gating_e = 1;
830     hw_regs->common.reg026.transd_auto_gating_e = 1;
831     hw_regs->common.reg026.sram_auto_gating_e = 1;
832     hw_regs->common.reg026.cru_auto_gating_e = 1;
833     hw_regs->common.reg026.reg_cfg_gating_en = 1;
834     hw_regs->common.reg032_timeout_threshold = 0x3ffff;
835 
836     valid_ref = hw_regs->common_addr.reg130_decout_base;
837     reg_ctx->error_index = dxva_cxt->pp.CurrPic.Index7Bits;
838     hw_regs->common_addr.reg132_error_ref_base = valid_ref;
839 
840     memset(&hw_regs->highpoc.reg205, 0, sizeof(RK_U32));
841 
842     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
843         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
844             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
845 
846             MppFrame mframe = NULL;
847             hw_regs->h265d_param.reg67_82_ref_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
848             mpp_buf_slot_get_prop(reg_ctx->slots,
849                                   dxva_cxt->pp.RefPicList[i].Index7Bits,
850                                   SLOT_BUFFER, &framebuf);
851             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
852                                   SLOT_FRAME_PTR, &mframe);
853             if (framebuf != NULL) {
854                 hw_regs->h265d_addr.reg164_179_ref_base[i] = mpp_buffer_get_fd(framebuf);
855                 valid_ref = hw_regs->h265d_addr.reg164_179_ref_base[i];
856                 // mpp_log("cur poc %d, ref poc %d", dxva_cxt->pp.current_poc, dxva_cxt->pp.PicOrderCntValList[i]);
857                 if ((pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc) < distance)
858                     && (!mpp_frame_get_errinfo(mframe))) {
859                     distance = pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc);
860                     hw_regs->common_addr.reg132_error_ref_base = hw_regs->h265d_addr.reg164_179_ref_base[i];
861                     reg_ctx->error_index = dxva_cxt->pp.RefPicList[i].Index7Bits;
862                     hw_regs->common.reg021.error_intra_mode = 0;
863 
864                 }
865             } else {
866                 hw_regs->h265d_addr.reg164_179_ref_base[i] = valid_ref;
867             }
868 
869             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.RefPicList[i].Index7Bits);
870             hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
871 
872             SET_REF_VALID(hw_regs->h265d_param, i, 1);
873         }
874     }
875 
876     if ((reg_ctx->error_index == dxva_cxt->pp.CurrPic.Index7Bits) && !dxva_cxt->pp.IntraPicFlag) {
877         // mpp_err("current frm may be err, should skip process");
878         syn->dec.flags.ref_err = 1;
879         return MPP_OK;
880     }
881 
882     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
883 
884         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
885             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
886             MppFrame mframe = NULL;
887 
888             mpp_buf_slot_get_prop(reg_ctx->slots,
889                                   dxva_cxt->pp.RefPicList[i].Index7Bits,
890                                   SLOT_BUFFER, &framebuf);
891 
892             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
893                                   SLOT_FRAME_PTR, &mframe);
894 
895             if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) {
896                 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index);
897                 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
898                 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
899             }
900         } else {
901             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index);
902             hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
903             hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
904             /* mark 3 to differ from current frame */
905             if (reg_ctx->error_index == dxva_cxt->pp.CurrPic.Index7Bits)
906                 SET_POC_HIGNBIT_INFO(hw_regs->highpoc, i, poc_highbit, 3);
907         }
908     }
909 
910     trans_cfg.reg_idx = 161;
911     trans_cfg.offset = reg_ctx->spspps_offset;
912     mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
913     /* rps */
914     trans_cfg.reg_idx = 163;
915     trans_cfg.offset = reg_ctx->rps_offset;
916     mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
917 
918     hw_regs->common.reg013.cur_pic_is_idr = dxva_cxt->pp.IdrPicFlag;//p_hal->slice_long->idr_flag;
919 
920     hw_regs->common.reg011.buf_empty_en = 1;
921 
922     hal_h265d_rcb_info_update(hal, dxva_cxt, hw_regs, width, height);
923     vdpu382_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
924                       reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
925                       (Vdpu382RcbInfo*)reg_ctx->rcb_info);
926     {
927         MppFrame mframe = NULL;
928 
929         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
930                               SLOT_FRAME_PTR, &mframe);
931 
932         if (mpp_frame_get_thumbnail_en(mframe)) {
933             hw_regs->h265d_addr.reg198_scale_down_luma_base =
934                 hw_regs->common_addr.reg130_decout_base;
935             hw_regs->h265d_addr.reg199_scale_down_chorme_base =
936                 hw_regs->common_addr.reg130_decout_base;
937             vdpu382_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->common);
938         } else {
939             hw_regs->h265d_addr.reg198_scale_down_luma_base = 0;
940             hw_regs->h265d_addr.reg199_scale_down_chorme_base = 0;
941             hw_regs->common.reg012.scale_down_en = 0;
942         }
943     }
944     vdpu382_setup_statistic(&hw_regs->common, &hw_regs->statistic);
945 
946     return ret;
947 }
948 
hal_h265d_vdpu382_start(void * hal,HalTaskInfo * task)949 static MPP_RET hal_h265d_vdpu382_start(void *hal, HalTaskInfo *task)
950 {
951     MPP_RET ret = MPP_OK;
952     RK_U8* p = NULL;
953     Vdpu382H265dRegSet *hw_regs = NULL;
954     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
955     RK_S32 index =  task->dec.reg_index;
956 
957     RK_U32 i;
958 
959     if (task->dec.flags.parse_err ||
960         task->dec.flags.ref_err) {
961         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
962         return MPP_OK;
963     }
964 
965     if (reg_ctx->fast_mode) {
966         p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
967         hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
968     } else {
969         p = (RK_U8*)reg_ctx->hw_regs;
970         hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->hw_regs;
971     }
972 
973     if (hw_regs == NULL) {
974         mpp_err("hal_h265d_start hw_regs is NULL");
975         return MPP_ERR_NULL_PTR;
976     }
977     for (i = 0; i < 68; i++) {
978         h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
979                   i, *((RK_U32*)p));
980         //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
981         p += 4;
982     }
983 
984     do {
985         MppDevRegWrCfg wr_cfg;
986         MppDevRegRdCfg rd_cfg;
987 
988         wr_cfg.reg = &hw_regs->common;
989         wr_cfg.size = sizeof(hw_regs->common);
990         wr_cfg.offset = OFFSET_COMMON_REGS;
991 
992         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
993         if (ret) {
994             mpp_err_f("set register write failed %d\n", ret);
995             break;
996         }
997 
998         wr_cfg.reg = &hw_regs->h265d_param;
999         wr_cfg.size = sizeof(hw_regs->h265d_param);
1000         wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS;
1001 
1002         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1003         if (ret) {
1004             mpp_err_f("set register write failed %d\n", ret);
1005             break;
1006         }
1007 
1008         wr_cfg.reg = &hw_regs->common_addr;
1009         wr_cfg.size = sizeof(hw_regs->common_addr);
1010         wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1011 
1012         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1013         if (ret) {
1014             mpp_err_f("set register write failed %d\n", ret);
1015             break;
1016         }
1017 
1018         wr_cfg.reg = &hw_regs->h265d_addr;
1019         wr_cfg.size = sizeof(hw_regs->h265d_addr);
1020         wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1021 
1022         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1023         if (ret) {
1024             mpp_err_f("set register write failed %d\n", ret);
1025             break;
1026         }
1027 
1028         wr_cfg.reg = &hw_regs->statistic;
1029         wr_cfg.size = sizeof(hw_regs->statistic);
1030         wr_cfg.offset = OFFSET_STATISTIC_REGS;
1031 
1032         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1033         if (ret) {
1034             mpp_err_f("set register write failed %d\n", ret);
1035             break;
1036         }
1037 
1038         wr_cfg.reg = &hw_regs->highpoc;
1039         wr_cfg.size = sizeof(hw_regs->highpoc);
1040         wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS;
1041 
1042         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1043         if (ret) {
1044             mpp_err_f("set register write failed %d\n", ret);
1045             break;
1046         }
1047 
1048         rd_cfg.reg = &hw_regs->irq_status;
1049         rd_cfg.size = sizeof(hw_regs->irq_status);
1050         rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1051 
1052         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1053         if (ret) {
1054             mpp_err_f("set register read failed %d\n", ret);
1055             break;
1056         }
1057         /* rcb info for sram */
1058         vdpu382_set_rcbinfo(reg_ctx->dev, (Vdpu382RcbInfo*)reg_ctx->rcb_info);
1059 
1060         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1061         if (ret) {
1062             mpp_err_f("send cmd failed %d\n", ret);
1063             break;
1064         }
1065     } while (0);
1066 
1067     return ret;
1068 }
1069 
1070 
hal_h265d_vdpu382_wait(void * hal,HalTaskInfo * task)1071 static MPP_RET hal_h265d_vdpu382_wait(void *hal, HalTaskInfo *task)
1072 {
1073     MPP_RET ret = MPP_OK;
1074     RK_S32 index =  task->dec.reg_index;
1075     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1076     RK_U8* p = NULL;
1077     Vdpu382H265dRegSet *hw_regs = NULL;
1078     RK_S32 i;
1079 
1080     if (task->dec.flags.parse_err ||
1081         task->dec.flags.ref_err) {
1082         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1083         goto ERR_PROC;
1084     }
1085 
1086     if (reg_ctx->fast_mode) {
1087         hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1088     } else {
1089         hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->hw_regs;
1090     }
1091 
1092     p = (RK_U8*)hw_regs;
1093 
1094     ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1095     if (ret)
1096         mpp_err_f("poll cmd failed %d\n", ret);
1097 
1098 ERR_PROC:
1099     if (task->dec.flags.parse_err ||
1100         task->dec.flags.ref_err ||
1101         hw_regs->irq_status.reg224.dec_error_sta ||
1102         hw_regs->irq_status.reg224.buf_empty_sta ||
1103         hw_regs->irq_status.reg224.dec_bus_sta ||
1104         !hw_regs->irq_status.reg224.dec_rdy_sta) {
1105         if (!reg_ctx->fast_mode) {
1106             if (reg_ctx->dec_cb)
1107                 mpp_callback(reg_ctx->dec_cb, &task->dec);
1108         } else {
1109             MppFrame mframe = NULL;
1110             mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1111                                   SLOT_FRAME_PTR, &mframe);
1112             if (mframe) {
1113                 reg_ctx->fast_mode_err_found = 1;
1114                 mpp_frame_set_errinfo(mframe, 1);
1115             }
1116         }
1117     } else {
1118         if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1119             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1120                 if (task->dec.refer[i] >= 0) {
1121                     MppFrame frame_ref = NULL;
1122 
1123                     mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1124                                           SLOT_FRAME_PTR, &frame_ref);
1125                     h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1126                               i, task->dec.refer[i], frame_ref);
1127                     if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1128                         MppFrame frame_out = NULL;
1129                         mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1130                                               SLOT_FRAME_PTR, &frame_out);
1131                         mpp_frame_set_errinfo(frame_out, 1);
1132                         break;
1133                     }
1134                 }
1135             }
1136         }
1137     }
1138 
1139     for (i = 0; i < 68; i++) {
1140         if (i == 1) {
1141             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1142                       i, *((RK_U32*)p));
1143         }
1144 
1145         if (i == 45) {
1146             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1147                       i, *((RK_U32*)p));
1148         }
1149         p += 4;
1150     }
1151 
1152     if (reg_ctx->fast_mode) {
1153         reg_ctx->g_buf[index].use_flag = 0;
1154     }
1155 
1156     return ret;
1157 }
1158 
hal_h265d_vdpu382_reset(void * hal)1159 static MPP_RET hal_h265d_vdpu382_reset(void *hal)
1160 {
1161     MPP_RET ret = MPP_OK;
1162     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1163     p_hal->fast_mode_err_found = 0;
1164     (void)hal;
1165     return ret;
1166 }
1167 
hal_h265d_vdpu382_flush(void * hal)1168 static MPP_RET hal_h265d_vdpu382_flush(void *hal)
1169 {
1170     MPP_RET ret = MPP_OK;
1171 
1172     (void)hal;
1173     return ret;
1174 }
1175 
hal_h265d_vdpu382_control(void * hal,MpiCmd cmd_type,void * param)1176 static MPP_RET hal_h265d_vdpu382_control(void *hal, MpiCmd cmd_type, void *param)
1177 {
1178     MPP_RET ret = MPP_OK;
1179     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1180 
1181     (void)hal;
1182     switch ((MpiCmd)cmd_type) {
1183     case MPP_DEC_SET_FRAME_INFO: {
1184         MppFrame frame = (MppFrame)param;
1185         MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1186 
1187         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1188             vdpu382_afbc_align_calc(p_hal->slots, frame, 16);
1189         }
1190         break;
1191     }
1192     case MPP_DEC_SET_OUTPUT_FORMAT: {
1193     } break;
1194     default:
1195         break;
1196     }
1197     return  ret;
1198 }
1199 
1200 const MppHalApi hal_h265d_vdpu382 = {
1201     .name = "h265d_vdpu382",
1202     .type = MPP_CTX_DEC,
1203     .coding = MPP_VIDEO_CodingHEVC,
1204     .ctx_size = sizeof(HalH265dCtx),
1205     .flag = 0,
1206     .init = hal_h265d_vdpu382_init,
1207     .deinit = hal_h265d_vdpu382_deinit,
1208     .reg_gen = hal_h265d_vdpu382_gen_regs,
1209     .start = hal_h265d_vdpu382_start,
1210     .wait = hal_h265d_vdpu382_wait,
1211     .reset = hal_h265d_vdpu382_reset,
1212     .flush = hal_h265d_vdpu382_flush,
1213     .control = hal_h265d_vdpu382_control,
1214 };
1215