xref: /OK3568_Linux_fs/external/mpp/mpp/hal/rkdec/h265d/hal_h265d_vdpu34x.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Copyright 2020 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_h265d_vdpu34x"
18 
19 #include <stdio.h>
20 #include <string.h>
21 
22 #include "mpp_env.h"
23 #include "mpp_mem.h"
24 #include "mpp_bitread.h"
25 #include "mpp_bitput.h"
26 
27 #include "h265d_syntax.h"
28 #include "hal_h265d_debug.h"
29 #include "hal_h265d_ctx.h"
30 #include "hal_h265d_com.h"
31 #include "hal_h265d_vdpu34x.h"
32 #include "vdpu34x_h265d.h"
33 #include "rk_hdr_meta_com.h"
34 
35 /* #define dump */
36 #ifdef dump
37 static FILE *fp = NULL;
38 #endif
39 
40 #define HW_RPS
41 #define PPS_SIZE                (112 * 64)//(96x64)
42 
43 #define SET_REF_VALID(regs, index, value)\
44     do{ \
45         switch(index){\
46         case 0: regs.reg99.hevc_ref_valid_0 = value; break;\
47         case 1: regs.reg99.hevc_ref_valid_1 = value; break;\
48         case 2: regs.reg99.hevc_ref_valid_2 = value; break;\
49         case 3: regs.reg99.hevc_ref_valid_3 = value; break;\
50         case 4: regs.reg99.hevc_ref_valid_4 = value; break;\
51         case 5: regs.reg99.hevc_ref_valid_5 = value; break;\
52         case 6: regs.reg99.hevc_ref_valid_6 = value; break;\
53         case 7: regs.reg99.hevc_ref_valid_7 = value; break;\
54         case 8: regs.reg99.hevc_ref_valid_8 = value; break;\
55         case 9: regs.reg99.hevc_ref_valid_9 = value; break;\
56         case 10: regs.reg99.hevc_ref_valid_10 = value; break;\
57         case 11: regs.reg99.hevc_ref_valid_11 = value; break;\
58         case 12: regs.reg99.hevc_ref_valid_12 = value; break;\
59         case 13: regs.reg99.hevc_ref_valid_13 = value; break;\
60         case 14: regs.reg99.hevc_ref_valid_14 = value; break;\
61         default: break;}\
62     }while(0)
63 
64 #define FMT 4
65 #define CTU 3
66 
67 typedef struct {
68     RK_U32 a;
69     RK_U32 b;
70 } FilterdColBufRatio;
71 
72 static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = {
73     /* 400    420      422       444 */
74     {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16
75     {{0, 0}, {27, 8},  {36, 8},  {52, 8}}, //ctu 32
76     {{0, 0}, {27, 5},  {36, 5},  {52, 5}}  //ctu 64
77 };
78 
79 static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = {
80     /* 400    420      422       444 */
81     {{0, 0}, {9, 31}, {12, 39}, {12, 39}}, //ctu 16
82     {{0, 0}, {9, 25}, {12, 33}, {12, 33}}, //ctu 32
83     {{0, 0}, {9, 21}, {12, 29}, {12, 29}}  //ctu 64
84 };
85 
86 #define CABAC_TAB_ALIGEND_SIZE          (MPP_ALIGN(27456, SZ_4K))
87 #define SPSPPS_ALIGNED_SIZE             (MPP_ALIGN(112 * 64, SZ_4K))
88 #define RPS_ALIGEND_SIZE                (MPP_ALIGN(400 * 8, SZ_4K))
89 #define SCALIST_ALIGNED_SIZE            (MPP_ALIGN(81 * 1360, SZ_4K))
90 #define INFO_BUFFER_SIZE                (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE)
91 #define ALL_BUFFER_SIZE(cnt)            (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt)
92 
93 #define CABAC_TAB_OFFSET                (0)
94 #define SPSPPS_OFFSET(pos)              (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos))
95 #define RPS_OFFSET(pos)                 (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
96 #define SCALIST_OFFSET(pos)             (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE)
97 
hal_h265d_vdpu34x_init(void * hal,MppHalCfg * cfg)98 static MPP_RET hal_h265d_vdpu34x_init(void *hal, MppHalCfg *cfg)
99 {
100     RK_S32 ret = 0;
101     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
102 
103     mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
104     mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
105 
106     reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
107     if (reg_ctx->scaling_qm == NULL) {
108         mpp_err("scaling_org alloc fail");
109         return MPP_ERR_MALLOC;
110     }
111 
112     reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
113     reg_ctx->pps_buf = mpp_calloc(RK_U64, 15);
114     reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
115 
116     if (reg_ctx->scaling_rk == NULL) {
117         mpp_err("scaling_rk alloc fail");
118         return MPP_ERR_MALLOC;
119     }
120 
121     if (reg_ctx->group == NULL) {
122         ret = mpp_buffer_group_get_internal(&reg_ctx->group, MPP_BUFFER_TYPE_ION);
123         if (ret) {
124             mpp_err("h265d mpp_buffer_group_get failed\n");
125             return ret;
126         }
127     }
128 
129     {
130         RK_U32 i = 0;
131         RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
132 
133         //!< malloc buffers
134         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
135         if (ret) {
136             mpp_err("h265d mpp_buffer_get failed\n");
137             return ret;
138         }
139 
140         reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
141         reg_ctx->offset_cabac = CABAC_TAB_OFFSET;
142         for (i = 0; i < max_cnt; i++) {
143             reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu34xH265dRegSet));
144             reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
145             reg_ctx->offset_rps[i] = RPS_OFFSET(i);
146             reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
147         }
148     }
149 
150     if (!reg_ctx->fast_mode) {
151         reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
152         reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
153         reg_ctx->rps_offset = reg_ctx->offset_rps[0];
154         reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
155     }
156 
157     ret = mpp_buffer_write(reg_ctx->bufs, 0, (void*)cabac_table, sizeof(cabac_table));
158     if (ret) {
159         mpp_err("h265d write cabac_table data failed\n");
160         return ret;
161     }
162 
163     {
164         // report hw_info to parser
165         const MppSocInfo *info = mpp_get_soc_info();
166         const void *hw_info = NULL;
167         RK_U32 i;
168 
169         for (i = 0; i < MPP_ARRAY_ELEMS(info->dec_caps); i++) {
170             if (info->dec_caps[i] && info->dec_caps[i]->type == VPU_CLIENT_RKVDEC) {
171                 hw_info = info->dec_caps[i];
172                 break;
173             }
174         }
175 
176         mpp_assert(hw_info);
177         cfg->hw_info = hw_info;
178     }
179     (void)cfg;
180 #ifdef dump
181     fp = fopen("/data/hal.bin", "wb");
182 #endif
183     return MPP_OK;
184 }
185 
hal_h265d_vdpu34x_deinit(void * hal)186 static MPP_RET hal_h265d_vdpu34x_deinit(void *hal)
187 {
188     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
189     RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
190     RK_U32 i;
191 
192     if (reg_ctx->bufs) {
193         mpp_buffer_put(reg_ctx->bufs);
194         reg_ctx->bufs = NULL;
195     }
196 
197     loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
198     for (i = 0; i < loop; i++) {
199         if (reg_ctx->rcb_buf[i]) {
200             mpp_buffer_put(reg_ctx->rcb_buf[i]);
201             reg_ctx->rcb_buf[i] = NULL;
202         }
203     }
204 
205     if (reg_ctx->group) {
206         mpp_buffer_group_put(reg_ctx->group);
207         reg_ctx->group = NULL;
208     }
209 
210     for (i = 0; i < loop; i++)
211         MPP_FREE(reg_ctx->g_buf[i].hw_regs);
212 
213     MPP_FREE(reg_ctx->scaling_qm);
214     MPP_FREE(reg_ctx->scaling_rk);
215     MPP_FREE(reg_ctx->pps_buf);
216     MPP_FREE(reg_ctx->sw_rps_buf);
217 
218     if (reg_ctx->cmv_bufs) {
219         hal_bufs_deinit(reg_ctx->cmv_bufs);
220         reg_ctx->cmv_bufs = NULL;
221     }
222 
223     return MPP_OK;
224 }
225 
hal_h265d_v345_output_pps_packet(void * hal,void * dxva)226 static RK_S32 hal_h265d_v345_output_pps_packet(void *hal, void *dxva)
227 {
228     RK_S32 fifo_len = 14;//12
229     RK_S32 i, j;
230     RK_U32 addr;
231     RK_U32 log2_min_cb_size;
232     RK_S32 width, height;
233     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
234     Vdpu34xH265dRegSet *hw_reg = (Vdpu34xH265dRegSet*)(reg_ctx->hw_regs);
235     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
236     BitputCtx_t bp;
237 
238     if (NULL == reg_ctx || dxva_cxt == NULL) {
239         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
240                 __FILE__, __FUNCTION__, __LINE__);
241         return MPP_ERR_NULL_PTR;
242     }
243     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
244     if (dxva_cxt->pp.ps_update_flag) {
245         RK_U64 *pps_packet = reg_ctx->pps_buf;
246         if (NULL == pps_ptr) {
247             mpp_err("pps_data get ptr error");
248             return MPP_ERR_NOMEM;
249         }
250 
251         for (i = 0; i < 14; i++) pps_packet[i] = 0;
252 
253         mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
254 
255         // SPS
256         mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
257         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
258         mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
259 
260         log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
261         width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
262         height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
263 
264         mpp_put_bits(&bp, width                                          , 16);
265         mpp_put_bits(&bp, height                                         , 16);
266         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
267         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
268         mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
269         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
270         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
271         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
272         ///<-zrh comment ^  63 bit above
273         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
274         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
275         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
276         mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
277         mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
278         mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
279         ///<-zrh comment ^  68 bit above
280         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
281         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
282         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
283         mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
284         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
285         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
286 
287         mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
288         mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
289         mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
290         mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
291         mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
292         ///<-zrh comment ^ 100 bit above
293 
294         mpp_put_bits(&bp, 0                                                    , 7 ); //49bits
295         //yandong change
296         mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1,       4);
297         mpp_put_bits(&bp, 0, 3);
298         mpp_put_align(&bp                                                        , 32, 0xf); //128
299         // PPS
300         mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
301         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
302         mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
303         mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
304         mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
305         mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
306         mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
307         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);//31 bits
308         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
309         mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
310         mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
311         mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
312         mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1); //164
313         mpp_put_bits(&bp, log2_min_cb_size +
314                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
315                      dxva_cxt->pp.diff_cu_qp_delta_depth                             , 3);
316 
317         h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
318                   dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
319 
320         mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
321         mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
322         mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
323         mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
324         mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
325         mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1 );
326         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1 );
327         mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
328         mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
329         mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1); //185
330         mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
331         mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
332         mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
333         mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
334         mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
335         mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
336         mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
337         mpp_put_bits(&bp, 0                                                        , 3);
338         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
339         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
340         mpp_put_bits(&bp, 0, 4);//2 //mSps_Pps[i]->mMode
341         mpp_put_align(&bp, 64, 0xf);
342         {
343             /// tiles info begin
344             RK_U16 column_width[20];
345             RK_U16 row_height[22];
346 
347             memset(column_width, 0, sizeof(column_width));
348             memset(row_height, 0, sizeof(row_height));
349 
350             if (dxva_cxt->pp.tiles_enabled_flag) {
351 
352                 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
353                     RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
354                     RK_S32 ctu_width_in_pic = (width +
355                                                (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
356                     RK_S32 ctu_height_in_pic = (height +
357                                                 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
358                     RK_S32 sum = 0;
359                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
360                         column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
361                         sum += column_width[i]  ;
362                     }
363                     column_width[i] = ctu_width_in_pic - sum;
364 
365                     sum = 0;
366                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
367                         row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
368                         sum += row_height[i];
369                     }
370                     row_height[i] = ctu_height_in_pic - sum;
371                 } // end of (pps->uniform_spacing_flag == 0)
372                 else {
373 
374                     RK_S32    pic_in_cts_width = (width +
375                                                   (1 << (log2_min_cb_size +
376                                                          dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
377                                                  / (1 << (log2_min_cb_size +
378                                                           dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
379                     RK_S32 pic_in_cts_height = (height +
380                                                 (1 << (log2_min_cb_size +
381                                                        dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
382                                                / (1 << (log2_min_cb_size +
383                                                         dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
384 
385                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
386                         column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
387                                           (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
388 
389                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
390                         row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
391                                         (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
392                 }
393             } // pps->tiles_enabled_flag
394             else {
395                 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
396                 column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
397                 row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
398             }
399 
400             for (j = 0; j < 20; j++) {
401                 if (column_width[j] > 0)
402                     column_width[j]--;
403                 mpp_put_bits(&bp, column_width[j], 12);
404             }
405 
406             for (j = 0; j < 22; j++) {
407                 if (row_height[j] > 0)
408                     row_height[j]--;
409                 mpp_put_bits(&bp, row_height[j], 12);
410             }
411         }
412 
413         mpp_put_bits(&bp, 0, 32);
414         mpp_put_bits(&bp, 0, 70);
415         mpp_put_align(&bp, 64, 0xf);//128
416     }
417 
418     if (dxva_cxt->pp.scaling_list_enabled_flag) {
419         MppDevRegOffsetCfg trans_cfg;
420         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
421 
422         if (dxva_cxt->pp.scaling_list_data_present_flag) {
423             addr = (dxva_cxt->pp.pps_id + 16) * 1360;
424         } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
425             addr = dxva_cxt->pp.sps_id * 1360;
426         } else {
427             addr = 80 * 1360;
428         }
429 
430         hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
431 
432         hw_reg->h265d_addr.reg180_scanlist_addr = reg_ctx->bufs_fd;
433         hw_reg->common.reg012.scanlist_addr_valid_en = 1;
434 
435         /* need to config addr */
436         trans_cfg.reg_idx = 180;
437         trans_cfg.offset = addr + reg_ctx->sclst_offset;
438         mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
439     }
440 
441     for (i = 0; i < 64; i++)
442         memcpy(pps_ptr + i * 112, reg_ctx->pps_buf, 112);
443 #ifdef dump
444     fwrite(pps_ptr, 1, 80 * 64, fp);
445     RK_U32 *tmp = (RK_U32 *)pps_ptr;
446     for (i = 0; i < 112 / 4; i++) {
447         mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
448     }
449 #endif
450     return 0;
451 }
452 
hal_h265d_output_pps_packet(void * hal,void * dxva)453 static RK_S32 hal_h265d_output_pps_packet(void *hal, void *dxva)
454 {
455     RK_S32 fifo_len = 10;
456     RK_S32 i, j;
457     RK_U32 addr;
458     RK_U32 log2_min_cb_size;
459     RK_S32 width, height;
460     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
461     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
462     BitputCtx_t bp;
463 
464     if (NULL == reg_ctx || dxva_cxt == NULL) {
465         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
466                 __FILE__, __FUNCTION__, __LINE__);
467         return MPP_ERR_NULL_PTR;
468     }
469 
470     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
471 
472     if (dxva_cxt->pp.ps_update_flag || dxva_cxt->pp.scaling_list_enabled_flag) {
473         RK_U64 *pps_packet = reg_ctx->pps_buf;
474 
475         if (NULL == pps_ptr) {
476             mpp_err("pps_data get ptr error");
477             return MPP_ERR_NOMEM;
478         }
479 
480         for (i = 0; i < 10; i++) pps_packet[i] = 0;
481 
482         mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
483 
484         // SPS
485         mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
486         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
487         mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
488 
489         log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
490         width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
491         height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
492 
493         mpp_put_bits(&bp, width                                          , 16);//yandong
494         mpp_put_bits(&bp, height                                         , 16);//yandong
495         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
496         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
497         mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
498         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
499         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
500         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
501         ///<-zrh comment ^  57 bit above
502         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
503         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
504         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
505         mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
506         mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
507         mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
508         ///<-zrh comment ^  68 bit above
509         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
510         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
511         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
512         mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
513         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
514         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
515 
516         mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
517         mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
518         mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
519         mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
520         mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
521         ///<-zrh comment ^ 100 bit above
522 
523         mpp_put_bits(&bp, 0                                                    , 7 );
524         mpp_put_align(&bp                                                      , 32, 0xf);
525 
526         // PPS
527         mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
528         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
529         mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
530         mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
531         mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
532         mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
533         mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
534         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);
535         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
536         mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
537         mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
538         mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
539         mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1);
540 
541         mpp_put_bits(&bp, log2_min_cb_size +
542                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
543                      dxva_cxt->pp.diff_cu_qp_delta_depth                           , 3);
544 
545         h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
546                   dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
547 
548         mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
549         mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
550         mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
551         mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
552         mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
553         mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1);
554         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1);
555         mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
556         mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
557         mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1);
558 
559         mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
560         mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
561         mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
562         mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
563         mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
564         mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
565         mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
566         mpp_put_bits(&bp, 0                                                        , 3);
567         mpp_put_bits(&bp, dxva_cxt->pp.num_tile_columns_minus1 + 1, 5);
568         mpp_put_bits(&bp, dxva_cxt->pp.num_tile_rows_minus1 + 1 , 5 );
569         mpp_put_bits(&bp, 3, 2); //mSps_Pps[i]->mMode
570         mpp_put_align(&bp, 64, 0xf);
571 
572         {
573             /// tiles info begin
574             RK_U16 column_width[20];
575             RK_U16 row_height[22];
576 
577             memset(column_width, 0, sizeof(column_width));
578             memset(row_height, 0, sizeof(row_height));
579 
580             if (dxva_cxt->pp.tiles_enabled_flag) {
581                 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
582                     RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
583                     RK_S32 ctu_width_in_pic = (width +
584                                                (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
585                     RK_S32 ctu_height_in_pic = (height +
586                                                 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
587                     RK_S32 sum = 0;
588                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
589                         column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
590                         sum += column_width[i]  ;
591                     }
592                     column_width[i] = ctu_width_in_pic - sum;
593 
594                     sum = 0;
595                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
596                         row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
597                         sum += row_height[i];
598                     }
599                     row_height[i] = ctu_height_in_pic - sum;
600                 } // end of (pps->uniform_spacing_flag == 0)
601                 else {
602 
603                     RK_S32 pic_in_cts_width = (width +
604                                                (1 << (log2_min_cb_size +
605                                                       dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
606                                               / (1 << (log2_min_cb_size +
607                                                        dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
608                     RK_S32 pic_in_cts_height = (height +
609                                                 (1 << (log2_min_cb_size +
610                                                        dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
611                                                / (1 << (log2_min_cb_size +
612                                                         dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
613 
614                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
615                         column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
616                                           (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
617 
618                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
619                         row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
620                                         (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
621                 }
622             } // pps->tiles_enabled_flag
623             else {
624                 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
625                 column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
626                 row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
627             }
628 
629             for (j = 0; j < 20; j++) {
630                 if (column_width[j] > 0)
631                     column_width[j]--;
632                 mpp_put_bits(&bp, column_width[j], 12);// yandong 8bit -> 12bit
633             }
634 
635             for (j = 0; j < 22; j++) {
636                 if (row_height[j] > 0)
637                     row_height[j]--;
638                 mpp_put_bits(&bp, row_height[j], 12);// yandong 8bit -> 12bit
639             }
640         }
641 
642         {
643             RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->scaling_list_data);
644             if (dxva_cxt->pp.scaling_list_data_present_flag) {
645                 addr = (dxva_cxt->pp.pps_id + 16) * 1360;
646             } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
647                 addr = dxva_cxt->pp.sps_id * 1360;
648             } else {
649                 addr = 80 * 1360;
650             }
651 
652             hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
653 
654             RK_U32 fd = mpp_buffer_get_fd(reg_ctx->scaling_list_data);
655             /* need to config addr */
656             addr = fd | (addr << 10);
657 
658             mpp_put_bits(&bp, addr, 32);
659             mpp_put_align(&bp, 64, 0xf);
660         }
661         for (i = 0; i < 64; i++)
662             memcpy(pps_ptr + i * 80, reg_ctx->pps_buf, 80);
663     } else if (reg_ctx->fast_mode) {
664         for (i = 0; i < 64; i++)
665             memcpy(pps_ptr + i * 80, reg_ctx->pps_buf, 80);
666     }
667 
668 #ifdef dump
669     fwrite(pps_ptr, 1, 80 * 64, fp);
670     fflush(fp);
671 #endif
672     return 0;
673 }
674 
h265d_refine_rcb_size(Vdpu34xRcbInfo * rcb_info,Vdpu34xH265dRegSet * hw_regs,RK_S32 width,RK_S32 height,void * dxva)675 static void h265d_refine_rcb_size(Vdpu34xRcbInfo *rcb_info,
676                                   Vdpu34xH265dRegSet *hw_regs,
677                                   RK_S32 width, RK_S32 height, void *dxva)
678 {
679     RK_U32 rcb_bits = 0;
680     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
681     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
682     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
683     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
684     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
685     RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
686     RK_U32 ext_align_size = tile_col_cut_num * 64 * 8;
687 
688     width = MPP_ALIGN(width, ctu_size);
689     height = MPP_ALIGN(height, ctu_size);
690     /* RCB_STRMD_ROW */
691     if (width > 8192) {
692         RK_U32 factor = ctu_size / 16;
693         rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) * factor * 24 + ext_align_size;
694     } else
695         rcb_bits = 0;
696     rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
697     /* RCB_TRANSD_ROW */
698     if (width > 8192)
699         rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
700     else
701         rcb_bits = 0;
702     rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
703     /* RCB_TRANSD_COL */
704     if (height > 8192)
705         rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1) + ext_align_size;
706     else
707         rcb_bits = 0;
708     rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
709     /* RCB_INTER_ROW */
710     rcb_bits = width * 22 + ext_align_size;
711     rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
712     /* RCB_INTER_COL */
713     rcb_bits = height * 22 + ext_align_size;
714     rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
715     /* RCB_INTRA_ROW */
716     rcb_bits = width * 48 + ext_align_size;
717     rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
718     /* RCB_DBLK_ROW */
719     if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
720         if (ctu_size == 32)
721             rcb_bits = width * ( 4 + 6 * bit_depth);
722         else
723             rcb_bits = width * ( 2 + 6 * bit_depth);
724     } else {
725         if (ctu_size == 32)
726             rcb_bits = width * ( 4 + 8 * bit_depth);
727         else
728             rcb_bits = width * ( 2 + 8 * bit_depth);
729     }
730     rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
731     rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
732     /* RCB_SAO_ROW */
733     if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
734         rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
735     } else {
736         rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
737     }
738     rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
739     rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
740     /* RCB_FBC_ROW */
741     if (hw_regs->common.reg012.fbc_e) {
742         rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
743         rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 128 : 64)) + ext_align_size;
744     } else
745         rcb_bits = 0;
746     rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
747     /* RCB_FILT_COL */
748     if (hw_regs->common.reg012.fbc_e) {
749         RK_U32 ctu_idx = ctu_size >> 5;
750         RK_U32 a = filterd_fbc_on[chroma_fmt_idc][ctu_idx].a;
751         RK_U32 b = filterd_fbc_on[chroma_fmt_idc][ctu_idx].b;
752 
753         rcb_bits = height * (a * bit_depth + b);
754     } else {
755         RK_U32 ctu_idx = ctu_size >> 5;
756         RK_U32 a = filterd_fbc_off[chroma_fmt_idc][ctu_idx].a;
757         RK_U32 b = filterd_fbc_off[chroma_fmt_idc][ctu_idx].b;
758 
759         rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 192 * ctu_size >> 4 : 0));
760     }
761     rcb_bits += ext_align_size;
762     rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
763 }
764 
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu34xH265dRegSet * hw_regs,RK_S32 width,RK_S32 height)765 static void hal_h265d_rcb_info_update(void *hal,  void *dxva,
766                                       Vdpu34xH265dRegSet *hw_regs,
767                                       RK_S32 width, RK_S32 height)
768 {
769     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
770     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
771     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
772     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
773     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
774     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
775     RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
776 
777     if (reg_ctx->num_row_tiles != num_tiles ||
778         reg_ctx->bit_depth != bit_depth ||
779         reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
780         reg_ctx->ctu_size !=  ctu_size ||
781         reg_ctx->width != width ||
782         reg_ctx->height != height) {
783         RK_U32 i = 0;
784         RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
785 
786         reg_ctx->rcb_buf_size = vdpu34x_get_rcb_buf_size((Vdpu34xRcbInfo*)reg_ctx->rcb_info, width, height);
787         h265d_refine_rcb_size((Vdpu34xRcbInfo*)reg_ctx->rcb_info, hw_regs, width, height, dxva_cxt);
788 
789         for (i = 0; i < loop; i++) {
790             MppBuffer rcb_buf;
791 
792             if (reg_ctx->rcb_buf[i]) {
793                 mpp_buffer_put(reg_ctx->rcb_buf[i]);
794                 reg_ctx->rcb_buf[i] = NULL;
795             }
796             mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
797             reg_ctx->rcb_buf[i] = rcb_buf;
798         }
799 
800         reg_ctx->num_row_tiles  = num_tiles;
801         reg_ctx->bit_depth      = bit_depth;
802         reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
803         reg_ctx->ctu_size       = ctu_size;
804         reg_ctx->width          = width;
805         reg_ctx->height         = height;
806     }
807 }
808 
809 #define SET_POC_HIGNBIT_INFO(regs, index, field, value)\
810     do{ \
811         switch(index){\
812         case 0: regs.reg200.ref0_##field = value; break;\
813         case 1: regs.reg200.ref1_##field = value; break;\
814         case 2: regs.reg200.ref2_##field = value; break;\
815         case 3: regs.reg200.ref3_##field = value; break;\
816         case 4: regs.reg200.ref4_##field = value; break;\
817         case 5: regs.reg200.ref5_##field = value; break;\
818         case 6: regs.reg200.ref6_##field = value; break;\
819         case 7: regs.reg200.ref7_##field = value; break;\
820         case 8: regs.reg201.ref8_##field = value; break;\
821         case 9: regs.reg201.ref9_##field = value; break;\
822         case 10: regs.reg201.ref10_##field = value; break;\
823         case 11: regs.reg201.ref11_##field = value; break;\
824         case 12: regs.reg201.ref12_##field = value; break;\
825         case 13: regs.reg201.ref13_##field = value; break;\
826         case 14: regs.reg201.ref14_##field = value; break;\
827         case 15: regs.reg201.ref15_##field = value; break;\
828         default: break;}\
829     }while(0)
830 
831 #define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
832 #define MAX_INT           2147483647
833 
hal_h265d_vdpu34x_gen_regs(void * hal,HalTaskInfo * syn)834 static MPP_RET hal_h265d_vdpu34x_gen_regs(void *hal,  HalTaskInfo *syn)
835 {
836     RK_S32 i = 0;
837     RK_S32 log2_min_cb_size;
838     RK_S32 width, height;
839     RK_S32 stride_y, stride_uv, virstrid_y;
840     Vdpu34xH265dRegSet *hw_regs;
841     RK_S32 ret = MPP_SUCCESS;
842     MppBuffer streambuf = NULL;
843     RK_S32 aglin_offset = 0;
844     RK_S32 valid_ref = -1;
845     MppBuffer framebuf = NULL;
846     HalBuf *mv_buf = NULL;
847     RK_S32 fd = -1;
848     RK_U32 mv_size = 0;
849     RK_S32 distance = MAX_INT;
850     h265d_dxva2_picture_context_t *dxva_cxt =
851         (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
852     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
853     void *rps_ptr = NULL;
854     RK_U32 stream_buf_size = 0;
855 
856     if (syn->dec.flags.parse_err ||
857         syn->dec.flags.ref_err) {
858         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
859         return MPP_OK;
860     }
861 
862     if (reg_ctx ->fast_mode) {
863         for (i = 0; i < MAX_GEN_REG; i++) {
864             if (!reg_ctx->g_buf[i].use_flag) {
865                 syn->dec.reg_index = i;
866 
867                 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
868                 reg_ctx->rps_offset = reg_ctx->offset_rps[i];
869                 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
870 
871                 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
872                 reg_ctx->g_buf[i].use_flag = 1;
873                 break;
874             }
875         }
876         if (i == MAX_GEN_REG) {
877             mpp_err("hevc rps buf all used");
878             return MPP_ERR_NOMEM;
879         }
880     }
881     rps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->rps_offset;
882     if (NULL == rps_ptr) {
883 
884         mpp_err("rps_data get ptr error");
885         return MPP_ERR_NOMEM;
886     }
887 
888 
889     if (syn->dec.syntax.data == NULL) {
890         mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
891         return MPP_ERR_NULL_PTR;
892     }
893 
894     /* output pps */
895     hw_regs = (Vdpu34xH265dRegSet*)reg_ctx->hw_regs;
896     memset(hw_regs, 0, sizeof(Vdpu34xH265dRegSet));
897 
898     if (reg_ctx->is_v34x) {
899         hal_h265d_v345_output_pps_packet(hal, syn->dec.syntax.data);
900     } else {
901         hal_h265d_output_pps_packet(hal, syn->dec.syntax.data);
902     }
903 
904     if (NULL == reg_ctx->hw_regs) {
905         return MPP_ERR_NULL_PTR;
906     }
907 
908 
909     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
910 
911     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
912     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
913     mv_size = (MPP_ALIGN(width, 64) * MPP_ALIGN(height, 64)) >> 3;
914     if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
915         size_t size = mv_size;
916 
917         if (reg_ctx->cmv_bufs) {
918             hal_bufs_deinit(reg_ctx->cmv_bufs);
919             reg_ctx->cmv_bufs = NULL;
920         }
921 
922         hal_bufs_init(&reg_ctx->cmv_bufs);
923         if (reg_ctx->cmv_bufs == NULL) {
924             mpp_err_f("colmv bufs init fail");
925             return MPP_ERR_NULL_PTR;
926         }
927 
928         reg_ctx->mv_size = mv_size;
929         reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
930         hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
931     }
932 
933     {
934         MppFrame mframe = NULL;
935         RK_U32 ver_virstride;
936 
937         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
938                               SLOT_FRAME_PTR, &mframe);
939         stride_y = mpp_frame_get_hor_stride(mframe);
940         ver_virstride = mpp_frame_get_ver_stride(mframe);
941         stride_uv = stride_y;
942         virstrid_y = ver_virstride * stride_y;
943         hw_regs->common.reg013.h26x_error_mode = 1;
944         hw_regs->common.reg013.h26x_streamd_error_mode = 1;
945         hw_regs->common.reg013.colmv_error_mode = 1;
946         hw_regs->common.reg021.error_deb_en = 1;
947         hw_regs->common.reg021.inter_error_prc_mode = 0;
948         hw_regs->common.reg021.error_intra_mode = 1;
949 
950         hw_regs->common.reg017.slice_num = dxva_cxt->slice_count;
951         hw_regs->h265d_param.reg64.h26x_rps_mode = 0;
952         hw_regs->h265d_param.reg64.h26x_frame_orslice = 0;
953         hw_regs->h265d_param.reg64.h26x_stream_mode = 0;
954 
955         if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) {
956             RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
957             RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (ver_virstride + 64) / 16, SZ_4K);
958 
959             hw_regs->common.reg012.fbc_e = 1;
960             hw_regs->common.reg018.y_hor_virstride = fbc_hdr_stride >> 4;
961             hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
962             hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
963         } else {
964             hw_regs->common.reg012.fbc_e = 0;
965             hw_regs->common.reg018.y_hor_virstride = stride_y >> 4;
966             hw_regs->common.reg019.uv_hor_virstride = stride_uv >> 4;
967             hw_regs->common.reg020_y_virstride.y_virstride = virstrid_y >> 4;
968         }
969 
970         if (MPP_FRAME_FMT_IS_HDR(mpp_frame_get_fmt(mframe)) && reg_ctx->cfg->base.enable_hdr_meta)
971             fill_hdr_meta_to_frame(mframe, HDR_HEVC);
972     }
973     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
974                           SLOT_BUFFER, &framebuf);
975     hw_regs->common_addr.reg130_decout_base  = mpp_buffer_get_fd(framebuf); //just index need map
976     /*if out_base is equal to zero it means this frame may error
977     we return directly add by csy*/
978 
979     if (hw_regs->common_addr.reg130_decout_base == 0) {
980         return 0;
981     }
982     fd =  mpp_buffer_get_fd(framebuf);
983     hw_regs->common_addr.reg130_decout_base = fd;
984     mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.CurrPic.Index7Bits);
985     hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
986 
987     hw_regs->h265d_param.reg65.cur_top_poc = dxva_cxt->pp.CurrPicOrderCntVal;
988 
989     mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
990                           &streambuf);
991     if ( dxva_cxt->bitstream == NULL) {
992         dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
993     }
994     if (reg_ctx->is_v34x) {
995 #ifdef HW_RPS
996         hw_regs->common.reg012.wait_reset_en = 1;
997         hw_regs->h265d_param.reg103.ref_pic_layer_same_with_cur = 0xffff;
998         hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
999 #else
1000         hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
1001         hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
1002 #endif
1003     } else {
1004         hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
1005     }
1006 
1007     MppDevRegOffsetCfg trans_cfg;
1008     /* cabac table */
1009     hw_regs->h265d_addr.reg197_cabactbl_base    = reg_ctx->bufs_fd;
1010     /* pps */
1011     hw_regs->h265d_addr.reg161_pps_base         = reg_ctx->bufs_fd;
1012     hw_regs->h265d_addr.reg163_rps_base         = reg_ctx->bufs_fd;
1013 
1014     hw_regs->common_addr.reg128_rlc_base        = mpp_buffer_get_fd(streambuf);
1015     hw_regs->common_addr.reg129_rlcwrite_base   = mpp_buffer_get_fd(streambuf);
1016     stream_buf_size                             = mpp_buffer_get_size(streambuf);
1017     hw_regs->common.reg016_str_len              = ((dxva_cxt->bitstream_size + 15)
1018                                                    & (~15)) + 64;
1019     hw_regs->common.reg016_str_len = stream_buf_size > hw_regs->common.reg016_str_len ?
1020                                      hw_regs->common.reg016_str_len : stream_buf_size;
1021 
1022     aglin_offset =  hw_regs->common.reg016_str_len - dxva_cxt->bitstream_size;
1023     if (aglin_offset > 0) {
1024         memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
1025                aglin_offset);
1026     }
1027     hw_regs->common.reg010.dec_e                = 1;
1028     hw_regs->common.reg011.dec_timeout_e        = 1;
1029     hw_regs->common.reg012.wr_ddr_align_en      = dxva_cxt->pp.tiles_enabled_flag
1030                                                   ? 0 : 1;
1031     hw_regs->common.reg012.colmv_compress_en    = 1;
1032 
1033     hw_regs->common.reg024.cabac_err_en_lowbits = 0xffffdfff;
1034     hw_regs->common.reg025.cabac_err_en_highbits = 0x3ffbf9ff;
1035 
1036     hw_regs->common.reg011.dec_clkgate_e    = 1;
1037     hw_regs->common.reg011.dec_e_strmd_clkgate_dis = 0;
1038     hw_regs->common.reg026.swreg_block_gating_e =
1039         (mpp_get_soc_type() == ROCKCHIP_SOC_RK3588) ? 0xfffef : 0xfffff;
1040     hw_regs->common.reg026.reg_cfg_gating_en = 1;
1041     hw_regs->common.reg032_timeout_threshold = 0x3ffff;
1042 
1043     valid_ref = hw_regs->common_addr.reg130_decout_base;
1044     reg_ctx->error_index = dxva_cxt->pp.CurrPic.Index7Bits;
1045     hw_regs->common_addr.reg132_error_ref_base = valid_ref;
1046 
1047     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
1048         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
1049             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
1050 
1051             MppFrame mframe = NULL;
1052             hw_regs->h265d_param.reg67_82_ref_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
1053             mpp_buf_slot_get_prop(reg_ctx->slots,
1054                                   dxva_cxt->pp.RefPicList[i].Index7Bits,
1055                                   SLOT_BUFFER, &framebuf);
1056             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
1057                                   SLOT_FRAME_PTR, &mframe);
1058             if (framebuf != NULL) {
1059                 hw_regs->h265d_addr.reg164_179_ref_base[i] = mpp_buffer_get_fd(framebuf);
1060                 valid_ref = hw_regs->h265d_addr.reg164_179_ref_base[i];
1061                 // mpp_log("cur poc %d, ref poc %d", dxva_cxt->pp.current_poc, dxva_cxt->pp.PicOrderCntValList[i]);
1062                 if ((pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc) < distance)
1063                     && (!mpp_frame_get_errinfo(mframe))) {
1064                     distance = pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc);
1065                     hw_regs->common_addr.reg132_error_ref_base = hw_regs->h265d_addr.reg164_179_ref_base[i];
1066                     reg_ctx->error_index = dxva_cxt->pp.RefPicList[i].Index7Bits;
1067                     hw_regs->common.reg021.error_intra_mode = 0;
1068                 }
1069             } else {
1070                 hw_regs->h265d_addr.reg164_179_ref_base[i] = valid_ref;
1071             }
1072 
1073             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.RefPicList[i].Index7Bits);
1074             hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1075 
1076             SET_REF_VALID(hw_regs->h265d_param, i, 1);
1077         }
1078     }
1079 
1080     if ((reg_ctx->error_index == dxva_cxt->pp.CurrPic.Index7Bits) && !dxva_cxt->pp.IntraPicFlag) {
1081         // mpp_err("current frm may be err, should skip process");
1082         syn->dec.flags.ref_err = 1;
1083         return MPP_OK;
1084     }
1085 
1086     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
1087 
1088         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
1089             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
1090             if (!hw_regs->common.reg021.error_intra_mode) {
1091                 MppFrame mframe = NULL;
1092 
1093                 mpp_buf_slot_get_prop(reg_ctx->slots,
1094                                       dxva_cxt->pp.RefPicList[i].Index7Bits,
1095                                       SLOT_BUFFER, &framebuf);
1096 
1097                 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
1098                                       SLOT_FRAME_PTR, &mframe);
1099 
1100                 if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) {
1101                     mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index);
1102                     hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
1103                     hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1104                 }
1105             }
1106         } else {
1107             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index);
1108             hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
1109             hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1110             /* mark 3 to differ from current frame */
1111             if (reg_ctx->error_index == dxva_cxt->pp.CurrPic.Index7Bits)
1112                 SET_POC_HIGNBIT_INFO(hw_regs->highpoc, i, poc_highbit, 3);
1113         }
1114     }
1115 
1116     trans_cfg.reg_idx = 161;
1117     trans_cfg.offset = reg_ctx->spspps_offset;
1118     mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
1119     /* rps */
1120     trans_cfg.reg_idx = 163;
1121     trans_cfg.offset = reg_ctx->rps_offset;
1122     mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
1123 
1124     hw_regs->common.reg013.timeout_mode = 1;
1125     hw_regs->common.reg013.cur_pic_is_idr = dxva_cxt->pp.IdrPicFlag;//p_hal->slice_long->idr_flag;
1126 
1127     hw_regs->common.reg011.buf_empty_en = 1;
1128 
1129     hal_h265d_rcb_info_update(hal, dxva_cxt, hw_regs, width, height);
1130     vdpu34x_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
1131                       reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
1132                       (Vdpu34xRcbInfo*)reg_ctx->rcb_info);
1133     vdpu34x_setup_statistic(&hw_regs->common, &hw_regs->statistic);
1134 
1135     return ret;
1136 }
1137 
hal_h265d_vdpu34x_start(void * hal,HalTaskInfo * task)1138 static MPP_RET hal_h265d_vdpu34x_start(void *hal, HalTaskInfo *task)
1139 {
1140     MPP_RET ret = MPP_OK;
1141     RK_U8* p = NULL;
1142     Vdpu34xH265dRegSet *hw_regs = NULL;
1143     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1144     RK_S32 index =  task->dec.reg_index;
1145 
1146     RK_U32 i;
1147 
1148     if (task->dec.flags.parse_err ||
1149         task->dec.flags.ref_err) {
1150         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1151         return MPP_OK;
1152     }
1153 
1154     if (reg_ctx->fast_mode) {
1155         p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
1156         hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1157     } else {
1158         p = (RK_U8*)reg_ctx->hw_regs;
1159         hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->hw_regs;
1160     }
1161 
1162     if (hw_regs == NULL) {
1163         mpp_err("hal_h265d_start hw_regs is NULL");
1164         return MPP_ERR_NULL_PTR;
1165     }
1166     for (i = 0; i < 68; i++) {
1167         h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1168                   i, *((RK_U32*)p));
1169         //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
1170         p += 4;
1171     }
1172 
1173     do {
1174         MppDevRegWrCfg wr_cfg;
1175         MppDevRegRdCfg rd_cfg;
1176 
1177         wr_cfg.reg = &hw_regs->common;
1178         wr_cfg.size = sizeof(hw_regs->common);
1179         wr_cfg.offset = OFFSET_COMMON_REGS;
1180 
1181         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1182         if (ret) {
1183             mpp_err_f("set register write failed %d\n", ret);
1184             break;
1185         }
1186 
1187         wr_cfg.reg = &hw_regs->h265d_param;
1188         wr_cfg.size = sizeof(hw_regs->h265d_param);
1189         wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS;
1190 
1191         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1192         if (ret) {
1193             mpp_err_f("set register write failed %d\n", ret);
1194             break;
1195         }
1196 
1197         wr_cfg.reg = &hw_regs->common_addr;
1198         wr_cfg.size = sizeof(hw_regs->common_addr);
1199         wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1200 
1201         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1202         if (ret) {
1203             mpp_err_f("set register write failed %d\n", ret);
1204             break;
1205         }
1206 
1207         wr_cfg.reg = &hw_regs->h265d_addr;
1208         wr_cfg.size = sizeof(hw_regs->h265d_addr);
1209         wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1210 
1211         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1212         if (ret) {
1213             mpp_err_f("set register write failed %d\n", ret);
1214             break;
1215         }
1216 
1217         wr_cfg.reg = &hw_regs->statistic;
1218         wr_cfg.size = sizeof(hw_regs->statistic);
1219         wr_cfg.offset = OFFSET_STATISTIC_REGS;
1220 
1221         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1222         if (ret) {
1223             mpp_err_f("set register write failed %d\n", ret);
1224             break;
1225         }
1226 
1227         if (mpp_get_soc_type() == ROCKCHIP_SOC_RK3588) {
1228             wr_cfg.reg = &hw_regs->highpoc;
1229             wr_cfg.size = sizeof(hw_regs->highpoc);
1230             wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS;
1231 
1232             ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1233             if (ret) {
1234                 mpp_err_f("set register write failed %d\n", ret);
1235                 break;
1236             }
1237         }
1238 
1239         rd_cfg.reg = &hw_regs->irq_status;
1240         rd_cfg.size = sizeof(hw_regs->irq_status);
1241         rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1242 
1243         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1244         if (ret) {
1245             mpp_err_f("set register read failed %d\n", ret);
1246             break;
1247         }
1248 
1249         /* rcb info for sram */
1250         vdpu34x_set_rcbinfo(reg_ctx->dev, (Vdpu34xRcbInfo*)reg_ctx->rcb_info);
1251 
1252         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1253         if (ret) {
1254             mpp_err_f("send cmd failed %d\n", ret);
1255             break;
1256         }
1257     } while (0);
1258 
1259     return ret;
1260 }
1261 
1262 
hal_h265d_vdpu34x_wait(void * hal,HalTaskInfo * task)1263 static MPP_RET hal_h265d_vdpu34x_wait(void *hal, HalTaskInfo *task)
1264 {
1265     MPP_RET ret = MPP_OK;
1266     RK_S32 index =  task->dec.reg_index;
1267     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1268     RK_U8* p = NULL;
1269     Vdpu34xH265dRegSet *hw_regs = NULL;
1270     RK_S32 i;
1271 
1272     if (task->dec.flags.parse_err ||
1273         task->dec.flags.ref_err) {
1274         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1275         goto ERR_PROC;
1276     }
1277 
1278     if (reg_ctx->fast_mode) {
1279         hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1280     } else {
1281         hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->hw_regs;
1282     }
1283 
1284     p = (RK_U8*)hw_regs;
1285 
1286     ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1287     if (ret)
1288         mpp_err_f("poll cmd failed %d\n", ret);
1289 
1290 ERR_PROC:
1291     if (task->dec.flags.parse_err ||
1292         task->dec.flags.ref_err ||
1293         hw_regs->irq_status.reg224.dec_error_sta ||
1294         hw_regs->irq_status.reg224.buf_empty_sta ||
1295         hw_regs->irq_status.reg224.dec_bus_sta ||
1296         !hw_regs->irq_status.reg224.dec_rdy_sta) {
1297         if (!reg_ctx->fast_mode) {
1298             if (reg_ctx->dec_cb)
1299                 mpp_callback(reg_ctx->dec_cb, &task->dec);
1300         } else {
1301             MppFrame mframe = NULL;
1302             mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1303                                   SLOT_FRAME_PTR, &mframe);
1304             if (mframe) {
1305                 reg_ctx->fast_mode_err_found = 1;
1306                 mpp_frame_set_errinfo(mframe, 1);
1307             }
1308         }
1309     } else {
1310         if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1311             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1312                 if (task->dec.refer[i] >= 0) {
1313                     MppFrame frame_ref = NULL;
1314 
1315                     mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1316                                           SLOT_FRAME_PTR, &frame_ref);
1317                     h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1318                               i, task->dec.refer[i], frame_ref);
1319                     if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1320                         MppFrame frame_out = NULL;
1321                         mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1322                                               SLOT_FRAME_PTR, &frame_out);
1323                         mpp_frame_set_errinfo(frame_out, 1);
1324                         break;
1325                     }
1326                 }
1327             }
1328         }
1329     }
1330 
1331     for (i = 0; i < 68; i++) {
1332         if (i == 1) {
1333             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1334                       i, *((RK_U32*)p));
1335         }
1336 
1337         if (i == 45) {
1338             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1339                       i, *((RK_U32*)p));
1340         }
1341         p += 4;
1342     }
1343 
1344     if (reg_ctx->fast_mode) {
1345         reg_ctx->g_buf[index].use_flag = 0;
1346     }
1347 
1348     return ret;
1349 }
1350 
hal_h265d_vdpu34x_reset(void * hal)1351 static MPP_RET hal_h265d_vdpu34x_reset(void *hal)
1352 {
1353     MPP_RET ret = MPP_OK;
1354     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1355     p_hal->fast_mode_err_found = 0;
1356     (void)hal;
1357     return ret;
1358 }
1359 
hal_h265d_vdpu34x_flush(void * hal)1360 static MPP_RET hal_h265d_vdpu34x_flush(void *hal)
1361 {
1362     MPP_RET ret = MPP_OK;
1363 
1364     (void)hal;
1365     return ret;
1366 }
1367 
hal_h265d_vdpu34x_control(void * hal,MpiCmd cmd_type,void * param)1368 static MPP_RET hal_h265d_vdpu34x_control(void *hal, MpiCmd cmd_type, void *param)
1369 {
1370     MPP_RET ret = MPP_OK;
1371     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1372 
1373     (void)hal;
1374     switch ((MpiCmd)cmd_type) {
1375     case MPP_DEC_SET_FRAME_INFO: {
1376         MppFrame frame = (MppFrame)param;
1377         MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1378 
1379         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1380             vdpu34x_afbc_align_calc(p_hal->slots, frame, 16);
1381         }
1382         break;
1383     }
1384     case MPP_DEC_SET_OUTPUT_FORMAT: {
1385     } break;
1386     default:
1387         break;
1388     }
1389     return  ret;
1390 }
1391 
1392 const MppHalApi hal_h265d_vdpu34x = {
1393     .name = "h265d_vdpu34x",
1394     .type = MPP_CTX_DEC,
1395     .coding = MPP_VIDEO_CodingHEVC,
1396     .ctx_size = sizeof(HalH265dCtx),
1397     .flag = 0,
1398     .init = hal_h265d_vdpu34x_init,
1399     .deinit = hal_h265d_vdpu34x_deinit,
1400     .reg_gen = hal_h265d_vdpu34x_gen_regs,
1401     .start = hal_h265d_vdpu34x_start,
1402     .wait = hal_h265d_vdpu34x_wait,
1403     .reset = hal_h265d_vdpu34x_reset,
1404     .flush = hal_h265d_vdpu34x_flush,
1405     .control = hal_h265d_vdpu34x_control,
1406 };
1407