xref: /rockchip-linux_mpp/mpp/hal/rkdec/h265d/hal_h265d_vdpu34x.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /*
2  * Copyright 2020 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_h265d_vdpu34x"
18 
19 #include <stdio.h>
20 #include <string.h>
21 
22 #include "mpp_env.h"
23 #include "mpp_mem.h"
24 #include "mpp_bitread.h"
25 #include "mpp_bitput.h"
26 
27 #include "h265d_syntax.h"
28 #include "hal_h265d_debug.h"
29 #include "hal_h265d_ctx.h"
30 #include "hal_h265d_com.h"
31 #include "hal_h265d_vdpu34x.h"
32 #include "vdpu34x_h265d.h"
33 
34 /* #define dump */
35 #ifdef dump
36 static FILE *fp = NULL;
37 #endif
38 
39 #define HW_RPS
40 #define PPS_SIZE                (112 * 64)//(96x64)
41 
42 #define COLMV_COMPRESS_EN       1
43 
44 #define SET_REF_VALID(regs, index, value)\
45     do{ \
46         switch(index){\
47         case 0: regs.reg99.hevc_ref_valid_0 = value; break;\
48         case 1: regs.reg99.hevc_ref_valid_1 = value; break;\
49         case 2: regs.reg99.hevc_ref_valid_2 = value; break;\
50         case 3: regs.reg99.hevc_ref_valid_3 = value; break;\
51         case 4: regs.reg99.hevc_ref_valid_4 = value; break;\
52         case 5: regs.reg99.hevc_ref_valid_5 = value; break;\
53         case 6: regs.reg99.hevc_ref_valid_6 = value; break;\
54         case 7: regs.reg99.hevc_ref_valid_7 = value; break;\
55         case 8: regs.reg99.hevc_ref_valid_8 = value; break;\
56         case 9: regs.reg99.hevc_ref_valid_9 = value; break;\
57         case 10: regs.reg99.hevc_ref_valid_10 = value; break;\
58         case 11: regs.reg99.hevc_ref_valid_11 = value; break;\
59         case 12: regs.reg99.hevc_ref_valid_12 = value; break;\
60         case 13: regs.reg99.hevc_ref_valid_13 = value; break;\
61         case 14: regs.reg99.hevc_ref_valid_14 = value; break;\
62         default: break;}\
63     }while(0)
64 
65 #define FMT 4
66 #define CTU 3
67 
68 typedef struct {
69     RK_U32 a;
70     RK_U32 b;
71 } FilterdColBufRatio;
72 
73 static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = {
74     /* 400    420      422       444 */
75     {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16
76     {{0, 0}, {27, 8},  {36, 8},  {52, 8}}, //ctu 32
77     {{0, 0}, {27, 5},  {36, 5},  {52, 5}}  //ctu 64
78 };
79 
80 static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = {
81     /* 400    420      422       444 */
82     {{0, 0}, {9, 31}, {12, 39}, {12, 39}}, //ctu 16
83     {{0, 0}, {9, 25}, {12, 33}, {12, 33}}, //ctu 32
84     {{0, 0}, {9, 21}, {12, 29}, {12, 29}}  //ctu 64
85 };
86 
87 #define CABAC_TAB_ALIGEND_SIZE          (MPP_ALIGN(27456, SZ_4K))
88 #define SPSPPS_ALIGNED_SIZE             (MPP_ALIGN(112 * 64, SZ_4K))
89 #define RPS_ALIGEND_SIZE                (MPP_ALIGN(400 * 8, SZ_4K))
90 #define SCALIST_ALIGNED_SIZE            (MPP_ALIGN(81 * 1360, SZ_4K))
91 #define INFO_BUFFER_SIZE                (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE)
92 #define ALL_BUFFER_SIZE(cnt)            (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt)
93 
94 #define CABAC_TAB_OFFSET                (0)
95 #define SPSPPS_OFFSET(pos)              (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos))
96 #define RPS_OFFSET(pos)                 (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
97 #define SCALIST_OFFSET(pos)             (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE)
98 
hal_h265d_vdpu34x_init(void * hal,MppHalCfg * cfg)99 static MPP_RET hal_h265d_vdpu34x_init(void *hal, MppHalCfg *cfg)
100 {
101     RK_S32 ret = 0;
102     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
103 
104     mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
105     mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
106 
107     reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
108     if (reg_ctx->scaling_qm == NULL) {
109         mpp_err("scaling_org alloc fail");
110         return MPP_ERR_MALLOC;
111     }
112 
113     reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
114     reg_ctx->pps_buf = mpp_calloc(RK_U64, 15);
115     reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
116 
117     if (reg_ctx->scaling_rk == NULL) {
118         mpp_err("scaling_rk alloc fail");
119         return MPP_ERR_MALLOC;
120     }
121 
122     if (reg_ctx->group == NULL) {
123         ret = mpp_buffer_group_get_internal(&reg_ctx->group, MPP_BUFFER_TYPE_ION);
124         if (ret) {
125             mpp_err("h265d mpp_buffer_group_get failed\n");
126             return ret;
127         }
128     }
129 
130     {
131         RK_U32 i = 0;
132         RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
133 
134         //!< malloc buffers
135         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
136         if (ret) {
137             mpp_err("h265d mpp_buffer_get failed\n");
138             return ret;
139         }
140 
141         reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
142         reg_ctx->offset_cabac = CABAC_TAB_OFFSET;
143         for (i = 0; i < max_cnt; i++) {
144             reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu34xH265dRegSet));
145             reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
146             reg_ctx->offset_rps[i] = RPS_OFFSET(i);
147             reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
148         }
149     }
150 
151     if (!reg_ctx->fast_mode) {
152         reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
153         reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
154         reg_ctx->rps_offset = reg_ctx->offset_rps[0];
155         reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
156     }
157 
158     ret = mpp_buffer_write(reg_ctx->bufs, 0, (void*)cabac_table, sizeof(cabac_table));
159     if (ret) {
160         mpp_err("h265d write cabac_table data failed\n");
161         return ret;
162     }
163 
164     if (cfg->hal_fbc_adj_cfg) {
165         cfg->hal_fbc_adj_cfg->func = vdpu34x_afbc_align_calc;
166         cfg->hal_fbc_adj_cfg->expand = 16;
167     }
168 
169     (void)cfg;
170 #ifdef dump
171     fp = fopen("/data/hal.bin", "wb");
172 #endif
173     return MPP_OK;
174 }
175 
hal_h265d_vdpu34x_deinit(void * hal)176 static MPP_RET hal_h265d_vdpu34x_deinit(void *hal)
177 {
178     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
179     RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
180     RK_U32 i;
181 
182     if (reg_ctx->bufs) {
183         mpp_buffer_put(reg_ctx->bufs);
184         reg_ctx->bufs = NULL;
185     }
186 
187     loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
188     for (i = 0; i < loop; i++) {
189         if (reg_ctx->rcb_buf[i]) {
190             mpp_buffer_put(reg_ctx->rcb_buf[i]);
191             reg_ctx->rcb_buf[i] = NULL;
192         }
193     }
194 
195     if (reg_ctx->missing_ref_buf) {
196         mpp_buffer_put(reg_ctx->missing_ref_buf);
197         reg_ctx->missing_ref_buf = NULL;
198     }
199 
200     if (reg_ctx->group) {
201         mpp_buffer_group_put(reg_ctx->group);
202         reg_ctx->group = NULL;
203     }
204 
205     for (i = 0; i < loop; i++)
206         MPP_FREE(reg_ctx->g_buf[i].hw_regs);
207 
208     MPP_FREE(reg_ctx->scaling_qm);
209     MPP_FREE(reg_ctx->scaling_rk);
210     MPP_FREE(reg_ctx->pps_buf);
211     MPP_FREE(reg_ctx->sw_rps_buf);
212 
213     if (reg_ctx->cmv_bufs) {
214         hal_bufs_deinit(reg_ctx->cmv_bufs);
215         reg_ctx->cmv_bufs = NULL;
216     }
217 
218     return MPP_OK;
219 }
220 
hal_h265d_v345_output_pps_packet(void * hal,void * dxva)221 static RK_S32 hal_h265d_v345_output_pps_packet(void *hal, void *dxva)
222 {
223     RK_S32 fifo_len = 14;//12
224     RK_S32 i, j;
225     RK_U32 addr;
226     RK_U32 log2_min_cb_size;
227     RK_S32 width, height;
228     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
229     Vdpu34xH265dRegSet *hw_reg = (Vdpu34xH265dRegSet*)(reg_ctx->hw_regs);
230     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
231     BitputCtx_t bp;
232 
233     if (NULL == reg_ctx || dxva_cxt == NULL) {
234         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
235                 __FILE__, __FUNCTION__, __LINE__);
236         return MPP_ERR_NULL_PTR;
237     }
238     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
239     if (dxva_cxt->pp.ps_update_flag) {
240         RK_U64 *pps_packet = reg_ctx->pps_buf;
241         if (NULL == pps_ptr) {
242             mpp_err("pps_data get ptr error");
243             return MPP_ERR_NOMEM;
244         }
245 
246         for (i = 0; i < 14; i++) pps_packet[i] = 0;
247 
248         mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
249 
250         // SPS
251         mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
252         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
253         mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
254 
255         log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
256         width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
257         height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
258 
259         mpp_put_bits(&bp, width                                          , 16);
260         mpp_put_bits(&bp, height                                         , 16);
261         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
262         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
263         mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
264         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
265         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
266         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
267         ///<-zrh comment ^  63 bit above
268         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
269         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
270         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
271         mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
272         mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
273         mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
274         ///<-zrh comment ^  68 bit above
275         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
276         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
277         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
278         mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
279         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
280         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
281 
282         mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
283         mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
284         mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
285         mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
286         mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
287         ///<-zrh comment ^ 100 bit above
288 
289         mpp_put_bits(&bp, 0                                                    , 7 ); //49bits
290         //yandong change
291         mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1,       4);
292         mpp_put_bits(&bp, 0, 3);
293         mpp_put_align(&bp                                                        , 32, 0xf); //128
294         // PPS
295         mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
296         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
297         mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
298         mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
299         mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
300         mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
301         mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
302         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);//31 bits
303         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
304         mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
305         mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
306         mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
307         mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1); //164
308         mpp_put_bits(&bp, log2_min_cb_size +
309                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
310                      dxva_cxt->pp.diff_cu_qp_delta_depth                             , 3);
311 
312         h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
313                   dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
314 
315         mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
316         mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
317         mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
318         mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
319         mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
320         mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1 );
321         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1 );
322         mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
323         mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
324         mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1); //185
325         mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
326         mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
327         mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
328         mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
329         mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
330         mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
331         mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
332         mpp_put_bits(&bp, 0                                                        , 3);
333         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
334         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
335         mpp_put_bits(&bp, 0, 4);//2 //mSps_Pps[i]->mMode
336         mpp_put_align(&bp, 64, 0xf);
337         {
338             /// tiles info begin
339             RK_U16 column_width[20];
340             RK_U16 row_height[22];
341 
342             memset(column_width, 0, sizeof(column_width));
343             memset(row_height, 0, sizeof(row_height));
344 
345             if (dxva_cxt->pp.tiles_enabled_flag) {
346 
347                 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
348                     RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
349                     RK_S32 ctu_width_in_pic = (width +
350                                                (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
351                     RK_S32 ctu_height_in_pic = (height +
352                                                 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
353                     RK_S32 sum = 0;
354                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
355                         column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
356                         sum += column_width[i]  ;
357                     }
358                     column_width[i] = ctu_width_in_pic - sum;
359 
360                     sum = 0;
361                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
362                         row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
363                         sum += row_height[i];
364                     }
365                     row_height[i] = ctu_height_in_pic - sum;
366                 } // end of (pps->uniform_spacing_flag == 0)
367                 else {
368 
369                     RK_S32    pic_in_cts_width = (width +
370                                                   (1 << (log2_min_cb_size +
371                                                          dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
372                                                  / (1 << (log2_min_cb_size +
373                                                           dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
374                     RK_S32 pic_in_cts_height = (height +
375                                                 (1 << (log2_min_cb_size +
376                                                        dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
377                                                / (1 << (log2_min_cb_size +
378                                                         dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
379 
380                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
381                         column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
382                                           (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
383 
384                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
385                         row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
386                                         (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
387                 }
388             } // pps->tiles_enabled_flag
389             else {
390                 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
391                 column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
392                 row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
393             }
394 
395             for (j = 0; j < 20; j++) {
396                 if (column_width[j] > 0)
397                     column_width[j]--;
398                 mpp_put_bits(&bp, column_width[j], 12);
399             }
400 
401             for (j = 0; j < 22; j++) {
402                 if (row_height[j] > 0)
403                     row_height[j]--;
404                 mpp_put_bits(&bp, row_height[j], 12);
405             }
406         }
407 
408         mpp_put_bits(&bp, 0, 32);
409         mpp_put_bits(&bp, 0, 70);
410         mpp_put_align(&bp, 64, 0xf);//128
411     }
412 
413     if (dxva_cxt->pp.scaling_list_enabled_flag) {
414         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
415 
416         if (dxva_cxt->pp.scaling_list_data_present_flag) {
417             addr = (dxva_cxt->pp.pps_id + 16) * 1360;
418         } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
419             addr = dxva_cxt->pp.sps_id * 1360;
420         } else {
421             addr = 80 * 1360;
422         }
423 
424         hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
425 
426         hw_reg->h265d_addr.reg180_scanlist_addr = reg_ctx->bufs_fd;
427         hw_reg->common.reg012.scanlist_addr_valid_en = 1;
428 
429         /* need to config addr */
430         mpp_dev_set_reg_offset(reg_ctx->dev, 180, addr + reg_ctx->sclst_offset);
431     }
432 
433     for (i = 0; i < 64; i++)
434         memcpy(pps_ptr + i * 112, reg_ctx->pps_buf, 112);
435 #ifdef dump
436     fwrite(pps_ptr, 1, 80 * 64, fp);
437     RK_U32 *tmp = (RK_U32 *)pps_ptr;
438     for (i = 0; i < 112 / 4; i++) {
439         mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
440     }
441 #endif
442     return 0;
443 }
444 
hal_h265d_output_pps_packet(void * hal,void * dxva)445 static RK_S32 hal_h265d_output_pps_packet(void *hal, void *dxva)
446 {
447     RK_S32 fifo_len = 10;
448     RK_S32 i, j;
449     RK_U32 addr;
450     RK_U32 log2_min_cb_size;
451     RK_S32 width, height;
452     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
453     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
454     BitputCtx_t bp;
455 
456     if (NULL == reg_ctx || dxva_cxt == NULL) {
457         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
458                 __FILE__, __FUNCTION__, __LINE__);
459         return MPP_ERR_NULL_PTR;
460     }
461 
462     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
463 
464     if (dxva_cxt->pp.ps_update_flag || dxva_cxt->pp.scaling_list_enabled_flag) {
465         RK_U64 *pps_packet = reg_ctx->pps_buf;
466 
467         if (NULL == pps_ptr) {
468             mpp_err("pps_data get ptr error");
469             return MPP_ERR_NOMEM;
470         }
471 
472         for (i = 0; i < 10; i++) pps_packet[i] = 0;
473 
474         mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
475 
476         // SPS
477         mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
478         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
479         mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
480 
481         log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
482         width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
483         height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
484 
485         mpp_put_bits(&bp, width                                          , 16);//yandong
486         mpp_put_bits(&bp, height                                         , 16);//yandong
487         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
488         mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
489         mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
490         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
491         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
492         mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
493         ///<-zrh comment ^  57 bit above
494         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
495         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
496         mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
497         mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
498         mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
499         mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
500         ///<-zrh comment ^  68 bit above
501         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
502         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
503         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
504         mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
505         mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
506         mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
507 
508         mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
509         mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
510         mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
511         mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
512         mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
513         ///<-zrh comment ^ 100 bit above
514 
515         mpp_put_bits(&bp, 0                                                    , 7 );
516         mpp_put_align(&bp                                                      , 32, 0xf);
517 
518         // PPS
519         mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
520         mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
521         mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
522         mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
523         mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
524         mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
525         mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
526         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);
527         mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
528         mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
529         mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
530         mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
531         mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1);
532 
533         mpp_put_bits(&bp, log2_min_cb_size +
534                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
535                      dxva_cxt->pp.diff_cu_qp_delta_depth                           , 3);
536 
537         h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
538                   dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
539 
540         mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
541         mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
542         mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
543         mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
544         mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
545         mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1);
546         mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1);
547         mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
548         mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
549         mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1);
550 
551         mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
552         mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
553         mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
554         mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
555         mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
556         mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
557         mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
558         mpp_put_bits(&bp, 0                                                        , 3);
559         mpp_put_bits(&bp, dxva_cxt->pp.num_tile_columns_minus1 + 1, 5);
560         mpp_put_bits(&bp, dxva_cxt->pp.num_tile_rows_minus1 + 1 , 5 );
561         mpp_put_bits(&bp, 3, 2); //mSps_Pps[i]->mMode
562         mpp_put_align(&bp, 64, 0xf);
563 
564         {
565             /// tiles info begin
566             RK_U16 column_width[20];
567             RK_U16 row_height[22];
568 
569             memset(column_width, 0, sizeof(column_width));
570             memset(row_height, 0, sizeof(row_height));
571 
572             if (dxva_cxt->pp.tiles_enabled_flag) {
573                 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
574                     RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
575                     RK_S32 ctu_width_in_pic = (width +
576                                                (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
577                     RK_S32 ctu_height_in_pic = (height +
578                                                 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
579                     RK_S32 sum = 0;
580                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
581                         column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
582                         sum += column_width[i]  ;
583                     }
584                     column_width[i] = ctu_width_in_pic - sum;
585 
586                     sum = 0;
587                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
588                         row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
589                         sum += row_height[i];
590                     }
591                     row_height[i] = ctu_height_in_pic - sum;
592                 } // end of (pps->uniform_spacing_flag == 0)
593                 else {
594 
595                     RK_S32 pic_in_cts_width = (width +
596                                                (1 << (log2_min_cb_size +
597                                                       dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
598                                               / (1 << (log2_min_cb_size +
599                                                        dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
600                     RK_S32 pic_in_cts_height = (height +
601                                                 (1 << (log2_min_cb_size +
602                                                        dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
603                                                / (1 << (log2_min_cb_size +
604                                                         dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
605 
606                     for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
607                         column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
608                                           (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
609 
610                     for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
611                         row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
612                                         (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
613                 }
614             } // pps->tiles_enabled_flag
615             else {
616                 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
617                 column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
618                 row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
619             }
620 
621             for (j = 0; j < 20; j++) {
622                 if (column_width[j] > 0)
623                     column_width[j]--;
624                 mpp_put_bits(&bp, column_width[j], 12);// yandong 8bit -> 12bit
625             }
626 
627             for (j = 0; j < 22; j++) {
628                 if (row_height[j] > 0)
629                     row_height[j]--;
630                 mpp_put_bits(&bp, row_height[j], 12);// yandong 8bit -> 12bit
631             }
632         }
633 
634         {
635             RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->scaling_list_data);
636             if (dxva_cxt->pp.scaling_list_data_present_flag) {
637                 addr = (dxva_cxt->pp.pps_id + 16) * 1360;
638             } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
639                 addr = dxva_cxt->pp.sps_id * 1360;
640             } else {
641                 addr = 80 * 1360;
642             }
643 
644             hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
645 
646             RK_U32 fd = mpp_buffer_get_fd(reg_ctx->scaling_list_data);
647             /* need to config addr */
648             addr = fd | (addr << 10);
649 
650             mpp_put_bits(&bp, addr, 32);
651             mpp_put_align(&bp, 64, 0xf);
652         }
653         for (i = 0; i < 64; i++)
654             memcpy(pps_ptr + i * 80, reg_ctx->pps_buf, 80);
655     } else if (reg_ctx->fast_mode) {
656         for (i = 0; i < 64; i++)
657             memcpy(pps_ptr + i * 80, reg_ctx->pps_buf, 80);
658     }
659 
660 #ifdef dump
661     fwrite(pps_ptr, 1, 80 * 64, fp);
662     fflush(fp);
663 #endif
664     return 0;
665 }
666 
h265d_refine_rcb_size(Vdpu34xRcbInfo * rcb_info,Vdpu34xH265dRegSet * hw_regs,RK_S32 width,RK_S32 height,void * dxva)667 static void h265d_refine_rcb_size(Vdpu34xRcbInfo *rcb_info,
668                                   Vdpu34xH265dRegSet *hw_regs,
669                                   RK_S32 width, RK_S32 height, void *dxva)
670 {
671     RK_U32 rcb_bits = 0;
672     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
673     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
674     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
675     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
676     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
677     RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
678     RK_U32 ext_align_size = tile_col_cut_num * 64 * 8;
679 
680     width = MPP_ALIGN(width, ctu_size);
681     height = MPP_ALIGN(height, ctu_size);
682 
683     /* RCB_STRMD_ROW */
684     if (width > 8192) {
685         RK_U32 factor = ctu_size / 16;
686         rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) * factor * 24 + ext_align_size;
687     } else
688         rcb_bits = 0;
689     rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
690 
691     /* RCB_TRANSD_ROW */
692     if (width > 8192)
693         rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
694     else
695         rcb_bits = 0;
696     rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
697 
698     /* RCB_TRANSD_COL */
699     if (height > 8192 && tile_col_cut_num)
700         rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1);
701     else
702         rcb_bits = 0;
703     rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
704 
705     /* RCB_INTER_ROW */
706     rcb_bits = width * 22 + ext_align_size;
707     rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
708 
709     /* RCB_INTER_COL */
710     rcb_bits = tile_col_cut_num ? height * 22 : 0;
711     rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
712 
713     /* RCB_INTRA_ROW */
714     rcb_bits = width * 48 + ext_align_size;
715     rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
716 
717     /* RCB_DBLK_ROW */
718     if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
719         if (ctu_size == 32)
720             rcb_bits = width * ( 4 + 6 * bit_depth);
721         else
722             rcb_bits = width * ( 2 + 6 * bit_depth);
723     } else {
724         if (ctu_size == 32)
725             rcb_bits = width * ( 4 + 8 * bit_depth);
726         else
727             rcb_bits = width * ( 2 + 8 * bit_depth);
728     }
729     rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
730     rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
731 
732     /* RCB_SAO_ROW */
733     if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
734         rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
735     } else {
736         rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
737     }
738     rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
739     rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
740 
741     /* RCB_FBC_ROW */
742     if (hw_regs->common.reg012.fbc_e) {
743         rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
744         rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 128 : 64)) + ext_align_size;
745     } else
746         rcb_bits = 0;
747     rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
748 
749     /* RCB_FILT_COL */
750     if (tile_col_cut_num) {
751         if (hw_regs->common.reg012.fbc_e) {
752             RK_U32 ctu_idx = ctu_size >> 5;
753             RK_U32 a = filterd_fbc_on[ctu_idx][chroma_fmt_idc].a;
754             RK_U32 b = filterd_fbc_on[ctu_idx][chroma_fmt_idc].b;
755 
756             rcb_bits = height * (a * bit_depth + b);
757         } else {
758             RK_U32 ctu_idx = ctu_size >> 5;
759             RK_U32 a = filterd_fbc_off[ctu_idx][chroma_fmt_idc].a;
760             RK_U32 b = filterd_fbc_off[ctu_idx][chroma_fmt_idc].b;
761 
762             rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 16 : 0));
763         }
764     } else
765         rcb_bits = 0;
766 
767     rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
768 }
769 
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu34xH265dRegSet * hw_regs,RK_S32 width,RK_S32 height)770 static void hal_h265d_rcb_info_update(void *hal,  void *dxva,
771                                       Vdpu34xH265dRegSet *hw_regs,
772                                       RK_S32 width, RK_S32 height)
773 {
774     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
775     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
776     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
777     RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
778     RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
779     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
780     RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
781 
782     if (reg_ctx->num_row_tiles != num_tiles ||
783         reg_ctx->bit_depth != bit_depth ||
784         reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
785         reg_ctx->ctu_size !=  ctu_size ||
786         reg_ctx->width != width ||
787         reg_ctx->height != height) {
788         RK_U32 i = 0;
789         RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
790 
791         reg_ctx->rcb_buf_size = vdpu34x_get_rcb_buf_size((Vdpu34xRcbInfo*)reg_ctx->rcb_info, width, height);
792         h265d_refine_rcb_size((Vdpu34xRcbInfo*)reg_ctx->rcb_info, hw_regs, width, height, dxva_cxt);
793 
794         for (i = 0; i < loop; i++) {
795             MppBuffer rcb_buf;
796 
797             if (reg_ctx->rcb_buf[i]) {
798                 mpp_buffer_put(reg_ctx->rcb_buf[i]);
799                 reg_ctx->rcb_buf[i] = NULL;
800             }
801             mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
802             reg_ctx->rcb_buf[i] = rcb_buf;
803         }
804 
805         reg_ctx->num_row_tiles  = num_tiles;
806         reg_ctx->bit_depth      = bit_depth;
807         reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
808         reg_ctx->ctu_size       = ctu_size;
809         reg_ctx->width          = width;
810         reg_ctx->height         = height;
811     }
812 }
813 
814 #define SET_POC_HIGNBIT_INFO(regs, index, field, value)\
815     do{ \
816         switch(index){\
817         case 0: regs.reg200.ref0_##field = value; break;\
818         case 1: regs.reg200.ref1_##field = value; break;\
819         case 2: regs.reg200.ref2_##field = value; break;\
820         case 3: regs.reg200.ref3_##field = value; break;\
821         case 4: regs.reg200.ref4_##field = value; break;\
822         case 5: regs.reg200.ref5_##field = value; break;\
823         case 6: regs.reg200.ref6_##field = value; break;\
824         case 7: regs.reg200.ref7_##field = value; break;\
825         case 8: regs.reg201.ref8_##field = value; break;\
826         case 9: regs.reg201.ref9_##field = value; break;\
827         case 10: regs.reg201.ref10_##field = value; break;\
828         case 11: regs.reg201.ref11_##field = value; break;\
829         case 12: regs.reg201.ref12_##field = value; break;\
830         case 13: regs.reg201.ref13_##field = value; break;\
831         case 14: regs.reg201.ref14_##field = value; break;\
832         case 15: regs.reg201.ref15_##field = value; break;\
833         default: break;}\
834     }while(0)
835 
836 #define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
837 
hal_h265d_vdpu34x_gen_regs(void * hal,HalTaskInfo * syn)838 static MPP_RET hal_h265d_vdpu34x_gen_regs(void *hal,  HalTaskInfo *syn)
839 {
840     RK_S32 i = 0;
841     RK_S32 log2_min_cb_size;
842     RK_S32 width, height;
843     RK_S32 stride_y, stride_uv, virstrid_y;
844     Vdpu34xH265dRegSet *hw_regs;
845     RK_S32 ret = MPP_SUCCESS;
846     MppBuffer streambuf = NULL;
847     RK_S32 aglin_offset = 0;
848     RK_S32 valid_ref = -1;
849     MppBuffer framebuf = NULL;
850     HalBuf *mv_buf = NULL;
851     RK_S32 fd = -1;
852     RK_U32 mv_size = 0;
853     RK_S32 distance = INT_MAX;
854     h265d_dxva2_picture_context_t *dxva_cxt =
855         (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
856     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
857     void *rps_ptr = NULL;
858     RK_U32 stream_buf_size = 0;
859     DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
860     RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size +
861                            pp->log2_min_luma_coding_block_size_minus3 + 3);
862 
863     if (syn->dec.flags.parse_err ||
864         (syn->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
865         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
866         return MPP_OK;
867     }
868 
869     if (reg_ctx ->fast_mode) {
870         for (i = 0; i < MAX_GEN_REG; i++) {
871             if (!reg_ctx->g_buf[i].use_flag) {
872                 syn->dec.reg_index = i;
873 
874                 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
875                 reg_ctx->rps_offset = reg_ctx->offset_rps[i];
876                 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
877 
878                 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
879                 reg_ctx->g_buf[i].use_flag = 1;
880                 break;
881             }
882         }
883         if (i == MAX_GEN_REG) {
884             mpp_err("hevc rps buf all used");
885             return MPP_ERR_NOMEM;
886         }
887     } else {
888         syn->dec.reg_index = 0;
889     }
890     rps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->rps_offset;
891     if (NULL == rps_ptr) {
892 
893         mpp_err("rps_data get ptr error");
894         return MPP_ERR_NOMEM;
895     }
896 
897 
898     if (syn->dec.syntax.data == NULL) {
899         mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
900         return MPP_ERR_NULL_PTR;
901     }
902 
903     /* output pps */
904     hw_regs = (Vdpu34xH265dRegSet*)reg_ctx->hw_regs;
905     memset(hw_regs, 0, sizeof(Vdpu34xH265dRegSet));
906 
907     if (NULL == reg_ctx->hw_regs) {
908         return MPP_ERR_NULL_PTR;
909     }
910 
911     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
912 
913     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
914     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
915     mv_size = vdpu34x_get_colmv_size(width, height, ctu_size, 16, 16, COLMV_COMPRESS_EN);
916     if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
917         size_t size = mv_size;
918 
919         if (reg_ctx->cmv_bufs) {
920             hal_bufs_deinit(reg_ctx->cmv_bufs);
921             reg_ctx->cmv_bufs = NULL;
922         }
923 
924         hal_bufs_init(&reg_ctx->cmv_bufs);
925         if (reg_ctx->cmv_bufs == NULL) {
926             mpp_err_f("colmv bufs init fail");
927             return MPP_ERR_NULL_PTR;
928         }
929 
930         reg_ctx->mv_size = mv_size;
931         reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
932         hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
933     }
934 
935     {
936         MppFrame mframe = NULL;
937         RK_U32 ver_virstride;
938 
939         mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
940                               SLOT_FRAME_PTR, &mframe);
941         stride_y = mpp_frame_get_hor_stride(mframe);
942         ver_virstride = mpp_frame_get_ver_stride(mframe);
943         stride_uv = stride_y;
944         virstrid_y = ver_virstride * stride_y;
945         hw_regs->common.reg013.h26x_error_mode = 1;
946         hw_regs->common.reg013.h26x_streamd_error_mode = 1;
947         hw_regs->common.reg013.colmv_error_mode = 1;
948         hw_regs->common.reg021.error_deb_en = 1;
949         hw_regs->common.reg021.inter_error_prc_mode = 0;
950         hw_regs->common.reg021.error_intra_mode = 1;
951 
952         hw_regs->common.reg017.slice_num = dxva_cxt->slice_count;
953         hw_regs->h265d_param.reg64.h26x_rps_mode = 0;
954         hw_regs->h265d_param.reg64.h26x_frame_orslice = 0;
955         hw_regs->h265d_param.reg64.h26x_stream_mode = 0;
956 
957         if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) {
958             RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
959             RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (ver_virstride + 64) / 16, SZ_4K);
960 
961             hw_regs->common.reg012.fbc_e = 1;
962             hw_regs->common.reg018.y_hor_virstride = fbc_hdr_stride >> 4;
963             hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
964             hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
965         } else {
966             hw_regs->common.reg012.fbc_e = 0;
967             hw_regs->common.reg018.y_hor_virstride = stride_y >> 4;
968             hw_regs->common.reg019.uv_hor_virstride = stride_uv >> 4;
969             hw_regs->common.reg020_y_virstride.y_virstride = virstrid_y >> 4;
970         }
971     }
972     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
973                           SLOT_BUFFER, &framebuf);
974     hw_regs->common_addr.reg130_decout_base  = mpp_buffer_get_fd(framebuf); //just index need map
975     /*if out_base is equal to zero it means this frame may error
976     we return directly add by csy*/
977 
978     if (hw_regs->common_addr.reg130_decout_base == 0) {
979         return 0;
980     }
981     fd =  mpp_buffer_get_fd(framebuf);
982     hw_regs->common_addr.reg130_decout_base = fd;
983     mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.CurrPic.Index7Bits);
984     hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
985 
986     hw_regs->h265d_param.reg65.cur_top_poc = dxva_cxt->pp.CurrPicOrderCntVal;
987 
988     mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
989                           &streambuf);
990     if ( dxva_cxt->bitstream == NULL) {
991         dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
992     }
993     if (reg_ctx->is_v34x) {
994 #ifdef HW_RPS
995         hw_regs->common.reg012.wait_reset_en = 1;
996         hw_regs->h265d_param.reg103.ref_pic_layer_same_with_cur = 0xffff;
997         hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
998 #else
999         hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
1000         hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
1001 #endif
1002     } else {
1003         hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
1004     }
1005 
1006     /* cabac table */
1007     hw_regs->h265d_addr.reg197_cabactbl_base    = reg_ctx->bufs_fd;
1008     /* pps */
1009     hw_regs->h265d_addr.reg161_pps_base         = reg_ctx->bufs_fd;
1010     hw_regs->h265d_addr.reg163_rps_base         = reg_ctx->bufs_fd;
1011 
1012     hw_regs->common_addr.reg128_rlc_base        = mpp_buffer_get_fd(streambuf);
1013     hw_regs->common_addr.reg129_rlcwrite_base   = mpp_buffer_get_fd(streambuf);
1014     stream_buf_size                             = mpp_buffer_get_size(streambuf);
1015     hw_regs->common.reg016_str_len              = ((dxva_cxt->bitstream_size + 15)
1016                                                    & (~15)) + 64;
1017     hw_regs->common.reg016_str_len = stream_buf_size > hw_regs->common.reg016_str_len ?
1018                                      hw_regs->common.reg016_str_len : stream_buf_size;
1019 
1020     aglin_offset =  hw_regs->common.reg016_str_len - dxva_cxt->bitstream_size;
1021     if (aglin_offset > 0) {
1022         memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
1023                aglin_offset);
1024     }
1025     hw_regs->common.reg010.dec_e                = 1;
1026     hw_regs->common.reg011.dec_timeout_e        = 1;
1027     hw_regs->common.reg012.wr_ddr_align_en      = dxva_cxt->pp.tiles_enabled_flag
1028                                                   ? 0 : 1;
1029     hw_regs->common.reg012.colmv_compress_en    = COLMV_COMPRESS_EN;
1030 
1031     if (mpp_get_soc_type() == ROCKCHIP_SOC_RK3588) {
1032         hw_regs->common.reg026.swreg_block_gating_e = 0xfffef;
1033         hw_regs->common.reg024.cabac_err_en_lowbits = 0;
1034         hw_regs->common.reg025.cabac_err_en_highbits = 0;
1035     } else {
1036         hw_regs->common.reg024.cabac_err_en_lowbits = 0xffffdfff;
1037         hw_regs->common.reg025.cabac_err_en_highbits = 0x3ffbf9ff;
1038         hw_regs->common.reg026.swreg_block_gating_e = 0xfffff;
1039     }
1040 
1041     hw_regs->common.reg011.dec_clkgate_e    = 1;
1042     hw_regs->common.reg011.dec_e_strmd_clkgate_dis = 0;
1043     hw_regs->common.reg026.reg_cfg_gating_en = 1;
1044     hw_regs->common.reg032_timeout_threshold = 0x3ffff;
1045 
1046     valid_ref = hw_regs->common_addr.reg130_decout_base;
1047     reg_ctx->error_index[syn->dec.reg_index] = dxva_cxt->pp.CurrPic.Index7Bits;
1048     hw_regs->common_addr.reg132_error_ref_base = valid_ref;
1049 
1050     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
1051         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
1052             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
1053 
1054             MppFrame mframe = NULL;
1055             MppBuffer ref_buf = NULL;
1056             hw_regs->h265d_param.reg67_82_ref_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
1057             mpp_buf_slot_get_prop(reg_ctx->slots,
1058                                   dxva_cxt->pp.RefPicList[i].Index7Bits,
1059                                   SLOT_BUFFER, &ref_buf);
1060             mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
1061                                   SLOT_FRAME_PTR, &mframe);
1062             if (!ref_buf && mpp_get_soc_type() == ROCKCHIP_SOC_RK3588 &&
1063                 reg_ctx->cfg->base.disable_error) {
1064                 if (reg_ctx->missing_ref_buf && reg_ctx->missing_ref_buf_size < mpp_buffer_get_size(framebuf)) {
1065                     mpp_buffer_put(reg_ctx->missing_ref_buf);
1066                     reg_ctx->missing_ref_buf = NULL;
1067                 }
1068 
1069                 if (!reg_ctx->missing_ref_buf) {
1070                     reg_ctx->missing_ref_buf_size = mpp_buffer_get_size(framebuf);
1071                     mpp_buffer_get(reg_ctx->group, &reg_ctx->missing_ref_buf, reg_ctx->missing_ref_buf_size);
1072                     if (!reg_ctx->missing_ref_buf) {
1073                         syn->dec.flags.ref_err = 1;
1074                         h265h_dbg(H265H_DBG_TASK_ERR, "Failed to generate missing ref buf\n");
1075                         return MPP_ERR_NOMEM;
1076                     }
1077                 }
1078                 ref_buf = reg_ctx->missing_ref_buf;
1079             }
1080             if (ref_buf) {
1081                 hw_regs->h265d_addr.reg164_179_ref_base[i] = mpp_buffer_get_fd(ref_buf);
1082                 valid_ref = hw_regs->h265d_addr.reg164_179_ref_base[i];
1083                 h265h_dbg(H265H_DBG_TASK_ERR, "cur poc %d, ref poc %d", dxva_cxt->pp.current_poc, dxva_cxt->pp.PicOrderCntValList[i]);
1084                 if ((pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc) < distance)
1085                     && (!mpp_frame_get_errinfo(mframe))) {
1086                     distance = pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc);
1087                     hw_regs->common_addr.reg132_error_ref_base = hw_regs->h265d_addr.reg164_179_ref_base[i];
1088                     reg_ctx->error_index[syn->dec.reg_index] = dxva_cxt->pp.RefPicList[i].Index7Bits;
1089                     hw_regs->common.reg021.error_intra_mode = 0;
1090                     h265h_dbg(H265H_DBG_TASK_ERR, "update error ref to ref[%d] to poc %d, slot_idx %d, fd %d\n",
1091                               i, dxva_cxt->pp.PicOrderCntValList[i],
1092                               dxva_cxt->pp.RefPicList[i].Index7Bits,
1093                               hw_regs->common_addr.reg132_error_ref_base);
1094                 }
1095             } else {
1096                 h265h_dbg(H265H_DBG_TASK_ERR, "ref[%d] buffer is empty, replace with fd %d\n", i, valid_ref);
1097                 hw_regs->h265d_addr.reg164_179_ref_base[i] = valid_ref;
1098             }
1099 
1100             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.RefPicList[i].Index7Bits);
1101 
1102             SET_REF_VALID(hw_regs->h265d_param, i, 1);
1103 
1104             if (hw_regs->common.reg013.h26x_error_mode &&
1105                 !hw_regs->common.reg021.error_intra_mode &&
1106                 (!ref_buf || mpp_frame_get_errinfo(mframe))) {
1107 
1108                 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
1109                 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
1110             }
1111         } else {
1112             mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
1113             hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
1114             /* mark 3 to differ from current frame */
1115             if (reg_ctx->error_index[syn->dec.reg_index] == dxva_cxt->pp.CurrPic.Index7Bits)
1116                 SET_POC_HIGNBIT_INFO(hw_regs->highpoc, i, poc_highbit, 3);
1117         }
1118         hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1119     }
1120 
1121     if ((reg_ctx->error_index[syn->dec.reg_index] == dxva_cxt->pp.CurrPic.Index7Bits) &&
1122         !dxva_cxt->pp.IntraPicFlag) {
1123         h265h_dbg(H265H_DBG_TASK_ERR, "current frm may be err, should skip process");
1124         syn->dec.flags.ref_err = 1;
1125         return MPP_OK;
1126     }
1127 
1128     if (reg_ctx->is_v34x) {
1129         hal_h265d_v345_output_pps_packet(hal, syn->dec.syntax.data);
1130     } else {
1131         hal_h265d_output_pps_packet(hal, syn->dec.syntax.data);
1132     }
1133 
1134     mpp_dev_set_reg_offset(reg_ctx->dev, 161, reg_ctx->spspps_offset);
1135     /* rps */
1136     mpp_dev_set_reg_offset(reg_ctx->dev, 163, reg_ctx->rps_offset);
1137 
1138     hw_regs->common.reg013.timeout_mode = 1;
1139     hw_regs->common.reg013.cur_pic_is_idr = dxva_cxt->pp.IdrPicFlag;//p_hal->slice_long->idr_flag;
1140 
1141     hw_regs->common.reg011.buf_empty_en = 1;
1142 
1143     hal_h265d_rcb_info_update(hal, dxva_cxt, hw_regs, width, height);
1144     vdpu34x_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
1145                       reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
1146                       (Vdpu34xRcbInfo*)reg_ctx->rcb_info);
1147     vdpu34x_setup_statistic(&hw_regs->common, &hw_regs->statistic);
1148     mpp_buffer_sync_end(reg_ctx->bufs);
1149 
1150     return ret;
1151 }
1152 
hal_h265d_vdpu34x_start(void * hal,HalTaskInfo * task)1153 static MPP_RET hal_h265d_vdpu34x_start(void *hal, HalTaskInfo *task)
1154 {
1155     MPP_RET ret = MPP_OK;
1156     RK_U8* p = NULL;
1157     Vdpu34xH265dRegSet *hw_regs = NULL;
1158     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1159     RK_S32 index =  task->dec.reg_index;
1160 
1161     RK_U32 i;
1162 
1163     if (task->dec.flags.parse_err ||
1164         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1165         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1166         return MPP_OK;
1167     }
1168 
1169     if (reg_ctx->fast_mode) {
1170         p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
1171         hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1172     } else {
1173         p = (RK_U8*)reg_ctx->hw_regs;
1174         hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->hw_regs;
1175     }
1176 
1177     if (hw_regs == NULL) {
1178         mpp_err("hal_h265d_start hw_regs is NULL");
1179         return MPP_ERR_NULL_PTR;
1180     }
1181     for (i = 0; i < 68; i++) {
1182         h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1183                   i, *((RK_U32*)p));
1184         //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
1185         p += 4;
1186     }
1187 
1188     do {
1189         MppDevRegWrCfg wr_cfg;
1190         MppDevRegRdCfg rd_cfg;
1191 
1192         wr_cfg.reg = &hw_regs->common;
1193         wr_cfg.size = sizeof(hw_regs->common);
1194         wr_cfg.offset = OFFSET_COMMON_REGS;
1195 
1196         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1197         if (ret) {
1198             mpp_err_f("set register write failed %d\n", ret);
1199             break;
1200         }
1201 
1202         wr_cfg.reg = &hw_regs->h265d_param;
1203         wr_cfg.size = sizeof(hw_regs->h265d_param);
1204         wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS;
1205 
1206         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1207         if (ret) {
1208             mpp_err_f("set register write failed %d\n", ret);
1209             break;
1210         }
1211 
1212         wr_cfg.reg = &hw_regs->common_addr;
1213         wr_cfg.size = sizeof(hw_regs->common_addr);
1214         wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1215 
1216         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1217         if (ret) {
1218             mpp_err_f("set register write failed %d\n", ret);
1219             break;
1220         }
1221 
1222         wr_cfg.reg = &hw_regs->h265d_addr;
1223         wr_cfg.size = sizeof(hw_regs->h265d_addr);
1224         wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1225 
1226         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1227         if (ret) {
1228             mpp_err_f("set register write failed %d\n", ret);
1229             break;
1230         }
1231 
1232         wr_cfg.reg = &hw_regs->statistic;
1233         wr_cfg.size = sizeof(hw_regs->statistic);
1234         wr_cfg.offset = OFFSET_STATISTIC_REGS;
1235 
1236         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1237         if (ret) {
1238             mpp_err_f("set register write failed %d\n", ret);
1239             break;
1240         }
1241 
1242         if (mpp_get_soc_type() == ROCKCHIP_SOC_RK3588) {
1243             wr_cfg.reg = &hw_regs->highpoc;
1244             wr_cfg.size = sizeof(hw_regs->highpoc);
1245             wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS;
1246 
1247             ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1248             if (ret) {
1249                 mpp_err_f("set register write failed %d\n", ret);
1250                 break;
1251             }
1252         }
1253 
1254         rd_cfg.reg = &hw_regs->irq_status;
1255         rd_cfg.size = sizeof(hw_regs->irq_status);
1256         rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1257 
1258         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1259         if (ret) {
1260             mpp_err_f("set register read failed %d\n", ret);
1261             break;
1262         }
1263 
1264         /* rcb info for sram */
1265         vdpu34x_set_rcbinfo(reg_ctx->dev, (Vdpu34xRcbInfo*)reg_ctx->rcb_info);
1266 
1267         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1268         if (ret) {
1269             mpp_err_f("send cmd failed %d\n", ret);
1270             break;
1271         }
1272     } while (0);
1273 
1274     return ret;
1275 }
1276 
1277 
hal_h265d_vdpu34x_wait(void * hal,HalTaskInfo * task)1278 static MPP_RET hal_h265d_vdpu34x_wait(void *hal, HalTaskInfo *task)
1279 {
1280     MPP_RET ret = MPP_OK;
1281     RK_S32 index =  task->dec.reg_index;
1282     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1283     RK_U8* p = NULL;
1284     Vdpu34xH265dRegSet *hw_regs = NULL;
1285     RK_S32 i;
1286 
1287     if (reg_ctx->fast_mode) {
1288         hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1289     } else {
1290         hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->hw_regs;
1291     }
1292 
1293     p = (RK_U8*)hw_regs;
1294 
1295     if (task->dec.flags.parse_err ||
1296         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1297         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1298         goto ERR_PROC;
1299     }
1300 
1301     ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1302     if (ret)
1303         mpp_err_f("poll cmd failed %d\n", ret);
1304 
1305 ERR_PROC:
1306     if (task->dec.flags.parse_err ||
1307         task->dec.flags.ref_err ||
1308         hw_regs->irq_status.reg224.dec_error_sta ||
1309         hw_regs->irq_status.reg224.buf_empty_sta ||
1310         hw_regs->irq_status.reg224.dec_bus_sta ||
1311         !hw_regs->irq_status.reg224.dec_rdy_sta) {
1312         if (!reg_ctx->fast_mode) {
1313             if (reg_ctx->dec_cb)
1314                 mpp_callback(reg_ctx->dec_cb, &task->dec);
1315         } else {
1316             MppFrame mframe = NULL;
1317             mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1318                                   SLOT_FRAME_PTR, &mframe);
1319             if (mframe) {
1320                 reg_ctx->fast_mode_err_found = 1;
1321                 mpp_frame_set_errinfo(mframe, 1);
1322             }
1323         }
1324     } else {
1325         if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1326             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1327                 if (task->dec.refer[i] >= 0) {
1328                     MppFrame frame_ref = NULL;
1329 
1330                     mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1331                                           SLOT_FRAME_PTR, &frame_ref);
1332                     h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1333                               i, task->dec.refer[i], frame_ref);
1334                     if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1335                         MppFrame frame_out = NULL;
1336                         mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1337                                               SLOT_FRAME_PTR, &frame_out);
1338                         mpp_frame_set_errinfo(frame_out, 1);
1339                         break;
1340                     }
1341                 }
1342             }
1343         }
1344     }
1345 
1346     for (i = 0; i < 68; i++) {
1347         if (i == 1) {
1348             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1349                       i, *((RK_U32*)p));
1350         }
1351 
1352         if (i == 45) {
1353             h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1354                       i, *((RK_U32*)p));
1355         }
1356         p += 4;
1357     }
1358 
1359     if (reg_ctx->fast_mode) {
1360         reg_ctx->g_buf[index].use_flag = 0;
1361     }
1362 
1363     return ret;
1364 }
1365 
hal_h265d_vdpu34x_reset(void * hal)1366 static MPP_RET hal_h265d_vdpu34x_reset(void *hal)
1367 {
1368     MPP_RET ret = MPP_OK;
1369     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1370     p_hal->fast_mode_err_found = 0;
1371     (void)hal;
1372     return ret;
1373 }
1374 
hal_h265d_vdpu34x_flush(void * hal)1375 static MPP_RET hal_h265d_vdpu34x_flush(void *hal)
1376 {
1377     MPP_RET ret = MPP_OK;
1378 
1379     (void)hal;
1380     return ret;
1381 }
1382 
hal_h265d_vdpu34x_control(void * hal,MpiCmd cmd_type,void * param)1383 static MPP_RET hal_h265d_vdpu34x_control(void *hal, MpiCmd cmd_type, void *param)
1384 {
1385     MPP_RET ret = MPP_OK;
1386     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1387 
1388     (void)hal;
1389     switch ((MpiCmd)cmd_type) {
1390     case MPP_DEC_SET_FRAME_INFO: {
1391         MppFrame frame = (MppFrame)param;
1392         MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1393 
1394         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1395             vdpu34x_afbc_align_calc(p_hal->slots, frame, 16);
1396         }
1397         break;
1398     }
1399     case MPP_DEC_SET_OUTPUT_FORMAT: {
1400     } break;
1401     default:
1402         break;
1403     }
1404     return  ret;
1405 }
1406 
1407 const MppHalApi hal_h265d_vdpu34x = {
1408     .name = "h265d_vdpu34x",
1409     .type = MPP_CTX_DEC,
1410     .coding = MPP_VIDEO_CodingHEVC,
1411     .ctx_size = sizeof(HalH265dCtx),
1412     .flag = 0,
1413     .init = hal_h265d_vdpu34x_init,
1414     .deinit = hal_h265d_vdpu34x_deinit,
1415     .reg_gen = hal_h265d_vdpu34x_gen_regs,
1416     .start = hal_h265d_vdpu34x_start,
1417     .wait = hal_h265d_vdpu34x_wait,
1418     .reset = hal_h265d_vdpu34x_reset,
1419     .flush = hal_h265d_vdpu34x_flush,
1420     .control = hal_h265d_vdpu34x_control,
1421 };
1422