xref: /OK3568_Linux_fs/external/mpp/mpp/hal/rkdec/h265d/hal_h265d_rkv.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Copyright 2020 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_h265d_rkv"
18 
19 #include <stdio.h>
20 #include <string.h>
21 
22 #include "mpp_mem.h"
23 #include "mpp_bitread.h"
24 #include "mpp_bitput.h"
25 
26 #include "hal_h265d_debug.h"
27 #include "hal_h265d_ctx.h"
28 #include "hal_h265d_com.h"
29 #include "hal_h265d_rkv.h"
30 #include "hal_h265d_rkv_reg.h"
31 #include "h265d_syntax.h"
32 
33 /* #define dump */
34 #ifdef dump
35 static FILE *fp = NULL;
36 #endif
37 #define HW_RPS
38 
39 #define PPS_SIZE                (96 * 64)
40 
hal_h265d_alloc_res(void * hal)41 static MPP_RET hal_h265d_alloc_res(void *hal)
42 {
43     RK_S32 i = 0;
44     RK_S32 ret = 0;
45     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
46     if (reg_ctx->fast_mode) {
47         for (i = 0; i < MAX_GEN_REG; i++) {
48             reg_ctx->g_buf[i].hw_regs =
49                 mpp_calloc_size(void, sizeof(H265d_REGS_t));
50             ret = mpp_buffer_get(reg_ctx->group,
51                                  &reg_ctx->g_buf[i].scaling_list_data,
52                                  SCALING_LIST_SIZE);
53             if (ret) {
54                 mpp_err("h265d scaling_list_data get buffer failed\n");
55                 return ret;
56             }
57 
58             ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->g_buf[i].pps_data,
59                                  PPS_SIZE);
60             if (ret) {
61                 mpp_err("h265d pps_data get buffer failed\n");
62                 return ret;
63             }
64 
65             ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->g_buf[i].rps_data,
66                                  RPS_SIZE);
67             if (ret) {
68                 mpp_err("h265d rps_data get buffer failed\n");
69                 return ret;
70             }
71         }
72     } else {
73         reg_ctx->hw_regs = mpp_calloc_size(void, sizeof(H265d_REGS_t));
74         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->scaling_list_data,
75                              SCALING_LIST_SIZE);
76         if (ret) {
77             mpp_err("h265d scaling_list_data get buffer failed\n");
78             return ret;
79         }
80 
81         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->pps_data, PPS_SIZE);
82         if (ret) {
83             mpp_err("h265d pps_data get buffer failed\n");
84             return ret;
85         }
86 
87         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->rps_data, RPS_SIZE);
88         if (ret) {
89             mpp_err("h265d rps_data get buffer failed\n");
90             return ret;
91         }
92 
93     }
94     return MPP_OK;
95 }
96 
hal_h265d_release_res(void * hal)97 static MPP_RET hal_h265d_release_res(void *hal)
98 {
99     RK_S32 ret = 0;
100     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
101     RK_S32 i = 0;
102     if (reg_ctx->fast_mode) {
103         for (i = 0; i < MAX_GEN_REG; i++) {
104             if (reg_ctx->g_buf[i].scaling_list_data) {
105                 ret = mpp_buffer_put(reg_ctx->g_buf[i].scaling_list_data);
106                 if (ret) {
107                     mpp_err("h265d scaling_list_data free buffer failed\n");
108                     return ret;
109                 }
110             }
111             if (reg_ctx->g_buf[i].pps_data) {
112                 ret = mpp_buffer_put(reg_ctx->g_buf[i].pps_data);
113                 if (ret) {
114                     mpp_err("h265d pps_data free buffer failed\n");
115                     return ret;
116                 }
117             }
118 
119             if (reg_ctx->g_buf[i].rps_data) {
120                 ret = mpp_buffer_put(reg_ctx->g_buf[i].rps_data);
121                 if (ret) {
122                     mpp_err("h265d rps_data free buffer failed\n");
123                     return ret;
124                 }
125             }
126 
127             if (reg_ctx->g_buf[i].hw_regs) {
128                 mpp_free(reg_ctx->g_buf[i].hw_regs);
129                 reg_ctx->g_buf[i].hw_regs = NULL;
130             }
131         }
132     } else {
133         if (reg_ctx->scaling_list_data) {
134             ret = mpp_buffer_put(reg_ctx->scaling_list_data);
135             if (ret) {
136                 mpp_err("h265d scaling_list_data free buffer failed\n");
137                 return ret;
138             }
139         }
140         if (reg_ctx->pps_data) {
141             ret = mpp_buffer_put(reg_ctx->pps_data);
142             if (ret) {
143                 mpp_err("h265d pps_data free buffer failed\n");
144                 return ret;
145             }
146         }
147 
148         if (reg_ctx->rps_data) {
149             ret = mpp_buffer_put(reg_ctx->rps_data);
150             if (ret) {
151                 mpp_err("h265d rps_data free buffer failed\n");
152                 return ret;
153             }
154         }
155 
156         if (reg_ctx->hw_regs) {
157             mpp_free(reg_ctx->hw_regs);
158             reg_ctx->hw_regs = NULL;
159         }
160     }
161     return MPP_OK;
162 }
163 
hal_h265d_rkv_init(void * hal,MppHalCfg * cfg)164 MPP_RET hal_h265d_rkv_init(void *hal, MppHalCfg *cfg)
165 {
166     MPP_RET ret = MPP_NOK;
167     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
168 
169     mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
170     mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
171 
172     reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
173     reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
174 
175     if (reg_ctx->scaling_qm == NULL) {
176         mpp_err("scaling_org alloc fail");
177         return MPP_ERR_MALLOC;
178     }
179 
180     reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
181     if (reg_ctx->scaling_rk == NULL) {
182         mpp_err("scaling_rk alloc fail");
183         return MPP_ERR_MALLOC;
184     }
185 
186     if (reg_ctx->group == NULL) {
187         ret = mpp_buffer_group_get_internal(&reg_ctx->group, MPP_BUFFER_TYPE_ION);
188         if (ret) {
189             mpp_err("h265d mpp_buffer_group_get failed\n");
190             return ret;
191         }
192     }
193 
194     ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->cabac_table_data, sizeof(cabac_table));
195     if (ret) {
196         mpp_err("h265d cabac_table get buffer failed\n");
197         return ret;
198     }
199 
200     ret = mpp_buffer_write(reg_ctx->cabac_table_data, 0, (void*)cabac_table, sizeof(cabac_table));
201     if (ret) {
202         mpp_err("h265d write cabac_table data failed\n");
203         return ret;
204     }
205 
206     ret = hal_h265d_alloc_res(hal);
207     if (ret) {
208         mpp_err("hal_h265d_alloc_res failed\n");
209         return ret;
210     }
211 
212 
213     {
214         // report hw_info to parser
215         const MppSocInfo *info = mpp_get_soc_info();
216         const void *hw_info = NULL;
217         RK_U32 i = 0;
218 
219         for (i = 0; i < MPP_ARRAY_ELEMS(info->dec_caps); i++) {
220             if (info->dec_caps[i] && ( info->dec_caps[i]->type == VPU_CLIENT_RKVDEC ||
221                                        info->dec_caps[i]->type == VPU_CLIENT_HEVC_DEC)) {
222                 hw_info = info->dec_caps[i];
223                 break;
224             }
225         }
226 
227         mpp_assert(hw_info);
228         cfg->hw_info = hw_info;
229     }
230 
231 #ifdef dump
232     fp = fopen("/data/hal.bin", "wb");
233 #endif
234     (void) cfg;
235     return MPP_OK;
236 }
237 
hal_h265d_rkv_deinit(void * hal)238 MPP_RET hal_h265d_rkv_deinit(void *hal)
239 {
240     RK_S32 ret = 0;
241     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
242 
243     ret = mpp_buffer_put(reg_ctx->cabac_table_data);
244     if (ret) {
245         mpp_err("h265d cabac_table free buffer failed\n");
246         return ret;
247     }
248 
249     if (reg_ctx->scaling_qm) {
250         mpp_free(reg_ctx->scaling_qm);
251     }
252 
253     if (reg_ctx->sw_rps_buf) {
254         mpp_free(reg_ctx->sw_rps_buf);
255     }
256 
257     if (reg_ctx->scaling_rk) {
258         mpp_free(reg_ctx->scaling_rk);
259     }
260 
261     hal_h265d_release_res(hal);
262 
263     if (reg_ctx->group) {
264         ret = mpp_buffer_group_put(reg_ctx->group);
265         if (ret) {
266             mpp_err("h265d group free buffer failed\n");
267             return ret;
268         }
269     }
270     return MPP_OK;
271 }
272 
hal_h265d_v345_output_pps_packet(void * hal,void * dxva)273 static RK_S32 hal_h265d_v345_output_pps_packet(void *hal, void *dxva)
274 {
275     RK_S32 fifo_len = 12;
276     RK_S32 i, j;
277     RK_U32 log2_min_cb_size;
278     RK_S32 width, height;
279     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
280     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
281     BitputCtx_t bp;
282     RK_U64 *pps_packet = mpp_calloc(RK_U64, fifo_len + 1);
283 
284     if (NULL == reg_ctx || dxva_cxt == NULL) {
285         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
286                 __FILE__, __FUNCTION__, __LINE__);
287         MPP_FREE(pps_packet);
288         return MPP_ERR_NULL_PTR;
289     }
290 
291     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->pps_data);
292     if (NULL == pps_ptr) {
293         mpp_err("pps_data get ptr error");
294         return MPP_ERR_NOMEM;
295     }
296     memset(pps_ptr, 0, 96 * 64);
297     // pps_packet = (RK_U64 *)(pps_ptr + dxva_cxt->pp.pps_id * 80);
298 
299     for (i = 0; i < 12; i++) pps_packet[i] = 0;
300 
301     mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
302 
303     // SPS
304     mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
305     mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
306     mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
307 
308     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
309     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
310     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
311 
312     mpp_put_bits(&bp, width                                          , 13);
313     mpp_put_bits(&bp, height                                         , 13);
314     mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
315     mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
316     mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
317     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
318     mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
319     mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
320     ///<-zrh comment ^  57 bit above
321     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
322     mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
323     mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
324     mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
325     mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
326     mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
327     ///<-zrh comment ^  68 bit above
328     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
329     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
330     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
331     mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
332     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
333     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
334 
335     mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
336     mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
337     mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
338     mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
339     mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
340     ///<-zrh comment ^ 100 bit above
341 
342     mpp_put_bits(&bp, 0                                                    , 7 );
343 
344     mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1,       4);
345     mpp_put_bits(&bp, 0, 3);
346     mpp_put_align(&bp                                                        , 32, 0xf); //128
347 
348     // PPS
349     mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
350     mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
351     mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
352     mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
353     mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
354     mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
355     mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
356     mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);
357     mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
358     mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
359     mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
360     mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
361     mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1); //164
362 
363     mpp_put_bits(&bp, log2_min_cb_size +
364                  dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
365                  dxva_cxt->pp.diff_cu_qp_delta_depth                             , 3);
366 
367     h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
368               dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
369 
370     mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
371     mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
372     mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
373     mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
374     mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
375     mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1 );
376     mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1 );
377     mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
378     mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
379     mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1); //185
380     mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
381     mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
382     mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
383     mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
384     mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
385     mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
386     mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
387     mpp_put_bits(&bp, 0                                                        , 3);
388     mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
389     mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
390     mpp_put_bits(&bp, 0, 4); //mSps_Pps[i]->mMode
391     mpp_put_align(&bp, 64, 0xf);
392     {
393         /// tiles info begin
394         RK_U16 column_width[20];
395         RK_U16 row_height[22];
396 
397         memset(column_width, 0, sizeof(column_width));
398         memset(row_height, 0, sizeof(row_height));
399 
400         if (dxva_cxt->pp.tiles_enabled_flag) {
401 
402             if (dxva_cxt->pp.uniform_spacing_flag == 0) {
403                 RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
404                 RK_S32 ctu_width_in_pic = (width +
405                                            (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
406                 RK_S32 ctu_height_in_pic = (height +
407                                             (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
408                 RK_S32 sum = 0;
409                 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
410                     column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
411                     sum += column_width[i]  ;
412                 }
413                 column_width[i] = ctu_width_in_pic - sum;
414 
415                 sum = 0;
416                 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
417                     row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
418                     sum += row_height[i];
419                 }
420                 row_height[i] = ctu_height_in_pic - sum;
421             } // end of (pps->uniform_spacing_flag == 0)
422             else {
423 
424                 RK_S32    pic_in_cts_width = (width +
425                                               (1 << (log2_min_cb_size +
426                                                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
427                                              / (1 << (log2_min_cb_size +
428                                                       dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
429                 RK_S32 pic_in_cts_height = (height +
430                                             (1 << (log2_min_cb_size +
431                                                    dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
432                                            / (1 << (log2_min_cb_size +
433                                                     dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
434 
435                 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
436                     column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
437                                       (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
438 
439                 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
440                     row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
441                                     (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
442             }
443         } // pps->tiles_enabled_flag
444         else {
445             RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
446             column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
447             row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
448         }
449 
450         for (j = 0; j < 20; j++) {
451             if (column_width[j] > 0)
452                 column_width[j]--;
453             mpp_put_bits(&bp, column_width[j], 8);
454         }
455 
456         for (j = 0; j < 22; j++) {
457             if (row_height[j] > 0)
458                 row_height[j]--;
459             mpp_put_bits(&bp, row_height[j], 8);
460         }
461     }
462 
463     {
464         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->scaling_list_data);
465         RK_U32 fd = mpp_buffer_get_fd(reg_ctx->scaling_list_data);
466 
467         hal_h265d_output_scalinglist_packet(hal, ptr_scaling, dxva);
468         mpp_put_bits(&bp, fd, 32);
469         mpp_put_bits(&bp, 0, 60);
470         mpp_put_align(&bp, 128, 0xf);
471     }
472 
473     for (i = 0; i < 64; i++)
474         memcpy(pps_ptr + i * 96, pps_packet, 96);
475 
476 #ifdef dump
477     fwrite(pps_ptr, 1, 80 * 64, fp);
478     RK_U32 *tmp = (RK_U32 *)pps_ptr;
479     for (i = 0; i < 96 / 4; i++) {
480         h265h_dbg(H265H_DBG_PPS, "pps[%3d] = 0x%08x\n", i, tmp[i]);
481     }
482 #endif
483     MPP_FREE(pps_packet);
484     return 0;
485 }
486 
hal_h265d_output_pps_packet(void * hal,void * dxva)487 static RK_S32 hal_h265d_output_pps_packet(void *hal, void *dxva)
488 {
489     RK_S32 fifo_len = 10;
490     RK_S32 i, j;
491     RK_U32 log2_min_cb_size;
492     RK_S32 width, height;
493     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
494     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
495     BitputCtx_t bp;
496     RK_U64 *pps_packet = mpp_calloc(RK_U64, fifo_len + 1);
497 
498     if (NULL == reg_ctx || dxva_cxt == NULL) {
499         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
500                 __FILE__, __FUNCTION__, __LINE__);
501         MPP_FREE(pps_packet);
502         return MPP_ERR_NULL_PTR;
503     }
504 
505     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->pps_data);
506     if (NULL == pps_ptr) {
507         mpp_err("pps_data get ptr error");
508         return MPP_ERR_NOMEM;
509     }
510     memset(pps_ptr, 0, 80 * 64);
511     // pps_packet = (RK_U64 *)(pps_ptr + dxva_cxt->pp.pps_id * 80);
512 
513     for (i = 0; i < 10; i++) pps_packet[i] = 0;
514 
515     mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
516 
517     // SPS
518     mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
519     mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
520     mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
521 
522     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
523     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
524     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
525 
526     mpp_put_bits(&bp, width                                          , 13);
527     mpp_put_bits(&bp, height                                         , 13);
528     mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
529     mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
530     mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
531     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
532     mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
533     mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
534     ///<-zrh comment ^  57 bit above
535     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
536     mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
537     mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
538     mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
539     mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
540     mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
541     ///<-zrh comment ^  68 bit above
542     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
543     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
544     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
545     mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
546     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
547     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
548 
549     mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
550     mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
551     mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
552     mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
553     mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
554     ///<-zrh comment ^ 100 bit above
555 
556     mpp_put_bits(&bp, 0                                                    , 7 );
557     mpp_put_align(&bp                                                         , 32, 0xf);
558 
559     // PPS
560     mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
561     mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
562     mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
563     mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
564     mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
565     mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
566     mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
567     mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);
568     mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
569     mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
570     mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
571     mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
572     mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1);
573 
574     mpp_put_bits(&bp, log2_min_cb_size +
575                  dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
576                  dxva_cxt->pp.diff_cu_qp_delta_depth                             , 3);
577 
578     h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
579               dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
580 
581     mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
582     mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
583     mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
584     mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
585     mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
586     mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1 );
587     mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1 );
588     mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
589     mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
590     mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1);
591 
592     mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
593     mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
594     mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
595     mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
596     mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
597     mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
598     /*slice_segment_header_extension_present_flag need set 0 */
599     mpp_put_bits(&bp, 0                                                        , 1);
600     mpp_put_bits(&bp, 0                                                        , 3);
601     mpp_put_bits(&bp, dxva_cxt->pp.num_tile_columns_minus1 + 1, 5);
602     mpp_put_bits(&bp, dxva_cxt->pp.num_tile_rows_minus1 + 1 , 5 );
603     mpp_put_bits(&bp, 3, 2); //mSps_Pps[i]->mMode
604     mpp_put_align(&bp, 64, 0xf);
605 
606     {
607         /// tiles info begin
608         RK_U16 column_width[20];
609         RK_U16 row_height[22];
610 
611         memset(column_width, 0, sizeof(column_width));
612         memset(row_height, 0, sizeof(row_height));
613 
614         if (dxva_cxt->pp.tiles_enabled_flag) {
615             if (dxva_cxt->pp.uniform_spacing_flag == 0) {
616                 RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
617                 RK_S32 ctu_width_in_pic = (width +
618                                            (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
619                 RK_S32 ctu_height_in_pic = (height +
620                                             (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
621                 RK_S32 sum = 0;
622                 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
623                     column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
624                     sum += column_width[i]  ;
625                 }
626                 column_width[i] = ctu_width_in_pic - sum;
627 
628                 sum = 0;
629                 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
630                     row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
631                     sum += row_height[i];
632                 }
633                 row_height[i] = ctu_height_in_pic - sum;
634             } // end of (pps->uniform_spacing_flag == 0)
635             else {
636 
637                 RK_S32    pic_in_cts_width = (width +
638                                               (1 << (log2_min_cb_size +
639                                                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
640                                              / (1 << (log2_min_cb_size +
641                                                       dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
642                 RK_S32 pic_in_cts_height = (height +
643                                             (1 << (log2_min_cb_size +
644                                                    dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
645                                            / (1 << (log2_min_cb_size +
646                                                     dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
647 
648                 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
649                     column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
650                                       (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
651 
652                 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
653                     row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
654                                     (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
655             }
656         } // pps->tiles_enabled_flag
657         else {
658             RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
659             column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
660             row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
661         }
662 
663         for (j = 0; j < 20; j++) {
664             if (column_width[j] > 0)
665                 column_width[j]--;
666             mpp_put_bits(&bp, column_width[j], 8);
667         }
668 
669         for (j = 0; j < 22; j++) {
670             if (row_height[j] > 0)
671                 row_height[j]--;
672             mpp_put_bits(&bp, row_height[j], 8);
673         }
674     }
675 
676     {
677         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->scaling_list_data);
678         RK_U32 fd = mpp_buffer_get_fd(reg_ctx->scaling_list_data);
679 
680         hal_h265d_output_scalinglist_packet(hal, ptr_scaling, dxva);
681         mpp_put_bits(&bp, fd, 32);
682         mpp_put_align(&bp, 64, 0xf);
683     }
684 
685     for (i = 0; i < 64; i++)
686         memcpy(pps_ptr + i * 80, pps_packet, 80);
687 
688 #ifdef dump
689     fwrite(pps_ptr, 1, 80 * 64, fp);
690     fflush(fp);
691 #endif
692 
693     MPP_FREE(pps_packet);
694     return 0;
695 }
696 
update_stream_buffer(MppBuffer streambuf,HalTaskInfo * syn)697 static void update_stream_buffer(MppBuffer streambuf, HalTaskInfo *syn)
698 {
699     h265d_dxva2_picture_context_t *dxva_cxt =
700         (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
701     RK_U8 *ptr = (RK_U8*)mpp_buffer_get_ptr(streambuf);
702     RK_U8 bit_left = 0;
703     RK_U16 start_byte, end_byte, i = 0;
704     RK_U32 stream_size = dxva_cxt->bitstream_size;
705     RK_U8 *buf = NULL;
706     RK_U8 *temp = NULL;
707     RK_U32 cut_byte = 0, cut_byte_acc = 0;
708 
709     for (i = 0; i < dxva_cxt->slice_count; i++) {
710         if (dxva_cxt->slice_cut_param[i].is_enable) {
711 
712             bit_left = 8 - (dxva_cxt->slice_cut_param[i].start_bit & 0x7);
713             start_byte = dxva_cxt->slice_cut_param[i].start_bit >> 3;
714             end_byte = (dxva_cxt->slice_cut_param[i].end_bit + 7) >> 3;
715             buf = ptr + (dxva_cxt->slice_short[i].BSNALunitDataLocation - cut_byte_acc);
716             temp = buf + start_byte;
717 
718             h265h_dbg(H265H_DBG_FUNCTION, "start bit %d start byte[%d] 0x%x end bit %d end byte[%d] 0x%x\n",
719                       dxva_cxt->slice_cut_param[i].start_bit, start_byte, buf[start_byte],
720                       dxva_cxt->slice_cut_param[i].end_bit, end_byte, buf[end_byte]);
721             if (bit_left < 8) {
722                 *temp = (*temp >> bit_left) << bit_left;
723                 *temp |= 1 << (bit_left - 1);
724             } else {
725                 *temp = 0x80;
726             }
727             if ((dxva_cxt->slice_cut_param[i].end_bit & 0x7) == 0 && buf[end_byte] == 0x80)
728                 end_byte += 1;
729 
730             h265h_dbg(H265H_DBG_FUNCTION, "i %d location %d count %d SliceBytesInBuffer %d bitstream_size %d\n",
731                       i, dxva_cxt->slice_short[i].BSNALunitDataLocation, dxva_cxt->slice_count,
732                       dxva_cxt->slice_short[i].SliceBytesInBuffer, dxva_cxt->bitstream_size);
733 
734             memmove(buf + start_byte + 1, buf + end_byte,
735                     stream_size - dxva_cxt->slice_short[i].BSNALunitDataLocation - end_byte);
736 
737             cut_byte = end_byte - start_byte - 1;
738             dxva_cxt->slice_short[i].SliceBytesInBuffer -= cut_byte;
739             dxva_cxt->bitstream_size -= cut_byte;
740             cut_byte_acc += cut_byte;
741         }
742     }
743 }
744 
hal_h265d_rkv_gen_regs(void * hal,HalTaskInfo * syn)745 MPP_RET hal_h265d_rkv_gen_regs(void *hal,  HalTaskInfo *syn)
746 {
747     RK_S32 i = 0;
748     RK_S32 log2_min_cb_size;
749     RK_S32 width, height;
750     RK_S32 stride_y, stride_uv, virstrid_y, virstrid_yuv;
751     H265d_REGS_t *hw_regs;
752     RK_S32 ret = MPP_SUCCESS;
753     MppBuffer streambuf = NULL;
754     RK_S32 aglin_offset = 0;
755     RK_S32 valid_ref = -1;
756     MppBuffer framebuf = NULL;
757     RK_U32 sw_ref_valid = 0;
758     RK_U32 stream_buf_size = 0;
759 
760     if (syn->dec.flags.parse_err ||
761         syn->dec.flags.ref_err) {
762         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
763         return MPP_OK;
764     }
765 
766     h265d_dxva2_picture_context_t *dxva_cxt =
767         (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
768     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
769 
770     void *rps_ptr = NULL;
771     if (reg_ctx ->fast_mode) {
772         for (i = 0; i < MAX_GEN_REG; i++) {
773             if (!reg_ctx->g_buf[i].use_flag) {
774                 syn->dec.reg_index = i;
775                 reg_ctx->rps_data = reg_ctx->g_buf[i].rps_data;
776                 reg_ctx->scaling_list_data =
777                     reg_ctx->g_buf[i].scaling_list_data;
778                 reg_ctx->pps_data = reg_ctx->g_buf[i].pps_data;
779                 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
780                 reg_ctx->g_buf[i].use_flag = 1;
781                 break;
782             }
783         }
784         if (i == MAX_GEN_REG) {
785             mpp_err("hevc rps buf all used");
786             return MPP_ERR_NOMEM;
787         }
788     }
789     rps_ptr = mpp_buffer_get_ptr(reg_ctx->rps_data);
790     if (NULL == rps_ptr) {
791 
792         mpp_err("rps_data get ptr error");
793         return MPP_ERR_NOMEM;
794     }
795 
796 
797     if (syn->dec.syntax.data == NULL) {
798         mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
799         return MPP_ERR_NULL_PTR;
800     }
801 
802     /* output pps */
803     if (reg_ctx->is_v345) {
804         hal_h265d_v345_output_pps_packet(hal, syn->dec.syntax.data);
805     } else {
806         hal_h265d_output_pps_packet(hal, syn->dec.syntax.data);
807     }
808 
809     if (NULL == reg_ctx->hw_regs) {
810         return MPP_ERR_NULL_PTR;
811     }
812 
813     hw_regs = (H265d_REGS_t*)reg_ctx->hw_regs;
814     memset(hw_regs, 0, sizeof(H265d_REGS_t));
815 
816     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
817 
818     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
819     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
820 
821     stride_y = ((MPP_ALIGN(width, 64)
822                  * (dxva_cxt->pp.bit_depth_luma_minus8 + 8)) >> 3);
823     stride_uv = ((MPP_ALIGN(width, 64)
824                   * (dxva_cxt->pp.bit_depth_chroma_minus8 + 8)) >> 3);
825 
826     stride_y = hevc_hor_align(stride_y);
827     stride_uv = hevc_hor_align(stride_uv);
828     virstrid_y = hevc_ver_align(height) * stride_y;
829     virstrid_yuv  = virstrid_y + stride_uv * hevc_ver_align(height) / 2;
830 
831     hw_regs->sw_picparameter.sw_slice_num = dxva_cxt->slice_count;
832     hw_regs->sw_picparameter.sw_y_hor_virstride = stride_y >> 4;
833     hw_regs->sw_picparameter.sw_uv_hor_virstride = stride_uv >> 4;
834     hw_regs->sw_y_virstride = virstrid_y >> 4;
835     hw_regs->sw_yuv_virstride = virstrid_yuv >> 4;
836     hw_regs->sw_sysctrl.sw_h26x_rps_mode = 0;
837     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
838                           SLOT_BUFFER, &framebuf);
839     hw_regs->sw_decout_base  = mpp_buffer_get_fd(framebuf); //just index need map
840 
841     /*if out_base is equal to zero it means this frame may error
842     we return directly add by csy*/
843 
844     if (hw_regs->sw_decout_base == 0) {
845         return 0;
846     }
847 
848     hw_regs->sw_cur_poc = dxva_cxt->pp.CurrPicOrderCntVal;
849 
850     mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
851                           &streambuf);
852 
853     if ( dxva_cxt->bitstream == NULL) {
854         dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
855     }
856     if (reg_ctx->is_v345) {
857 #ifdef HW_RPS
858         hw_regs->sw_sysctrl.sw_wait_reset_en = 1;
859         hw_regs->v345_reg_ends.reg064_mvc0.refp_layer_same_with_cur = 0xffff;
860         hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
861 #else
862         hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
863         hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
864 #endif
865     } else {
866         hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
867     }
868 
869     if (dxva_cxt->pp.slice_segment_header_extension_present_flag && !reg_ctx->is_v345) {
870         update_stream_buffer(streambuf, syn);
871     }
872 
873     hw_regs->sw_cabactbl_base   =  mpp_buffer_get_fd(reg_ctx->cabac_table_data);
874     hw_regs->sw_pps_base        =  mpp_buffer_get_fd(reg_ctx->pps_data);
875     hw_regs->sw_rps_base        =  mpp_buffer_get_fd(reg_ctx->rps_data);
876     hw_regs->sw_strm_rlc_base   =  mpp_buffer_get_fd(streambuf);
877     stream_buf_size             =  mpp_buffer_get_size(streambuf);
878 
879     hw_regs->sw_stream_len      = ((dxva_cxt->bitstream_size + 15)
880                                    & (~15)) + 64;
881     hw_regs->sw_stream_len      = stream_buf_size >  hw_regs->sw_stream_len ?
882                                   hw_regs->sw_stream_len : stream_buf_size;
883 
884     aglin_offset =  hw_regs->sw_stream_len - dxva_cxt->bitstream_size;
885     if (aglin_offset > 0) {
886         memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
887                aglin_offset);
888     }
889     hw_regs->sw_interrupt.sw_dec_e         = 1;
890     hw_regs->sw_interrupt.sw_dec_timeout_e = 1;
891     hw_regs->sw_interrupt.sw_wr_ddr_align_en = dxva_cxt->pp.tiles_enabled_flag
892                                                ? 0 : 1;
893 
894     ///find s->rps_model[i] position, and set register
895     hw_regs->cabac_error_en = 0xfdfffffd;
896     hw_regs->rkv_reg_ends.extern_error_en = 0x30000000;
897 
898     valid_ref = hw_regs->sw_decout_base;
899     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
900         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
901             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
902             hw_regs->sw_refer_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
903             mpp_buf_slot_get_prop(reg_ctx->slots,
904                                   dxva_cxt->pp.RefPicList[i].Index7Bits,
905                                   SLOT_BUFFER, &framebuf);
906             if (framebuf != NULL) {
907                 hw_regs->sw_refer_base[i] = mpp_buffer_get_fd(framebuf);
908                 valid_ref = hw_regs->sw_refer_base[i];
909             } else {
910                 hw_regs->sw_refer_base[i] = valid_ref;
911             }
912             sw_ref_valid          |=   (1 << i);
913         } else {
914             hw_regs->sw_refer_base[i] = hw_regs->sw_decout_base;
915         }
916     }
917 
918     if (sw_ref_valid) {
919         mpp_dev_set_reg_offset(reg_ctx->dev, 10, sw_ref_valid & 0xf);
920         mpp_dev_set_reg_offset(reg_ctx->dev, 11, ((sw_ref_valid >> 4) & 0xf));
921         mpp_dev_set_reg_offset(reg_ctx->dev, 12, ((sw_ref_valid >> 8) & 0xf));
922         mpp_dev_set_reg_offset(reg_ctx->dev, 13, ((sw_ref_valid >> 12) & 0xf));
923     }
924 
925     return ret;
926 }
927 
hal_h265d_rkv_start(void * hal,HalTaskInfo * task)928 MPP_RET hal_h265d_rkv_start(void *hal, HalTaskInfo *task)
929 {
930     MPP_RET ret = MPP_OK;
931     H265d_REGS_t *hw_regs = NULL;
932     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
933     RK_S32 index =  task->dec.reg_index;
934 
935     RK_U32 i;
936 
937     if (task->dec.flags.parse_err ||
938         task->dec.flags.ref_err) {
939         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
940         return MPP_OK;
941     }
942 
943     if (reg_ctx->fast_mode) {
944         hw_regs = ( H265d_REGS_t *)reg_ctx->g_buf[index].hw_regs;
945     } else {
946         hw_regs = ( H265d_REGS_t *)reg_ctx->hw_regs;
947     }
948 
949     if (hw_regs == NULL) {
950         mpp_err("hal_h265d_start hw_regs is NULL");
951         return MPP_ERR_NULL_PTR;
952     }
953 
954     do {
955         MppDevRegWrCfg wr_cfg;
956         MppDevRegRdCfg rd_cfg;
957         RK_U32 reg_size = (reg_ctx->is_v345) ? V345_HEVC_REGISTERS :
958                           (reg_ctx->client_type == VPU_CLIENT_RKVDEC) ?
959                           RKVDEC_V1_REGISTERS : RKVDEC_HEVC_REGISTERS;
960 
961         reg_size *= sizeof(RK_U32);
962 
963         wr_cfg.reg = hw_regs;
964         wr_cfg.size = reg_size;
965         wr_cfg.offset = 0;
966 
967         if (hal_h265d_debug & H265H_DBG_REG) {
968             for (i = 0; i < reg_size / sizeof(RK_U32); i++)
969                 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n", i, ((RK_U32 *)hw_regs)[i]);
970         }
971 
972         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
973         if (ret) {
974             mpp_err_f("set register write failed %d\n", ret);
975             break;
976         }
977 
978         rd_cfg.reg = hw_regs;
979         rd_cfg.size = reg_size;
980         rd_cfg.offset = 0;
981 
982         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
983         if (ret) {
984             mpp_err_f("set register read failed %d\n", ret);
985             break;
986         }
987 
988         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
989         if (ret) {
990             mpp_err_f("send cmd failed %d\n", ret);
991             break;
992         }
993     } while (0);
994 
995     return ret;
996 }
997 
hal_h265d_rkv_wait(void * hal,HalTaskInfo * task)998 MPP_RET hal_h265d_rkv_wait(void *hal, HalTaskInfo *task)
999 {
1000     MPP_RET ret = MPP_OK;
1001     RK_S32 index =  task->dec.reg_index;
1002     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1003     H265d_REGS_t *hw_regs = NULL;
1004     RK_S32 i;
1005 
1006     if (task->dec.flags.parse_err ||
1007         task->dec.flags.ref_err) {
1008         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1009         goto ERR_PROC;
1010     }
1011 
1012     if (reg_ctx->fast_mode) {
1013         hw_regs = ( H265d_REGS_t *)reg_ctx->g_buf[index].hw_regs;
1014     } else {
1015         hw_regs = ( H265d_REGS_t *)reg_ctx->hw_regs;
1016     }
1017 
1018     ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1019     if (ret)
1020         mpp_err_f("poll cmd failed %d\n", ret);
1021 
1022 ERR_PROC:
1023     if (task->dec.flags.parse_err ||
1024         task->dec.flags.ref_err ||
1025         hw_regs->sw_interrupt.sw_dec_error_sta ||
1026         hw_regs->sw_interrupt.sw_dec_timeout_sta ||
1027         hw_regs->sw_interrupt.sw_dec_empty_sta) {
1028         if (!reg_ctx->fast_mode) {
1029             if (reg_ctx->dec_cb)
1030                 mpp_callback(reg_ctx->dec_cb, &task->dec);
1031         } else {
1032             MppFrame mframe = NULL;
1033             mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1034                                   SLOT_FRAME_PTR, &mframe);
1035             if (mframe) {
1036                 reg_ctx->fast_mode_err_found = 1;
1037                 mpp_frame_set_errinfo(mframe, 1);
1038             }
1039         }
1040     } else {
1041         if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1042             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1043                 if (task->dec.refer[i] >= 0) {
1044                     MppFrame frame_ref = NULL;
1045 
1046                     mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1047                                           SLOT_FRAME_PTR, &frame_ref);
1048                     h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1049                               i, task->dec.refer[i], frame_ref);
1050                     if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1051                         MppFrame frame_out = NULL;
1052                         mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1053                                               SLOT_FRAME_PTR, &frame_out);
1054                         mpp_frame_set_errinfo(frame_out, 1);
1055                         break;
1056                     }
1057                 }
1058             }
1059         }
1060     }
1061 
1062     if (hal_h265d_debug & H265H_DBG_REG) {
1063         h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[1]=0x%08X, regs[45]=0x%08x\n", ((RK_U32 *)hw_regs)[1], ((RK_U32 *)hw_regs)[45]);
1064     }
1065 
1066     if (reg_ctx->fast_mode) {
1067         reg_ctx->g_buf[index].use_flag = 0;
1068     }
1069 
1070     return ret;
1071 }
1072 
hal_h265d_rkv_reset(void * hal)1073 MPP_RET hal_h265d_rkv_reset(void *hal)
1074 {
1075     MPP_RET ret = MPP_OK;
1076     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1077 
1078     p_hal->fast_mode_err_found = 0;
1079 
1080     return ret;
1081 }
1082 
1083 const MppHalApi hal_h265d_rkv = {
1084     .name = "h265d_rkdec",
1085     .type = MPP_CTX_DEC,
1086     .coding = MPP_VIDEO_CodingHEVC,
1087     .ctx_size = sizeof(HalH265dCtx),
1088     .flag = 0,
1089     .init = hal_h265d_rkv_init,
1090     .deinit = hal_h265d_rkv_deinit,
1091     .reg_gen = hal_h265d_rkv_gen_regs,
1092     .start = hal_h265d_rkv_start,
1093     .wait = hal_h265d_rkv_wait,
1094     .reset = hal_h265d_rkv_reset,
1095     .flush = NULL,
1096     .control = NULL,
1097 };
1098