xref: /rockchip-linux_mpp/mpp/hal/rkdec/h265d/hal_h265d_rkv.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /*
2  * Copyright 2020 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_h265d_rkv"
18 
19 #include <stdio.h>
20 #include <string.h>
21 
22 #include "mpp_mem.h"
23 #include "mpp_bitread.h"
24 #include "mpp_bitput.h"
25 
26 #include "hal_h265d_debug.h"
27 #include "hal_h265d_ctx.h"
28 #include "hal_h265d_com.h"
29 #include "hal_h265d_rkv.h"
30 #include "hal_h265d_rkv_reg.h"
31 #include "h265d_syntax.h"
32 
33 /* #define dump */
34 #ifdef dump
35 static FILE *fp = NULL;
36 #endif
37 #define HW_RPS
38 
39 #define PPS_SIZE                (96 * 64)
40 
hal_h265d_alloc_res(void * hal)41 static MPP_RET hal_h265d_alloc_res(void *hal)
42 {
43     RK_S32 i = 0;
44     RK_S32 ret = 0;
45     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
46     if (reg_ctx->fast_mode) {
47         for (i = 0; i < MAX_GEN_REG; i++) {
48             reg_ctx->g_buf[i].hw_regs =
49                 mpp_calloc_size(void, sizeof(H265d_REGS_t));
50             ret = mpp_buffer_get(reg_ctx->group,
51                                  &reg_ctx->g_buf[i].scaling_list_data,
52                                  SCALING_LIST_SIZE);
53             if (ret) {
54                 mpp_err("h265d scaling_list_data get buffer failed\n");
55                 return ret;
56             }
57 
58             ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->g_buf[i].pps_data,
59                                  PPS_SIZE);
60             if (ret) {
61                 mpp_err("h265d pps_data get buffer failed\n");
62                 return ret;
63             }
64 
65             ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->g_buf[i].rps_data,
66                                  RPS_SIZE);
67             if (ret) {
68                 mpp_err("h265d rps_data get buffer failed\n");
69                 return ret;
70             }
71         }
72     } else {
73         reg_ctx->hw_regs = mpp_calloc_size(void, sizeof(H265d_REGS_t));
74         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->scaling_list_data,
75                              SCALING_LIST_SIZE);
76         if (ret) {
77             mpp_err("h265d scaling_list_data get buffer failed\n");
78             return ret;
79         }
80 
81         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->pps_data, PPS_SIZE);
82         if (ret) {
83             mpp_err("h265d pps_data get buffer failed\n");
84             return ret;
85         }
86 
87         ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->rps_data, RPS_SIZE);
88         if (ret) {
89             mpp_err("h265d rps_data get buffer failed\n");
90             return ret;
91         }
92 
93     }
94     return MPP_OK;
95 }
96 
hal_h265d_release_res(void * hal)97 static MPP_RET hal_h265d_release_res(void *hal)
98 {
99     RK_S32 ret = 0;
100     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
101     RK_S32 i = 0;
102     if (reg_ctx->fast_mode) {
103         for (i = 0; i < MAX_GEN_REG; i++) {
104             if (reg_ctx->g_buf[i].scaling_list_data) {
105                 ret = mpp_buffer_put(reg_ctx->g_buf[i].scaling_list_data);
106                 if (ret) {
107                     mpp_err("h265d scaling_list_data free buffer failed\n");
108                     return ret;
109                 }
110             }
111             if (reg_ctx->g_buf[i].pps_data) {
112                 ret = mpp_buffer_put(reg_ctx->g_buf[i].pps_data);
113                 if (ret) {
114                     mpp_err("h265d pps_data free buffer failed\n");
115                     return ret;
116                 }
117             }
118 
119             if (reg_ctx->g_buf[i].rps_data) {
120                 ret = mpp_buffer_put(reg_ctx->g_buf[i].rps_data);
121                 if (ret) {
122                     mpp_err("h265d rps_data free buffer failed\n");
123                     return ret;
124                 }
125             }
126 
127             if (reg_ctx->g_buf[i].hw_regs) {
128                 mpp_free(reg_ctx->g_buf[i].hw_regs);
129                 reg_ctx->g_buf[i].hw_regs = NULL;
130             }
131         }
132     } else {
133         if (reg_ctx->scaling_list_data) {
134             ret = mpp_buffer_put(reg_ctx->scaling_list_data);
135             if (ret) {
136                 mpp_err("h265d scaling_list_data free buffer failed\n");
137                 return ret;
138             }
139         }
140         if (reg_ctx->pps_data) {
141             ret = mpp_buffer_put(reg_ctx->pps_data);
142             if (ret) {
143                 mpp_err("h265d pps_data free buffer failed\n");
144                 return ret;
145             }
146         }
147 
148         if (reg_ctx->rps_data) {
149             ret = mpp_buffer_put(reg_ctx->rps_data);
150             if (ret) {
151                 mpp_err("h265d rps_data free buffer failed\n");
152                 return ret;
153             }
154         }
155 
156         if (reg_ctx->hw_regs) {
157             mpp_free(reg_ctx->hw_regs);
158             reg_ctx->hw_regs = NULL;
159         }
160     }
161     return MPP_OK;
162 }
163 
hal_h265d_rkv_init(void * hal,MppHalCfg * cfg)164 MPP_RET hal_h265d_rkv_init(void *hal, MppHalCfg *cfg)
165 {
166     MPP_RET ret = MPP_NOK;
167     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
168 
169     mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
170     mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
171 
172     reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
173     reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
174 
175     if (reg_ctx->scaling_qm == NULL) {
176         mpp_err("scaling_org alloc fail");
177         return MPP_ERR_MALLOC;
178     }
179 
180     reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
181     if (reg_ctx->scaling_rk == NULL) {
182         mpp_err("scaling_rk alloc fail");
183         return MPP_ERR_MALLOC;
184     }
185 
186     if (reg_ctx->group == NULL) {
187         ret = mpp_buffer_group_get_internal(&reg_ctx->group, MPP_BUFFER_TYPE_ION);
188         if (ret) {
189             mpp_err("h265d mpp_buffer_group_get failed\n");
190             return ret;
191         }
192     }
193 
194     ret = mpp_buffer_get(reg_ctx->group, &reg_ctx->cabac_table_data, sizeof(cabac_table));
195     if (ret) {
196         mpp_err("h265d cabac_table get buffer failed\n");
197         return ret;
198     }
199 
200     ret = mpp_buffer_write(reg_ctx->cabac_table_data, 0, (void*)cabac_table, sizeof(cabac_table));
201     if (ret) {
202         mpp_err("h265d write cabac_table data failed\n");
203         return ret;
204     }
205     mpp_buffer_sync_end(reg_ctx->cabac_table_data);
206 
207     ret = hal_h265d_alloc_res(hal);
208     if (ret) {
209         mpp_err("hal_h265d_alloc_res failed\n");
210         return ret;
211     }
212 
213 #ifdef dump
214     fp = fopen("/data/hal.bin", "wb");
215 #endif
216     (void) cfg;
217     return MPP_OK;
218 }
219 
hal_h265d_rkv_deinit(void * hal)220 MPP_RET hal_h265d_rkv_deinit(void *hal)
221 {
222     RK_S32 ret = 0;
223     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
224 
225     ret = mpp_buffer_put(reg_ctx->cabac_table_data);
226     if (ret) {
227         mpp_err("h265d cabac_table free buffer failed\n");
228         return ret;
229     }
230 
231     if (reg_ctx->scaling_qm) {
232         mpp_free(reg_ctx->scaling_qm);
233     }
234 
235     if (reg_ctx->sw_rps_buf) {
236         mpp_free(reg_ctx->sw_rps_buf);
237     }
238 
239     if (reg_ctx->scaling_rk) {
240         mpp_free(reg_ctx->scaling_rk);
241     }
242 
243     hal_h265d_release_res(hal);
244 
245     if (reg_ctx->group) {
246         ret = mpp_buffer_group_put(reg_ctx->group);
247         if (ret) {
248             mpp_err("h265d group free buffer failed\n");
249             return ret;
250         }
251     }
252     return MPP_OK;
253 }
254 
hal_h265d_v345_output_pps_packet(void * hal,void * dxva)255 static RK_S32 hal_h265d_v345_output_pps_packet(void *hal, void *dxva)
256 {
257     RK_S32 fifo_len = 12;
258     RK_S32 i, j;
259     RK_U32 log2_min_cb_size;
260     RK_S32 width, height;
261     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
262     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
263     BitputCtx_t bp;
264     RK_U64 *pps_packet = mpp_calloc(RK_U64, fifo_len + 1);
265 
266     if (NULL == reg_ctx || dxva_cxt == NULL) {
267         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
268                 __FILE__, __FUNCTION__, __LINE__);
269         MPP_FREE(pps_packet);
270         return MPP_ERR_NULL_PTR;
271     }
272 
273     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->pps_data);
274     if (NULL == pps_ptr) {
275         mpp_err("pps_data get ptr error");
276         return MPP_ERR_NOMEM;
277     }
278     memset(pps_ptr, 0, 96 * 64);
279     // pps_packet = (RK_U64 *)(pps_ptr + dxva_cxt->pp.pps_id * 80);
280 
281     for (i = 0; i < 12; i++) pps_packet[i] = 0;
282 
283     mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
284 
285     // SPS
286     mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
287     mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
288     mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
289 
290     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
291     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
292     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
293 
294     mpp_put_bits(&bp, width                                          , 13);
295     mpp_put_bits(&bp, height                                         , 13);
296     mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
297     mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
298     mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
299     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
300     mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
301     mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
302     ///<-zrh comment ^  57 bit above
303     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
304     mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
305     mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
306     mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
307     mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
308     mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
309     ///<-zrh comment ^  68 bit above
310     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
311     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
312     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
313     mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
314     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
315     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
316 
317     mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
318     mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
319     mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
320     mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
321     mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
322     ///<-zrh comment ^ 100 bit above
323 
324     mpp_put_bits(&bp, 0                                                    , 7 );
325 
326     mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1,       4);
327     mpp_put_bits(&bp, 0, 3);
328     mpp_put_align(&bp                                                        , 32, 0xf); //128
329 
330     // PPS
331     mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
332     mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
333     mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
334     mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
335     mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
336     mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
337     mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
338     mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);
339     mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
340     mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
341     mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
342     mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
343     mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1); //164
344 
345     mpp_put_bits(&bp, log2_min_cb_size +
346                  dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
347                  dxva_cxt->pp.diff_cu_qp_delta_depth                             , 3);
348 
349     h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
350               dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
351 
352     mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
353     mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
354     mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
355     mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
356     mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
357     mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1 );
358     mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1 );
359     mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
360     mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
361     mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1); //185
362     mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
363     mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
364     mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
365     mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
366     mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
367     mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
368     mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
369     mpp_put_bits(&bp, 0                                                        , 3);
370     mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
371     mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
372     mpp_put_bits(&bp, 0, 4); //mSps_Pps[i]->mMode
373     mpp_put_align(&bp, 64, 0xf);
374     {
375         /// tiles info begin
376         RK_U16 column_width[20];
377         RK_U16 row_height[22];
378 
379         memset(column_width, 0, sizeof(column_width));
380         memset(row_height, 0, sizeof(row_height));
381 
382         if (dxva_cxt->pp.tiles_enabled_flag) {
383 
384             if (dxva_cxt->pp.uniform_spacing_flag == 0) {
385                 RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
386                 RK_S32 ctu_width_in_pic = (width +
387                                            (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
388                 RK_S32 ctu_height_in_pic = (height +
389                                             (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
390                 RK_S32 sum = 0;
391                 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
392                     column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
393                     sum += column_width[i]  ;
394                 }
395                 column_width[i] = ctu_width_in_pic - sum;
396 
397                 sum = 0;
398                 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
399                     row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
400                     sum += row_height[i];
401                 }
402                 row_height[i] = ctu_height_in_pic - sum;
403             } // end of (pps->uniform_spacing_flag == 0)
404             else {
405 
406                 RK_S32    pic_in_cts_width = (width +
407                                               (1 << (log2_min_cb_size +
408                                                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
409                                              / (1 << (log2_min_cb_size +
410                                                       dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
411                 RK_S32 pic_in_cts_height = (height +
412                                             (1 << (log2_min_cb_size +
413                                                    dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
414                                            / (1 << (log2_min_cb_size +
415                                                     dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
416 
417                 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
418                     column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
419                                       (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
420 
421                 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
422                     row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
423                                     (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
424             }
425         } // pps->tiles_enabled_flag
426         else {
427             RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
428             column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
429             row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
430         }
431 
432         for (j = 0; j < 20; j++) {
433             if (column_width[j] > 0)
434                 column_width[j]--;
435             mpp_put_bits(&bp, column_width[j], 8);
436         }
437 
438         for (j = 0; j < 22; j++) {
439             if (row_height[j] > 0)
440                 row_height[j]--;
441             mpp_put_bits(&bp, row_height[j], 8);
442         }
443     }
444 
445     {
446         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->scaling_list_data);
447         RK_U32 fd = mpp_buffer_get_fd(reg_ctx->scaling_list_data);
448 
449         hal_h265d_output_scalinglist_packet(hal, ptr_scaling, dxva);
450         mpp_put_bits(&bp, fd, 32);
451         mpp_put_bits(&bp, 0, 60);
452         mpp_put_align(&bp, 128, 0xf);
453     }
454 
455     for (i = 0; i < 64; i++)
456         memcpy(pps_ptr + i * 96, pps_packet, 96);
457 
458 #ifdef dump
459     fwrite(pps_ptr, 1, 80 * 64, fp);
460     RK_U32 *tmp = (RK_U32 *)pps_ptr;
461     for (i = 0; i < 96 / 4; i++) {
462         h265h_dbg(H265H_DBG_PPS, "pps[%3d] = 0x%08x\n", i, tmp[i]);
463     }
464 #endif
465     MPP_FREE(pps_packet);
466     return 0;
467 }
468 
hal_h265d_output_pps_packet(void * hal,void * dxva)469 static RK_S32 hal_h265d_output_pps_packet(void *hal, void *dxva)
470 {
471     RK_S32 fifo_len = 10;
472     RK_S32 i, j;
473     RK_U32 log2_min_cb_size;
474     RK_S32 width, height;
475     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
476     h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
477     BitputCtx_t bp;
478     RK_U64 *pps_packet = mpp_calloc(RK_U64, fifo_len + 1);
479 
480     if (NULL == reg_ctx || dxva_cxt == NULL) {
481         mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
482                 __FILE__, __FUNCTION__, __LINE__);
483         MPP_FREE(pps_packet);
484         return MPP_ERR_NULL_PTR;
485     }
486 
487     void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->pps_data);
488     if (NULL == pps_ptr) {
489         mpp_err("pps_data get ptr error");
490         return MPP_ERR_NOMEM;
491     }
492     memset(pps_ptr, 0, 80 * 64);
493     // pps_packet = (RK_U64 *)(pps_ptr + dxva_cxt->pp.pps_id * 80);
494 
495     for (i = 0; i < 10; i++) pps_packet[i] = 0;
496 
497     mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
498 
499     // SPS
500     mpp_put_bits(&bp, dxva_cxt->pp.vps_id                            , 4);
501     mpp_put_bits(&bp, dxva_cxt->pp.sps_id                            , 4);
502     mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc                 , 2);
503 
504     log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
505     width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
506     height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
507 
508     mpp_put_bits(&bp, width                                          , 13);
509     mpp_put_bits(&bp, height                                         , 13);
510     mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8         , 4);
511     mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8       , 4);
512     mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4      , 5);
513     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size   , 2); //log2_maxa_coding_block_depth
514     mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
515     mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2   , 3);
516     ///<-zrh comment ^  57 bit above
517     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size     , 2);
518     mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter        , 3);
519     mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra        , 3);
520     mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag                  , 1);
521     mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag                           , 1);
522     mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag        , 1);
523     ///<-zrh comment ^  68 bit above
524     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag                           , 1);
525     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0  , 4);
526     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
527     mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag                                               , 1);
528     mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size                                , 3);
529     mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
530 
531     mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets             , 7);
532     mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag         , 1);
533     mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps              , 6);
534     mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag           , 1);
535     mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag     , 1);
536     ///<-zrh comment ^ 100 bit above
537 
538     mpp_put_bits(&bp, 0                                                    , 7 );
539     mpp_put_align(&bp                                                         , 32, 0xf);
540 
541     // PPS
542     mpp_put_bits(&bp, dxva_cxt->pp.pps_id                                    , 6 );
543     mpp_put_bits(&bp, dxva_cxt->pp.sps_id                                    , 4 );
544     mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag     , 1 );
545     mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag                  , 1 );
546     mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits               , 13);
547     mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
548     mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag                   , 1);
549     mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1  , 4);
550     mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1  , 4);
551     mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26                           , 7);
552     mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag               , 1);
553     mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag               , 1);
554     mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag                  , 1);
555 
556     mpp_put_bits(&bp, log2_min_cb_size +
557                  dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
558                  dxva_cxt->pp.diff_cu_qp_delta_depth                             , 3);
559 
560     h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
561               dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
562 
563     mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset                            , 5);
564     mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset                            , 5);
565     mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag    , 1);
566     mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag                          , 1);
567     mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag                        , 1);
568     mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag              , 1 );
569     mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag                          , 1 );
570     mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag            , 1);
571     mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag  , 1);
572     mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag       , 1);
573 
574     mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag     , 1);
575     mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag         , 1);
576     mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2                        , 4);
577     mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2                          , 4);
578     mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag             , 1);
579     mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2        , 3);
580     /*slice_segment_header_extension_present_flag need set 0 */
581     mpp_put_bits(&bp, 0                                                        , 1);
582     mpp_put_bits(&bp, 0                                                        , 3);
583     mpp_put_bits(&bp, dxva_cxt->pp.num_tile_columns_minus1 + 1, 5);
584     mpp_put_bits(&bp, dxva_cxt->pp.num_tile_rows_minus1 + 1 , 5 );
585     mpp_put_bits(&bp, 3, 2); //mSps_Pps[i]->mMode
586     mpp_put_align(&bp, 64, 0xf);
587 
588     {
589         /// tiles info begin
590         RK_U16 column_width[20];
591         RK_U16 row_height[22];
592 
593         memset(column_width, 0, sizeof(column_width));
594         memset(row_height, 0, sizeof(row_height));
595 
596         if (dxva_cxt->pp.tiles_enabled_flag) {
597             if (dxva_cxt->pp.uniform_spacing_flag == 0) {
598                 RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
599                 RK_S32 ctu_width_in_pic = (width +
600                                            (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
601                 RK_S32 ctu_height_in_pic = (height +
602                                             (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
603                 RK_S32 sum = 0;
604                 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
605                     column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
606                     sum += column_width[i]  ;
607                 }
608                 column_width[i] = ctu_width_in_pic - sum;
609 
610                 sum = 0;
611                 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
612                     row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
613                     sum += row_height[i];
614                 }
615                 row_height[i] = ctu_height_in_pic - sum;
616             } // end of (pps->uniform_spacing_flag == 0)
617             else {
618 
619                 RK_S32    pic_in_cts_width = (width +
620                                               (1 << (log2_min_cb_size +
621                                                      dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
622                                              / (1 << (log2_min_cb_size +
623                                                       dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
624                 RK_S32 pic_in_cts_height = (height +
625                                             (1 << (log2_min_cb_size +
626                                                    dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
627                                            / (1 << (log2_min_cb_size +
628                                                     dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
629 
630                 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
631                     column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
632                                       (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
633 
634                 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
635                     row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
636                                     (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
637             }
638         } // pps->tiles_enabled_flag
639         else {
640             RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
641             column_width[0] = (width  + MaxCUWidth - 1) / MaxCUWidth;
642             row_height[0]   = (height + MaxCUWidth - 1) / MaxCUWidth;
643         }
644 
645         for (j = 0; j < 20; j++) {
646             if (column_width[j] > 0)
647                 column_width[j]--;
648             mpp_put_bits(&bp, column_width[j], 8);
649         }
650 
651         for (j = 0; j < 22; j++) {
652             if (row_height[j] > 0)
653                 row_height[j]--;
654             mpp_put_bits(&bp, row_height[j], 8);
655         }
656     }
657 
658     {
659         RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->scaling_list_data);
660         RK_U32 fd = mpp_buffer_get_fd(reg_ctx->scaling_list_data);
661 
662         hal_h265d_output_scalinglist_packet(hal, ptr_scaling, dxva);
663         mpp_put_bits(&bp, fd, 32);
664         mpp_put_align(&bp, 64, 0xf);
665     }
666 
667     for (i = 0; i < 64; i++)
668         memcpy(pps_ptr + i * 80, pps_packet, 80);
669 
670 #ifdef dump
671     fwrite(pps_ptr, 1, 80 * 64, fp);
672     fflush(fp);
673 #endif
674 
675     MPP_FREE(pps_packet);
676     return 0;
677 }
678 
update_stream_buffer(MppBuffer streambuf,HalTaskInfo * syn)679 static void update_stream_buffer(MppBuffer streambuf, HalTaskInfo *syn)
680 {
681     h265d_dxva2_picture_context_t *dxva_cxt =
682         (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
683     RK_U8 *ptr = (RK_U8*)mpp_buffer_get_ptr(streambuf);
684     RK_U8 bit_left = 0;
685     RK_U16 start_byte, end_byte, i = 0;
686     RK_U32 stream_size = dxva_cxt->bitstream_size;
687     RK_U8 *buf = NULL;
688     RK_U8 *temp = NULL;
689     RK_U32 cut_byte = 0, cut_byte_acc = 0;
690 
691     for (i = 0; i < dxva_cxt->slice_count; i++) {
692         if (dxva_cxt->slice_cut_param[i].is_enable) {
693 
694             bit_left = 8 - (dxva_cxt->slice_cut_param[i].start_bit & 0x7);
695             start_byte = dxva_cxt->slice_cut_param[i].start_bit >> 3;
696             end_byte = (dxva_cxt->slice_cut_param[i].end_bit + 7) >> 3;
697             buf = ptr + (dxva_cxt->slice_short[i].BSNALunitDataLocation - cut_byte_acc);
698             temp = buf + start_byte;
699 
700             h265h_dbg(H265H_DBG_FUNCTION, "start bit %d start byte[%d] 0x%x end bit %d end byte[%d] 0x%x\n",
701                       dxva_cxt->slice_cut_param[i].start_bit, start_byte, buf[start_byte],
702                       dxva_cxt->slice_cut_param[i].end_bit, end_byte, buf[end_byte]);
703             if (bit_left < 8) {
704                 *temp = (*temp >> bit_left) << bit_left;
705                 *temp |= 1 << (bit_left - 1);
706             } else {
707                 *temp = 0x80;
708             }
709             if ((dxva_cxt->slice_cut_param[i].end_bit & 0x7) == 0 && buf[end_byte] == 0x80)
710                 end_byte += 1;
711 
712             h265h_dbg(H265H_DBG_FUNCTION, "i %d location %d count %d SliceBytesInBuffer %d bitstream_size %d\n",
713                       i, dxva_cxt->slice_short[i].BSNALunitDataLocation, dxva_cxt->slice_count,
714                       dxva_cxt->slice_short[i].SliceBytesInBuffer, dxva_cxt->bitstream_size);
715 
716             memmove(buf + start_byte + 1, buf + end_byte,
717                     stream_size - dxva_cxt->slice_short[i].BSNALunitDataLocation - end_byte);
718 
719             cut_byte = end_byte - start_byte - 1;
720             dxva_cxt->slice_short[i].SliceBytesInBuffer -= cut_byte;
721             dxva_cxt->bitstream_size -= cut_byte;
722             cut_byte_acc += cut_byte;
723         }
724     }
725 }
726 
hal_h265d_rkv_gen_regs(void * hal,HalTaskInfo * syn)727 MPP_RET hal_h265d_rkv_gen_regs(void *hal,  HalTaskInfo *syn)
728 {
729     RK_S32 i = 0;
730     RK_S32 stride_y, stride_uv, virstrid_y, virstrid_yuv;
731     H265d_REGS_t *hw_regs;
732     RK_S32 ret = MPP_SUCCESS;
733     MppBuffer streambuf = NULL;
734     RK_S32 aglin_offset = 0;
735     RK_S32 valid_ref = -1;
736     MppBuffer framebuf = NULL;
737     RK_U32 sw_ref_valid = 0;
738     RK_U32 stream_buf_size = 0;
739     HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
740     MppFrame mframe;
741 
742     if (syn->dec.flags.parse_err ||
743         (syn->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
744         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
745         return MPP_OK;
746     }
747 
748     h265d_dxva2_picture_context_t *dxva_cxt =
749         (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
750 
751     void *rps_ptr = NULL;
752     if (reg_ctx ->fast_mode) {
753         for (i = 0; i < MAX_GEN_REG; i++) {
754             if (!reg_ctx->g_buf[i].use_flag) {
755                 syn->dec.reg_index = i;
756                 reg_ctx->rps_data = reg_ctx->g_buf[i].rps_data;
757                 reg_ctx->scaling_list_data =
758                     reg_ctx->g_buf[i].scaling_list_data;
759                 reg_ctx->pps_data = reg_ctx->g_buf[i].pps_data;
760                 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
761                 reg_ctx->g_buf[i].use_flag = 1;
762                 break;
763             }
764         }
765         if (i == MAX_GEN_REG) {
766             mpp_err("hevc rps buf all used");
767             return MPP_ERR_NOMEM;
768         }
769     } else {
770         syn->dec.reg_index = 0;
771     }
772     rps_ptr = mpp_buffer_get_ptr(reg_ctx->rps_data);
773     if (NULL == rps_ptr) {
774 
775         mpp_err("rps_data get ptr error");
776         return MPP_ERR_NOMEM;
777     }
778 
779 
780     if (syn->dec.syntax.data == NULL) {
781         mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
782         return MPP_ERR_NULL_PTR;
783     }
784 
785     /* output pps */
786     if (reg_ctx->is_v345) {
787         hal_h265d_v345_output_pps_packet(hal, syn->dec.syntax.data);
788     } else {
789         hal_h265d_output_pps_packet(hal, syn->dec.syntax.data);
790     }
791 
792     if (NULL == reg_ctx->hw_regs) {
793         return MPP_ERR_NULL_PTR;
794     }
795 
796     hw_regs = (H265d_REGS_t*)reg_ctx->hw_regs;
797     memset(hw_regs, 0, sizeof(H265d_REGS_t));
798 
799     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
800                           SLOT_FRAME_PTR, &mframe);
801     stride_y = mpp_frame_get_hor_stride(mframe);
802     stride_uv = mpp_frame_get_hor_stride(mframe);
803     virstrid_y = mpp_frame_get_ver_stride(mframe) * stride_y;
804     virstrid_yuv  = virstrid_y + stride_uv * mpp_frame_get_ver_stride(mframe) / 2;
805 
806     hw_regs->sw_picparameter.sw_slice_num = dxva_cxt->slice_count;
807     hw_regs->sw_picparameter.sw_y_hor_virstride = stride_y >> 4;
808     hw_regs->sw_picparameter.sw_uv_hor_virstride = stride_uv >> 4;
809     hw_regs->sw_y_virstride = virstrid_y >> 4;
810     hw_regs->sw_yuv_virstride = virstrid_yuv >> 4;
811     hw_regs->sw_sysctrl.sw_h26x_rps_mode = 0;
812     mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
813                           SLOT_BUFFER, &framebuf);
814     hw_regs->sw_decout_base  = mpp_buffer_get_fd(framebuf); //just index need map
815 
816     /*if out_base is equal to zero it means this frame may error
817     we return directly add by csy*/
818 
819     if (hw_regs->sw_decout_base == 0) {
820         return 0;
821     }
822 
823     hw_regs->sw_cur_poc = dxva_cxt->pp.CurrPicOrderCntVal;
824 
825     mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
826                           &streambuf);
827 
828     if ( dxva_cxt->bitstream == NULL) {
829         dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
830     }
831     if (reg_ctx->is_v345) {
832 #ifdef HW_RPS
833         hw_regs->sw_sysctrl.sw_wait_reset_en = 1;
834         hw_regs->v345_reg_ends.reg064_mvc0.refp_layer_same_with_cur = 0xffff;
835         hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
836 #else
837         hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
838         hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
839 #endif
840     } else {
841         hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
842     }
843 
844     if (dxva_cxt->pp.slice_segment_header_extension_present_flag && !reg_ctx->is_v345) {
845         update_stream_buffer(streambuf, syn);
846         mpp_buffer_sync_end(streambuf);
847     }
848 
849     hw_regs->sw_cabactbl_base   =  mpp_buffer_get_fd(reg_ctx->cabac_table_data);
850     hw_regs->sw_pps_base        =  mpp_buffer_get_fd(reg_ctx->pps_data);
851     hw_regs->sw_rps_base        =  mpp_buffer_get_fd(reg_ctx->rps_data);
852     hw_regs->sw_strm_rlc_base   =  mpp_buffer_get_fd(streambuf);
853     stream_buf_size             =  mpp_buffer_get_size(streambuf);
854 
855     hw_regs->sw_stream_len      = ((dxva_cxt->bitstream_size + 15)
856                                    & (~15)) + 64;
857     hw_regs->sw_stream_len      = stream_buf_size >  hw_regs->sw_stream_len ?
858                                   hw_regs->sw_stream_len : stream_buf_size;
859 
860     aglin_offset =  hw_regs->sw_stream_len - dxva_cxt->bitstream_size;
861     if (aglin_offset > 0) {
862         memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
863                aglin_offset);
864     }
865     hw_regs->sw_interrupt.sw_dec_e         = 1;
866     hw_regs->sw_interrupt.sw_dec_timeout_e = 1;
867     hw_regs->sw_interrupt.sw_wr_ddr_align_en = dxva_cxt->pp.tiles_enabled_flag
868                                                ? 0 : 1;
869 
870     ///find s->rps_model[i] position, and set register
871     hw_regs->cabac_error_en = 0xfdfffffd;
872     hw_regs->rkv_reg_ends.extern_error_en = 0x30000000;
873 
874     valid_ref = hw_regs->sw_decout_base;
875     for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
876         if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
877             dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
878             hw_regs->sw_refer_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
879             mpp_buf_slot_get_prop(reg_ctx->slots,
880                                   dxva_cxt->pp.RefPicList[i].Index7Bits,
881                                   SLOT_BUFFER, &framebuf);
882             if (framebuf != NULL) {
883                 hw_regs->sw_refer_base[i] = mpp_buffer_get_fd(framebuf);
884                 valid_ref = hw_regs->sw_refer_base[i];
885             } else {
886                 hw_regs->sw_refer_base[i] = valid_ref;
887             }
888             sw_ref_valid          |=   (1 << i);
889         } else {
890             hw_regs->sw_refer_base[i] = hw_regs->sw_decout_base;
891         }
892     }
893 
894     if (sw_ref_valid) {
895         mpp_dev_set_reg_offset(reg_ctx->dev, 10, sw_ref_valid & 0xf);
896         mpp_dev_set_reg_offset(reg_ctx->dev, 11, ((sw_ref_valid >> 4) & 0xf));
897         mpp_dev_set_reg_offset(reg_ctx->dev, 12, ((sw_ref_valid >> 8) & 0xf));
898         mpp_dev_set_reg_offset(reg_ctx->dev, 13, ((sw_ref_valid >> 12) & 0xf));
899     }
900     mpp_buffer_sync_end(reg_ctx->scaling_list_data);
901     mpp_buffer_sync_end(reg_ctx->pps_data);
902     mpp_buffer_sync_end(reg_ctx->rps_data);
903 
904     return ret;
905 }
906 
hal_h265d_rkv_start(void * hal,HalTaskInfo * task)907 MPP_RET hal_h265d_rkv_start(void *hal, HalTaskInfo *task)
908 {
909     MPP_RET ret = MPP_OK;
910     H265d_REGS_t *hw_regs = NULL;
911     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
912     RK_S32 index =  task->dec.reg_index;
913 
914     RK_U32 i;
915 
916     if (task->dec.flags.parse_err ||
917         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
918         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
919         return MPP_OK;
920     }
921 
922     if (reg_ctx->fast_mode) {
923         hw_regs = ( H265d_REGS_t *)reg_ctx->g_buf[index].hw_regs;
924     } else {
925         hw_regs = ( H265d_REGS_t *)reg_ctx->hw_regs;
926     }
927 
928     if (hw_regs == NULL) {
929         mpp_err("hal_h265d_start hw_regs is NULL");
930         return MPP_ERR_NULL_PTR;
931     }
932 
933     do {
934         MppDevRegWrCfg wr_cfg;
935         MppDevRegRdCfg rd_cfg;
936 
937         RK_U32 reg_size = (reg_ctx->is_v341 || reg_ctx->is_v345) ? V345_HEVC_REGISTERS :
938                           (reg_ctx->client_type == VPU_CLIENT_RKVDEC) ?
939                           RKVDEC_V1_REGISTERS : RKVDEC_HEVC_REGISTERS;
940 
941         reg_size *= sizeof(RK_U32);
942 
943         wr_cfg.reg = hw_regs;
944         wr_cfg.size = reg_size;
945         wr_cfg.offset = 0;
946 
947         if (hal_h265d_debug & H265H_DBG_REG) {
948             for (i = 0; i < reg_size / sizeof(RK_U32); i++)
949                 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n", i, ((RK_U32 *)hw_regs)[i]);
950         }
951 
952         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
953         if (ret) {
954             mpp_err_f("set register write failed %d\n", ret);
955             break;
956         }
957 
958         rd_cfg.reg = hw_regs;
959         rd_cfg.size = reg_size;
960         rd_cfg.offset = 0;
961 
962         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
963         if (ret) {
964             mpp_err_f("set register read failed %d\n", ret);
965             break;
966         }
967 
968         ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
969         if (ret) {
970             mpp_err_f("send cmd failed %d\n", ret);
971             break;
972         }
973     } while (0);
974 
975     return ret;
976 }
977 
hal_h265d_rkv_wait(void * hal,HalTaskInfo * task)978 MPP_RET hal_h265d_rkv_wait(void *hal, HalTaskInfo *task)
979 {
980     MPP_RET ret = MPP_OK;
981     RK_S32 index =  task->dec.reg_index;
982     HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
983     H265d_REGS_t *hw_regs = NULL;
984     RK_S32 i;
985 
986     if (reg_ctx->fast_mode) {
987         hw_regs = ( H265d_REGS_t *)reg_ctx->g_buf[index].hw_regs;
988     } else {
989         hw_regs = ( H265d_REGS_t *)reg_ctx->hw_regs;
990     }
991 
992     if (task->dec.flags.parse_err ||
993         (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
994         h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
995         goto ERR_PROC;
996     }
997 
998     ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
999     if (ret)
1000         mpp_err_f("poll cmd failed %d\n", ret);
1001 
1002 ERR_PROC:
1003     if (task->dec.flags.parse_err ||
1004         task->dec.flags.ref_err ||
1005         hw_regs->sw_interrupt.sw_dec_error_sta ||
1006         hw_regs->sw_interrupt.sw_dec_timeout_sta ||
1007         hw_regs->sw_interrupt.sw_dec_empty_sta) {
1008         if (!reg_ctx->fast_mode) {
1009             if (reg_ctx->dec_cb)
1010                 mpp_callback(reg_ctx->dec_cb, &task->dec);
1011         } else {
1012             MppFrame mframe = NULL;
1013             mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1014                                   SLOT_FRAME_PTR, &mframe);
1015             if (mframe) {
1016                 reg_ctx->fast_mode_err_found = 1;
1017                 mpp_frame_set_errinfo(mframe, 1);
1018             }
1019         }
1020     } else {
1021         if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1022             for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1023                 if (task->dec.refer[i] >= 0) {
1024                     MppFrame frame_ref = NULL;
1025 
1026                     mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1027                                           SLOT_FRAME_PTR, &frame_ref);
1028                     h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1029                               i, task->dec.refer[i], frame_ref);
1030                     if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1031                         MppFrame frame_out = NULL;
1032                         mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1033                                               SLOT_FRAME_PTR, &frame_out);
1034                         mpp_frame_set_errinfo(frame_out, 1);
1035                         break;
1036                     }
1037                 }
1038             }
1039         }
1040     }
1041 
1042     if (hal_h265d_debug & H265H_DBG_REG) {
1043         h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[1]=0x%08X, regs[45]=0x%08x\n", ((RK_U32 *)hw_regs)[1], ((RK_U32 *)hw_regs)[45]);
1044     }
1045 
1046     if (reg_ctx->fast_mode) {
1047         reg_ctx->g_buf[index].use_flag = 0;
1048     }
1049 
1050     return ret;
1051 }
1052 
hal_h265d_rkv_reset(void * hal)1053 MPP_RET hal_h265d_rkv_reset(void *hal)
1054 {
1055     MPP_RET ret = MPP_OK;
1056     HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1057 
1058     p_hal->fast_mode_err_found = 0;
1059 
1060     return ret;
1061 }
1062 
1063 const MppHalApi hal_h265d_rkv = {
1064     .name = "h265d_rkdec",
1065     .type = MPP_CTX_DEC,
1066     .coding = MPP_VIDEO_CodingHEVC,
1067     .ctx_size = sizeof(HalH265dCtx),
1068     .flag = 0,
1069     .init = hal_h265d_rkv_init,
1070     .deinit = hal_h265d_rkv_deinit,
1071     .reg_gen = hal_h265d_rkv_gen_regs,
1072     .start = hal_h265d_rkv_start,
1073     .wait = hal_h265d_rkv_wait,
1074     .reset = hal_h265d_rkv_reset,
1075     .flush = NULL,
1076     .control = NULL,
1077 };
1078