1 /*
2 * Copyright 2022 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_h265d_vdpu382"
18
19 #include <stdio.h>
20 #include <string.h>
21
22 #include "mpp_env.h"
23 #include "mpp_mem.h"
24 #include "mpp_bitread.h"
25 #include "mpp_bitput.h"
26
27 #include "h265d_syntax.h"
28 #include "hal_h265d_debug.h"
29 #include "hal_h265d_ctx.h"
30 #include "hal_h265d_com.h"
31 #include "hal_h265d_vdpu382.h"
32 #include "vdpu382_h265d.h"
33
34 /* #define dump */
35 #ifdef dump
36 static FILE *fp = NULL;
37 #endif
38
39 #define HW_RPS
40 #define PPS_SIZE (112 * 64)//(96x64)
41
42 #define SET_REF_VALID(regs, index, value)\
43 do{ \
44 switch(index){\
45 case 0: regs.reg99.hevc_ref_valid_0 = value; break;\
46 case 1: regs.reg99.hevc_ref_valid_1 = value; break;\
47 case 2: regs.reg99.hevc_ref_valid_2 = value; break;\
48 case 3: regs.reg99.hevc_ref_valid_3 = value; break;\
49 case 4: regs.reg99.hevc_ref_valid_4 = value; break;\
50 case 5: regs.reg99.hevc_ref_valid_5 = value; break;\
51 case 6: regs.reg99.hevc_ref_valid_6 = value; break;\
52 case 7: regs.reg99.hevc_ref_valid_7 = value; break;\
53 case 8: regs.reg99.hevc_ref_valid_8 = value; break;\
54 case 9: regs.reg99.hevc_ref_valid_9 = value; break;\
55 case 10: regs.reg99.hevc_ref_valid_10 = value; break;\
56 case 11: regs.reg99.hevc_ref_valid_11 = value; break;\
57 case 12: regs.reg99.hevc_ref_valid_12 = value; break;\
58 case 13: regs.reg99.hevc_ref_valid_13 = value; break;\
59 case 14: regs.reg99.hevc_ref_valid_14 = value; break;\
60 default: break;}\
61 }while(0)
62
63 #define FMT 4
64 #define CTU 3
65
66 typedef struct {
67 RK_U32 a;
68 RK_U32 b;
69 } FilterdColBufRatio;
70
71 static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = {
72 /* 400 420 422 444 */
73 {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16
74 {{0, 0}, {27, 8}, {36, 8}, {52, 8}}, //ctu 32
75 {{0, 0}, {27, 5}, {36, 5}, {52, 5}} //ctu 64
76 };
77
78 static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = {
79 /* 400 420 422 444 */
80 {{0, 0}, {15, 5}, {20, 5}, {20, 5}}, //ctu 16
81 {{0, 0}, {15, 9}, {20, 9}, {20, 9}}, //ctu 32
82 {{0, 0}, {15, 16}, {20, 16}, {20, 16}} //ctu 64
83 };
84
85 #define CABAC_TAB_ALIGEND_SIZE (MPP_ALIGN(27456, SZ_4K))
86 #define SPSPPS_ALIGNED_SIZE (MPP_ALIGN(112 * 64, SZ_4K))
87 #define RPS_ALIGEND_SIZE (MPP_ALIGN(400 * 8, SZ_4K))
88 #define SCALIST_ALIGNED_SIZE (MPP_ALIGN(81 * 1360, SZ_4K))
89 #define INFO_BUFFER_SIZE (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE)
90 #define ALL_BUFFER_SIZE(cnt) (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt)
91
92 #define CABAC_TAB_OFFSET (0)
93 #define SPSPPS_OFFSET(pos) (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos))
94 #define RPS_OFFSET(pos) (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
95 #define SCALIST_OFFSET(pos) (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE)
96
hal_h265d_vdpu382_init(void * hal,MppHalCfg * cfg)97 static MPP_RET hal_h265d_vdpu382_init(void *hal, MppHalCfg *cfg)
98 {
99 RK_S32 ret = 0;
100 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
101
102 mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
103 mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
104
105 reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
106 if (reg_ctx->scaling_qm == NULL) {
107 mpp_err("scaling_org alloc fail");
108 return MPP_ERR_MALLOC;
109 }
110
111 reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
112 reg_ctx->pps_buf = mpp_calloc(RK_U64, 15);
113 reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
114
115 if (reg_ctx->scaling_rk == NULL) {
116 mpp_err("scaling_rk alloc fail");
117 return MPP_ERR_MALLOC;
118 }
119
120 if (reg_ctx->group == NULL) {
121 ret = mpp_buffer_group_get_internal(®_ctx->group, MPP_BUFFER_TYPE_ION);
122 if (ret) {
123 mpp_err("h265d mpp_buffer_group_get failed\n");
124 return ret;
125 }
126 }
127
128 {
129 RK_U32 i = 0;
130 RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
131
132 //!< malloc buffers
133 ret = mpp_buffer_get(reg_ctx->group, ®_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
134 if (ret) {
135 mpp_err("h265d mpp_buffer_get failed\n");
136 return ret;
137 }
138
139 reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
140 reg_ctx->offset_cabac = CABAC_TAB_OFFSET;
141 for (i = 0; i < max_cnt; i++) {
142 reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu382H265dRegSet));
143 reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
144 reg_ctx->offset_rps[i] = RPS_OFFSET(i);
145 reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
146 }
147 }
148
149 if (!reg_ctx->fast_mode) {
150 reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
151 reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
152 reg_ctx->rps_offset = reg_ctx->offset_rps[0];
153 reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
154 }
155
156 ret = mpp_buffer_write(reg_ctx->bufs, 0, (void*)cabac_table, sizeof(cabac_table));
157 if (ret) {
158 mpp_err("h265d write cabac_table data failed\n");
159 return ret;
160 }
161
162 if (cfg->hal_fbc_adj_cfg) {
163 cfg->hal_fbc_adj_cfg->func = vdpu382_afbc_align_calc;
164 cfg->hal_fbc_adj_cfg->expand = 16;
165 }
166
167 #ifdef dump
168 fp = fopen("/data/hal.bin", "wb");
169 #endif
170 (void) cfg;
171 return MPP_OK;
172 }
173
hal_h265d_vdpu382_deinit(void * hal)174 static MPP_RET hal_h265d_vdpu382_deinit(void *hal)
175 {
176 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
177 RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
178 RK_U32 i;
179
180 if (reg_ctx->bufs) {
181 mpp_buffer_put(reg_ctx->bufs);
182 reg_ctx->bufs = NULL;
183 }
184
185 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
186 for (i = 0; i < loop; i++) {
187 if (reg_ctx->rcb_buf[i]) {
188 mpp_buffer_put(reg_ctx->rcb_buf[i]);
189 reg_ctx->rcb_buf[i] = NULL;
190 }
191 }
192
193 if (reg_ctx->group) {
194 mpp_buffer_group_put(reg_ctx->group);
195 reg_ctx->group = NULL;
196 }
197
198 for (i = 0; i < loop; i++)
199 MPP_FREE(reg_ctx->g_buf[i].hw_regs);
200
201 MPP_FREE(reg_ctx->scaling_qm);
202 MPP_FREE(reg_ctx->scaling_rk);
203 MPP_FREE(reg_ctx->pps_buf);
204 MPP_FREE(reg_ctx->sw_rps_buf);
205
206 if (reg_ctx->cmv_bufs) {
207 hal_bufs_deinit(reg_ctx->cmv_bufs);
208 reg_ctx->cmv_bufs = NULL;
209 }
210
211 return MPP_OK;
212 }
213
hal_h265d_v382_output_pps_packet(void * hal,void * dxva)214 static RK_S32 hal_h265d_v382_output_pps_packet(void *hal, void *dxva)
215 {
216 RK_S32 fifo_len = 14;//12
217 RK_S32 i, j;
218 RK_U32 addr;
219 RK_U32 log2_min_cb_size;
220 RK_S32 width, height;
221 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
222 Vdpu382H265dRegSet *hw_reg = (Vdpu382H265dRegSet*)(reg_ctx->hw_regs);
223 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
224 BitputCtx_t bp;
225
226 if (NULL == reg_ctx || dxva_cxt == NULL) {
227 mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
228 __FILE__, __FUNCTION__, __LINE__);
229 return MPP_ERR_NULL_PTR;
230 }
231 void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
232 if (dxva_cxt->pp.ps_update_flag) {
233 RK_U64 *pps_packet = reg_ctx->pps_buf;
234 if (NULL == pps_ptr) {
235 mpp_err("pps_data get ptr error");
236 return MPP_ERR_NOMEM;
237 }
238
239 for (i = 0; i < 14; i++) pps_packet[i] = 0;
240
241 mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
242
243 // SPS
244 mpp_put_bits(&bp, dxva_cxt->pp.vps_id , 4);
245 mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4);
246 mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc , 2);
247
248 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
249 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
250 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
251
252 mpp_put_bits(&bp, width , 16);
253 mpp_put_bits(&bp, height , 16);
254 mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8 , 4);
255 mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8 , 4);
256 mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4 , 5);
257 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size , 2); //log2_maxa_coding_block_depth
258 mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
259 mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2 , 3);
260 ///<-zrh comment ^ 63 bit above
261 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size , 2);
262 mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter , 3);
263 mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra , 3);
264 mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag , 1);
265 mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag , 1);
266 mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag , 1);
267 ///<-zrh comment ^ 68 bit above
268 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag , 1);
269 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0 , 4);
270 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
271 mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag , 1);
272 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size , 3);
273 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
274
275 mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets , 7);
276 mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag , 1);
277 mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps , 6);
278 mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag , 1);
279 mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag , 1);
280 ///<-zrh comment ^ 100 bit above
281
282 mpp_put_bits(&bp, 0 , 7 ); //49bits
283 //yandong change
284 mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1, 4);
285 mpp_put_bits(&bp, 0, 3);
286 mpp_put_align(&bp , 32, 0xf); //128
287 // PPS
288 mpp_put_bits(&bp, dxva_cxt->pp.pps_id , 6 );
289 mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4 );
290 mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag , 1 );
291 mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag , 1 );
292 mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits , 13);
293 mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
294 mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag , 1);
295 mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1 , 4);//31 bits
296 mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1 , 4);
297 mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26 , 7);
298 mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag , 1);
299 mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag , 1);
300 mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag , 1); //164
301 mpp_put_bits(&bp, log2_min_cb_size +
302 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
303 dxva_cxt->pp.diff_cu_qp_delta_depth , 3);
304
305 h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
306 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
307
308 mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset , 5);
309 mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset , 5);
310 mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag , 1);
311 mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag , 1);
312 mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag , 1);
313 mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag , 1 );
314 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag , 1 );
315 mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag , 1);
316 mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag , 1);
317 mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag , 1); //185
318 mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag , 1);
319 mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag , 1);
320 mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2 , 4);
321 mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2 , 4);
322 mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag , 1);
323 mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2 , 3);
324 mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
325 mpp_put_bits(&bp, 0 , 3);
326 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
327 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
328 mpp_put_bits(&bp, 0, 4);//2 //mSps_Pps[i]->mMode
329 mpp_put_align(&bp, 64, 0xf);
330 {
331 /// tiles info begin
332 RK_U16 column_width[20];
333 RK_U16 row_height[22];
334
335 memset(column_width, 0, sizeof(column_width));
336 memset(row_height, 0, sizeof(row_height));
337
338 if (dxva_cxt->pp.tiles_enabled_flag) {
339
340 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
341 RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
342 RK_S32 ctu_width_in_pic = (width +
343 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
344 RK_S32 ctu_height_in_pic = (height +
345 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
346 RK_S32 sum = 0;
347 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
348 column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
349 sum += column_width[i] ;
350 }
351 column_width[i] = ctu_width_in_pic - sum;
352
353 sum = 0;
354 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
355 row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
356 sum += row_height[i];
357 }
358 row_height[i] = ctu_height_in_pic - sum;
359 } // end of (pps->uniform_spacing_flag == 0)
360 else {
361
362 RK_S32 pic_in_cts_width = (width +
363 (1 << (log2_min_cb_size +
364 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
365 / (1 << (log2_min_cb_size +
366 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
367 RK_S32 pic_in_cts_height = (height +
368 (1 << (log2_min_cb_size +
369 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
370 / (1 << (log2_min_cb_size +
371 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
372
373 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
374 column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
375 (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
376
377 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
378 row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
379 (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
380 }
381 } // pps->tiles_enabled_flag
382 else {
383 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
384 column_width[0] = (width + MaxCUWidth - 1) / MaxCUWidth;
385 row_height[0] = (height + MaxCUWidth - 1) / MaxCUWidth;
386 }
387
388 for (j = 0; j < 20; j++) {
389 if (column_width[j] > 0)
390 column_width[j]--;
391 mpp_put_bits(&bp, column_width[j], 12);
392 }
393
394 for (j = 0; j < 22; j++) {
395 if (row_height[j] > 0)
396 row_height[j]--;
397 mpp_put_bits(&bp, row_height[j], 12);
398 }
399 }
400
401 mpp_put_bits(&bp, 0, 32);
402 mpp_put_bits(&bp, 0, 70);
403 mpp_put_align(&bp, 64, 0xf);//128
404 }
405
406 if (dxva_cxt->pp.scaling_list_enabled_flag) {
407 RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
408
409 if (dxva_cxt->pp.scaling_list_data_present_flag) {
410 addr = (dxva_cxt->pp.pps_id + 16) * 1360;
411 } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
412 addr = dxva_cxt->pp.sps_id * 1360;
413 } else {
414 addr = 80 * 1360;
415 }
416
417 hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
418
419 hw_reg->h265d_addr.reg180_scanlist_addr = reg_ctx->bufs_fd;
420 hw_reg->common.reg012.scanlist_addr_valid_en = 1;
421
422 /* need to config addr */
423 mpp_dev_set_reg_offset(reg_ctx->dev, 180, addr + reg_ctx->sclst_offset);
424 }
425
426 for (i = 0; i < 64; i++)
427 memcpy(pps_ptr + i * 112, reg_ctx->pps_buf, 112);
428 #ifdef dump
429 fwrite(pps_ptr, 1, 80 * 64, fp);
430 RK_U32 *tmp = (RK_U32 *)pps_ptr;
431 for (i = 0; i < 112 / 4; i++) {
432 mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
433 }
434 #endif
435 return 0;
436 }
437
h265d_refine_rcb_size(Vdpu382RcbInfo * rcb_info,Vdpu382H265dRegSet * hw_regs,RK_S32 width,RK_S32 height,void * dxva)438 static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
439 Vdpu382H265dRegSet *hw_regs,
440 RK_S32 width, RK_S32 height, void *dxva)
441 {
442 RK_U32 rcb_bits = 0;
443 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
444 DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
445 RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 420 ,2 422,3 444
446 RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
447 RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
448 RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
449 RK_U32 ext_align_size = tile_col_cut_num * 64 * 8;
450
451 width = MPP_ALIGN(width, ctu_size);
452 height = MPP_ALIGN(height, ctu_size);
453
454 /* RCB_STRMD_ROW */
455 if (width >= 8192) {
456 RK_U32 factor = 64 / ctu_size;
457
458 rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) / factor * 24 + ext_align_size;
459 } else
460 rcb_bits = 0;
461 rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
462
463 /* RCB_TRANSD_ROW */
464 if (width >= 8192)
465 rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
466 else
467 rcb_bits = 0;
468 rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
469
470 /* RCB_TRANSD_COL */
471 if (height >= 8192 && tile_col_cut_num)
472 rcb_bits = tile_col_cut_num ? (MPP_ALIGN(height - 8192, 4) << 1) : 0;
473 else
474 rcb_bits = 0;
475 rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
476
477 /* RCB_INTER_ROW */
478 rcb_bits = width * 22 + ext_align_size;
479 rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
480
481 /* RCB_INTER_COL */
482 rcb_bits = tile_col_cut_num ? (height * 22) : 0;
483 rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
484
485 /* RCB_INTRA_ROW */
486 rcb_bits = width * ((chroma_fmt_idc ? 1 : 0) + 1) * 11 + ext_align_size;
487 rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
488
489 /* RCB_DBLK_ROW */
490 if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
491 if (ctu_size == 32)
492 rcb_bits = width * ( 4 + 6 * bit_depth);
493 else
494 rcb_bits = width * ( 2 + 6 * bit_depth);
495 } else {
496 if (ctu_size == 32)
497 rcb_bits = width * ( 4 + 8 * bit_depth);
498 else
499 rcb_bits = width * ( 2 + 8 * bit_depth);
500 }
501 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
502 rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
503
504 /* RCB_SAO_ROW */
505 if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
506 rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
507 } else {
508 rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
509 }
510 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
511 rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
512
513 /* RCB_FBC_ROW */
514 if (hw_regs->common.reg012.fbc_e) {
515 rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
516 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 128 : 64)) + ext_align_size;
517 } else
518 rcb_bits = 0;
519 rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
520
521 /* RCB_FILT_COL */
522 if (tile_col_cut_num) {
523 if (hw_regs->common.reg012.fbc_e) {
524 RK_U32 ctu_idx = ctu_size >> 5;
525 RK_U32 a = filterd_fbc_on[ctu_idx][chroma_fmt_idc].a;
526 RK_U32 b = filterd_fbc_on[ctu_idx][chroma_fmt_idc].b;
527
528 rcb_bits = height * (a * bit_depth + b);
529 } else {
530 RK_U32 ctu_idx = ctu_size >> 5;
531 RK_U32 a = filterd_fbc_off[ctu_idx][chroma_fmt_idc].a;
532 RK_U32 b = filterd_fbc_off[ctu_idx][chroma_fmt_idc].b;
533
534 rcb_bits = height * (a * bit_depth + b);
535 }
536 } else
537 rcb_bits = 0;
538 rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
539 }
540
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu382H265dRegSet * hw_regs,RK_S32 width,RK_S32 height)541 static void hal_h265d_rcb_info_update(void *hal, void *dxva,
542 Vdpu382H265dRegSet *hw_regs,
543 RK_S32 width, RK_S32 height)
544 {
545 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
546 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
547 DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
548 RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
549 RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
550 RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
551 RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
552
553 if (reg_ctx->num_row_tiles != num_tiles ||
554 reg_ctx->bit_depth != bit_depth ||
555 reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
556 reg_ctx->ctu_size != ctu_size ||
557 reg_ctx->width != width ||
558 reg_ctx->height != height) {
559 RK_U32 i = 0;
560 RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
561
562 reg_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size((Vdpu382RcbInfo*)reg_ctx->rcb_info, width, height);
563 h265d_refine_rcb_size((Vdpu382RcbInfo*)reg_ctx->rcb_info, hw_regs, width, height, dxva_cxt);
564
565 for (i = 0; i < loop; i++) {
566 MppBuffer rcb_buf;
567
568 if (reg_ctx->rcb_buf[i]) {
569 mpp_buffer_put(reg_ctx->rcb_buf[i]);
570 reg_ctx->rcb_buf[i] = NULL;
571 }
572 mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
573 reg_ctx->rcb_buf[i] = rcb_buf;
574 }
575
576 reg_ctx->num_row_tiles = num_tiles;
577 reg_ctx->bit_depth = bit_depth;
578 reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
579 reg_ctx->ctu_size = ctu_size;
580 reg_ctx->width = width;
581 reg_ctx->height = height;
582 }
583 }
584
585 #define SET_POC_HIGNBIT_INFO(regs, index, field, value)\
586 do{ \
587 switch(index){\
588 case 0: regs.reg200.ref0_##field = value; break;\
589 case 1: regs.reg200.ref1_##field = value; break;\
590 case 2: regs.reg200.ref2_##field = value; break;\
591 case 3: regs.reg200.ref3_##field = value; break;\
592 case 4: regs.reg200.ref4_##field = value; break;\
593 case 5: regs.reg200.ref5_##field = value; break;\
594 case 6: regs.reg200.ref6_##field = value; break;\
595 case 7: regs.reg200.ref7_##field = value; break;\
596 case 8: regs.reg201.ref8_##field = value; break;\
597 case 9: regs.reg201.ref9_##field = value; break;\
598 case 10: regs.reg201.ref10_##field = value; break;\
599 case 11: regs.reg201.ref11_##field = value; break;\
600 case 12: regs.reg201.ref12_##field = value; break;\
601 case 13: regs.reg201.ref13_##field = value; break;\
602 case 14: regs.reg201.ref14_##field = value; break;\
603 case 15: regs.reg201.ref15_##field = value; break;\
604 default: break;}\
605 }while(0)
606
607 #define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
608
hal_h265d_vdpu382_setup_colmv_buf(void * hal,HalTaskInfo * syn)609 static MPP_RET hal_h265d_vdpu382_setup_colmv_buf(void *hal, HalTaskInfo *syn)
610 {
611 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
612 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
613 DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
614 RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size +
615 pp->log2_min_luma_coding_block_size_minus3 + 3);
616 RK_U32 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
617
618 RK_U32 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
619 RK_U32 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
620 RK_U32 mv_size = 0, colmv_size = 16, colmv_byte = 16;
621 RK_U32 compress = reg_ctx->hw_info ? reg_ctx->hw_info->cap_colmv_compress : 1;
622
623
624 mv_size = vdpu382_get_colmv_size(width, height, ctu_size, colmv_byte, colmv_size, compress);
625
626 if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
627 size_t size = mv_size;
628
629 if (reg_ctx->cmv_bufs) {
630 hal_bufs_deinit(reg_ctx->cmv_bufs);
631 reg_ctx->cmv_bufs = NULL;
632 }
633
634 hal_bufs_init(®_ctx->cmv_bufs);
635 if (reg_ctx->cmv_bufs == NULL) {
636 mpp_err_f("colmv bufs init fail");
637 return MPP_ERR_NOMEM;;
638 }
639
640 reg_ctx->mv_size = mv_size;
641 reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
642 hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
643 }
644
645 return MPP_OK;
646 }
647
hal_h265d_vdpu382_gen_regs(void * hal,HalTaskInfo * syn)648 static MPP_RET hal_h265d_vdpu382_gen_regs(void *hal, HalTaskInfo *syn)
649 {
650 RK_S32 i = 0;
651 RK_S32 log2_min_cb_size;
652 RK_S32 width, height;
653 RK_S32 stride_y, stride_uv, virstrid_y;
654 Vdpu382H265dRegSet *hw_regs;
655 RK_S32 ret = MPP_SUCCESS;
656 MppBuffer streambuf = NULL;
657 RK_S32 aglin_offset = 0;
658 RK_S32 valid_ref = -1;
659 MppBuffer framebuf = NULL;
660 HalBuf *mv_buf = NULL;
661 RK_S32 fd = -1;
662 RK_S32 distance = INT_MAX;
663 h265d_dxva2_picture_context_t *dxva_cxt =
664 (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
665 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
666 void *rps_ptr = NULL;
667 RK_U32 stream_buf_size = 0;
668
669 if (syn->dec.flags.parse_err ||
670 (syn->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
671 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
672 return MPP_OK;
673 }
674
675 if (reg_ctx ->fast_mode) {
676 for (i = 0; i < MAX_GEN_REG; i++) {
677 if (!reg_ctx->g_buf[i].use_flag) {
678 syn->dec.reg_index = i;
679
680 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
681 reg_ctx->rps_offset = reg_ctx->offset_rps[i];
682 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
683
684 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
685 reg_ctx->g_buf[i].use_flag = 1;
686 break;
687 }
688 }
689 if (i == MAX_GEN_REG) {
690 mpp_err("hevc rps buf all used");
691 return MPP_ERR_NOMEM;
692 }
693 } else {
694 syn->dec.reg_index = 0;
695 }
696 rps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->rps_offset;
697 if (NULL == rps_ptr) {
698
699 mpp_err("rps_data get ptr error");
700 return MPP_ERR_NOMEM;
701 }
702
703
704 if (syn->dec.syntax.data == NULL) {
705 mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
706 return MPP_ERR_NULL_PTR;
707 }
708
709 /* output pps */
710 hw_regs = (Vdpu382H265dRegSet*)reg_ctx->hw_regs;
711 memset(hw_regs, 0, sizeof(Vdpu382H265dRegSet));
712
713 if (NULL == reg_ctx->hw_regs) {
714 return MPP_ERR_NULL_PTR;
715 }
716
717
718 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
719
720 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
721 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
722 ret = hal_h265d_vdpu382_setup_colmv_buf(hal, syn);
723 if (ret)
724 return MPP_ERR_NOMEM;
725
726 {
727 MppFrame mframe = NULL;
728 RK_U32 ver_virstride;
729
730 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
731 SLOT_FRAME_PTR, &mframe);
732 stride_y = mpp_frame_get_hor_stride(mframe);
733 ver_virstride = mpp_frame_get_ver_stride(mframe);
734 stride_uv = stride_y;
735 virstrid_y = ver_virstride * stride_y;
736 hw_regs->common.reg013.h26x_error_mode = 1;
737 hw_regs->common.reg021.error_deb_en = 1;
738 hw_regs->common.reg021.inter_error_prc_mode = 0;
739 hw_regs->common.reg021.error_intra_mode = 1;
740
741 hw_regs->common.reg017.slice_num = dxva_cxt->slice_count;
742 hw_regs->h265d_param.reg64.h26x_rps_mode = 0;
743 hw_regs->h265d_param.reg64.h26x_frame_orslice = 0;
744 hw_regs->h265d_param.reg64.h26x_stream_mode = 0;
745
746 if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) {
747 RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
748 RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (ver_virstride + 64) / 16, SZ_4K);
749
750 hw_regs->common.reg012.fbc_e = 1;
751 hw_regs->common.reg018.y_hor_virstride = fbc_hdr_stride >> 4;
752 hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
753 hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
754 } else {
755 hw_regs->common.reg012.fbc_e = 0;
756 hw_regs->common.reg018.y_hor_virstride = stride_y >> 4;
757 hw_regs->common.reg019.uv_hor_virstride = stride_uv >> 4;
758 hw_regs->common.reg020_y_virstride.y_virstride = virstrid_y >> 4;
759 }
760 }
761 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
762 SLOT_BUFFER, &framebuf);
763 hw_regs->common_addr.reg130_decout_base = mpp_buffer_get_fd(framebuf); //just index need map
764 /*if out_base is equal to zero it means this frame may error
765 we return directly add by csy*/
766
767 if (hw_regs->common_addr.reg130_decout_base == 0) {
768 return 0;
769 }
770 fd = mpp_buffer_get_fd(framebuf);
771 hw_regs->common_addr.reg130_decout_base = fd;
772 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.CurrPic.Index7Bits);
773 hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
774
775 hw_regs->h265d_param.reg65.cur_top_poc = dxva_cxt->pp.CurrPicOrderCntVal;
776
777 mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
778 &streambuf);
779 if ( dxva_cxt->bitstream == NULL) {
780 dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
781 }
782 #ifdef HW_RPS
783 hw_regs->h265d_param.reg103.ref_pic_layer_same_with_cur = 0xffff;
784 hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
785 #else
786 hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
787 hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
788 #endif
789
790 /* cabac table */
791 hw_regs->h265d_addr.reg197_cabactbl_base = reg_ctx->bufs_fd;
792 /* pps */
793 hw_regs->h265d_addr.reg161_pps_base = reg_ctx->bufs_fd;
794 hw_regs->h265d_addr.reg163_rps_base = reg_ctx->bufs_fd;
795
796 hw_regs->common_addr.reg128_rlc_base = mpp_buffer_get_fd(streambuf);
797 hw_regs->common_addr.reg129_rlcwrite_base = mpp_buffer_get_fd(streambuf);
798 stream_buf_size = mpp_buffer_get_size(streambuf);
799 hw_regs->common.reg016_str_len = ((dxva_cxt->bitstream_size + 15)
800 & (~15)) + 64;
801 hw_regs->common.reg016_str_len = stream_buf_size > hw_regs->common.reg016_str_len ?
802 hw_regs->common.reg016_str_len : stream_buf_size;
803
804 aglin_offset = hw_regs->common.reg016_str_len - dxva_cxt->bitstream_size;
805 if (aglin_offset > 0) {
806 memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
807 aglin_offset);
808 }
809 hw_regs->common.reg010.dec_e = 1;
810 hw_regs->common.reg012.colmv_compress_en = reg_ctx->hw_info ?
811 reg_ctx->hw_info->cap_colmv_compress : 0;
812
813 hw_regs->common.reg024.cabac_err_en_lowbits = 0xffffdfff;
814 hw_regs->common.reg025.cabac_err_en_highbits = 0x3ffbf9ff;
815
816 hw_regs->common.reg011.dec_clkgate_e = 1;
817 hw_regs->common.reg011.err_head_fill_e = 1;
818 hw_regs->common.reg011.err_colmv_fill_e = 1;
819
820 hw_regs->common.reg026.inter_auto_gating_e = 1;
821 hw_regs->common.reg026.filterd_auto_gating_e = 1;
822 hw_regs->common.reg026.strmd_auto_gating_e = 1;
823 hw_regs->common.reg026.mcp_auto_gating_e = 1;
824 hw_regs->common.reg026.busifd_auto_gating_e = 1;
825 hw_regs->common.reg026.dec_ctrl_auto_gating_e = 1;
826 hw_regs->common.reg026.intra_auto_gating_e = 1;
827 hw_regs->common.reg026.mc_auto_gating_e = 1;
828 hw_regs->common.reg026.transd_auto_gating_e = 1;
829 hw_regs->common.reg026.sram_auto_gating_e = 1;
830 hw_regs->common.reg026.cru_auto_gating_e = 1;
831 hw_regs->common.reg026.reg_cfg_gating_en = 1;
832 hw_regs->common.reg032_timeout_threshold = 0x3ffff;
833
834 valid_ref = hw_regs->common_addr.reg130_decout_base;
835 reg_ctx->error_index[syn->dec.reg_index] = dxva_cxt->pp.CurrPic.Index7Bits;
836 hw_regs->common_addr.reg132_error_ref_base = valid_ref;
837
838 memset(&hw_regs->highpoc.reg205, 0, sizeof(RK_U32));
839
840 for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
841 if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
842 dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
843
844 MppFrame mframe = NULL;
845 hw_regs->h265d_param.reg67_82_ref_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
846 mpp_buf_slot_get_prop(reg_ctx->slots,
847 dxva_cxt->pp.RefPicList[i].Index7Bits,
848 SLOT_BUFFER, &framebuf);
849 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
850 SLOT_FRAME_PTR, &mframe);
851 if (framebuf != NULL) {
852 hw_regs->h265d_addr.reg164_179_ref_base[i] = mpp_buffer_get_fd(framebuf);
853 valid_ref = hw_regs->h265d_addr.reg164_179_ref_base[i];
854 // mpp_log("cur poc %d, ref poc %d", dxva_cxt->pp.current_poc, dxva_cxt->pp.PicOrderCntValList[i]);
855 if ((pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc) < distance)
856 && (!mpp_frame_get_errinfo(mframe))) {
857 distance = pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc);
858 hw_regs->common_addr.reg132_error_ref_base = hw_regs->h265d_addr.reg164_179_ref_base[i];
859 reg_ctx->error_index[syn->dec.reg_index] = dxva_cxt->pp.RefPicList[i].Index7Bits;
860 hw_regs->common.reg021.error_intra_mode = 0;
861
862 }
863 } else {
864 hw_regs->h265d_addr.reg164_179_ref_base[i] = valid_ref;
865 }
866
867 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.RefPicList[i].Index7Bits);
868 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
869
870 SET_REF_VALID(hw_regs->h265d_param, i, 1);
871 }
872 }
873
874 if ((reg_ctx->error_index[syn->dec.reg_index] == dxva_cxt->pp.CurrPic.Index7Bits) &&
875 !dxva_cxt->pp.IntraPicFlag) {
876 h265h_dbg(H265H_DBG_TASK_ERR, "current frm may be err, should skip process");
877 syn->dec.flags.ref_err = 1;
878 return MPP_OK;
879 }
880
881 for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
882
883 if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
884 dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
885 MppFrame mframe = NULL;
886
887 mpp_buf_slot_get_prop(reg_ctx->slots,
888 dxva_cxt->pp.RefPicList[i].Index7Bits,
889 SLOT_BUFFER, &framebuf);
890
891 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
892 SLOT_FRAME_PTR, &mframe);
893
894 if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) {
895 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
896 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
897 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
898 }
899 } else {
900 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
901 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
902 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
903 /* mark 3 to differ from current frame */
904 if (reg_ctx->error_index[syn->dec.reg_index] == dxva_cxt->pp.CurrPic.Index7Bits)
905 SET_POC_HIGNBIT_INFO(hw_regs->highpoc, i, poc_highbit, 3);
906 }
907 }
908 hal_h265d_v382_output_pps_packet(hal, syn->dec.syntax.data);
909
910 mpp_dev_set_reg_offset(reg_ctx->dev, 161, reg_ctx->spspps_offset);
911 /* rps */
912 mpp_dev_set_reg_offset(reg_ctx->dev, 163, reg_ctx->rps_offset);
913
914 hw_regs->common.reg013.cur_pic_is_idr = dxva_cxt->pp.IdrPicFlag;//p_hal->slice_long->idr_flag;
915
916 hw_regs->common.reg011.buf_empty_en = 1;
917
918 hal_h265d_rcb_info_update(hal, dxva_cxt, hw_regs, width, height);
919 vdpu382_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
920 reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
921 (Vdpu382RcbInfo*)reg_ctx->rcb_info);
922 {
923 MppFrame mframe = NULL;
924
925 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
926 SLOT_FRAME_PTR, &mframe);
927
928 if (mpp_frame_get_thumbnail_en(mframe)) {
929 hw_regs->h265d_addr.reg198_scale_down_luma_base =
930 hw_regs->common_addr.reg130_decout_base;
931 hw_regs->h265d_addr.reg199_scale_down_chorme_base =
932 hw_regs->common_addr.reg130_decout_base;
933 vdpu382_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->common);
934 } else {
935 hw_regs->h265d_addr.reg198_scale_down_luma_base = 0;
936 hw_regs->h265d_addr.reg199_scale_down_chorme_base = 0;
937 hw_regs->common.reg012.scale_down_en = 0;
938 }
939 }
940 vdpu382_setup_statistic(&hw_regs->common, &hw_regs->statistic);
941 mpp_buffer_sync_end(reg_ctx->bufs);
942
943 return ret;
944 }
945
hal_h265d_vdpu382_start(void * hal,HalTaskInfo * task)946 static MPP_RET hal_h265d_vdpu382_start(void *hal, HalTaskInfo *task)
947 {
948 MPP_RET ret = MPP_OK;
949 RK_U8* p = NULL;
950 Vdpu382H265dRegSet *hw_regs = NULL;
951 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
952 RK_S32 index = task->dec.reg_index;
953
954 RK_U32 i;
955
956 if (task->dec.flags.parse_err ||
957 (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
958 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
959 return MPP_OK;
960 }
961
962 if (reg_ctx->fast_mode) {
963 p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
964 hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
965 } else {
966 p = (RK_U8*)reg_ctx->hw_regs;
967 hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->hw_regs;
968 }
969
970 if (hw_regs == NULL) {
971 mpp_err("hal_h265d_start hw_regs is NULL");
972 return MPP_ERR_NULL_PTR;
973 }
974 for (i = 0; i < 68; i++) {
975 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
976 i, *((RK_U32*)p));
977 //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
978 p += 4;
979 }
980
981 do {
982 MppDevRegWrCfg wr_cfg;
983 MppDevRegRdCfg rd_cfg;
984
985 wr_cfg.reg = &hw_regs->common;
986 wr_cfg.size = sizeof(hw_regs->common);
987 wr_cfg.offset = OFFSET_COMMON_REGS;
988
989 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
990 if (ret) {
991 mpp_err_f("set register write failed %d\n", ret);
992 break;
993 }
994
995 wr_cfg.reg = &hw_regs->h265d_param;
996 wr_cfg.size = sizeof(hw_regs->h265d_param);
997 wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS;
998
999 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1000 if (ret) {
1001 mpp_err_f("set register write failed %d\n", ret);
1002 break;
1003 }
1004
1005 wr_cfg.reg = &hw_regs->common_addr;
1006 wr_cfg.size = sizeof(hw_regs->common_addr);
1007 wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1008
1009 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1010 if (ret) {
1011 mpp_err_f("set register write failed %d\n", ret);
1012 break;
1013 }
1014
1015 wr_cfg.reg = &hw_regs->h265d_addr;
1016 wr_cfg.size = sizeof(hw_regs->h265d_addr);
1017 wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1018
1019 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1020 if (ret) {
1021 mpp_err_f("set register write failed %d\n", ret);
1022 break;
1023 }
1024
1025 wr_cfg.reg = &hw_regs->statistic;
1026 wr_cfg.size = sizeof(hw_regs->statistic);
1027 wr_cfg.offset = OFFSET_STATISTIC_REGS;
1028
1029 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1030 if (ret) {
1031 mpp_err_f("set register write failed %d\n", ret);
1032 break;
1033 }
1034
1035 wr_cfg.reg = &hw_regs->highpoc;
1036 wr_cfg.size = sizeof(hw_regs->highpoc);
1037 wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS;
1038
1039 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1040 if (ret) {
1041 mpp_err_f("set register write failed %d\n", ret);
1042 break;
1043 }
1044
1045 rd_cfg.reg = &hw_regs->irq_status;
1046 rd_cfg.size = sizeof(hw_regs->irq_status);
1047 rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1048
1049 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1050 if (ret) {
1051 mpp_err_f("set register read failed %d\n", ret);
1052 break;
1053 }
1054 /* rcb info for sram */
1055 vdpu382_set_rcbinfo(reg_ctx->dev, (Vdpu382RcbInfo*)reg_ctx->rcb_info);
1056
1057 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1058 if (ret) {
1059 mpp_err_f("send cmd failed %d\n", ret);
1060 break;
1061 }
1062 } while (0);
1063
1064 return ret;
1065 }
1066
1067
hal_h265d_vdpu382_wait(void * hal,HalTaskInfo * task)1068 static MPP_RET hal_h265d_vdpu382_wait(void *hal, HalTaskInfo *task)
1069 {
1070 MPP_RET ret = MPP_OK;
1071 RK_S32 index = task->dec.reg_index;
1072 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1073 RK_U8* p = NULL;
1074 Vdpu382H265dRegSet *hw_regs = NULL;
1075 RK_S32 i;
1076
1077 if (reg_ctx->fast_mode) {
1078 hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1079 } else {
1080 hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->hw_regs;
1081 }
1082
1083 p = (RK_U8*)hw_regs;
1084
1085 if (task->dec.flags.parse_err ||
1086 (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1087 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1088 goto ERR_PROC;
1089 }
1090
1091 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1092 if (ret)
1093 mpp_err_f("poll cmd failed %d\n", ret);
1094
1095 ERR_PROC:
1096 if (task->dec.flags.parse_err ||
1097 task->dec.flags.ref_err ||
1098 hw_regs->irq_status.reg224.dec_error_sta ||
1099 hw_regs->irq_status.reg224.buf_empty_sta ||
1100 hw_regs->irq_status.reg224.dec_bus_sta ||
1101 !hw_regs->irq_status.reg224.dec_rdy_sta) {
1102 if (!reg_ctx->fast_mode) {
1103 if (reg_ctx->dec_cb)
1104 mpp_callback(reg_ctx->dec_cb, &task->dec);
1105 } else {
1106 MppFrame mframe = NULL;
1107 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1108 SLOT_FRAME_PTR, &mframe);
1109 if (mframe) {
1110 reg_ctx->fast_mode_err_found = 1;
1111 mpp_frame_set_errinfo(mframe, 1);
1112 }
1113 }
1114 } else {
1115 if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1116 for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1117 if (task->dec.refer[i] >= 0) {
1118 MppFrame frame_ref = NULL;
1119
1120 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1121 SLOT_FRAME_PTR, &frame_ref);
1122 h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1123 i, task->dec.refer[i], frame_ref);
1124 if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1125 MppFrame frame_out = NULL;
1126 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1127 SLOT_FRAME_PTR, &frame_out);
1128 mpp_frame_set_errinfo(frame_out, 1);
1129 break;
1130 }
1131 }
1132 }
1133 }
1134 }
1135
1136 for (i = 0; i < 68; i++) {
1137 if (i == 1) {
1138 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1139 i, *((RK_U32*)p));
1140 }
1141
1142 if (i == 45) {
1143 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1144 i, *((RK_U32*)p));
1145 }
1146 p += 4;
1147 }
1148
1149 if (reg_ctx->fast_mode) {
1150 reg_ctx->g_buf[index].use_flag = 0;
1151 }
1152
1153 return ret;
1154 }
1155
hal_h265d_vdpu382_reset(void * hal)1156 static MPP_RET hal_h265d_vdpu382_reset(void *hal)
1157 {
1158 MPP_RET ret = MPP_OK;
1159 HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1160 p_hal->fast_mode_err_found = 0;
1161 (void)hal;
1162 return ret;
1163 }
1164
hal_h265d_vdpu382_flush(void * hal)1165 static MPP_RET hal_h265d_vdpu382_flush(void *hal)
1166 {
1167 MPP_RET ret = MPP_OK;
1168
1169 (void)hal;
1170 return ret;
1171 }
1172
hal_h265d_vdpu382_control(void * hal,MpiCmd cmd_type,void * param)1173 static MPP_RET hal_h265d_vdpu382_control(void *hal, MpiCmd cmd_type, void *param)
1174 {
1175 MPP_RET ret = MPP_OK;
1176 HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1177
1178 (void)hal;
1179 switch ((MpiCmd)cmd_type) {
1180 case MPP_DEC_SET_FRAME_INFO: {
1181 MppFrame frame = (MppFrame)param;
1182 MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1183
1184 if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1185 vdpu382_afbc_align_calc(p_hal->slots, frame, 16);
1186 }
1187 break;
1188 }
1189 case MPP_DEC_SET_OUTPUT_FORMAT: {
1190 } break;
1191 default:
1192 break;
1193 }
1194 return ret;
1195 }
1196
1197 const MppHalApi hal_h265d_vdpu382 = {
1198 .name = "h265d_vdpu382",
1199 .type = MPP_CTX_DEC,
1200 .coding = MPP_VIDEO_CodingHEVC,
1201 .ctx_size = sizeof(HalH265dCtx),
1202 .flag = 0,
1203 .init = hal_h265d_vdpu382_init,
1204 .deinit = hal_h265d_vdpu382_deinit,
1205 .reg_gen = hal_h265d_vdpu382_gen_regs,
1206 .start = hal_h265d_vdpu382_start,
1207 .wait = hal_h265d_vdpu382_wait,
1208 .reset = hal_h265d_vdpu382_reset,
1209 .flush = hal_h265d_vdpu382_flush,
1210 .control = hal_h265d_vdpu382_control,
1211 };
1212