1 /*
2 * Copyright 2020 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_h265d_vdpu34x"
18
19 #include <stdio.h>
20 #include <string.h>
21
22 #include "mpp_env.h"
23 #include "mpp_mem.h"
24 #include "mpp_bitread.h"
25 #include "mpp_bitput.h"
26
27 #include "h265d_syntax.h"
28 #include "hal_h265d_debug.h"
29 #include "hal_h265d_ctx.h"
30 #include "hal_h265d_com.h"
31 #include "hal_h265d_vdpu34x.h"
32 #include "vdpu34x_h265d.h"
33
34 /* #define dump */
35 #ifdef dump
36 static FILE *fp = NULL;
37 #endif
38
39 #define HW_RPS
40 #define PPS_SIZE (112 * 64)//(96x64)
41
42 #define COLMV_COMPRESS_EN 1
43
44 #define SET_REF_VALID(regs, index, value)\
45 do{ \
46 switch(index){\
47 case 0: regs.reg99.hevc_ref_valid_0 = value; break;\
48 case 1: regs.reg99.hevc_ref_valid_1 = value; break;\
49 case 2: regs.reg99.hevc_ref_valid_2 = value; break;\
50 case 3: regs.reg99.hevc_ref_valid_3 = value; break;\
51 case 4: regs.reg99.hevc_ref_valid_4 = value; break;\
52 case 5: regs.reg99.hevc_ref_valid_5 = value; break;\
53 case 6: regs.reg99.hevc_ref_valid_6 = value; break;\
54 case 7: regs.reg99.hevc_ref_valid_7 = value; break;\
55 case 8: regs.reg99.hevc_ref_valid_8 = value; break;\
56 case 9: regs.reg99.hevc_ref_valid_9 = value; break;\
57 case 10: regs.reg99.hevc_ref_valid_10 = value; break;\
58 case 11: regs.reg99.hevc_ref_valid_11 = value; break;\
59 case 12: regs.reg99.hevc_ref_valid_12 = value; break;\
60 case 13: regs.reg99.hevc_ref_valid_13 = value; break;\
61 case 14: regs.reg99.hevc_ref_valid_14 = value; break;\
62 default: break;}\
63 }while(0)
64
65 #define FMT 4
66 #define CTU 3
67
68 typedef struct {
69 RK_U32 a;
70 RK_U32 b;
71 } FilterdColBufRatio;
72
73 static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = {
74 /* 400 420 422 444 */
75 {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16
76 {{0, 0}, {27, 8}, {36, 8}, {52, 8}}, //ctu 32
77 {{0, 0}, {27, 5}, {36, 5}, {52, 5}} //ctu 64
78 };
79
80 static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = {
81 /* 400 420 422 444 */
82 {{0, 0}, {9, 31}, {12, 39}, {12, 39}}, //ctu 16
83 {{0, 0}, {9, 25}, {12, 33}, {12, 33}}, //ctu 32
84 {{0, 0}, {9, 21}, {12, 29}, {12, 29}} //ctu 64
85 };
86
87 #define CABAC_TAB_ALIGEND_SIZE (MPP_ALIGN(27456, SZ_4K))
88 #define SPSPPS_ALIGNED_SIZE (MPP_ALIGN(112 * 64, SZ_4K))
89 #define RPS_ALIGEND_SIZE (MPP_ALIGN(400 * 8, SZ_4K))
90 #define SCALIST_ALIGNED_SIZE (MPP_ALIGN(81 * 1360, SZ_4K))
91 #define INFO_BUFFER_SIZE (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE)
92 #define ALL_BUFFER_SIZE(cnt) (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt)
93
94 #define CABAC_TAB_OFFSET (0)
95 #define SPSPPS_OFFSET(pos) (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos))
96 #define RPS_OFFSET(pos) (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
97 #define SCALIST_OFFSET(pos) (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE)
98
hal_h265d_vdpu34x_init(void * hal,MppHalCfg * cfg)99 static MPP_RET hal_h265d_vdpu34x_init(void *hal, MppHalCfg *cfg)
100 {
101 RK_S32 ret = 0;
102 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
103
104 mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
105 mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
106
107 reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
108 if (reg_ctx->scaling_qm == NULL) {
109 mpp_err("scaling_org alloc fail");
110 return MPP_ERR_MALLOC;
111 }
112
113 reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
114 reg_ctx->pps_buf = mpp_calloc(RK_U64, 15);
115 reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
116
117 if (reg_ctx->scaling_rk == NULL) {
118 mpp_err("scaling_rk alloc fail");
119 return MPP_ERR_MALLOC;
120 }
121
122 if (reg_ctx->group == NULL) {
123 ret = mpp_buffer_group_get_internal(®_ctx->group, MPP_BUFFER_TYPE_ION);
124 if (ret) {
125 mpp_err("h265d mpp_buffer_group_get failed\n");
126 return ret;
127 }
128 }
129
130 {
131 RK_U32 i = 0;
132 RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
133
134 //!< malloc buffers
135 ret = mpp_buffer_get(reg_ctx->group, ®_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
136 if (ret) {
137 mpp_err("h265d mpp_buffer_get failed\n");
138 return ret;
139 }
140
141 reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
142 reg_ctx->offset_cabac = CABAC_TAB_OFFSET;
143 for (i = 0; i < max_cnt; i++) {
144 reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu34xH265dRegSet));
145 reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
146 reg_ctx->offset_rps[i] = RPS_OFFSET(i);
147 reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
148 }
149 }
150
151 if (!reg_ctx->fast_mode) {
152 reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
153 reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
154 reg_ctx->rps_offset = reg_ctx->offset_rps[0];
155 reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
156 }
157
158 ret = mpp_buffer_write(reg_ctx->bufs, 0, (void*)cabac_table, sizeof(cabac_table));
159 if (ret) {
160 mpp_err("h265d write cabac_table data failed\n");
161 return ret;
162 }
163
164 if (cfg->hal_fbc_adj_cfg) {
165 cfg->hal_fbc_adj_cfg->func = vdpu34x_afbc_align_calc;
166 cfg->hal_fbc_adj_cfg->expand = 16;
167 }
168
169 (void)cfg;
170 #ifdef dump
171 fp = fopen("/data/hal.bin", "wb");
172 #endif
173 return MPP_OK;
174 }
175
hal_h265d_vdpu34x_deinit(void * hal)176 static MPP_RET hal_h265d_vdpu34x_deinit(void *hal)
177 {
178 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
179 RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
180 RK_U32 i;
181
182 if (reg_ctx->bufs) {
183 mpp_buffer_put(reg_ctx->bufs);
184 reg_ctx->bufs = NULL;
185 }
186
187 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
188 for (i = 0; i < loop; i++) {
189 if (reg_ctx->rcb_buf[i]) {
190 mpp_buffer_put(reg_ctx->rcb_buf[i]);
191 reg_ctx->rcb_buf[i] = NULL;
192 }
193 }
194
195 if (reg_ctx->missing_ref_buf) {
196 mpp_buffer_put(reg_ctx->missing_ref_buf);
197 reg_ctx->missing_ref_buf = NULL;
198 }
199
200 if (reg_ctx->group) {
201 mpp_buffer_group_put(reg_ctx->group);
202 reg_ctx->group = NULL;
203 }
204
205 for (i = 0; i < loop; i++)
206 MPP_FREE(reg_ctx->g_buf[i].hw_regs);
207
208 MPP_FREE(reg_ctx->scaling_qm);
209 MPP_FREE(reg_ctx->scaling_rk);
210 MPP_FREE(reg_ctx->pps_buf);
211 MPP_FREE(reg_ctx->sw_rps_buf);
212
213 if (reg_ctx->cmv_bufs) {
214 hal_bufs_deinit(reg_ctx->cmv_bufs);
215 reg_ctx->cmv_bufs = NULL;
216 }
217
218 return MPP_OK;
219 }
220
hal_h265d_v345_output_pps_packet(void * hal,void * dxva)221 static RK_S32 hal_h265d_v345_output_pps_packet(void *hal, void *dxva)
222 {
223 RK_S32 fifo_len = 14;//12
224 RK_S32 i, j;
225 RK_U32 addr;
226 RK_U32 log2_min_cb_size;
227 RK_S32 width, height;
228 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
229 Vdpu34xH265dRegSet *hw_reg = (Vdpu34xH265dRegSet*)(reg_ctx->hw_regs);
230 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
231 BitputCtx_t bp;
232
233 if (NULL == reg_ctx || dxva_cxt == NULL) {
234 mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
235 __FILE__, __FUNCTION__, __LINE__);
236 return MPP_ERR_NULL_PTR;
237 }
238 void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
239 if (dxva_cxt->pp.ps_update_flag) {
240 RK_U64 *pps_packet = reg_ctx->pps_buf;
241 if (NULL == pps_ptr) {
242 mpp_err("pps_data get ptr error");
243 return MPP_ERR_NOMEM;
244 }
245
246 for (i = 0; i < 14; i++) pps_packet[i] = 0;
247
248 mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
249
250 // SPS
251 mpp_put_bits(&bp, dxva_cxt->pp.vps_id , 4);
252 mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4);
253 mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc , 2);
254
255 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
256 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
257 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
258
259 mpp_put_bits(&bp, width , 16);
260 mpp_put_bits(&bp, height , 16);
261 mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8 , 4);
262 mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8 , 4);
263 mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4 , 5);
264 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size , 2); //log2_maxa_coding_block_depth
265 mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
266 mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2 , 3);
267 ///<-zrh comment ^ 63 bit above
268 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size , 2);
269 mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter , 3);
270 mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra , 3);
271 mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag , 1);
272 mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag , 1);
273 mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag , 1);
274 ///<-zrh comment ^ 68 bit above
275 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag , 1);
276 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0 , 4);
277 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
278 mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag , 1);
279 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size , 3);
280 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
281
282 mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets , 7);
283 mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag , 1);
284 mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps , 6);
285 mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag , 1);
286 mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag , 1);
287 ///<-zrh comment ^ 100 bit above
288
289 mpp_put_bits(&bp, 0 , 7 ); //49bits
290 //yandong change
291 mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1, 4);
292 mpp_put_bits(&bp, 0, 3);
293 mpp_put_align(&bp , 32, 0xf); //128
294 // PPS
295 mpp_put_bits(&bp, dxva_cxt->pp.pps_id , 6 );
296 mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4 );
297 mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag , 1 );
298 mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag , 1 );
299 mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits , 13);
300 mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
301 mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag , 1);
302 mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1 , 4);//31 bits
303 mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1 , 4);
304 mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26 , 7);
305 mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag , 1);
306 mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag , 1);
307 mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag , 1); //164
308 mpp_put_bits(&bp, log2_min_cb_size +
309 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
310 dxva_cxt->pp.diff_cu_qp_delta_depth , 3);
311
312 h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
313 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
314
315 mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset , 5);
316 mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset , 5);
317 mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag , 1);
318 mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag , 1);
319 mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag , 1);
320 mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag , 1 );
321 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag , 1 );
322 mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag , 1);
323 mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag , 1);
324 mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag , 1); //185
325 mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag , 1);
326 mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag , 1);
327 mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2 , 4);
328 mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2 , 4);
329 mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag , 1);
330 mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2 , 3);
331 mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
332 mpp_put_bits(&bp, 0 , 3);
333 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
334 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
335 mpp_put_bits(&bp, 0, 4);//2 //mSps_Pps[i]->mMode
336 mpp_put_align(&bp, 64, 0xf);
337 {
338 /// tiles info begin
339 RK_U16 column_width[20];
340 RK_U16 row_height[22];
341
342 memset(column_width, 0, sizeof(column_width));
343 memset(row_height, 0, sizeof(row_height));
344
345 if (dxva_cxt->pp.tiles_enabled_flag) {
346
347 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
348 RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
349 RK_S32 ctu_width_in_pic = (width +
350 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
351 RK_S32 ctu_height_in_pic = (height +
352 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
353 RK_S32 sum = 0;
354 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
355 column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
356 sum += column_width[i] ;
357 }
358 column_width[i] = ctu_width_in_pic - sum;
359
360 sum = 0;
361 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
362 row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
363 sum += row_height[i];
364 }
365 row_height[i] = ctu_height_in_pic - sum;
366 } // end of (pps->uniform_spacing_flag == 0)
367 else {
368
369 RK_S32 pic_in_cts_width = (width +
370 (1 << (log2_min_cb_size +
371 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
372 / (1 << (log2_min_cb_size +
373 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
374 RK_S32 pic_in_cts_height = (height +
375 (1 << (log2_min_cb_size +
376 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
377 / (1 << (log2_min_cb_size +
378 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
379
380 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
381 column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
382 (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
383
384 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
385 row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
386 (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
387 }
388 } // pps->tiles_enabled_flag
389 else {
390 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
391 column_width[0] = (width + MaxCUWidth - 1) / MaxCUWidth;
392 row_height[0] = (height + MaxCUWidth - 1) / MaxCUWidth;
393 }
394
395 for (j = 0; j < 20; j++) {
396 if (column_width[j] > 0)
397 column_width[j]--;
398 mpp_put_bits(&bp, column_width[j], 12);
399 }
400
401 for (j = 0; j < 22; j++) {
402 if (row_height[j] > 0)
403 row_height[j]--;
404 mpp_put_bits(&bp, row_height[j], 12);
405 }
406 }
407
408 mpp_put_bits(&bp, 0, 32);
409 mpp_put_bits(&bp, 0, 70);
410 mpp_put_align(&bp, 64, 0xf);//128
411 }
412
413 if (dxva_cxt->pp.scaling_list_enabled_flag) {
414 RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
415
416 if (dxva_cxt->pp.scaling_list_data_present_flag) {
417 addr = (dxva_cxt->pp.pps_id + 16) * 1360;
418 } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
419 addr = dxva_cxt->pp.sps_id * 1360;
420 } else {
421 addr = 80 * 1360;
422 }
423
424 hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
425
426 hw_reg->h265d_addr.reg180_scanlist_addr = reg_ctx->bufs_fd;
427 hw_reg->common.reg012.scanlist_addr_valid_en = 1;
428
429 /* need to config addr */
430 mpp_dev_set_reg_offset(reg_ctx->dev, 180, addr + reg_ctx->sclst_offset);
431 }
432
433 for (i = 0; i < 64; i++)
434 memcpy(pps_ptr + i * 112, reg_ctx->pps_buf, 112);
435 #ifdef dump
436 fwrite(pps_ptr, 1, 80 * 64, fp);
437 RK_U32 *tmp = (RK_U32 *)pps_ptr;
438 for (i = 0; i < 112 / 4; i++) {
439 mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
440 }
441 #endif
442 return 0;
443 }
444
hal_h265d_output_pps_packet(void * hal,void * dxva)445 static RK_S32 hal_h265d_output_pps_packet(void *hal, void *dxva)
446 {
447 RK_S32 fifo_len = 10;
448 RK_S32 i, j;
449 RK_U32 addr;
450 RK_U32 log2_min_cb_size;
451 RK_S32 width, height;
452 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
453 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
454 BitputCtx_t bp;
455
456 if (NULL == reg_ctx || dxva_cxt == NULL) {
457 mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
458 __FILE__, __FUNCTION__, __LINE__);
459 return MPP_ERR_NULL_PTR;
460 }
461
462 void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
463
464 if (dxva_cxt->pp.ps_update_flag || dxva_cxt->pp.scaling_list_enabled_flag) {
465 RK_U64 *pps_packet = reg_ctx->pps_buf;
466
467 if (NULL == pps_ptr) {
468 mpp_err("pps_data get ptr error");
469 return MPP_ERR_NOMEM;
470 }
471
472 for (i = 0; i < 10; i++) pps_packet[i] = 0;
473
474 mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
475
476 // SPS
477 mpp_put_bits(&bp, dxva_cxt->pp.vps_id , 4);
478 mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4);
479 mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc , 2);
480
481 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
482 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
483 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
484
485 mpp_put_bits(&bp, width , 16);//yandong
486 mpp_put_bits(&bp, height , 16);//yandong
487 mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8 , 4);
488 mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8 , 4);
489 mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4 , 5);
490 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size , 2); //log2_maxa_coding_block_depth
491 mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
492 mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2 , 3);
493 ///<-zrh comment ^ 57 bit above
494 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size , 2);
495 mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter , 3);
496 mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra , 3);
497 mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag , 1);
498 mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag , 1);
499 mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag , 1);
500 ///<-zrh comment ^ 68 bit above
501 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag , 1);
502 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0 , 4);
503 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
504 mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag , 1);
505 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size , 3);
506 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
507
508 mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets , 7);
509 mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag , 1);
510 mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps , 6);
511 mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag , 1);
512 mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag , 1);
513 ///<-zrh comment ^ 100 bit above
514
515 mpp_put_bits(&bp, 0 , 7 );
516 mpp_put_align(&bp , 32, 0xf);
517
518 // PPS
519 mpp_put_bits(&bp, dxva_cxt->pp.pps_id , 6 );
520 mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4 );
521 mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag , 1 );
522 mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag , 1 );
523 mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits , 13);
524 mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
525 mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag , 1);
526 mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1 , 4);
527 mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1 , 4);
528 mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26 , 7);
529 mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag , 1);
530 mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag , 1);
531 mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag , 1);
532
533 mpp_put_bits(&bp, log2_min_cb_size +
534 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
535 dxva_cxt->pp.diff_cu_qp_delta_depth , 3);
536
537 h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
538 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
539
540 mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset , 5);
541 mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset , 5);
542 mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag , 1);
543 mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag , 1);
544 mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag , 1);
545 mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag , 1);
546 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag , 1);
547 mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag , 1);
548 mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag , 1);
549 mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag , 1);
550
551 mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag , 1);
552 mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag , 1);
553 mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2 , 4);
554 mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2 , 4);
555 mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag , 1);
556 mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2 , 3);
557 mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
558 mpp_put_bits(&bp, 0 , 3);
559 mpp_put_bits(&bp, dxva_cxt->pp.num_tile_columns_minus1 + 1, 5);
560 mpp_put_bits(&bp, dxva_cxt->pp.num_tile_rows_minus1 + 1 , 5 );
561 mpp_put_bits(&bp, 3, 2); //mSps_Pps[i]->mMode
562 mpp_put_align(&bp, 64, 0xf);
563
564 {
565 /// tiles info begin
566 RK_U16 column_width[20];
567 RK_U16 row_height[22];
568
569 memset(column_width, 0, sizeof(column_width));
570 memset(row_height, 0, sizeof(row_height));
571
572 if (dxva_cxt->pp.tiles_enabled_flag) {
573 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
574 RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
575 RK_S32 ctu_width_in_pic = (width +
576 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
577 RK_S32 ctu_height_in_pic = (height +
578 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
579 RK_S32 sum = 0;
580 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
581 column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
582 sum += column_width[i] ;
583 }
584 column_width[i] = ctu_width_in_pic - sum;
585
586 sum = 0;
587 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
588 row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
589 sum += row_height[i];
590 }
591 row_height[i] = ctu_height_in_pic - sum;
592 } // end of (pps->uniform_spacing_flag == 0)
593 else {
594
595 RK_S32 pic_in_cts_width = (width +
596 (1 << (log2_min_cb_size +
597 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
598 / (1 << (log2_min_cb_size +
599 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
600 RK_S32 pic_in_cts_height = (height +
601 (1 << (log2_min_cb_size +
602 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
603 / (1 << (log2_min_cb_size +
604 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
605
606 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
607 column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
608 (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
609
610 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
611 row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
612 (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
613 }
614 } // pps->tiles_enabled_flag
615 else {
616 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
617 column_width[0] = (width + MaxCUWidth - 1) / MaxCUWidth;
618 row_height[0] = (height + MaxCUWidth - 1) / MaxCUWidth;
619 }
620
621 for (j = 0; j < 20; j++) {
622 if (column_width[j] > 0)
623 column_width[j]--;
624 mpp_put_bits(&bp, column_width[j], 12);// yandong 8bit -> 12bit
625 }
626
627 for (j = 0; j < 22; j++) {
628 if (row_height[j] > 0)
629 row_height[j]--;
630 mpp_put_bits(&bp, row_height[j], 12);// yandong 8bit -> 12bit
631 }
632 }
633
634 {
635 RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->scaling_list_data);
636 if (dxva_cxt->pp.scaling_list_data_present_flag) {
637 addr = (dxva_cxt->pp.pps_id + 16) * 1360;
638 } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
639 addr = dxva_cxt->pp.sps_id * 1360;
640 } else {
641 addr = 80 * 1360;
642 }
643
644 hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
645
646 RK_U32 fd = mpp_buffer_get_fd(reg_ctx->scaling_list_data);
647 /* need to config addr */
648 addr = fd | (addr << 10);
649
650 mpp_put_bits(&bp, addr, 32);
651 mpp_put_align(&bp, 64, 0xf);
652 }
653 for (i = 0; i < 64; i++)
654 memcpy(pps_ptr + i * 80, reg_ctx->pps_buf, 80);
655 } else if (reg_ctx->fast_mode) {
656 for (i = 0; i < 64; i++)
657 memcpy(pps_ptr + i * 80, reg_ctx->pps_buf, 80);
658 }
659
660 #ifdef dump
661 fwrite(pps_ptr, 1, 80 * 64, fp);
662 fflush(fp);
663 #endif
664 return 0;
665 }
666
h265d_refine_rcb_size(Vdpu34xRcbInfo * rcb_info,Vdpu34xH265dRegSet * hw_regs,RK_S32 width,RK_S32 height,void * dxva)667 static void h265d_refine_rcb_size(Vdpu34xRcbInfo *rcb_info,
668 Vdpu34xH265dRegSet *hw_regs,
669 RK_S32 width, RK_S32 height, void *dxva)
670 {
671 RK_U32 rcb_bits = 0;
672 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
673 DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
674 RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
675 RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
676 RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
677 RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
678 RK_U32 ext_align_size = tile_col_cut_num * 64 * 8;
679
680 width = MPP_ALIGN(width, ctu_size);
681 height = MPP_ALIGN(height, ctu_size);
682
683 /* RCB_STRMD_ROW */
684 if (width > 8192) {
685 RK_U32 factor = ctu_size / 16;
686 rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) * factor * 24 + ext_align_size;
687 } else
688 rcb_bits = 0;
689 rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
690
691 /* RCB_TRANSD_ROW */
692 if (width > 8192)
693 rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
694 else
695 rcb_bits = 0;
696 rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
697
698 /* RCB_TRANSD_COL */
699 if (height > 8192 && tile_col_cut_num)
700 rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1);
701 else
702 rcb_bits = 0;
703 rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
704
705 /* RCB_INTER_ROW */
706 rcb_bits = width * 22 + ext_align_size;
707 rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
708
709 /* RCB_INTER_COL */
710 rcb_bits = tile_col_cut_num ? height * 22 : 0;
711 rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
712
713 /* RCB_INTRA_ROW */
714 rcb_bits = width * 48 + ext_align_size;
715 rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
716
717 /* RCB_DBLK_ROW */
718 if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
719 if (ctu_size == 32)
720 rcb_bits = width * ( 4 + 6 * bit_depth);
721 else
722 rcb_bits = width * ( 2 + 6 * bit_depth);
723 } else {
724 if (ctu_size == 32)
725 rcb_bits = width * ( 4 + 8 * bit_depth);
726 else
727 rcb_bits = width * ( 2 + 8 * bit_depth);
728 }
729 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
730 rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
731
732 /* RCB_SAO_ROW */
733 if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
734 rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
735 } else {
736 rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
737 }
738 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
739 rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
740
741 /* RCB_FBC_ROW */
742 if (hw_regs->common.reg012.fbc_e) {
743 rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
744 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 128 : 64)) + ext_align_size;
745 } else
746 rcb_bits = 0;
747 rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
748
749 /* RCB_FILT_COL */
750 if (tile_col_cut_num) {
751 if (hw_regs->common.reg012.fbc_e) {
752 RK_U32 ctu_idx = ctu_size >> 5;
753 RK_U32 a = filterd_fbc_on[ctu_idx][chroma_fmt_idc].a;
754 RK_U32 b = filterd_fbc_on[ctu_idx][chroma_fmt_idc].b;
755
756 rcb_bits = height * (a * bit_depth + b);
757 } else {
758 RK_U32 ctu_idx = ctu_size >> 5;
759 RK_U32 a = filterd_fbc_off[ctu_idx][chroma_fmt_idc].a;
760 RK_U32 b = filterd_fbc_off[ctu_idx][chroma_fmt_idc].b;
761
762 rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 16 : 0));
763 }
764 } else
765 rcb_bits = 0;
766
767 rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
768 }
769
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu34xH265dRegSet * hw_regs,RK_S32 width,RK_S32 height)770 static void hal_h265d_rcb_info_update(void *hal, void *dxva,
771 Vdpu34xH265dRegSet *hw_regs,
772 RK_S32 width, RK_S32 height)
773 {
774 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
775 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
776 DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
777 RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
778 RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
779 RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
780 RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
781
782 if (reg_ctx->num_row_tiles != num_tiles ||
783 reg_ctx->bit_depth != bit_depth ||
784 reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
785 reg_ctx->ctu_size != ctu_size ||
786 reg_ctx->width != width ||
787 reg_ctx->height != height) {
788 RK_U32 i = 0;
789 RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
790
791 reg_ctx->rcb_buf_size = vdpu34x_get_rcb_buf_size((Vdpu34xRcbInfo*)reg_ctx->rcb_info, width, height);
792 h265d_refine_rcb_size((Vdpu34xRcbInfo*)reg_ctx->rcb_info, hw_regs, width, height, dxva_cxt);
793
794 for (i = 0; i < loop; i++) {
795 MppBuffer rcb_buf;
796
797 if (reg_ctx->rcb_buf[i]) {
798 mpp_buffer_put(reg_ctx->rcb_buf[i]);
799 reg_ctx->rcb_buf[i] = NULL;
800 }
801 mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
802 reg_ctx->rcb_buf[i] = rcb_buf;
803 }
804
805 reg_ctx->num_row_tiles = num_tiles;
806 reg_ctx->bit_depth = bit_depth;
807 reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
808 reg_ctx->ctu_size = ctu_size;
809 reg_ctx->width = width;
810 reg_ctx->height = height;
811 }
812 }
813
814 #define SET_POC_HIGNBIT_INFO(regs, index, field, value)\
815 do{ \
816 switch(index){\
817 case 0: regs.reg200.ref0_##field = value; break;\
818 case 1: regs.reg200.ref1_##field = value; break;\
819 case 2: regs.reg200.ref2_##field = value; break;\
820 case 3: regs.reg200.ref3_##field = value; break;\
821 case 4: regs.reg200.ref4_##field = value; break;\
822 case 5: regs.reg200.ref5_##field = value; break;\
823 case 6: regs.reg200.ref6_##field = value; break;\
824 case 7: regs.reg200.ref7_##field = value; break;\
825 case 8: regs.reg201.ref8_##field = value; break;\
826 case 9: regs.reg201.ref9_##field = value; break;\
827 case 10: regs.reg201.ref10_##field = value; break;\
828 case 11: regs.reg201.ref11_##field = value; break;\
829 case 12: regs.reg201.ref12_##field = value; break;\
830 case 13: regs.reg201.ref13_##field = value; break;\
831 case 14: regs.reg201.ref14_##field = value; break;\
832 case 15: regs.reg201.ref15_##field = value; break;\
833 default: break;}\
834 }while(0)
835
836 #define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
837
hal_h265d_vdpu34x_gen_regs(void * hal,HalTaskInfo * syn)838 static MPP_RET hal_h265d_vdpu34x_gen_regs(void *hal, HalTaskInfo *syn)
839 {
840 RK_S32 i = 0;
841 RK_S32 log2_min_cb_size;
842 RK_S32 width, height;
843 RK_S32 stride_y, stride_uv, virstrid_y;
844 Vdpu34xH265dRegSet *hw_regs;
845 RK_S32 ret = MPP_SUCCESS;
846 MppBuffer streambuf = NULL;
847 RK_S32 aglin_offset = 0;
848 RK_S32 valid_ref = -1;
849 MppBuffer framebuf = NULL;
850 HalBuf *mv_buf = NULL;
851 RK_S32 fd = -1;
852 RK_U32 mv_size = 0;
853 RK_S32 distance = INT_MAX;
854 h265d_dxva2_picture_context_t *dxva_cxt =
855 (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
856 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
857 void *rps_ptr = NULL;
858 RK_U32 stream_buf_size = 0;
859 DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
860 RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size +
861 pp->log2_min_luma_coding_block_size_minus3 + 3);
862
863 if (syn->dec.flags.parse_err ||
864 (syn->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
865 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
866 return MPP_OK;
867 }
868
869 if (reg_ctx ->fast_mode) {
870 for (i = 0; i < MAX_GEN_REG; i++) {
871 if (!reg_ctx->g_buf[i].use_flag) {
872 syn->dec.reg_index = i;
873
874 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
875 reg_ctx->rps_offset = reg_ctx->offset_rps[i];
876 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
877
878 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
879 reg_ctx->g_buf[i].use_flag = 1;
880 break;
881 }
882 }
883 if (i == MAX_GEN_REG) {
884 mpp_err("hevc rps buf all used");
885 return MPP_ERR_NOMEM;
886 }
887 } else {
888 syn->dec.reg_index = 0;
889 }
890 rps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->rps_offset;
891 if (NULL == rps_ptr) {
892
893 mpp_err("rps_data get ptr error");
894 return MPP_ERR_NOMEM;
895 }
896
897
898 if (syn->dec.syntax.data == NULL) {
899 mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
900 return MPP_ERR_NULL_PTR;
901 }
902
903 /* output pps */
904 hw_regs = (Vdpu34xH265dRegSet*)reg_ctx->hw_regs;
905 memset(hw_regs, 0, sizeof(Vdpu34xH265dRegSet));
906
907 if (NULL == reg_ctx->hw_regs) {
908 return MPP_ERR_NULL_PTR;
909 }
910
911 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
912
913 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
914 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
915 mv_size = vdpu34x_get_colmv_size(width, height, ctu_size, 16, 16, COLMV_COMPRESS_EN);
916 if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
917 size_t size = mv_size;
918
919 if (reg_ctx->cmv_bufs) {
920 hal_bufs_deinit(reg_ctx->cmv_bufs);
921 reg_ctx->cmv_bufs = NULL;
922 }
923
924 hal_bufs_init(®_ctx->cmv_bufs);
925 if (reg_ctx->cmv_bufs == NULL) {
926 mpp_err_f("colmv bufs init fail");
927 return MPP_ERR_NULL_PTR;
928 }
929
930 reg_ctx->mv_size = mv_size;
931 reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
932 hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
933 }
934
935 {
936 MppFrame mframe = NULL;
937 RK_U32 ver_virstride;
938
939 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
940 SLOT_FRAME_PTR, &mframe);
941 stride_y = mpp_frame_get_hor_stride(mframe);
942 ver_virstride = mpp_frame_get_ver_stride(mframe);
943 stride_uv = stride_y;
944 virstrid_y = ver_virstride * stride_y;
945 hw_regs->common.reg013.h26x_error_mode = 1;
946 hw_regs->common.reg013.h26x_streamd_error_mode = 1;
947 hw_regs->common.reg013.colmv_error_mode = 1;
948 hw_regs->common.reg021.error_deb_en = 1;
949 hw_regs->common.reg021.inter_error_prc_mode = 0;
950 hw_regs->common.reg021.error_intra_mode = 1;
951
952 hw_regs->common.reg017.slice_num = dxva_cxt->slice_count;
953 hw_regs->h265d_param.reg64.h26x_rps_mode = 0;
954 hw_regs->h265d_param.reg64.h26x_frame_orslice = 0;
955 hw_regs->h265d_param.reg64.h26x_stream_mode = 0;
956
957 if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) {
958 RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
959 RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (ver_virstride + 64) / 16, SZ_4K);
960
961 hw_regs->common.reg012.fbc_e = 1;
962 hw_regs->common.reg018.y_hor_virstride = fbc_hdr_stride >> 4;
963 hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
964 hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
965 } else {
966 hw_regs->common.reg012.fbc_e = 0;
967 hw_regs->common.reg018.y_hor_virstride = stride_y >> 4;
968 hw_regs->common.reg019.uv_hor_virstride = stride_uv >> 4;
969 hw_regs->common.reg020_y_virstride.y_virstride = virstrid_y >> 4;
970 }
971 }
972 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
973 SLOT_BUFFER, &framebuf);
974 hw_regs->common_addr.reg130_decout_base = mpp_buffer_get_fd(framebuf); //just index need map
975 /*if out_base is equal to zero it means this frame may error
976 we return directly add by csy*/
977
978 if (hw_regs->common_addr.reg130_decout_base == 0) {
979 return 0;
980 }
981 fd = mpp_buffer_get_fd(framebuf);
982 hw_regs->common_addr.reg130_decout_base = fd;
983 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.CurrPic.Index7Bits);
984 hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
985
986 hw_regs->h265d_param.reg65.cur_top_poc = dxva_cxt->pp.CurrPicOrderCntVal;
987
988 mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
989 &streambuf);
990 if ( dxva_cxt->bitstream == NULL) {
991 dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
992 }
993 if (reg_ctx->is_v34x) {
994 #ifdef HW_RPS
995 hw_regs->common.reg012.wait_reset_en = 1;
996 hw_regs->h265d_param.reg103.ref_pic_layer_same_with_cur = 0xffff;
997 hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
998 #else
999 hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
1000 hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
1001 #endif
1002 } else {
1003 hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
1004 }
1005
1006 /* cabac table */
1007 hw_regs->h265d_addr.reg197_cabactbl_base = reg_ctx->bufs_fd;
1008 /* pps */
1009 hw_regs->h265d_addr.reg161_pps_base = reg_ctx->bufs_fd;
1010 hw_regs->h265d_addr.reg163_rps_base = reg_ctx->bufs_fd;
1011
1012 hw_regs->common_addr.reg128_rlc_base = mpp_buffer_get_fd(streambuf);
1013 hw_regs->common_addr.reg129_rlcwrite_base = mpp_buffer_get_fd(streambuf);
1014 stream_buf_size = mpp_buffer_get_size(streambuf);
1015 hw_regs->common.reg016_str_len = ((dxva_cxt->bitstream_size + 15)
1016 & (~15)) + 64;
1017 hw_regs->common.reg016_str_len = stream_buf_size > hw_regs->common.reg016_str_len ?
1018 hw_regs->common.reg016_str_len : stream_buf_size;
1019
1020 aglin_offset = hw_regs->common.reg016_str_len - dxva_cxt->bitstream_size;
1021 if (aglin_offset > 0) {
1022 memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
1023 aglin_offset);
1024 }
1025 hw_regs->common.reg010.dec_e = 1;
1026 hw_regs->common.reg011.dec_timeout_e = 1;
1027 hw_regs->common.reg012.wr_ddr_align_en = dxva_cxt->pp.tiles_enabled_flag
1028 ? 0 : 1;
1029 hw_regs->common.reg012.colmv_compress_en = COLMV_COMPRESS_EN;
1030
1031 if (mpp_get_soc_type() == ROCKCHIP_SOC_RK3588) {
1032 hw_regs->common.reg026.swreg_block_gating_e = 0xfffef;
1033 hw_regs->common.reg024.cabac_err_en_lowbits = 0;
1034 hw_regs->common.reg025.cabac_err_en_highbits = 0;
1035 } else {
1036 hw_regs->common.reg024.cabac_err_en_lowbits = 0xffffdfff;
1037 hw_regs->common.reg025.cabac_err_en_highbits = 0x3ffbf9ff;
1038 hw_regs->common.reg026.swreg_block_gating_e = 0xfffff;
1039 }
1040
1041 hw_regs->common.reg011.dec_clkgate_e = 1;
1042 hw_regs->common.reg011.dec_e_strmd_clkgate_dis = 0;
1043 hw_regs->common.reg026.reg_cfg_gating_en = 1;
1044 hw_regs->common.reg032_timeout_threshold = 0x3ffff;
1045
1046 valid_ref = hw_regs->common_addr.reg130_decout_base;
1047 reg_ctx->error_index[syn->dec.reg_index] = dxva_cxt->pp.CurrPic.Index7Bits;
1048 hw_regs->common_addr.reg132_error_ref_base = valid_ref;
1049
1050 for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
1051 if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
1052 dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
1053
1054 MppFrame mframe = NULL;
1055 MppBuffer ref_buf = NULL;
1056 hw_regs->h265d_param.reg67_82_ref_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
1057 mpp_buf_slot_get_prop(reg_ctx->slots,
1058 dxva_cxt->pp.RefPicList[i].Index7Bits,
1059 SLOT_BUFFER, &ref_buf);
1060 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
1061 SLOT_FRAME_PTR, &mframe);
1062 if (!ref_buf && mpp_get_soc_type() == ROCKCHIP_SOC_RK3588 &&
1063 reg_ctx->cfg->base.disable_error) {
1064 if (reg_ctx->missing_ref_buf && reg_ctx->missing_ref_buf_size < mpp_buffer_get_size(framebuf)) {
1065 mpp_buffer_put(reg_ctx->missing_ref_buf);
1066 reg_ctx->missing_ref_buf = NULL;
1067 }
1068
1069 if (!reg_ctx->missing_ref_buf) {
1070 reg_ctx->missing_ref_buf_size = mpp_buffer_get_size(framebuf);
1071 mpp_buffer_get(reg_ctx->group, ®_ctx->missing_ref_buf, reg_ctx->missing_ref_buf_size);
1072 if (!reg_ctx->missing_ref_buf) {
1073 syn->dec.flags.ref_err = 1;
1074 h265h_dbg(H265H_DBG_TASK_ERR, "Failed to generate missing ref buf\n");
1075 return MPP_ERR_NOMEM;
1076 }
1077 }
1078 ref_buf = reg_ctx->missing_ref_buf;
1079 }
1080 if (ref_buf) {
1081 hw_regs->h265d_addr.reg164_179_ref_base[i] = mpp_buffer_get_fd(ref_buf);
1082 valid_ref = hw_regs->h265d_addr.reg164_179_ref_base[i];
1083 h265h_dbg(H265H_DBG_TASK_ERR, "cur poc %d, ref poc %d", dxva_cxt->pp.current_poc, dxva_cxt->pp.PicOrderCntValList[i]);
1084 if ((pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc) < distance)
1085 && (!mpp_frame_get_errinfo(mframe))) {
1086 distance = pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc);
1087 hw_regs->common_addr.reg132_error_ref_base = hw_regs->h265d_addr.reg164_179_ref_base[i];
1088 reg_ctx->error_index[syn->dec.reg_index] = dxva_cxt->pp.RefPicList[i].Index7Bits;
1089 hw_regs->common.reg021.error_intra_mode = 0;
1090 h265h_dbg(H265H_DBG_TASK_ERR, "update error ref to ref[%d] to poc %d, slot_idx %d, fd %d\n",
1091 i, dxva_cxt->pp.PicOrderCntValList[i],
1092 dxva_cxt->pp.RefPicList[i].Index7Bits,
1093 hw_regs->common_addr.reg132_error_ref_base);
1094 }
1095 } else {
1096 h265h_dbg(H265H_DBG_TASK_ERR, "ref[%d] buffer is empty, replace with fd %d\n", i, valid_ref);
1097 hw_regs->h265d_addr.reg164_179_ref_base[i] = valid_ref;
1098 }
1099
1100 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.RefPicList[i].Index7Bits);
1101
1102 SET_REF_VALID(hw_regs->h265d_param, i, 1);
1103
1104 if (hw_regs->common.reg013.h26x_error_mode &&
1105 !hw_regs->common.reg021.error_intra_mode &&
1106 (!ref_buf || mpp_frame_get_errinfo(mframe))) {
1107
1108 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
1109 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
1110 }
1111 } else {
1112 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]);
1113 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
1114 /* mark 3 to differ from current frame */
1115 if (reg_ctx->error_index[syn->dec.reg_index] == dxva_cxt->pp.CurrPic.Index7Bits)
1116 SET_POC_HIGNBIT_INFO(hw_regs->highpoc, i, poc_highbit, 3);
1117 }
1118 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
1119 }
1120
1121 if ((reg_ctx->error_index[syn->dec.reg_index] == dxva_cxt->pp.CurrPic.Index7Bits) &&
1122 !dxva_cxt->pp.IntraPicFlag) {
1123 h265h_dbg(H265H_DBG_TASK_ERR, "current frm may be err, should skip process");
1124 syn->dec.flags.ref_err = 1;
1125 return MPP_OK;
1126 }
1127
1128 if (reg_ctx->is_v34x) {
1129 hal_h265d_v345_output_pps_packet(hal, syn->dec.syntax.data);
1130 } else {
1131 hal_h265d_output_pps_packet(hal, syn->dec.syntax.data);
1132 }
1133
1134 mpp_dev_set_reg_offset(reg_ctx->dev, 161, reg_ctx->spspps_offset);
1135 /* rps */
1136 mpp_dev_set_reg_offset(reg_ctx->dev, 163, reg_ctx->rps_offset);
1137
1138 hw_regs->common.reg013.timeout_mode = 1;
1139 hw_regs->common.reg013.cur_pic_is_idr = dxva_cxt->pp.IdrPicFlag;//p_hal->slice_long->idr_flag;
1140
1141 hw_regs->common.reg011.buf_empty_en = 1;
1142
1143 hal_h265d_rcb_info_update(hal, dxva_cxt, hw_regs, width, height);
1144 vdpu34x_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
1145 reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
1146 (Vdpu34xRcbInfo*)reg_ctx->rcb_info);
1147 vdpu34x_setup_statistic(&hw_regs->common, &hw_regs->statistic);
1148 mpp_buffer_sync_end(reg_ctx->bufs);
1149
1150 return ret;
1151 }
1152
hal_h265d_vdpu34x_start(void * hal,HalTaskInfo * task)1153 static MPP_RET hal_h265d_vdpu34x_start(void *hal, HalTaskInfo *task)
1154 {
1155 MPP_RET ret = MPP_OK;
1156 RK_U8* p = NULL;
1157 Vdpu34xH265dRegSet *hw_regs = NULL;
1158 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1159 RK_S32 index = task->dec.reg_index;
1160
1161 RK_U32 i;
1162
1163 if (task->dec.flags.parse_err ||
1164 (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1165 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1166 return MPP_OK;
1167 }
1168
1169 if (reg_ctx->fast_mode) {
1170 p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
1171 hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1172 } else {
1173 p = (RK_U8*)reg_ctx->hw_regs;
1174 hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->hw_regs;
1175 }
1176
1177 if (hw_regs == NULL) {
1178 mpp_err("hal_h265d_start hw_regs is NULL");
1179 return MPP_ERR_NULL_PTR;
1180 }
1181 for (i = 0; i < 68; i++) {
1182 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1183 i, *((RK_U32*)p));
1184 //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
1185 p += 4;
1186 }
1187
1188 do {
1189 MppDevRegWrCfg wr_cfg;
1190 MppDevRegRdCfg rd_cfg;
1191
1192 wr_cfg.reg = &hw_regs->common;
1193 wr_cfg.size = sizeof(hw_regs->common);
1194 wr_cfg.offset = OFFSET_COMMON_REGS;
1195
1196 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1197 if (ret) {
1198 mpp_err_f("set register write failed %d\n", ret);
1199 break;
1200 }
1201
1202 wr_cfg.reg = &hw_regs->h265d_param;
1203 wr_cfg.size = sizeof(hw_regs->h265d_param);
1204 wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS;
1205
1206 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1207 if (ret) {
1208 mpp_err_f("set register write failed %d\n", ret);
1209 break;
1210 }
1211
1212 wr_cfg.reg = &hw_regs->common_addr;
1213 wr_cfg.size = sizeof(hw_regs->common_addr);
1214 wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1215
1216 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1217 if (ret) {
1218 mpp_err_f("set register write failed %d\n", ret);
1219 break;
1220 }
1221
1222 wr_cfg.reg = &hw_regs->h265d_addr;
1223 wr_cfg.size = sizeof(hw_regs->h265d_addr);
1224 wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1225
1226 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1227 if (ret) {
1228 mpp_err_f("set register write failed %d\n", ret);
1229 break;
1230 }
1231
1232 wr_cfg.reg = &hw_regs->statistic;
1233 wr_cfg.size = sizeof(hw_regs->statistic);
1234 wr_cfg.offset = OFFSET_STATISTIC_REGS;
1235
1236 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1237 if (ret) {
1238 mpp_err_f("set register write failed %d\n", ret);
1239 break;
1240 }
1241
1242 if (mpp_get_soc_type() == ROCKCHIP_SOC_RK3588) {
1243 wr_cfg.reg = &hw_regs->highpoc;
1244 wr_cfg.size = sizeof(hw_regs->highpoc);
1245 wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS;
1246
1247 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1248 if (ret) {
1249 mpp_err_f("set register write failed %d\n", ret);
1250 break;
1251 }
1252 }
1253
1254 rd_cfg.reg = &hw_regs->irq_status;
1255 rd_cfg.size = sizeof(hw_regs->irq_status);
1256 rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1257
1258 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1259 if (ret) {
1260 mpp_err_f("set register read failed %d\n", ret);
1261 break;
1262 }
1263
1264 /* rcb info for sram */
1265 vdpu34x_set_rcbinfo(reg_ctx->dev, (Vdpu34xRcbInfo*)reg_ctx->rcb_info);
1266
1267 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1268 if (ret) {
1269 mpp_err_f("send cmd failed %d\n", ret);
1270 break;
1271 }
1272 } while (0);
1273
1274 return ret;
1275 }
1276
1277
hal_h265d_vdpu34x_wait(void * hal,HalTaskInfo * task)1278 static MPP_RET hal_h265d_vdpu34x_wait(void *hal, HalTaskInfo *task)
1279 {
1280 MPP_RET ret = MPP_OK;
1281 RK_S32 index = task->dec.reg_index;
1282 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1283 RK_U8* p = NULL;
1284 Vdpu34xH265dRegSet *hw_regs = NULL;
1285 RK_S32 i;
1286
1287 if (reg_ctx->fast_mode) {
1288 hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1289 } else {
1290 hw_regs = ( Vdpu34xH265dRegSet *)reg_ctx->hw_regs;
1291 }
1292
1293 p = (RK_U8*)hw_regs;
1294
1295 if (task->dec.flags.parse_err ||
1296 (task->dec.flags.ref_err && !reg_ctx->cfg->base.disable_error)) {
1297 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1298 goto ERR_PROC;
1299 }
1300
1301 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1302 if (ret)
1303 mpp_err_f("poll cmd failed %d\n", ret);
1304
1305 ERR_PROC:
1306 if (task->dec.flags.parse_err ||
1307 task->dec.flags.ref_err ||
1308 hw_regs->irq_status.reg224.dec_error_sta ||
1309 hw_regs->irq_status.reg224.buf_empty_sta ||
1310 hw_regs->irq_status.reg224.dec_bus_sta ||
1311 !hw_regs->irq_status.reg224.dec_rdy_sta) {
1312 if (!reg_ctx->fast_mode) {
1313 if (reg_ctx->dec_cb)
1314 mpp_callback(reg_ctx->dec_cb, &task->dec);
1315 } else {
1316 MppFrame mframe = NULL;
1317 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1318 SLOT_FRAME_PTR, &mframe);
1319 if (mframe) {
1320 reg_ctx->fast_mode_err_found = 1;
1321 mpp_frame_set_errinfo(mframe, 1);
1322 }
1323 }
1324 } else {
1325 if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1326 for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1327 if (task->dec.refer[i] >= 0) {
1328 MppFrame frame_ref = NULL;
1329
1330 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1331 SLOT_FRAME_PTR, &frame_ref);
1332 h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1333 i, task->dec.refer[i], frame_ref);
1334 if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1335 MppFrame frame_out = NULL;
1336 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1337 SLOT_FRAME_PTR, &frame_out);
1338 mpp_frame_set_errinfo(frame_out, 1);
1339 break;
1340 }
1341 }
1342 }
1343 }
1344 }
1345
1346 for (i = 0; i < 68; i++) {
1347 if (i == 1) {
1348 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1349 i, *((RK_U32*)p));
1350 }
1351
1352 if (i == 45) {
1353 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1354 i, *((RK_U32*)p));
1355 }
1356 p += 4;
1357 }
1358
1359 if (reg_ctx->fast_mode) {
1360 reg_ctx->g_buf[index].use_flag = 0;
1361 }
1362
1363 return ret;
1364 }
1365
hal_h265d_vdpu34x_reset(void * hal)1366 static MPP_RET hal_h265d_vdpu34x_reset(void *hal)
1367 {
1368 MPP_RET ret = MPP_OK;
1369 HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1370 p_hal->fast_mode_err_found = 0;
1371 (void)hal;
1372 return ret;
1373 }
1374
hal_h265d_vdpu34x_flush(void * hal)1375 static MPP_RET hal_h265d_vdpu34x_flush(void *hal)
1376 {
1377 MPP_RET ret = MPP_OK;
1378
1379 (void)hal;
1380 return ret;
1381 }
1382
hal_h265d_vdpu34x_control(void * hal,MpiCmd cmd_type,void * param)1383 static MPP_RET hal_h265d_vdpu34x_control(void *hal, MpiCmd cmd_type, void *param)
1384 {
1385 MPP_RET ret = MPP_OK;
1386 HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1387
1388 (void)hal;
1389 switch ((MpiCmd)cmd_type) {
1390 case MPP_DEC_SET_FRAME_INFO: {
1391 MppFrame frame = (MppFrame)param;
1392 MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1393
1394 if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1395 vdpu34x_afbc_align_calc(p_hal->slots, frame, 16);
1396 }
1397 break;
1398 }
1399 case MPP_DEC_SET_OUTPUT_FORMAT: {
1400 } break;
1401 default:
1402 break;
1403 }
1404 return ret;
1405 }
1406
1407 const MppHalApi hal_h265d_vdpu34x = {
1408 .name = "h265d_vdpu34x",
1409 .type = MPP_CTX_DEC,
1410 .coding = MPP_VIDEO_CodingHEVC,
1411 .ctx_size = sizeof(HalH265dCtx),
1412 .flag = 0,
1413 .init = hal_h265d_vdpu34x_init,
1414 .deinit = hal_h265d_vdpu34x_deinit,
1415 .reg_gen = hal_h265d_vdpu34x_gen_regs,
1416 .start = hal_h265d_vdpu34x_start,
1417 .wait = hal_h265d_vdpu34x_wait,
1418 .reset = hal_h265d_vdpu34x_reset,
1419 .flush = hal_h265d_vdpu34x_flush,
1420 .control = hal_h265d_vdpu34x_control,
1421 };
1422