1 /*
2 * Copyright 2022 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_h265d_vdpu382"
18
19 #include <stdio.h>
20 #include <string.h>
21
22 #include "mpp_env.h"
23 #include "mpp_mem.h"
24 #include "mpp_bitread.h"
25 #include "mpp_bitput.h"
26
27 #include "h265d_syntax.h"
28 #include "hal_h265d_debug.h"
29 #include "hal_h265d_ctx.h"
30 #include "hal_h265d_com.h"
31 #include "hal_h265d_vdpu382.h"
32 #include "vdpu382_h265d.h"
33 #include "rk_hdr_meta_com.h"
34
35 /* #define dump */
36 #ifdef dump
37 static FILE *fp = NULL;
38 #endif
39
40 #define HW_RPS
41 #define PPS_SIZE (112 * 64)//(96x64)
42
43 #define SET_REF_VALID(regs, index, value)\
44 do{ \
45 switch(index){\
46 case 0: regs.reg99.hevc_ref_valid_0 = value; break;\
47 case 1: regs.reg99.hevc_ref_valid_1 = value; break;\
48 case 2: regs.reg99.hevc_ref_valid_2 = value; break;\
49 case 3: regs.reg99.hevc_ref_valid_3 = value; break;\
50 case 4: regs.reg99.hevc_ref_valid_4 = value; break;\
51 case 5: regs.reg99.hevc_ref_valid_5 = value; break;\
52 case 6: regs.reg99.hevc_ref_valid_6 = value; break;\
53 case 7: regs.reg99.hevc_ref_valid_7 = value; break;\
54 case 8: regs.reg99.hevc_ref_valid_8 = value; break;\
55 case 9: regs.reg99.hevc_ref_valid_9 = value; break;\
56 case 10: regs.reg99.hevc_ref_valid_10 = value; break;\
57 case 11: regs.reg99.hevc_ref_valid_11 = value; break;\
58 case 12: regs.reg99.hevc_ref_valid_12 = value; break;\
59 case 13: regs.reg99.hevc_ref_valid_13 = value; break;\
60 case 14: regs.reg99.hevc_ref_valid_14 = value; break;\
61 default: break;}\
62 }while(0)
63
64 #define FMT 4
65 #define CTU 3
66
67 typedef struct {
68 RK_U32 a;
69 RK_U32 b;
70 } FilterdColBufRatio;
71
72 static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = {
73 /* 400 420 422 444 */
74 {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16
75 {{0, 0}, {27, 8}, {36, 8}, {52, 8}}, //ctu 32
76 {{0, 0}, {27, 5}, {36, 5}, {52, 5}} //ctu 64
77 };
78
79 static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = {
80 /* 400 420 422 444 */
81 {{0, 0}, {9, 31}, {12, 39}, {12, 39}}, //ctu 16
82 {{0, 0}, {9, 25}, {12, 33}, {12, 33}}, //ctu 32
83 {{0, 0}, {9, 21}, {12, 29}, {12, 29}} //ctu 64
84 };
85
86 #define CABAC_TAB_ALIGEND_SIZE (MPP_ALIGN(27456, SZ_4K))
87 #define SPSPPS_ALIGNED_SIZE (MPP_ALIGN(112 * 64, SZ_4K))
88 #define RPS_ALIGEND_SIZE (MPP_ALIGN(400 * 8, SZ_4K))
89 #define SCALIST_ALIGNED_SIZE (MPP_ALIGN(81 * 1360, SZ_4K))
90 #define INFO_BUFFER_SIZE (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE)
91 #define ALL_BUFFER_SIZE(cnt) (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt)
92
93 #define CABAC_TAB_OFFSET (0)
94 #define SPSPPS_OFFSET(pos) (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos))
95 #define RPS_OFFSET(pos) (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE)
96 #define SCALIST_OFFSET(pos) (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE)
97
hal_h265d_vdpu382_init(void * hal,MppHalCfg * cfg)98 static MPP_RET hal_h265d_vdpu382_init(void *hal, MppHalCfg *cfg)
99 {
100 RK_S32 ret = 0;
101 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
102
103 mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, hevc_hor_align);
104 mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align);
105
106 reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1);
107 if (reg_ctx->scaling_qm == NULL) {
108 mpp_err("scaling_org alloc fail");
109 return MPP_ERR_MALLOC;
110 }
111
112 reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1);
113 reg_ctx->pps_buf = mpp_calloc(RK_U64, 15);
114 reg_ctx->sw_rps_buf = mpp_calloc(RK_U64, 400);
115
116 if (reg_ctx->scaling_rk == NULL) {
117 mpp_err("scaling_rk alloc fail");
118 return MPP_ERR_MALLOC;
119 }
120
121 if (reg_ctx->group == NULL) {
122 ret = mpp_buffer_group_get_internal(®_ctx->group, MPP_BUFFER_TYPE_ION);
123 if (ret) {
124 mpp_err("h265d mpp_buffer_group_get failed\n");
125 return ret;
126 }
127 }
128
129 {
130 RK_U32 i = 0;
131 RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1;
132
133 //!< malloc buffers
134 ret = mpp_buffer_get(reg_ctx->group, ®_ctx->bufs, ALL_BUFFER_SIZE(max_cnt));
135 if (ret) {
136 mpp_err("h265d mpp_buffer_get failed\n");
137 return ret;
138 }
139
140 reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs);
141 reg_ctx->offset_cabac = CABAC_TAB_OFFSET;
142 for (i = 0; i < max_cnt; i++) {
143 reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu382H265dRegSet));
144 reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i);
145 reg_ctx->offset_rps[i] = RPS_OFFSET(i);
146 reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i);
147 }
148 }
149
150 if (!reg_ctx->fast_mode) {
151 reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs;
152 reg_ctx->spspps_offset = reg_ctx->offset_spspps[0];
153 reg_ctx->rps_offset = reg_ctx->offset_rps[0];
154 reg_ctx->sclst_offset = reg_ctx->offset_sclst[0];
155 }
156
157 ret = mpp_buffer_write(reg_ctx->bufs, 0, (void*)cabac_table, sizeof(cabac_table));
158 if (ret) {
159 mpp_err("h265d write cabac_table data failed\n");
160 return ret;
161 }
162
163 {
164 // report hw_info to parser
165 const MppSocInfo *info = mpp_get_soc_info();
166 const void *hw_info = NULL;
167 RK_U32 i;
168
169 for (i = 0; i < MPP_ARRAY_ELEMS(info->dec_caps); i++) {
170 if (info->dec_caps[i] && info->dec_caps[i]->type == VPU_CLIENT_RKVDEC) {
171 hw_info = info->dec_caps[i];
172 break;
173 }
174 }
175
176 mpp_assert(hw_info);
177 cfg->hw_info = hw_info;
178
179 //save hw_info to context
180 reg_ctx->hw_info = hw_info;
181 }
182
183 #ifdef dump
184 fp = fopen("/data/hal.bin", "wb");
185 #endif
186 (void) cfg;
187 return MPP_OK;
188 }
189
hal_h265d_vdpu382_deinit(void * hal)190 static MPP_RET hal_h265d_vdpu382_deinit(void *hal)
191 {
192 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
193 RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
194 RK_U32 i;
195
196 if (reg_ctx->bufs) {
197 mpp_buffer_put(reg_ctx->bufs);
198 reg_ctx->bufs = NULL;
199 }
200
201 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1;
202 for (i = 0; i < loop; i++) {
203 if (reg_ctx->rcb_buf[i]) {
204 mpp_buffer_put(reg_ctx->rcb_buf[i]);
205 reg_ctx->rcb_buf[i] = NULL;
206 }
207 }
208
209 if (reg_ctx->group) {
210 mpp_buffer_group_put(reg_ctx->group);
211 reg_ctx->group = NULL;
212 }
213
214 for (i = 0; i < loop; i++)
215 MPP_FREE(reg_ctx->g_buf[i].hw_regs);
216
217 MPP_FREE(reg_ctx->scaling_qm);
218 MPP_FREE(reg_ctx->scaling_rk);
219 MPP_FREE(reg_ctx->pps_buf);
220 MPP_FREE(reg_ctx->sw_rps_buf);
221
222 if (reg_ctx->cmv_bufs) {
223 hal_bufs_deinit(reg_ctx->cmv_bufs);
224 reg_ctx->cmv_bufs = NULL;
225 }
226
227 return MPP_OK;
228 }
229
hal_h265d_v382_output_pps_packet(void * hal,void * dxva)230 static RK_S32 hal_h265d_v382_output_pps_packet(void *hal, void *dxva)
231 {
232 RK_S32 fifo_len = 14;//12
233 RK_S32 i, j;
234 RK_U32 addr;
235 RK_U32 log2_min_cb_size;
236 RK_S32 width, height;
237 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
238 Vdpu382H265dRegSet *hw_reg = (Vdpu382H265dRegSet*)(reg_ctx->hw_regs);
239 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
240 BitputCtx_t bp;
241
242 if (NULL == reg_ctx || dxva_cxt == NULL) {
243 mpp_err("%s:%s:%d reg_ctx or dxva_cxt is NULL",
244 __FILE__, __FUNCTION__, __LINE__);
245 return MPP_ERR_NULL_PTR;
246 }
247 void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset;
248 if (dxva_cxt->pp.ps_update_flag) {
249 RK_U64 *pps_packet = reg_ctx->pps_buf;
250 if (NULL == pps_ptr) {
251 mpp_err("pps_data get ptr error");
252 return MPP_ERR_NOMEM;
253 }
254
255 for (i = 0; i < 14; i++) pps_packet[i] = 0;
256
257 mpp_set_bitput_ctx(&bp, pps_packet, fifo_len);
258
259 // SPS
260 mpp_put_bits(&bp, dxva_cxt->pp.vps_id , 4);
261 mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4);
262 mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc , 2);
263
264 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
265 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
266 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
267
268 mpp_put_bits(&bp, width , 16);
269 mpp_put_bits(&bp, height , 16);
270 mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8 , 4);
271 mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8 , 4);
272 mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4 , 5);
273 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size , 2); //log2_maxa_coding_block_depth
274 mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3);
275 mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2 , 3);
276 ///<-zrh comment ^ 63 bit above
277 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size , 2);
278 mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter , 3);
279 mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra , 3);
280 mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag , 1);
281 mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag , 1);
282 mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag , 1);
283 ///<-zrh comment ^ 68 bit above
284 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag , 1);
285 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0 , 4);
286 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4);
287 mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag , 1);
288 mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size , 3);
289 mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3);
290
291 mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets , 7);
292 mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag , 1);
293 mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps , 6);
294 mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag , 1);
295 mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag , 1);
296 ///<-zrh comment ^ 100 bit above
297
298 mpp_put_bits(&bp, 0 , 7 ); //49bits
299 //yandong change
300 mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1, 4);
301 mpp_put_bits(&bp, 0, 3);
302 mpp_put_align(&bp , 32, 0xf); //128
303 // PPS
304 mpp_put_bits(&bp, dxva_cxt->pp.pps_id , 6 );
305 mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4 );
306 mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag , 1 );
307 mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag , 1 );
308 mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits , 13);
309 mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1);
310 mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag , 1);
311 mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1 , 4);//31 bits
312 mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1 , 4);
313 mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26 , 7);
314 mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag , 1);
315 mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag , 1);
316 mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag , 1); //164
317 mpp_put_bits(&bp, log2_min_cb_size +
318 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size -
319 dxva_cxt->pp.diff_cu_qp_delta_depth , 3);
320
321 h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size,
322 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth );
323
324 mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset , 5);
325 mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset , 5);
326 mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag , 1);
327 mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag , 1);
328 mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag , 1);
329 mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag , 1 );
330 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag , 1 );
331 mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag , 1);
332 mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag , 1);
333 mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag , 1); //185
334 mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag , 1);
335 mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag , 1);
336 mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2 , 4);
337 mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2 , 4);
338 mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag , 1);
339 mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2 , 3);
340 mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1);
341 mpp_put_bits(&bp, 0 , 3);
342 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5);
343 mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 );
344 mpp_put_bits(&bp, 0, 4);//2 //mSps_Pps[i]->mMode
345 mpp_put_align(&bp, 64, 0xf);
346 {
347 /// tiles info begin
348 RK_U16 column_width[20];
349 RK_U16 row_height[22];
350
351 memset(column_width, 0, sizeof(column_width));
352 memset(row_height, 0, sizeof(row_height));
353
354 if (dxva_cxt->pp.tiles_enabled_flag) {
355
356 if (dxva_cxt->pp.uniform_spacing_flag == 0) {
357 RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size;
358 RK_S32 ctu_width_in_pic = (width +
359 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
360 RK_S32 ctu_height_in_pic = (height +
361 (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ;
362 RK_S32 sum = 0;
363 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) {
364 column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1;
365 sum += column_width[i] ;
366 }
367 column_width[i] = ctu_width_in_pic - sum;
368
369 sum = 0;
370 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) {
371 row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1;
372 sum += row_height[i];
373 }
374 row_height[i] = ctu_height_in_pic - sum;
375 } // end of (pps->uniform_spacing_flag == 0)
376 else {
377
378 RK_S32 pic_in_cts_width = (width +
379 (1 << (log2_min_cb_size +
380 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
381 / (1 << (log2_min_cb_size +
382 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
383 RK_S32 pic_in_cts_height = (height +
384 (1 << (log2_min_cb_size +
385 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1)
386 / (1 << (log2_min_cb_size +
387 dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size));
388
389 for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++)
390 column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) -
391 (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1);
392
393 for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++)
394 row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) -
395 (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1);
396 }
397 } // pps->tiles_enabled_flag
398 else {
399 RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size));
400 column_width[0] = (width + MaxCUWidth - 1) / MaxCUWidth;
401 row_height[0] = (height + MaxCUWidth - 1) / MaxCUWidth;
402 }
403
404 for (j = 0; j < 20; j++) {
405 if (column_width[j] > 0)
406 column_width[j]--;
407 mpp_put_bits(&bp, column_width[j], 12);
408 }
409
410 for (j = 0; j < 22; j++) {
411 if (row_height[j] > 0)
412 row_height[j]--;
413 mpp_put_bits(&bp, row_height[j], 12);
414 }
415 }
416
417 mpp_put_bits(&bp, 0, 32);
418 mpp_put_bits(&bp, 0, 70);
419 mpp_put_align(&bp, 64, 0xf);//128
420 }
421
422 if (dxva_cxt->pp.scaling_list_enabled_flag) {
423 MppDevRegOffsetCfg trans_cfg;
424 RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset;
425
426 if (dxva_cxt->pp.scaling_list_data_present_flag) {
427 addr = (dxva_cxt->pp.pps_id + 16) * 1360;
428 } else if (dxva_cxt->pp.scaling_list_enabled_flag) {
429 addr = dxva_cxt->pp.sps_id * 1360;
430 } else {
431 addr = 80 * 1360;
432 }
433
434 hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva);
435
436 hw_reg->h265d_addr.reg180_scanlist_addr = reg_ctx->bufs_fd;
437 hw_reg->common.reg012.scanlist_addr_valid_en = 1;
438
439 /* need to config addr */
440 trans_cfg.reg_idx = 180;
441 trans_cfg.offset = addr + reg_ctx->sclst_offset;
442 mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
443 }
444
445 for (i = 0; i < 64; i++)
446 memcpy(pps_ptr + i * 112, reg_ctx->pps_buf, 112);
447 #ifdef dump
448 fwrite(pps_ptr, 1, 80 * 64, fp);
449 RK_U32 *tmp = (RK_U32 *)pps_ptr;
450 for (i = 0; i < 112 / 4; i++) {
451 mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]);
452 }
453 #endif
454 return 0;
455 }
456
h265d_refine_rcb_size(Vdpu382RcbInfo * rcb_info,Vdpu382H265dRegSet * hw_regs,RK_S32 width,RK_S32 height,void * dxva)457 static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
458 Vdpu382H265dRegSet *hw_regs,
459 RK_S32 width, RK_S32 height, void *dxva)
460 {
461 RK_U32 rcb_bits = 0;
462 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
463 DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
464 RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 420 ,2 422,3 444
465 RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
466 RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
467 RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
468 RK_U32 ext_align_size = tile_col_cut_num * 64 * 8;
469
470 width = MPP_ALIGN(width, ctu_size);
471 height = MPP_ALIGN(height, ctu_size);
472
473 /* RCB_STRMD_ROW */
474 if (width >= 8192) {
475 RK_U32 factor = 64 / ctu_size;
476
477 rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) / factor * 24 + ext_align_size;
478 } else
479 rcb_bits = 0;
480 rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
481
482 /* RCB_TRANSD_ROW */
483 if (width >= 8192)
484 rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
485 else
486 rcb_bits = 0;
487 rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
488
489 /* RCB_TRANSD_COL */
490 if (height >= 8192 && tile_col_cut_num)
491 rcb_bits = tile_col_cut_num ? (MPP_ALIGN(height - 8192, 4) << 1) : 0;
492 else
493 rcb_bits = 0;
494 rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
495
496 /* RCB_INTER_ROW */
497 rcb_bits = width * 22 + ext_align_size;
498 rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
499
500 /* RCB_INTER_COL */
501 rcb_bits = tile_col_cut_num ? (height * 22) : 0;
502 rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
503
504 /* RCB_INTRA_ROW */
505 rcb_bits = width * ((chroma_fmt_idc ? 1 : 0) + 1) * 11 + ext_align_size;
506 rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
507
508 /* RCB_DBLK_ROW */
509 if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
510 if (ctu_size == 32)
511 rcb_bits = width * ( 4 + 6 * bit_depth);
512 else
513 rcb_bits = width * ( 2 + 6 * bit_depth);
514 } else {
515 if (ctu_size == 32)
516 rcb_bits = width * ( 4 + 8 * bit_depth);
517 else
518 rcb_bits = width * ( 2 + 8 * bit_depth);
519 }
520 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
521 rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
522
523 /* RCB_SAO_ROW */
524 if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
525 rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
526 } else {
527 rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
528 }
529 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
530 rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
531
532 /* RCB_FBC_ROW */
533 if (hw_regs->common.reg012.fbc_e) {
534 rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
535 rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 128 : 64)) + ext_align_size;
536 } else
537 rcb_bits = 0;
538 rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
539
540 /* RCB_FILT_COL */
541 if (tile_col_cut_num) {
542 if (hw_regs->common.reg012.fbc_e) {
543 RK_U32 ctu_idx = ctu_size >> 5;
544 RK_U32 a = filterd_fbc_on[ctu_idx][chroma_fmt_idc].a;
545 RK_U32 b = filterd_fbc_on[ctu_idx][chroma_fmt_idc].b;
546
547 rcb_bits = height * (a * bit_depth + b);
548 } else {
549 RK_U32 ctu_idx = ctu_size >> 5;
550 RK_U32 a = filterd_fbc_off[ctu_idx][chroma_fmt_idc].a;
551 RK_U32 b = filterd_fbc_off[ctu_idx][chroma_fmt_idc].b;
552
553 rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 192 * ctu_size >> 4 : 0));
554 }
555 } else
556 rcb_bits = 0;
557 rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
558 }
559
hal_h265d_rcb_info_update(void * hal,void * dxva,Vdpu382H265dRegSet * hw_regs,RK_S32 width,RK_S32 height)560 static void hal_h265d_rcb_info_update(void *hal, void *dxva,
561 Vdpu382H265dRegSet *hw_regs,
562 RK_S32 width, RK_S32 height)
563 {
564 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
565 h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
566 DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
567 RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
568 RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
569 RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
570 RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1;
571
572 if (reg_ctx->num_row_tiles != num_tiles ||
573 reg_ctx->bit_depth != bit_depth ||
574 reg_ctx->chroma_fmt_idc != chroma_fmt_idc ||
575 reg_ctx->ctu_size != ctu_size ||
576 reg_ctx->width != width ||
577 reg_ctx->height != height) {
578 RK_U32 i = 0;
579 RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1;
580
581 reg_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size((Vdpu382RcbInfo*)reg_ctx->rcb_info, width, height);
582 h265d_refine_rcb_size((Vdpu382RcbInfo*)reg_ctx->rcb_info, hw_regs, width, height, dxva_cxt);
583
584 for (i = 0; i < loop; i++) {
585 MppBuffer rcb_buf;
586
587 if (reg_ctx->rcb_buf[i]) {
588 mpp_buffer_put(reg_ctx->rcb_buf[i]);
589 reg_ctx->rcb_buf[i] = NULL;
590 }
591 mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size);
592 reg_ctx->rcb_buf[i] = rcb_buf;
593 }
594
595 reg_ctx->num_row_tiles = num_tiles;
596 reg_ctx->bit_depth = bit_depth;
597 reg_ctx->chroma_fmt_idc = chroma_fmt_idc;
598 reg_ctx->ctu_size = ctu_size;
599 reg_ctx->width = width;
600 reg_ctx->height = height;
601 }
602 }
603
604 #define SET_POC_HIGNBIT_INFO(regs, index, field, value)\
605 do{ \
606 switch(index){\
607 case 0: regs.reg200.ref0_##field = value; break;\
608 case 1: regs.reg200.ref1_##field = value; break;\
609 case 2: regs.reg200.ref2_##field = value; break;\
610 case 3: regs.reg200.ref3_##field = value; break;\
611 case 4: regs.reg200.ref4_##field = value; break;\
612 case 5: regs.reg200.ref5_##field = value; break;\
613 case 6: regs.reg200.ref6_##field = value; break;\
614 case 7: regs.reg200.ref7_##field = value; break;\
615 case 8: regs.reg201.ref8_##field = value; break;\
616 case 9: regs.reg201.ref9_##field = value; break;\
617 case 10: regs.reg201.ref10_##field = value; break;\
618 case 11: regs.reg201.ref11_##field = value; break;\
619 case 12: regs.reg201.ref12_##field = value; break;\
620 case 13: regs.reg201.ref13_##field = value; break;\
621 case 14: regs.reg201.ref14_##field = value; break;\
622 case 15: regs.reg201.ref15_##field = value; break;\
623 default: break;}\
624 }while(0)
625
626 #define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a)))
627 #define MAX_INT 2147483647
628
hal_h265d_vdpu382_gen_regs(void * hal,HalTaskInfo * syn)629 static MPP_RET hal_h265d_vdpu382_gen_regs(void *hal, HalTaskInfo *syn)
630 {
631 RK_S32 i = 0;
632 RK_S32 log2_min_cb_size;
633 RK_S32 width, height;
634 RK_S32 stride_y, stride_uv, virstrid_y;
635 Vdpu382H265dRegSet *hw_regs;
636 RK_S32 ret = MPP_SUCCESS;
637 MppBuffer streambuf = NULL;
638 RK_S32 aglin_offset = 0;
639 RK_S32 valid_ref = -1;
640 MppBuffer framebuf = NULL;
641 HalBuf *mv_buf = NULL;
642 RK_S32 fd = -1;
643 RK_U32 mv_size = 0;
644 RK_S32 distance = MAX_INT;
645 h265d_dxva2_picture_context_t *dxva_cxt =
646 (h265d_dxva2_picture_context_t *)syn->dec.syntax.data;
647 HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal;
648 void *rps_ptr = NULL;
649 RK_U32 stream_buf_size = 0;
650
651 if (syn->dec.flags.parse_err ||
652 syn->dec.flags.ref_err) {
653 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
654 return MPP_OK;
655 }
656
657 if (reg_ctx ->fast_mode) {
658 for (i = 0; i < MAX_GEN_REG; i++) {
659 if (!reg_ctx->g_buf[i].use_flag) {
660 syn->dec.reg_index = i;
661
662 reg_ctx->spspps_offset = reg_ctx->offset_spspps[i];
663 reg_ctx->rps_offset = reg_ctx->offset_rps[i];
664 reg_ctx->sclst_offset = reg_ctx->offset_sclst[i];
665
666 reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs;
667 reg_ctx->g_buf[i].use_flag = 1;
668 break;
669 }
670 }
671 if (i == MAX_GEN_REG) {
672 mpp_err("hevc rps buf all used");
673 return MPP_ERR_NOMEM;
674 }
675 }
676 rps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->rps_offset;
677 if (NULL == rps_ptr) {
678
679 mpp_err("rps_data get ptr error");
680 return MPP_ERR_NOMEM;
681 }
682
683
684 if (syn->dec.syntax.data == NULL) {
685 mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__);
686 return MPP_ERR_NULL_PTR;
687 }
688
689 /* output pps */
690 hw_regs = (Vdpu382H265dRegSet*)reg_ctx->hw_regs;
691 memset(hw_regs, 0, sizeof(Vdpu382H265dRegSet));
692
693 hal_h265d_v382_output_pps_packet(hal, syn->dec.syntax.data);
694
695 if (NULL == reg_ctx->hw_regs) {
696 return MPP_ERR_NULL_PTR;
697 }
698
699
700 log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3;
701
702 width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size);
703 height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size);
704 mv_size = (MPP_ALIGN(width, 64) * MPP_ALIGN(height, 64)) >> 3;
705 if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) {
706 size_t size = mv_size;
707
708 if (reg_ctx->cmv_bufs) {
709 hal_bufs_deinit(reg_ctx->cmv_bufs);
710 reg_ctx->cmv_bufs = NULL;
711 }
712
713 hal_bufs_init(®_ctx->cmv_bufs);
714 if (reg_ctx->cmv_bufs == NULL) {
715 mpp_err_f("colmv bufs init fail");
716 return MPP_ERR_NULL_PTR;
717 }
718
719 reg_ctx->mv_size = mv_size;
720 reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots);
721 hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size);
722 }
723
724 {
725 MppFrame mframe = NULL;
726 RK_U32 ver_virstride;
727
728 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
729 SLOT_FRAME_PTR, &mframe);
730 stride_y = mpp_frame_get_hor_stride(mframe);
731 ver_virstride = mpp_frame_get_ver_stride(mframe);
732 stride_uv = stride_y;
733 virstrid_y = ver_virstride * stride_y;
734 hw_regs->common.reg013.h26x_error_mode = 1;
735 hw_regs->common.reg021.error_deb_en = 1;
736 hw_regs->common.reg021.inter_error_prc_mode = 0;
737 hw_regs->common.reg021.error_intra_mode = 1;
738
739 hw_regs->common.reg017.slice_num = dxva_cxt->slice_count;
740 hw_regs->h265d_param.reg64.h26x_rps_mode = 0;
741 hw_regs->h265d_param.reg64.h26x_frame_orslice = 0;
742 hw_regs->h265d_param.reg64.h26x_stream_mode = 0;
743
744 if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) {
745 RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
746 RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (ver_virstride + 64) / 16, SZ_4K);
747
748 hw_regs->common.reg012.fbc_e = 1;
749 hw_regs->common.reg018.y_hor_virstride = fbc_hdr_stride >> 4;
750 hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
751 hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
752 } else {
753 hw_regs->common.reg012.fbc_e = 0;
754 hw_regs->common.reg018.y_hor_virstride = stride_y >> 4;
755 hw_regs->common.reg019.uv_hor_virstride = stride_uv >> 4;
756 hw_regs->common.reg020_y_virstride.y_virstride = virstrid_y >> 4;
757 }
758
759 if (MPP_FRAME_FMT_IS_HDR(mpp_frame_get_fmt(mframe)) && reg_ctx->cfg->base.enable_hdr_meta)
760 fill_hdr_meta_to_frame(mframe, HDR_HEVC);
761 }
762 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
763 SLOT_BUFFER, &framebuf);
764 hw_regs->common_addr.reg130_decout_base = mpp_buffer_get_fd(framebuf); //just index need map
765 /*if out_base is equal to zero it means this frame may error
766 we return directly add by csy*/
767
768 if (hw_regs->common_addr.reg130_decout_base == 0) {
769 return 0;
770 }
771 fd = mpp_buffer_get_fd(framebuf);
772 hw_regs->common_addr.reg130_decout_base = fd;
773 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.CurrPic.Index7Bits);
774 hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
775
776 hw_regs->h265d_param.reg65.cur_top_poc = dxva_cxt->pp.CurrPicOrderCntVal;
777
778 mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER,
779 &streambuf);
780 if ( dxva_cxt->bitstream == NULL) {
781 dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf);
782 }
783 #ifdef HW_RPS
784 hw_regs->h265d_param.reg103.ref_pic_layer_same_with_cur = 0xffff;
785 hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_ctx->sw_rps_buf, reg_ctx->fast_mode);
786 #else
787 hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1;
788 hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr);
789 #endif
790
791 MppDevRegOffsetCfg trans_cfg;
792 /* cabac table */
793 hw_regs->h265d_addr.reg197_cabactbl_base = reg_ctx->bufs_fd;
794 /* pps */
795 hw_regs->h265d_addr.reg161_pps_base = reg_ctx->bufs_fd;
796 hw_regs->h265d_addr.reg163_rps_base = reg_ctx->bufs_fd;
797
798 hw_regs->common_addr.reg128_rlc_base = mpp_buffer_get_fd(streambuf);
799 hw_regs->common_addr.reg129_rlcwrite_base = mpp_buffer_get_fd(streambuf);
800 stream_buf_size = mpp_buffer_get_size(streambuf);
801 hw_regs->common.reg016_str_len = ((dxva_cxt->bitstream_size + 15)
802 & (~15)) + 64;
803 hw_regs->common.reg016_str_len = stream_buf_size > hw_regs->common.reg016_str_len ?
804 hw_regs->common.reg016_str_len : stream_buf_size;
805
806 aglin_offset = hw_regs->common.reg016_str_len - dxva_cxt->bitstream_size;
807 if (aglin_offset > 0) {
808 memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0,
809 aglin_offset);
810 }
811 hw_regs->common.reg010.dec_e = 1;
812 hw_regs->common.reg012.colmv_compress_en = reg_ctx->hw_info ?
813 reg_ctx->hw_info->cap_colmv_compress : 0;
814
815 hw_regs->common.reg024.cabac_err_en_lowbits = 0xffffdfff;
816 hw_regs->common.reg025.cabac_err_en_highbits = 0x3ffbf9ff;
817
818 hw_regs->common.reg011.dec_clkgate_e = 1;
819 hw_regs->common.reg011.err_head_fill_e = 1;
820 hw_regs->common.reg011.err_colmv_fill_e = 1;
821
822 hw_regs->common.reg026.inter_auto_gating_e = 1;
823 hw_regs->common.reg026.filterd_auto_gating_e = 1;
824 hw_regs->common.reg026.strmd_auto_gating_e = 1;
825 hw_regs->common.reg026.mcp_auto_gating_e = 1;
826 hw_regs->common.reg026.busifd_auto_gating_e = 1;
827 hw_regs->common.reg026.dec_ctrl_auto_gating_e = 1;
828 hw_regs->common.reg026.intra_auto_gating_e = 1;
829 hw_regs->common.reg026.mc_auto_gating_e = 1;
830 hw_regs->common.reg026.transd_auto_gating_e = 1;
831 hw_regs->common.reg026.sram_auto_gating_e = 1;
832 hw_regs->common.reg026.cru_auto_gating_e = 1;
833 hw_regs->common.reg026.reg_cfg_gating_en = 1;
834 hw_regs->common.reg032_timeout_threshold = 0x3ffff;
835
836 valid_ref = hw_regs->common_addr.reg130_decout_base;
837 reg_ctx->error_index = dxva_cxt->pp.CurrPic.Index7Bits;
838 hw_regs->common_addr.reg132_error_ref_base = valid_ref;
839
840 memset(&hw_regs->highpoc.reg205, 0, sizeof(RK_U32));
841
842 for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
843 if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
844 dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
845
846 MppFrame mframe = NULL;
847 hw_regs->h265d_param.reg67_82_ref_poc[i] = dxva_cxt->pp.PicOrderCntValList[i];
848 mpp_buf_slot_get_prop(reg_ctx->slots,
849 dxva_cxt->pp.RefPicList[i].Index7Bits,
850 SLOT_BUFFER, &framebuf);
851 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
852 SLOT_FRAME_PTR, &mframe);
853 if (framebuf != NULL) {
854 hw_regs->h265d_addr.reg164_179_ref_base[i] = mpp_buffer_get_fd(framebuf);
855 valid_ref = hw_regs->h265d_addr.reg164_179_ref_base[i];
856 // mpp_log("cur poc %d, ref poc %d", dxva_cxt->pp.current_poc, dxva_cxt->pp.PicOrderCntValList[i]);
857 if ((pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc) < distance)
858 && (!mpp_frame_get_errinfo(mframe))) {
859 distance = pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc);
860 hw_regs->common_addr.reg132_error_ref_base = hw_regs->h265d_addr.reg164_179_ref_base[i];
861 reg_ctx->error_index = dxva_cxt->pp.RefPicList[i].Index7Bits;
862 hw_regs->common.reg021.error_intra_mode = 0;
863
864 }
865 } else {
866 hw_regs->h265d_addr.reg164_179_ref_base[i] = valid_ref;
867 }
868
869 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_cxt->pp.RefPicList[i].Index7Bits);
870 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
871
872 SET_REF_VALID(hw_regs->h265d_param, i, 1);
873 }
874 }
875
876 if ((reg_ctx->error_index == dxva_cxt->pp.CurrPic.Index7Bits) && !dxva_cxt->pp.IntraPicFlag) {
877 // mpp_err("current frm may be err, should skip process");
878 syn->dec.flags.ref_err = 1;
879 return MPP_OK;
880 }
881
882 for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) {
883
884 if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff &&
885 dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) {
886 MppFrame mframe = NULL;
887
888 mpp_buf_slot_get_prop(reg_ctx->slots,
889 dxva_cxt->pp.RefPicList[i].Index7Bits,
890 SLOT_BUFFER, &framebuf);
891
892 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.RefPicList[i].Index7Bits,
893 SLOT_FRAME_PTR, &mframe);
894
895 if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) {
896 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index);
897 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
898 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
899 }
900 } else {
901 mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index);
902 hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base;
903 hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
904 /* mark 3 to differ from current frame */
905 if (reg_ctx->error_index == dxva_cxt->pp.CurrPic.Index7Bits)
906 SET_POC_HIGNBIT_INFO(hw_regs->highpoc, i, poc_highbit, 3);
907 }
908 }
909
910 trans_cfg.reg_idx = 161;
911 trans_cfg.offset = reg_ctx->spspps_offset;
912 mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
913 /* rps */
914 trans_cfg.reg_idx = 163;
915 trans_cfg.offset = reg_ctx->rps_offset;
916 mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_OFFSET, &trans_cfg);
917
918 hw_regs->common.reg013.cur_pic_is_idr = dxva_cxt->pp.IdrPicFlag;//p_hal->slice_long->idr_flag;
919
920 hw_regs->common.reg011.buf_empty_en = 1;
921
922 hal_h265d_rcb_info_update(hal, dxva_cxt, hw_regs, width, height);
923 vdpu382_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ?
924 reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0],
925 (Vdpu382RcbInfo*)reg_ctx->rcb_info);
926 {
927 MppFrame mframe = NULL;
928
929 mpp_buf_slot_get_prop(reg_ctx->slots, dxva_cxt->pp.CurrPic.Index7Bits,
930 SLOT_FRAME_PTR, &mframe);
931
932 if (mpp_frame_get_thumbnail_en(mframe)) {
933 hw_regs->h265d_addr.reg198_scale_down_luma_base =
934 hw_regs->common_addr.reg130_decout_base;
935 hw_regs->h265d_addr.reg199_scale_down_chorme_base =
936 hw_regs->common_addr.reg130_decout_base;
937 vdpu382_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->common);
938 } else {
939 hw_regs->h265d_addr.reg198_scale_down_luma_base = 0;
940 hw_regs->h265d_addr.reg199_scale_down_chorme_base = 0;
941 hw_regs->common.reg012.scale_down_en = 0;
942 }
943 }
944 vdpu382_setup_statistic(&hw_regs->common, &hw_regs->statistic);
945
946 return ret;
947 }
948
hal_h265d_vdpu382_start(void * hal,HalTaskInfo * task)949 static MPP_RET hal_h265d_vdpu382_start(void *hal, HalTaskInfo *task)
950 {
951 MPP_RET ret = MPP_OK;
952 RK_U8* p = NULL;
953 Vdpu382H265dRegSet *hw_regs = NULL;
954 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
955 RK_S32 index = task->dec.reg_index;
956
957 RK_U32 i;
958
959 if (task->dec.flags.parse_err ||
960 task->dec.flags.ref_err) {
961 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
962 return MPP_OK;
963 }
964
965 if (reg_ctx->fast_mode) {
966 p = (RK_U8*)reg_ctx->g_buf[index].hw_regs;
967 hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
968 } else {
969 p = (RK_U8*)reg_ctx->hw_regs;
970 hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->hw_regs;
971 }
972
973 if (hw_regs == NULL) {
974 mpp_err("hal_h265d_start hw_regs is NULL");
975 return MPP_ERR_NULL_PTR;
976 }
977 for (i = 0; i < 68; i++) {
978 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
979 i, *((RK_U32*)p));
980 //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p));
981 p += 4;
982 }
983
984 do {
985 MppDevRegWrCfg wr_cfg;
986 MppDevRegRdCfg rd_cfg;
987
988 wr_cfg.reg = &hw_regs->common;
989 wr_cfg.size = sizeof(hw_regs->common);
990 wr_cfg.offset = OFFSET_COMMON_REGS;
991
992 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
993 if (ret) {
994 mpp_err_f("set register write failed %d\n", ret);
995 break;
996 }
997
998 wr_cfg.reg = &hw_regs->h265d_param;
999 wr_cfg.size = sizeof(hw_regs->h265d_param);
1000 wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS;
1001
1002 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1003 if (ret) {
1004 mpp_err_f("set register write failed %d\n", ret);
1005 break;
1006 }
1007
1008 wr_cfg.reg = &hw_regs->common_addr;
1009 wr_cfg.size = sizeof(hw_regs->common_addr);
1010 wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1011
1012 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1013 if (ret) {
1014 mpp_err_f("set register write failed %d\n", ret);
1015 break;
1016 }
1017
1018 wr_cfg.reg = &hw_regs->h265d_addr;
1019 wr_cfg.size = sizeof(hw_regs->h265d_addr);
1020 wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1021
1022 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1023 if (ret) {
1024 mpp_err_f("set register write failed %d\n", ret);
1025 break;
1026 }
1027
1028 wr_cfg.reg = &hw_regs->statistic;
1029 wr_cfg.size = sizeof(hw_regs->statistic);
1030 wr_cfg.offset = OFFSET_STATISTIC_REGS;
1031
1032 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1033 if (ret) {
1034 mpp_err_f("set register write failed %d\n", ret);
1035 break;
1036 }
1037
1038 wr_cfg.reg = &hw_regs->highpoc;
1039 wr_cfg.size = sizeof(hw_regs->highpoc);
1040 wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS;
1041
1042 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
1043 if (ret) {
1044 mpp_err_f("set register write failed %d\n", ret);
1045 break;
1046 }
1047
1048 rd_cfg.reg = &hw_regs->irq_status;
1049 rd_cfg.size = sizeof(hw_regs->irq_status);
1050 rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1051
1052 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
1053 if (ret) {
1054 mpp_err_f("set register read failed %d\n", ret);
1055 break;
1056 }
1057 /* rcb info for sram */
1058 vdpu382_set_rcbinfo(reg_ctx->dev, (Vdpu382RcbInfo*)reg_ctx->rcb_info);
1059
1060 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL);
1061 if (ret) {
1062 mpp_err_f("send cmd failed %d\n", ret);
1063 break;
1064 }
1065 } while (0);
1066
1067 return ret;
1068 }
1069
1070
hal_h265d_vdpu382_wait(void * hal,HalTaskInfo * task)1071 static MPP_RET hal_h265d_vdpu382_wait(void *hal, HalTaskInfo *task)
1072 {
1073 MPP_RET ret = MPP_OK;
1074 RK_S32 index = task->dec.reg_index;
1075 HalH265dCtx *reg_ctx = (HalH265dCtx *)hal;
1076 RK_U8* p = NULL;
1077 Vdpu382H265dRegSet *hw_regs = NULL;
1078 RK_S32 i;
1079
1080 if (task->dec.flags.parse_err ||
1081 task->dec.flags.ref_err) {
1082 h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__);
1083 goto ERR_PROC;
1084 }
1085
1086 if (reg_ctx->fast_mode) {
1087 hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->g_buf[index].hw_regs;
1088 } else {
1089 hw_regs = ( Vdpu382H265dRegSet *)reg_ctx->hw_regs;
1090 }
1091
1092 p = (RK_U8*)hw_regs;
1093
1094 ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL);
1095 if (ret)
1096 mpp_err_f("poll cmd failed %d\n", ret);
1097
1098 ERR_PROC:
1099 if (task->dec.flags.parse_err ||
1100 task->dec.flags.ref_err ||
1101 hw_regs->irq_status.reg224.dec_error_sta ||
1102 hw_regs->irq_status.reg224.buf_empty_sta ||
1103 hw_regs->irq_status.reg224.dec_bus_sta ||
1104 !hw_regs->irq_status.reg224.dec_rdy_sta) {
1105 if (!reg_ctx->fast_mode) {
1106 if (reg_ctx->dec_cb)
1107 mpp_callback(reg_ctx->dec_cb, &task->dec);
1108 } else {
1109 MppFrame mframe = NULL;
1110 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1111 SLOT_FRAME_PTR, &mframe);
1112 if (mframe) {
1113 reg_ctx->fast_mode_err_found = 1;
1114 mpp_frame_set_errinfo(mframe, 1);
1115 }
1116 }
1117 } else {
1118 if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) {
1119 for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) {
1120 if (task->dec.refer[i] >= 0) {
1121 MppFrame frame_ref = NULL;
1122
1123 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i],
1124 SLOT_FRAME_PTR, &frame_ref);
1125 h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n",
1126 i, task->dec.refer[i], frame_ref);
1127 if (frame_ref && mpp_frame_get_errinfo(frame_ref)) {
1128 MppFrame frame_out = NULL;
1129 mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output,
1130 SLOT_FRAME_PTR, &frame_out);
1131 mpp_frame_set_errinfo(frame_out, 1);
1132 break;
1133 }
1134 }
1135 }
1136 }
1137 }
1138
1139 for (i = 0; i < 68; i++) {
1140 if (i == 1) {
1141 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1142 i, *((RK_U32*)p));
1143 }
1144
1145 if (i == 45) {
1146 h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n",
1147 i, *((RK_U32*)p));
1148 }
1149 p += 4;
1150 }
1151
1152 if (reg_ctx->fast_mode) {
1153 reg_ctx->g_buf[index].use_flag = 0;
1154 }
1155
1156 return ret;
1157 }
1158
hal_h265d_vdpu382_reset(void * hal)1159 static MPP_RET hal_h265d_vdpu382_reset(void *hal)
1160 {
1161 MPP_RET ret = MPP_OK;
1162 HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1163 p_hal->fast_mode_err_found = 0;
1164 (void)hal;
1165 return ret;
1166 }
1167
hal_h265d_vdpu382_flush(void * hal)1168 static MPP_RET hal_h265d_vdpu382_flush(void *hal)
1169 {
1170 MPP_RET ret = MPP_OK;
1171
1172 (void)hal;
1173 return ret;
1174 }
1175
hal_h265d_vdpu382_control(void * hal,MpiCmd cmd_type,void * param)1176 static MPP_RET hal_h265d_vdpu382_control(void *hal, MpiCmd cmd_type, void *param)
1177 {
1178 MPP_RET ret = MPP_OK;
1179 HalH265dCtx *p_hal = (HalH265dCtx *)hal;
1180
1181 (void)hal;
1182 switch ((MpiCmd)cmd_type) {
1183 case MPP_DEC_SET_FRAME_INFO: {
1184 MppFrame frame = (MppFrame)param;
1185 MppFrameFormat fmt = mpp_frame_get_fmt(frame);
1186
1187 if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1188 vdpu382_afbc_align_calc(p_hal->slots, frame, 16);
1189 }
1190 break;
1191 }
1192 case MPP_DEC_SET_OUTPUT_FORMAT: {
1193 } break;
1194 default:
1195 break;
1196 }
1197 return ret;
1198 }
1199
1200 const MppHalApi hal_h265d_vdpu382 = {
1201 .name = "h265d_vdpu382",
1202 .type = MPP_CTX_DEC,
1203 .coding = MPP_VIDEO_CodingHEVC,
1204 .ctx_size = sizeof(HalH265dCtx),
1205 .flag = 0,
1206 .init = hal_h265d_vdpu382_init,
1207 .deinit = hal_h265d_vdpu382_deinit,
1208 .reg_gen = hal_h265d_vdpu382_gen_regs,
1209 .start = hal_h265d_vdpu382_start,
1210 .wait = hal_h265d_vdpu382_wait,
1211 .reset = hal_h265d_vdpu382_reset,
1212 .flush = hal_h265d_vdpu382_flush,
1213 .control = hal_h265d_vdpu382_control,
1214 };
1215