1 /* SPDX-License-Identifier: Apache-2.0 OR MIT */
2 /*
3 * Copyright (c) 2024 Rockchip Electronics Co., Ltd.
4 */
5
6 #define MODULE_TAG "hal_vp9d_vdpu383"
7
8 #include <string.h>
9
10 #include "mpp_debug.h"
11 #include "mpp_env.h"
12 #include "mpp_mem.h"
13 #include "mpp_common.h"
14 #include "mpp_buffer_impl.h"
15 #include "mpp_bitput.h"
16 #include "mpp_compat_impl.h"
17
18 #include "hal_vp9d_debug.h"
19 #include "hal_vp9d_com.h"
20 #include "hal_vp9d_vdpu383.h"
21 #include "hal_vp9d_ctx.h"
22 #include "vdpu383_vp9d.h"
23 #include "vp9d_syntax.h"
24
25 #define HW_PROB 1
26 #define VP9_CONTEXT 4
27 #define VP9_CTU_SIZE 64
28
29 #define GBL_SIZE 2 * (MPP_ALIGN(1299, 128) / 8)
30
31 #define EIGHTTAP 0
32 #define EIGHTTAP_SMOOTH 1
33 #define EIGHTTAP_SHARP 2
34 #define BILINEAR 3
35
36 const RK_U8 literal_to_filter[] = { EIGHTTAP_SMOOTH, EIGHTTAP,
37 EIGHTTAP_SHARP, BILINEAR
38 };
39
40 typedef struct Vdpu383Vp9dCtx_t {
41 Vp9dRegBuf g_buf[MAX_GEN_REG];
42 MppBuffer global_base;
43 MppBuffer probe_base;
44 MppBuffer count_base;
45 MppBuffer segid_cur_base;
46 MppBuffer segid_last_base;
47 MppBuffer prob_default_base;
48 void* hw_regs;
49 RK_S32 mv_base_addr;
50 RK_S32 pre_mv_base_addr;
51 Vp9dLastInfo ls_info;
52 /*
53 * swap between segid_cur_base & segid_last_base
54 * 0 used segid_cur_base as last
55 * 1 used segid_last_base as
56 */
57 RK_U32 last_segid_flag;
58 RK_S32 width;
59 RK_S32 height;
60 /* rcb buffers info */
61 RK_S32 rcb_buf_size;
62 Vdpu383RcbInfo rcb_info[RCB_BUF_COUNT];
63 MppBuffer rcb_buf;
64 RK_U32 num_row_tiles;
65 RK_U32 bit_depth;
66 /* colmv buffers info */
67 HalBufs cmv_bufs;
68 RK_S32 mv_size;
69 RK_S32 mv_count;
70 HalBufs origin_bufs;
71 RK_U32 prob_ctx_valid[VP9_CONTEXT];
72 MppBuffer prob_loop_base[VP9_CONTEXT];
73 /* uncompress header data */
74 RK_U8 header_data[168];
75 } Vdpu383Vp9dCtx;
76
77 #ifdef DUMP_VDPU383_DATAS
78 static RK_U32 cur_last_segid_flag;
79 static MppBuffer cur_last_prob_base;
80 #endif
81
vdpu383_setup_scale_origin_bufs(Vdpu383Vp9dCtx * ctx,MppFrame mframe)82 static MPP_RET vdpu383_setup_scale_origin_bufs(Vdpu383Vp9dCtx *ctx, MppFrame mframe)
83 {
84 /* for 8K FrameBuf scale mode */
85 size_t origin_buf_size = 0;
86
87 origin_buf_size = mpp_frame_get_buf_size(mframe);
88
89 if (!origin_buf_size) {
90 mpp_err_f("origin_bufs get buf size failed\n");
91 return MPP_NOK;
92 }
93 if (ctx->origin_bufs) {
94 hal_bufs_deinit(ctx->origin_bufs);
95 ctx->origin_bufs = NULL;
96 }
97 hal_bufs_init(&ctx->origin_bufs);
98 if (!ctx->origin_bufs) {
99 mpp_err_f("origin_bufs thumb init fail\n");
100 return MPP_ERR_NOMEM;
101 }
102 hal_bufs_setup(ctx->origin_bufs, 16, 1, &origin_buf_size);
103
104 return MPP_OK;
105 }
hal_vp9d_alloc_res(HalVp9dCtx * hal)106 static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal)
107 {
108 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
109 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
110 RK_S32 ret = 0;
111 RK_S32 i = 0;
112
113 /* alloc common buffer */
114 for (i = 0; i < VP9_CONTEXT; i++) {
115 ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_loop_base[i], PROB_SIZE);
116 if (ret) {
117 mpp_err("vp9 probe_loop_base get buffer failed\n");
118 return ret;
119 }
120 mpp_buffer_attach_dev(hw_ctx->prob_loop_base[i], p_hal->dev);
121 }
122 ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_default_base, PROB_SIZE);
123 if (ret) {
124 mpp_err("vp9 probe_default_base get buffer failed\n");
125 return ret;
126 }
127 mpp_buffer_attach_dev(hw_ctx->prob_default_base, p_hal->dev);
128
129 ret = mpp_buffer_get(p_hal->group, &hw_ctx->segid_cur_base, MAX_SEGMAP_SIZE);
130 if (ret) {
131 mpp_err("vp9 segid_cur_base get buffer failed\n");
132 return ret;
133 }
134 mpp_buffer_attach_dev(hw_ctx->segid_cur_base, p_hal->dev);
135 ret = mpp_buffer_get(p_hal->group, &hw_ctx->segid_last_base, MAX_SEGMAP_SIZE);
136 if (ret) {
137 mpp_err("vp9 segid_last_base get buffer failed\n");
138 return ret;
139 }
140 mpp_buffer_attach_dev(hw_ctx->segid_last_base, p_hal->dev);
141
142 /* alloc buffer for fast mode or normal */
143 if (p_hal->fast_mode) {
144 for (i = 0; i < MAX_GEN_REG; i++) {
145 hw_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu383Vp9dRegSet));
146 ret = mpp_buffer_get(p_hal->group,
147 &hw_ctx->g_buf[i].global_base, GBL_SIZE);
148 mpp_buffer_attach_dev(hw_ctx->g_buf[i].global_base, p_hal->dev);
149 if (ret) {
150 mpp_err("vp9 global_base get buffer failed\n");
151 return ret;
152 }
153 ret = mpp_buffer_get(p_hal->group,
154 &hw_ctx->g_buf[i].probe_base, PROB_KF_SIZE);
155 if (ret) {
156 mpp_err("vp9 probe_base get buffer failed\n");
157 return ret;
158 }
159 mpp_buffer_attach_dev(hw_ctx->g_buf[i].probe_base, p_hal->dev);
160 ret = mpp_buffer_get(p_hal->group,
161 &hw_ctx->g_buf[i].count_base, COUNT_SIZE);
162 if (ret) {
163 mpp_err("vp9 count_base get buffer failed\n");
164 return ret;
165 }
166 mpp_buffer_attach_dev(hw_ctx->g_buf[i].count_base, p_hal->dev);
167 }
168 } else {
169 hw_ctx->hw_regs = mpp_calloc_size(void, sizeof(Vdpu383Vp9dRegSet));
170 ret = mpp_buffer_get(p_hal->group, &hw_ctx->global_base, PROB_SIZE);
171 if (ret) {
172 mpp_err("vp9 global_base get buffer failed\n");
173 return ret;
174 }
175 mpp_buffer_attach_dev(hw_ctx->global_base, p_hal->dev);
176
177 ret = mpp_buffer_get(p_hal->group, &hw_ctx->probe_base, PROB_KF_SIZE);
178 if (ret) {
179 mpp_err("vp9 probe_base get buffer failed\n");
180 return ret;
181 }
182 mpp_buffer_attach_dev(hw_ctx->probe_base, p_hal->dev);
183
184 ret = mpp_buffer_get(p_hal->group, &hw_ctx->count_base, COUNT_SIZE);
185 if (ret) {
186 mpp_err("vp9 count_base get buffer failed\n");
187 return ret;
188 }
189 mpp_buffer_attach_dev(hw_ctx->count_base, p_hal->dev);
190 }
191 return MPP_OK;
192 }
193
hal_vp9d_release_res(HalVp9dCtx * hal)194 static MPP_RET hal_vp9d_release_res(HalVp9dCtx *hal)
195 {
196 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
197 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
198 RK_S32 ret = 0;
199 RK_S32 i = 0;
200
201 if (hw_ctx->prob_default_base) {
202 ret = mpp_buffer_put(hw_ctx->prob_default_base);
203 if (ret) {
204 mpp_err("vp9 probe_wr_base get buffer failed\n");
205 return ret;
206 }
207 }
208 if (hw_ctx->segid_cur_base) {
209 ret = mpp_buffer_put(hw_ctx->segid_cur_base);
210 if (ret) {
211 mpp_err("vp9 segid_cur_base put buffer failed\n");
212 return ret;
213 }
214 }
215 if (hw_ctx->segid_last_base) {
216 ret = mpp_buffer_put(hw_ctx->segid_last_base);
217 if (ret) {
218 mpp_err("vp9 segid_last_base put buffer failed\n");
219 return ret;
220 }
221 }
222 for (i = 0; i < VP9_CONTEXT; i++) {
223 if (hw_ctx->prob_loop_base[i]) {
224 ret = mpp_buffer_put(hw_ctx->prob_loop_base[i]);
225 if (ret) {
226 mpp_err("vp9 prob_loop_base put buffer failed\n");
227 return ret;
228 }
229 }
230 }
231 if (p_hal->fast_mode) {
232 for (i = 0; i < MAX_GEN_REG; i++) {
233 if (hw_ctx->g_buf[i].global_base) {
234 ret = mpp_buffer_put(hw_ctx->g_buf[i].global_base);
235 if (ret) {
236 mpp_err("vp9 global_base put buffer failed\n");
237 return ret;
238 }
239 }
240 if (hw_ctx->g_buf[i].probe_base) {
241 ret = mpp_buffer_put(hw_ctx->g_buf[i].probe_base);
242 if (ret) {
243 mpp_err("vp9 probe_base put buffer failed\n");
244 return ret;
245 }
246 }
247 if (hw_ctx->g_buf[i].count_base) {
248 ret = mpp_buffer_put(hw_ctx->g_buf[i].count_base);
249 if (ret) {
250 mpp_err("vp9 count_base put buffer failed\n");
251 return ret;
252 }
253 }
254 if (hw_ctx->g_buf[i].hw_regs) {
255 mpp_free(hw_ctx->g_buf[i].hw_regs);
256 hw_ctx->g_buf[i].hw_regs = NULL;
257 }
258 if (hw_ctx->g_buf[i].rcb_buf) {
259 ret = mpp_buffer_put(hw_ctx->g_buf[i].rcb_buf);
260 if (ret) {
261 mpp_err("vp9 rcb_buf[%d] put buffer failed\n", i);
262 return ret;
263 }
264 }
265 }
266 } else {
267 if (hw_ctx->global_base) {
268 ret = mpp_buffer_put(hw_ctx->global_base);
269 if (ret) {
270 mpp_err("vp9 global_base get buffer failed\n");
271 return ret;
272 }
273 }
274 if (hw_ctx->probe_base) {
275 ret = mpp_buffer_put(hw_ctx->probe_base);
276 if (ret) {
277 mpp_err("vp9 probe_base get buffer failed\n");
278 return ret;
279 }
280 }
281 if (hw_ctx->count_base) {
282 ret = mpp_buffer_put(hw_ctx->count_base);
283 if (ret) {
284 mpp_err("vp9 count_base put buffer failed\n");
285 return ret;
286 }
287 }
288 if (hw_ctx->hw_regs) {
289 mpp_free(hw_ctx->hw_regs);
290 hw_ctx->hw_regs = NULL;
291 }
292 if (hw_ctx->rcb_buf) {
293 ret = mpp_buffer_put(hw_ctx->rcb_buf);
294 if (ret) {
295 mpp_err("vp9 rcb_buf put buffer failed\n");
296 return ret;
297 }
298 }
299 }
300
301 if (hw_ctx->cmv_bufs) {
302 ret = hal_bufs_deinit(hw_ctx->cmv_bufs);
303 if (ret) {
304 mpp_err("vp9 cmv bufs deinit buffer failed\n");
305 return ret;
306 }
307 }
308 if (hw_ctx->origin_bufs) {
309 ret = hal_bufs_deinit(hw_ctx->origin_bufs);
310 if (ret) {
311 mpp_err("thumb vp9 origin_bufs deinit buffer failed\n");
312 return ret;
313 }
314 hw_ctx->origin_bufs = NULL;
315 }
316
317 return MPP_OK;
318 }
319
hal_vp9d_vdpu383_deinit(void * hal)320 static MPP_RET hal_vp9d_vdpu383_deinit(void *hal)
321 {
322 HalVp9dCtx *p_hal = (HalVp9dCtx *)hal;
323 MPP_RET ret = MPP_OK;
324
325 hal_vp9d_release_res(p_hal);
326
327 if (p_hal->group) {
328 ret = mpp_buffer_group_put(p_hal->group);
329 if (ret) {
330 mpp_err("vp9d group free buffer failed\n");
331 return ret;
332 }
333 }
334 MPP_FREE(p_hal->hw_ctx);
335
336 return ret;
337 }
338
hal_vp9d_vdpu383_init(void * hal,MppHalCfg * cfg)339 static MPP_RET hal_vp9d_vdpu383_init(void *hal, MppHalCfg *cfg)
340 {
341 MPP_RET ret = MPP_OK;
342 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
343 MEM_CHECK(ret, p_hal->hw_ctx = mpp_calloc_size(void, sizeof(Vdpu383Vp9dCtx)));
344 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
345 (void) cfg;
346
347 hw_ctx->mv_base_addr = -1;
348 hw_ctx->pre_mv_base_addr = -1;
349 mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
350 mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, vp9_ver_align);
351
352 if (p_hal->group == NULL) {
353 ret = mpp_buffer_group_get_internal(&p_hal->group, MPP_BUFFER_TYPE_ION);
354 if (ret) {
355 mpp_err("vp9 mpp_buffer_group_get failed\n");
356 goto __FAILED;
357 }
358 }
359
360 ret = hal_vp9d_alloc_res(p_hal);
361 if (ret) {
362 mpp_err("hal_vp9d_alloc_res failed\n");
363 goto __FAILED;
364 }
365
366 hw_ctx->last_segid_flag = 1;
367
368 if (cfg->hal_fbc_adj_cfg) {
369 cfg->hal_fbc_adj_cfg->func = vdpu383_afbc_align_calc;
370 cfg->hal_fbc_adj_cfg->expand = 0;
371 }
372
373 return ret;
374 __FAILED:
375 hal_vp9d_vdpu383_deinit(hal);
376 return ret;
377 }
378
vp9d_refine_rcb_size(Vdpu383RcbInfo * rcb_info,RK_S32 width,RK_S32 height,void * data)379 static void vp9d_refine_rcb_size(Vdpu383RcbInfo *rcb_info,
380 RK_S32 width, RK_S32 height, void* data)
381 {
382 RK_U32 rcb_bits = 0;
383 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data;
384 RK_U32 tile_row_num = 1 << pic_param->log2_tile_rows;
385 RK_U32 tile_col_num = 1 << pic_param->log2_tile_cols;
386 RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8;
387 RK_U32 ext_row_align_size = tile_row_num * 64 * 8;
388 RK_U32 ext_col_align_size = tile_col_num * 64 * 8;
389 RK_U32 filterd_row_append = 8192;
390
391 width = MPP_ALIGN(width, VP9_CTU_SIZE);
392 height = MPP_ALIGN(height, VP9_CTU_SIZE);
393 /* RCB_STRMD_ROW && RCB_STRMD_TILE_ROW*/
394 if (width > 4096)
395 rcb_bits = ((width + 63) / 64) * 250;
396 else
397 rcb_bits = 0;
398 rcb_info[RCB_STRMD_ROW].size = 0;
399 rcb_info[RCB_STRMD_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
400
401 /* RCB_INTER_ROW && RCB_INTER_TILE_ROW*/
402 rcb_bits = ((width + 63) / 64) * 2368;
403 rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
404 rcb_bits += ext_row_align_size;
405 if (tile_row_num > 1)
406 rcb_info[RCB_INTER_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
407 else
408 rcb_info[RCB_INTER_TILE_ROW].size = 0;
409
410 /* RCB_INTRA_ROW && RCB_INTRA_TILE_ROW*/
411 rcb_bits = MPP_ALIGN(width, 512) * (bit_depth + 2);
412 rcb_bits = rcb_bits * 3; //TODO:
413 rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
414 rcb_bits += ext_row_align_size;
415 if (tile_row_num > 1)
416 rcb_info[RCB_INTRA_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
417 else
418 rcb_info[RCB_INTRA_TILE_ROW].size = 0;
419
420 /* RCB_FILTERD_ROW && RCB_FILTERD_TILE_ROW*/
421 // save space mode : half for RCB_FILTERD_ROW, half for RCB_FILTERD_PROTECT_ROW
422 if (width > 4096)
423 filterd_row_append = 27648;
424 rcb_bits = (RK_U32)(MPP_ALIGN(width, 64) * (41 * bit_depth + 13));
425 rcb_info[RCB_FILTERD_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2);
426 rcb_info[RCB_FILTERD_PROTECT_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2);
427 rcb_bits += ext_row_align_size;
428 if (tile_row_num > 1)
429 rcb_info[RCB_FILTERD_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
430 else
431 rcb_info[RCB_FILTERD_TILE_ROW].size = 0;
432
433 /* RCB_FILTERD_TILE_COL */
434 if (tile_col_num > 1) {
435 rcb_bits = (RK_U32)(MPP_ALIGN(height, 64) * (42 * bit_depth + 13)) + ext_col_align_size;
436 rcb_info[RCB_FILTERD_TILE_COL].size = MPP_RCB_BYTES(rcb_bits);
437 } else {
438 rcb_info[RCB_FILTERD_TILE_COL].size = 0;
439 }
440
441 }
442
hal_vp9d_rcb_info_update(void * hal,Vdpu383Vp9dRegSet * hw_regs,void * data)443 static void hal_vp9d_rcb_info_update(void *hal, Vdpu383Vp9dRegSet *hw_regs, void *data)
444 {
445 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
446 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
447 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data;
448 RK_U32 num_tiles = pic_param->log2_tile_rows;
449 RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8;
450 RK_S32 height = vp9_ver_align(pic_param->height);
451 RK_S32 width = vp9_ver_align(pic_param->width);
452 (void) hw_regs;
453
454 if (hw_ctx->num_row_tiles != num_tiles ||
455 hw_ctx->bit_depth != bit_depth ||
456 hw_ctx->width != width ||
457 hw_ctx->height != height) {
458
459 hw_ctx->rcb_buf_size = vdpu383_get_rcb_buf_size(hw_ctx->rcb_info, width, height);
460 // TODO: refine rcb buffer size
461 vp9d_refine_rcb_size(hw_ctx->rcb_info, width, height, pic_param);
462
463 if (p_hal->fast_mode) {
464 RK_U32 i;
465
466 for (i = 0; i < MPP_ARRAY_ELEMS(hw_ctx->g_buf); i++) {
467 MppBuffer rcb_buf = hw_ctx->g_buf[i].rcb_buf;
468
469 if (rcb_buf) {
470 mpp_buffer_put(rcb_buf);
471 hw_ctx->g_buf[i].rcb_buf = NULL;
472 }
473 mpp_buffer_get(p_hal->group, &rcb_buf, hw_ctx->rcb_buf_size);
474 hw_ctx->g_buf[i].rcb_buf = rcb_buf;
475 }
476 } else {
477 MppBuffer rcb_buf = hw_ctx->rcb_buf;
478
479 if (rcb_buf) {
480 mpp_buffer_put(rcb_buf);
481 rcb_buf = NULL;
482 }
483 mpp_buffer_get(p_hal->group, &rcb_buf, hw_ctx->rcb_buf_size);
484 hw_ctx->rcb_buf = rcb_buf;
485 }
486
487 hw_ctx->num_row_tiles = num_tiles;
488 hw_ctx->bit_depth = bit_depth;
489 hw_ctx->width = width;
490 hw_ctx->height = height;
491 }
492 }
493
494 static void
set_tile_offset(RK_S32 * start,RK_S32 * end,RK_S32 idx,RK_S32 log2_n,RK_S32 n)495 set_tile_offset(RK_S32 *start, RK_S32 *end, RK_S32 idx, RK_S32 log2_n, RK_S32 n)
496 {
497 RK_S32 sb_start = ( idx * n) >> log2_n;
498 RK_S32 sb_end = ((idx + 1) * n) >> log2_n;
499
500 *start = MPP_MIN(sb_start, n) << 3;
501 *end = MPP_MIN(sb_end, n) << 3;
502 }
503
prepare_uncompress_header(HalVp9dCtx * p_hal,DXVA_PicParams_VP9 * pp,RK_U64 * data,RK_U32 len)504 static MPP_RET prepare_uncompress_header(HalVp9dCtx *p_hal, DXVA_PicParams_VP9 *pp,
505 RK_U64 *data, RK_U32 len)
506 {
507 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
508 BitputCtx_t bp;
509 RK_S32 i, j;
510
511 mpp_set_bitput_ctx(&bp, data, len);
512
513 mpp_put_bits(&bp, pp->frame_type, 1);
514 mpp_put_bits(&bp, pp->error_resilient_mode, 1);
515 mpp_put_bits(&bp, pp->BitDepthMinus8Luma, 3);
516 mpp_put_bits(&bp, 1, 2); // yuv420
517 mpp_put_bits(&bp, pp->width, 16);
518 mpp_put_bits(&bp, pp->height, 16);
519
520 mpp_put_bits(&bp, (!pp->frame_type || pp->intra_only), 1);
521 mpp_put_bits(&bp, pp->ref_frame_sign_bias[1], 1);
522 mpp_put_bits(&bp, pp->ref_frame_sign_bias[2], 1);
523 mpp_put_bits(&bp, pp->ref_frame_sign_bias[3], 1);
524
525 mpp_put_bits(&bp, pp->allow_high_precision_mv, 1);
526 /* sync with cmodel */
527 if (!pp->frame_type || pp->intra_only)
528 mpp_put_bits(&bp, 0, 3);
529 else {
530 if (pp->interp_filter == 4) /* FILTER_SWITCHABLE */
531 mpp_put_bits(&bp, pp->interp_filter, 3);
532 else
533 mpp_put_bits(&bp, literal_to_filter[pp->interp_filter], 3);
534 }
535 mpp_put_bits(&bp, pp->parallelmode, 1);
536 mpp_put_bits(&bp, pp->refresh_frame_context, 1);
537
538 /* loop filter */
539 mpp_put_bits(&bp, pp->filter_level, 6);
540 mpp_put_bits(&bp, pp->sharpness_level, 3);
541 mpp_put_bits(&bp, pp->mode_ref_delta_enabled, 1);
542 mpp_put_bits(&bp, pp->mode_ref_delta_update, 1);
543
544 mpp_put_bits(&bp, pp->ref_deltas[0], 7);
545 mpp_put_bits(&bp, pp->ref_deltas[1], 7);
546 mpp_put_bits(&bp, pp->ref_deltas[2], 7);
547 mpp_put_bits(&bp, pp->ref_deltas[3], 7);
548 mpp_put_bits(&bp, pp->mode_deltas[0], 7);
549 mpp_put_bits(&bp, pp->mode_deltas[1], 7);
550
551 mpp_put_bits(&bp, pp->base_qindex, 8);
552 mpp_put_bits(&bp, pp->y_dc_delta_q, 5);
553 mpp_put_bits(&bp, pp->uv_dc_delta_q, 5);
554 mpp_put_bits(&bp, pp->uv_ac_delta_q, 5);
555 mpp_put_bits(&bp, (!pp->base_qindex && !pp->y_dc_delta_q && !pp->uv_dc_delta_q && !pp->uv_ac_delta_q), 1);
556
557 for (i = 0; i < 3; i++) {
558 mpp_put_bits(&bp, pp->stVP9Segments.pred_probs[i], 8);
559 }
560 for (i = 0; i < 7; i++) {
561 mpp_put_bits(&bp, pp->stVP9Segments.tree_probs[i], 8);
562 }
563 mpp_put_bits(&bp, pp->stVP9Segments.enabled, 1);
564 mpp_put_bits(&bp, pp->stVP9Segments.update_map, 1);
565 mpp_put_bits(&bp, pp->stVP9Segments.temporal_update, 1);
566 mpp_put_bits(&bp, pp->stVP9Segments.abs_delta, 1);
567
568 {
569 RK_U32 use_prev_frame_mvs = !pp->error_resilient_mode &&
570 pp->width == hw_ctx->ls_info.last_width &&
571 pp->height == hw_ctx->ls_info.last_height &&
572 !hw_ctx->ls_info.last_intra_only &&
573 hw_ctx->ls_info.last_show_frame;
574 mpp_put_bits(&bp, use_prev_frame_mvs, 1);
575 }
576
577 for ( i = 0; i < 8; i++ )
578 for ( j = 0; j < 4; j++ )
579 mpp_put_bits(&bp, (pp->stVP9Segments.feature_mask[i] >> j) & 0x1, 1);
580
581 for ( i = 0; i < 8; i++ ) {
582 mpp_put_bits(&bp, pp->stVP9Segments.feature_data[i][0], 9);
583 mpp_put_bits(&bp, pp->stVP9Segments.feature_data[i][1], 7);
584 mpp_put_bits(&bp, pp->stVP9Segments.feature_data[i][2], 2);
585 }
586
587 mpp_put_bits(&bp, pp->first_partition_size, 16);
588
589 /* refer frame width and height */
590 {
591 RK_S32 ref_idx = pp->frame_refs[0].Index7Bits;
592 mpp_put_bits(&bp, pp->ref_frame_coded_width[ref_idx], 16);
593 mpp_put_bits(&bp, pp->ref_frame_coded_height[ref_idx], 16);
594 ref_idx = pp->frame_refs[1].Index7Bits;
595 mpp_put_bits(&bp, pp->ref_frame_coded_width[ref_idx], 16);
596 mpp_put_bits(&bp, pp->ref_frame_coded_height[ref_idx], 16);
597 ref_idx = pp->frame_refs[2].Index7Bits;
598 mpp_put_bits(&bp, pp->ref_frame_coded_width[ref_idx], 16);
599 mpp_put_bits(&bp, pp->ref_frame_coded_height[ref_idx], 16);
600 }
601
602 /* last frame info */
603 mpp_put_bits(&bp, hw_ctx->ls_info.last_mode_deltas[0], 7);
604 mpp_put_bits(&bp, hw_ctx->ls_info.last_mode_deltas[1], 7);
605 mpp_put_bits(&bp, hw_ctx->ls_info.last_ref_deltas[0], 7);
606 mpp_put_bits(&bp, hw_ctx->ls_info.last_ref_deltas[1], 7);
607 mpp_put_bits(&bp, hw_ctx->ls_info.last_ref_deltas[2], 7);
608 mpp_put_bits(&bp, hw_ctx->ls_info.last_ref_deltas[3], 7);
609 mpp_put_bits(&bp, hw_ctx->ls_info.segmentation_enable_flag_last, 1);
610
611 mpp_put_bits(&bp, hw_ctx->ls_info.last_show_frame, 1);
612 mpp_put_bits(&bp, pp->intra_only, 1);
613 {
614 RK_U32 last_widthheight_eqcur = pp->width == hw_ctx->ls_info.last_width &&
615 pp->height == hw_ctx->ls_info.last_height;
616
617 mpp_put_bits(&bp, last_widthheight_eqcur, 1);
618 }
619 mpp_put_bits(&bp, hw_ctx->ls_info.color_space_last, 3);
620
621 mpp_put_bits(&bp, !hw_ctx->ls_info.last_frame_type, 1);
622 mpp_put_bits(&bp, 0, 1);
623 mpp_put_bits(&bp, 1, 1);
624 mpp_put_bits(&bp, 1, 1);
625 mpp_put_bits(&bp, 1, 1);
626
627 mpp_put_bits(&bp, pp->mvscale[0][0], 16);
628 mpp_put_bits(&bp, pp->mvscale[0][1], 16);
629 mpp_put_bits(&bp, pp->mvscale[1][0], 16);
630 mpp_put_bits(&bp, pp->mvscale[1][1], 16);
631 mpp_put_bits(&bp, pp->mvscale[2][0], 16);
632 mpp_put_bits(&bp, pp->mvscale[2][1], 16);
633
634 /* tile cols and rows */
635 {
636 RK_S32 tile_width[64] = {0};
637 RK_S32 tile_height[4] = {0};
638 RK_S32 tile_cols = 1 << pp->log2_tile_cols;
639 RK_S32 tile_rows = 1 << pp->log2_tile_rows;
640
641 mpp_put_bits(&bp, tile_cols, 7);
642 mpp_put_bits(&bp, tile_rows, 3);
643
644 for (i = 0; i < tile_cols; ++i) { // tile_col
645 RK_S32 tile_col_start = 0;
646 RK_S32 tile_col_end = 0;
647
648 set_tile_offset(&tile_col_start, &tile_col_end,
649 i, pp->log2_tile_cols, MPP_ALIGN(pp->width, 64) / 64);
650 tile_width[i] = (tile_col_end - tile_col_start + 7) / 8;
651 }
652
653 for (j = 0; j < tile_rows; ++j) { // tile_row
654 RK_S32 tile_row_start = 0;
655 RK_S32 tile_row_end = 0;
656
657 set_tile_offset(&tile_row_start, &tile_row_end,
658 j, pp->log2_tile_rows, MPP_ALIGN(pp->height, 64) / 64);
659 tile_height[j] = (tile_row_end - tile_row_start + 7) / 8;
660 }
661
662 for (i = 0; i < 64; i++)
663 mpp_put_bits(&bp, tile_width[i], 10);
664
665 for (j = 0; j < 4; j++)
666 mpp_put_bits(&bp, tile_height[j], 10);
667 }
668
669 mpp_put_align(&bp, 64, 0);//128
670
671 #ifdef DUMP_VDPU383_DATAS
672 {
673 char *cur_fname = "global_cfg.dat";
674 memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
675 sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
676 dump_data_to_file(dump_cur_fname_path, (void *)bp.pbuf, 64 * (bp.index - 1) + bp.bitpos, 64, 0);
677 }
678 #endif
679
680 return MPP_OK;
681 }
682
hal_vp9d_vdpu383_gen_regs(void * hal,HalTaskInfo * task)683 static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
684 {
685 RK_S32 i;
686 RK_U8 bit_depth = 0;
687 RK_U32 ref_frame_width_y;
688 RK_U32 ref_frame_height_y;
689 RK_S32 stream_len = 0, aglin_offset = 0;
690 RK_U32 y_hor_virstride, uv_hor_virstride, y_virstride;
691 RK_U8 *bitstream = NULL;
692 MppBuffer streambuf = NULL;
693 RK_U32 sw_y_hor_virstride;
694 RK_U32 sw_uv_hor_virstride;
695 RK_U32 sw_y_virstride;
696 RK_U32 sw_uv_virstride;
697 RK_U8 ref_idx = 0;
698 RK_U8 ref_frame_idx = 0;
699 RK_U32 *reg_ref_base = NULL;
700 RK_U32 *reg_payload_ref_base = NULL;
701 RK_S32 intraFlag = 0;
702 MppBuffer framebuf = NULL;
703 HalBuf *mv_buf = NULL;
704 RK_U32 fbc_en = 0;
705 HalBuf *origin_buf = NULL;
706
707 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
708 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
709 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
710 Vdpu383Vp9dRegSet *vp9_hw_regs = NULL;
711 RK_S32 mv_size = pic_param->width * pic_param->height / 2;
712 RK_U32 frame_ctx_id = pic_param->frame_context_idx;
713 MppFrame mframe;
714 MppFrame ref_frame = NULL;
715
716 if (p_hal->fast_mode) {
717 for (i = 0; i < MAX_GEN_REG; i++) {
718 if (!hw_ctx->g_buf[i].use_flag) {
719 task->dec.reg_index = i;
720 hw_ctx->global_base = hw_ctx->g_buf[i].global_base;
721 hw_ctx->probe_base = hw_ctx->g_buf[i].probe_base;
722 hw_ctx->count_base = hw_ctx->g_buf[i].count_base;
723 hw_ctx->hw_regs = hw_ctx->g_buf[i].hw_regs;
724 hw_ctx->g_buf[i].use_flag = 1;
725 break;
726 }
727 }
728 if (i == MAX_GEN_REG) {
729 mpp_err("vp9 fast mode buf all used\n");
730 return MPP_ERR_NOMEM;
731 }
732 }
733 vp9_hw_regs = (Vdpu383Vp9dRegSet*)hw_ctx->hw_regs;
734 memset(vp9_hw_regs, 0, sizeof(Vdpu383Vp9dRegSet));
735
736 #ifdef DUMP_VDPU383_DATAS
737 {
738 memset(dump_cur_dir, 0, sizeof(dump_cur_dir));
739 sprintf(dump_cur_dir, "vp9/Frame%04d", dump_cur_frame);
740 if (access(dump_cur_dir, 0)) {
741 if (mkdir(dump_cur_dir))
742 mpp_err_f("error: mkdir %s\n", dump_cur_dir);
743 }
744 dump_cur_frame++;
745 }
746 #endif
747
748 /* uncompress header data */
749 prepare_uncompress_header(p_hal, pic_param, (RK_U64 *)hw_ctx->header_data, sizeof(hw_ctx->header_data) / 8);
750 memcpy(mpp_buffer_get_ptr(hw_ctx->global_base), hw_ctx->header_data, sizeof(hw_ctx->header_data));
751 mpp_buffer_sync_end(hw_ctx->global_base);
752 vp9_hw_regs->vp9d_paras.reg67_global_len = GBL_SIZE / 16;
753 vp9_hw_regs->common_addr.reg131_gbl_base = mpp_buffer_get_fd(hw_ctx->global_base);
754
755 if (hw_ctx->cmv_bufs == NULL || hw_ctx->mv_size < mv_size) {
756 size_t size = mv_size;
757
758 if (hw_ctx->cmv_bufs) {
759 hal_bufs_deinit(hw_ctx->cmv_bufs);
760 hw_ctx->cmv_bufs = NULL;
761 }
762
763 hal_bufs_init(&hw_ctx->cmv_bufs);
764 if (hw_ctx->cmv_bufs == NULL) {
765 mpp_err_f("colmv bufs init fail");
766 return MPP_NOK;
767 }
768 hw_ctx->mv_size = mv_size;
769 hw_ctx->mv_count = mpp_buf_slot_get_count(p_hal ->slots);
770 hal_bufs_setup(hw_ctx->cmv_bufs, hw_ctx->mv_count, 1, &size);
771 }
772
773 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
774 if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY &&
775 hw_ctx->origin_bufs == NULL) {
776 vdpu383_setup_scale_origin_bufs(hw_ctx, mframe);
777 }
778
779 stream_len = (RK_S32)mpp_packet_get_length(task->dec.input_packet);
780
781 intraFlag = (!pic_param->frame_type || pic_param->intra_only);
782 #if HW_PROB
783 // hal_vp9d_prob_flag_delta(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data);
784 /* init kf_probe */
785 hal_vp9d_prob_kf(mpp_buffer_get_ptr(hw_ctx->probe_base));
786 mpp_buffer_sync_end(hw_ctx->probe_base);
787 if (intraFlag) {
788 hal_vp9d_prob_default(mpp_buffer_get_ptr(hw_ctx->prob_default_base), task->dec.syntax.data);
789 mpp_buffer_sync_end(hw_ctx->prob_default_base);
790 }
791
792 /* config last prob base and update write base */
793 {
794 if (intraFlag || pic_param->error_resilient_mode) {
795 if (intraFlag
796 || pic_param->error_resilient_mode
797 || (pic_param->reset_frame_context == 3)) {
798 memset(hw_ctx->prob_ctx_valid, 0, sizeof(hw_ctx->prob_ctx_valid));
799 } else if (pic_param->reset_frame_context == 2) {
800 hw_ctx->prob_ctx_valid[frame_ctx_id] = 0;
801 }
802 }
803
804 if (hw_ctx->prob_ctx_valid[frame_ctx_id]) {
805 vp9_hw_regs->vp9d_addrs.reg184_lastprob_base =
806 mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]);
807 #ifdef DUMP_VDPU383_DATAS
808 { cur_last_prob_base = hw_ctx->prob_loop_base[frame_ctx_id]; }
809 #endif
810 } else {
811 vp9_hw_regs->vp9d_addrs.reg184_lastprob_base = mpp_buffer_get_fd(hw_ctx->prob_default_base);
812 hw_ctx->prob_ctx_valid[frame_ctx_id] |= pic_param->refresh_frame_context;
813 #ifdef DUMP_VDPU383_DATAS
814 { cur_last_prob_base = hw_ctx->prob_default_base; }
815 #endif
816 }
817 vp9_hw_regs->vp9d_addrs.reg185_updateprob_base =
818 mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]);
819 }
820 vp9_hw_regs->vp9d_addrs.reg183_kfprob_base = mpp_buffer_get_fd(hw_ctx->probe_base);
821 #ifdef DUMP_VDPU383_DATAS
822 {
823 char *cur_fname = "cabac_last_probe.dat";
824 memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
825 sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
826 dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(cur_last_prob_base),
827 8 * 152 * 16, 128, 0);
828 }
829 {
830 char *cur_fname = "cabac_kf_probe.dat";
831 memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
832 sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
833 dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(hw_ctx->probe_base),
834 8 * PROB_KF_SIZE, 128, 0);
835 }
836 #endif
837 #else
838 #endif
839
840 vp9_hw_regs->vp9d_paras.reg66_stream_len = ((stream_len + 15) & (~15)) + 0x80;
841
842 mpp_buf_slot_get_prop(p_hal->packet_slots, task->dec.input, SLOT_BUFFER, &streambuf);
843 bitstream = mpp_buffer_get_ptr(streambuf);
844 aglin_offset = vp9_hw_regs->vp9d_paras.reg66_stream_len - stream_len;
845 if (aglin_offset > 0) {
846 memset((void *)(bitstream + stream_len), 0, aglin_offset);
847 }
848
849 //--- caculate the yuv_frame_size and mv_size
850 bit_depth = pic_param->BitDepthMinus8Luma + 8;
851
852 {
853 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
854 fbc_en = MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe));
855
856 if (fbc_en) {
857 RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
858 RK_U32 h = MPP_ALIGN(mpp_frame_get_height(mframe), 64);
859 RK_U32 fbd_offset;
860
861 vp9_hw_regs->ctrl_regs.reg9.fbc_e = 1;
862 vp9_hw_regs->vp9d_paras.reg68_hor_virstride = fbc_hdr_stride / 64;
863 fbd_offset = vp9_hw_regs->vp9d_paras.reg68_hor_virstride * h * 4;
864 vp9_hw_regs->vp9d_addrs.reg193_fbc_payload_offset = fbd_offset;
865 /* error stride */
866 vp9_hw_regs->vp9d_paras.reg80_error_ref_hor_virstride = fbc_hdr_stride / 64;
867 } else {
868 sw_y_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
869 sw_uv_hor_virstride = sw_y_hor_virstride;
870 sw_y_virstride = mpp_frame_get_ver_stride(mframe) * sw_y_hor_virstride;
871 sw_uv_virstride = sw_y_virstride / 2;
872
873 vp9_hw_regs->ctrl_regs.reg9.fbc_e = 0;
874 if (MPP_FRAME_FMT_IS_TILE(mpp_frame_get_fmt(mframe))) {
875 vp9_hw_regs->ctrl_regs.reg9.tile_e = 1;
876 vp9_hw_regs->vp9d_paras.reg68_hor_virstride = sw_y_hor_virstride * 6;
877 vp9_hw_regs->vp9d_paras.reg70_y_virstride = sw_y_virstride + sw_uv_virstride;
878 } else {
879 vp9_hw_regs->ctrl_regs.reg9.tile_e = 0;
880 vp9_hw_regs->vp9d_paras.reg68_hor_virstride = sw_y_hor_virstride;
881 vp9_hw_regs->vp9d_paras.reg69_raster_uv_hor_virstride = sw_uv_hor_virstride;
882 vp9_hw_regs->vp9d_paras.reg70_y_virstride = sw_y_virstride;
883 }
884 /* error stride */
885 vp9_hw_regs->vp9d_paras.reg80_error_ref_hor_virstride = sw_y_hor_virstride;
886 vp9_hw_regs->vp9d_paras.reg81_error_ref_raster_uv_hor_virstride = sw_uv_hor_virstride;
887 vp9_hw_regs->vp9d_paras.reg82_error_ref_virstride = sw_y_virstride;
888 }
889 }
890 if (!pic_param->intra_only && pic_param->frame_type &&
891 !pic_param->error_resilient_mode && hw_ctx->ls_info.last_show_frame) {
892 hw_ctx->pre_mv_base_addr = hw_ctx->mv_base_addr;
893 }
894
895 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
896 mpp_buf_slot_get_prop(p_hal ->slots, task->dec.output, SLOT_BUFFER, &framebuf);
897 if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) {
898 origin_buf = hal_bufs_get_buf(hw_ctx->origin_bufs, task->dec.output);
899 framebuf = origin_buf->buf[0];
900 }
901 vp9_hw_regs->vp9d_addrs.reg168_decout_base = mpp_buffer_get_fd(framebuf);
902 vp9_hw_regs->vp9d_addrs.reg169_error_ref_base = mpp_buffer_get_fd(framebuf);
903 vp9_hw_regs->vp9d_addrs.reg192_payload_st_cur_base = mpp_buffer_get_fd(framebuf);
904 vp9_hw_regs->vp9d_addrs.reg194_payload_st_error_ref_base = mpp_buffer_get_fd(framebuf);
905 vp9_hw_regs->common_addr.reg128_strm_base = mpp_buffer_get_fd(streambuf);
906
907 {
908 RK_U32 strm_offset = pic_param->uncompressed_header_size_byte_aligned;
909
910 vp9_hw_regs->vp9d_paras.reg65_strm_start_bit = 8 * (strm_offset & 0xf);
911 mpp_dev_set_reg_offset(p_hal->dev, 128, strm_offset & 0xfffffff0);
912 }
913
914 if (hw_ctx->last_segid_flag) {
915 vp9_hw_regs->vp9d_addrs.reg181_segidlast_base = mpp_buffer_get_fd(hw_ctx->segid_last_base);
916 vp9_hw_regs->vp9d_addrs.reg182_segidcur_base = mpp_buffer_get_fd(hw_ctx->segid_cur_base);
917 } else {
918 vp9_hw_regs->vp9d_addrs.reg181_segidlast_base = mpp_buffer_get_fd(hw_ctx->segid_cur_base);
919 vp9_hw_regs->vp9d_addrs.reg182_segidcur_base = mpp_buffer_get_fd(hw_ctx->segid_last_base);
920 }
921 #ifdef DUMP_VDPU383_DATAS
922 cur_last_segid_flag = hw_ctx->last_segid_flag;
923 {
924 char *cur_fname = "stream_in.dat";
925 memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
926 sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
927 dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(streambuf)
928 + pic_param->uncompressed_header_size_byte_aligned,
929 8 * (((stream_len + 15) & (~15)) + 0x80), 128, 0);
930 }
931 #endif
932 /* set last segid flag */
933 if ((pic_param->stVP9Segments.enabled && pic_param->stVP9Segments.update_map) ||
934 (pic_param->width != hw_ctx->ls_info.last_width || pic_param->height != hw_ctx->ls_info.last_height) ||
935 intraFlag || pic_param->error_resilient_mode) {
936 hw_ctx->last_segid_flag = !hw_ctx->last_segid_flag;
937 }
938 //set cur colmv base
939 mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, task->dec.output);
940
941 vp9_hw_regs->vp9d_addrs.reg216_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
942
943 hw_ctx->mv_base_addr = vp9_hw_regs->vp9d_addrs.reg216_colmv_cur_base;
944 if (hw_ctx->pre_mv_base_addr < 0)
945 hw_ctx->pre_mv_base_addr = hw_ctx->mv_base_addr;
946
947 // vp9 only one colmv
948 vp9_hw_regs->vp9d_addrs.reg217_232_colmv_ref_base[0] = hw_ctx->pre_mv_base_addr;
949
950 reg_ref_base = vp9_hw_regs->vp9d_addrs.reg170_185_ref_base;
951 reg_payload_ref_base = vp9_hw_regs->vp9d_addrs.reg195_210_payload_st_ref_base;
952 for (i = 0; i < 3; i++) {
953 ref_idx = pic_param->frame_refs[i].Index7Bits;
954 ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
955 ref_frame_width_y = pic_param->ref_frame_coded_width[ref_idx];
956 ref_frame_height_y = pic_param->ref_frame_coded_height[ref_idx];
957 if (ref_frame_idx < 0x7f)
958 mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &ref_frame);
959 if (fbc_en) {
960 y_hor_virstride = uv_hor_virstride = MPP_ALIGN(ref_frame_width_y, 64) / 64;
961 if (*compat_ext_fbc_hdr_256_odd)
962 y_hor_virstride = uv_hor_virstride = (MPP_ALIGN(ref_frame_width_y, 256) | 256) / 64;
963 } else {
964 if (ref_frame)
965 y_hor_virstride = uv_hor_virstride = (mpp_frame_get_hor_stride(ref_frame) >> 4);
966 else
967 y_hor_virstride = uv_hor_virstride = (mpp_align_128_odd_plus_64((ref_frame_width_y * bit_depth) >> 3) >> 4);
968 }
969 if (ref_frame)
970 y_virstride = y_hor_virstride * mpp_frame_get_ver_stride(ref_frame);
971 else
972 y_virstride = y_hor_virstride * vp9_ver_align(ref_frame_height_y);
973
974 if (ref_frame_idx < 0x7f) {
975 mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_BUFFER, &framebuf);
976 if (hw_ctx->origin_bufs && mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) {
977 origin_buf = hal_bufs_get_buf(hw_ctx->origin_bufs, ref_frame_idx);
978 framebuf = origin_buf->buf[0];
979 }
980
981 switch (i) {
982 case 0: {
983 vp9_hw_regs->vp9d_paras.reg83_ref0_hor_virstride = y_hor_virstride;
984 vp9_hw_regs->vp9d_paras.reg84_ref0_raster_uv_hor_virstride = uv_hor_virstride;
985 vp9_hw_regs->vp9d_paras.reg85_ref0_virstride = y_virstride;
986 } break;
987 case 1: {
988 vp9_hw_regs->vp9d_paras.reg86_ref1_hor_virstride = y_hor_virstride;
989 vp9_hw_regs->vp9d_paras.reg87_ref1_raster_uv_hor_virstride = uv_hor_virstride;
990 vp9_hw_regs->vp9d_paras.reg88_ref1_virstride = y_virstride;
991 } break;
992 case 2: {
993 vp9_hw_regs->vp9d_paras.reg89_ref2_hor_virstride = y_hor_virstride;
994 vp9_hw_regs->vp9d_paras.reg90_ref2_raster_uv_hor_virstride = uv_hor_virstride;
995 vp9_hw_regs->vp9d_paras.reg91_ref2_virstride = y_virstride;
996 } break;
997 default:
998 break;
999 }
1000
1001 /*0 map to 11*/
1002 /*1 map to 12*/
1003 /*2 map to 13*/
1004 if (framebuf != NULL) {
1005 reg_ref_base[i] = mpp_buffer_get_fd(framebuf);
1006 reg_payload_ref_base[i] = mpp_buffer_get_fd(framebuf);
1007 } else {
1008 mpp_log("ref buff address is no valid used out as base slot index 0x%x", ref_frame_idx);
1009 reg_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
1010 reg_payload_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
1011 }
1012 mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, ref_frame_idx);
1013 } else {
1014 reg_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
1015 reg_payload_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
1016 }
1017 }
1018
1019 /* common register setting */
1020 vp9_hw_regs->ctrl_regs.reg8_dec_mode = 2; //set as vp9 dec
1021 vp9_hw_regs->ctrl_regs.reg9.buf_empty_en = 0;
1022
1023 vp9_hw_regs->ctrl_regs.reg10.strmd_auto_gating_e = 1;
1024 vp9_hw_regs->ctrl_regs.reg10.inter_auto_gating_e = 1;
1025 vp9_hw_regs->ctrl_regs.reg10.intra_auto_gating_e = 1;
1026 vp9_hw_regs->ctrl_regs.reg10.transd_auto_gating_e = 1;
1027 vp9_hw_regs->ctrl_regs.reg10.recon_auto_gating_e = 1;
1028 vp9_hw_regs->ctrl_regs.reg10.filterd_auto_gating_e = 1;
1029 vp9_hw_regs->ctrl_regs.reg10.bus_auto_gating_e = 1;
1030 vp9_hw_regs->ctrl_regs.reg10.ctrl_auto_gating_e = 1;
1031 vp9_hw_regs->ctrl_regs.reg10.rcb_auto_gating_e = 1;
1032 vp9_hw_regs->ctrl_regs.reg10.err_prc_auto_gating_e = 1;
1033
1034 vp9_hw_regs->ctrl_regs.reg16.error_proc_disable = 1;
1035 vp9_hw_regs->ctrl_regs.reg16.error_spread_disable = 0;
1036 vp9_hw_regs->ctrl_regs.reg16.roi_error_ctu_cal_en = 0;
1037
1038 vp9_hw_regs->ctrl_regs.reg20_cabac_error_en_lowbits = 0xffffffdf;
1039 vp9_hw_regs->ctrl_regs.reg21_cabac_error_en_highbits = 0x3fffffff;
1040
1041 vp9_hw_regs->ctrl_regs.reg13_core_timeout_threshold = 0x3ffff;
1042
1043 //last info update
1044 hw_ctx->ls_info.abs_delta_last = pic_param->stVP9Segments.abs_delta;
1045 for (i = 0 ; i < 4; i ++) {
1046 hw_ctx->ls_info.last_ref_deltas[i] = pic_param->ref_deltas[i];
1047 }
1048
1049 for (i = 0 ; i < 2; i ++) {
1050 hw_ctx->ls_info.last_mode_deltas[i] = pic_param->mode_deltas[i];
1051 }
1052
1053 for (i = 0; i < 8; i++) {
1054 hw_ctx->ls_info.feature_data[i][0] = pic_param->stVP9Segments.feature_data[i][0];
1055 hw_ctx->ls_info.feature_data[i][1] = pic_param->stVP9Segments.feature_data[i][1];
1056 hw_ctx->ls_info.feature_data[i][2] = pic_param->stVP9Segments.feature_data[i][2];
1057 hw_ctx->ls_info.feature_data[i][3] = pic_param->stVP9Segments.feature_data[i][3];
1058 hw_ctx->ls_info.feature_mask[i] = pic_param->stVP9Segments.feature_mask[i];
1059 }
1060 if (!hw_ctx->ls_info.segmentation_enable_flag_last)
1061 hw_ctx->ls_info.segmentation_enable_flag_last = pic_param->stVP9Segments.enabled;
1062
1063 hw_ctx->ls_info.last_show_frame = pic_param->show_frame;
1064 hw_ctx->ls_info.last_width = pic_param->width;
1065 hw_ctx->ls_info.last_height = pic_param->height;
1066 hw_ctx->ls_info.last_frame_type = pic_param->frame_type;
1067
1068 if (intraFlag)
1069 hw_ctx->ls_info.last_intra_only = 1;
1070
1071 hw_ctx->ls_info.last_intra_only = (!pic_param->frame_type || pic_param->intra_only);
1072 hal_vp9d_dbg_par("stVP9Segments.enabled %d show_frame %d width %d height %d last_intra_only %d",
1073 pic_param->stVP9Segments.enabled, pic_param->show_frame,
1074 pic_param->width, pic_param->height,
1075 hw_ctx->ls_info.last_intra_only);
1076
1077 hal_vp9d_rcb_info_update(hal, vp9_hw_regs, pic_param);
1078 {
1079 MppBuffer rcb_buf = NULL;
1080
1081 rcb_buf = p_hal->fast_mode ? hw_ctx->g_buf[task->dec.reg_index].rcb_buf : hw_ctx->rcb_buf;
1082 vdpu383_setup_rcb(&vp9_hw_regs->common_addr, p_hal->dev, rcb_buf, hw_ctx->rcb_info);
1083 }
1084 vdpu383_setup_statistic(&vp9_hw_regs->ctrl_regs);
1085 // whether need update counts
1086 if (pic_param->refresh_frame_context && !pic_param->parallelmode) {
1087 task->dec.flags.wait_done = 1;
1088 }
1089
1090 {
1091 //scale down config
1092 MppBuffer mbuffer = NULL;
1093 RK_S32 fd = -1;
1094 MppFrameThumbnailMode thumbnail_mode;
1095
1096 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output,
1097 SLOT_BUFFER, &mbuffer);
1098 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output,
1099 SLOT_FRAME_PTR, &mframe);
1100 thumbnail_mode = mpp_frame_get_thumbnail_en(mframe);
1101 switch (thumbnail_mode) {
1102 case MPP_FRAME_THUMBNAIL_ONLY:
1103 vp9_hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer);
1104 origin_buf = hal_bufs_get_buf(hw_ctx->origin_bufs, task->dec.output);
1105 fd = mpp_buffer_get_fd(origin_buf->buf[0]);
1106 vp9_hw_regs->vp9d_addrs.reg168_decout_base = fd;
1107 vp9_hw_regs->vp9d_addrs.reg169_error_ref_base = fd;
1108 vp9_hw_regs->vp9d_addrs.reg192_payload_st_cur_base = fd;
1109 vp9_hw_regs->vp9d_addrs.reg194_payload_st_error_ref_base = fd;
1110 vdpu383_setup_down_scale(mframe, p_hal->dev, &vp9_hw_regs->ctrl_regs,
1111 (void *)&vp9_hw_regs->vp9d_paras);
1112 break;
1113 case MPP_FRAME_THUMBNAIL_MIXED:
1114 vp9_hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer);
1115 vdpu383_setup_down_scale(mframe, p_hal->dev, &vp9_hw_regs->ctrl_regs,
1116 (void *)&vp9_hw_regs->vp9d_paras);
1117 break;
1118 case MPP_FRAME_THUMBNAIL_NONE:
1119 default:
1120 vp9_hw_regs->ctrl_regs.reg9.scale_down_en = 0;
1121 break;
1122 }
1123 }
1124
1125 return MPP_OK;
1126 }
1127
hal_vp9d_vdpu383_start(void * hal,HalTaskInfo * task)1128 static MPP_RET hal_vp9d_vdpu383_start(void *hal, HalTaskInfo *task)
1129 {
1130 MPP_RET ret = MPP_OK;
1131 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1132 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
1133 Vdpu383Vp9dRegSet *hw_regs = (Vdpu383Vp9dRegSet *)hw_ctx->hw_regs;
1134 MppDev dev = p_hal->dev;
1135
1136 if (p_hal->fast_mode) {
1137 RK_S32 index = task->dec.reg_index;
1138
1139 hw_regs = (Vdpu383Vp9dRegSet *)hw_ctx->g_buf[index].hw_regs;
1140 }
1141
1142 mpp_assert(hw_regs);
1143
1144 do {
1145 MppDevRegWrCfg wr_cfg;
1146 MppDevRegRdCfg rd_cfg;
1147
1148 wr_cfg.reg = &hw_regs->ctrl_regs;
1149 wr_cfg.size = sizeof(hw_regs->ctrl_regs);
1150 wr_cfg.offset = OFFSET_CTRL_REGS;
1151 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
1152 if (ret) {
1153 mpp_err_f("set register write failed %d\n", ret);
1154 break;
1155 }
1156
1157 wr_cfg.reg = &hw_regs->common_addr;
1158 wr_cfg.size = sizeof(hw_regs->common_addr);
1159 wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1160 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
1161 if (ret) {
1162 mpp_err_f("set register write failed %d\n", ret);
1163 break;
1164 }
1165
1166 wr_cfg.reg = &hw_regs->vp9d_paras;
1167 wr_cfg.size = sizeof(hw_regs->vp9d_paras);
1168 wr_cfg.offset = OFFSET_CODEC_PARAS_REGS;
1169 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
1170 if (ret) {
1171 mpp_err_f("set register write failed %d\n", ret);
1172 break;
1173 }
1174
1175 wr_cfg.reg = &hw_regs->vp9d_addrs;
1176 wr_cfg.size = sizeof(hw_regs->vp9d_addrs);
1177 wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1178 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
1179 if (ret) {
1180 mpp_err_f("set register write failed %d\n", ret);
1181 break;
1182 }
1183
1184 rd_cfg.reg = &hw_regs->ctrl_regs.reg15;
1185 rd_cfg.size = sizeof(hw_regs->ctrl_regs.reg15);
1186 rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1187 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
1188 if (ret) {
1189 mpp_err_f("set register read failed %d\n", ret);
1190 break;
1191 }
1192
1193 // rcb info for sram
1194 vdpu383_set_rcbinfo(dev, hw_ctx->rcb_info);
1195
1196 ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL);
1197 if (ret) {
1198 mpp_err_f("send cmd failed %d\n", ret);
1199 break;
1200 }
1201 } while (0);
1202
1203 return ret;
1204 }
1205
hal_vp9d_vdpu383_wait(void * hal,HalTaskInfo * task)1206 static MPP_RET hal_vp9d_vdpu383_wait(void *hal, HalTaskInfo *task)
1207 {
1208 MPP_RET ret = MPP_OK;
1209 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1210 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
1211 Vdpu383Vp9dRegSet *hw_regs = (Vdpu383Vp9dRegSet *)hw_ctx->hw_regs;
1212
1213 if (p_hal->fast_mode)
1214 hw_regs = (Vdpu383Vp9dRegSet *)hw_ctx->g_buf[task->dec.reg_index].hw_regs;
1215
1216 mpp_assert(hw_regs);
1217
1218 ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL);
1219 if (ret)
1220 mpp_err_f("poll cmd failed %d\n", ret);
1221 #ifdef DUMP_VDPU383_DATAS
1222 {
1223 char *cur_fname = "cabac_update_probe.dat";
1224 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
1225 RK_U32 frame_ctx_id = pic_param->frame_context_idx;
1226 memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
1227 sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
1228 dump_data_to_file(dump_cur_fname_path,
1229 (void *)mpp_buffer_get_ptr(hw_ctx->prob_loop_base[frame_ctx_id]),
1230 8 * 152 * 16, 128, 0);
1231 }
1232 {
1233 char *cur_fname = "segid_last.dat";
1234 memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
1235 sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
1236 if (!cur_last_segid_flag)
1237 dump_data_to_file(dump_cur_fname_path,
1238 (void *)mpp_buffer_get_ptr(hw_ctx->segid_cur_base),
1239 8 * 1559 * 8, 64, 0);
1240 else
1241 dump_data_to_file(dump_cur_fname_path,
1242 (void *)mpp_buffer_get_ptr(hw_ctx->segid_last_base),
1243 8 * 1559 * 8, 64, 0);
1244 }
1245 {
1246 char *cur_fname = "segid_cur.dat";
1247 memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
1248 sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
1249 if (cur_last_segid_flag)
1250 dump_data_to_file(dump_cur_fname_path,
1251 (void *)mpp_buffer_get_ptr(hw_ctx->segid_cur_base),
1252 8 * 1559 * 8, 64, 0);
1253 else
1254 dump_data_to_file(dump_cur_fname_path,
1255 (void *)mpp_buffer_get_ptr(hw_ctx->segid_last_base),
1256 8 * 1559 * 8, 64, 0);
1257 }
1258 #endif
1259
1260 if (hal_vp9d_debug & HAL_VP9D_DBG_REG) {
1261 RK_U32 *p = (RK_U32 *)hw_regs;
1262 RK_U32 i = 0;
1263
1264 for (i = 0; i < sizeof(Vdpu383Vp9dRegSet) / 4; i++)
1265 mpp_log("get regs[%02d]: %08X\n", i, *p++);
1266 }
1267
1268 if (task->dec.flags.parse_err ||
1269 task->dec.flags.ref_err ||
1270 (!hw_regs->ctrl_regs.reg15.rkvdec_frame_rdy_sta) ||
1271 hw_regs->ctrl_regs.reg15.rkvdec_strm_error_sta ||
1272 hw_regs->ctrl_regs.reg15.rkvdec_core_timeout_sta ||
1273 hw_regs->ctrl_regs.reg15.rkvdec_ip_timeout_sta ||
1274 hw_regs->ctrl_regs.reg15.rkvdec_bus_error_sta ||
1275 hw_regs->ctrl_regs.reg15.rkvdec_buffer_empty_sta ||
1276 hw_regs->ctrl_regs.reg15.rkvdec_colmv_ref_error_sta) {
1277 MppFrame mframe = NULL;
1278
1279 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
1280 mpp_frame_set_errinfo(mframe, 1);
1281 }
1282
1283 #if !HW_PROB
1284 if (p_hal->dec_cb && task->dec.flags.wait_done) {
1285 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
1286 hal_vp9d_update_counts(mpp_buffer_get_ptr(hw_ctx->count_base), task->dec.syntax.data);
1287 mpp_callback(p_hal->dec_cb, &pic_param->counts);
1288 }
1289 #endif
1290 if (p_hal->fast_mode) {
1291 hw_ctx->g_buf[task->dec.reg_index].use_flag = 0;
1292 }
1293
1294 (void)task;
1295 return ret;
1296 }
1297
hal_vp9d_vdpu383_reset(void * hal)1298 static MPP_RET hal_vp9d_vdpu383_reset(void *hal)
1299 {
1300 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1301 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
1302
1303 hal_vp9d_enter();
1304
1305 memset(&hw_ctx->ls_info, 0, sizeof(hw_ctx->ls_info));
1306 hw_ctx->mv_base_addr = -1;
1307 hw_ctx->pre_mv_base_addr = -1;
1308 hw_ctx->last_segid_flag = 1;
1309
1310 hal_vp9d_leave();
1311
1312 return MPP_OK;
1313 }
1314
hal_vp9d_vdpu383_flush(void * hal)1315 static MPP_RET hal_vp9d_vdpu383_flush(void *hal)
1316 {
1317 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1318 Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
1319
1320 hal_vp9d_enter();
1321
1322 hw_ctx->mv_base_addr = -1;
1323 hw_ctx->pre_mv_base_addr = -1;
1324
1325 hal_vp9d_leave();
1326
1327 return MPP_OK;
1328 }
1329
hal_vp9d_vdpu383_control(void * hal,MpiCmd cmd_type,void * param)1330 static MPP_RET hal_vp9d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param)
1331 {
1332 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1333
1334 switch ((MpiCmd)cmd_type) {
1335 case MPP_DEC_SET_FRAME_INFO : {
1336 MppFrameFormat fmt = mpp_frame_get_fmt((MppFrame)param);
1337
1338 if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1339 vdpu383_afbc_align_calc(p_hal->slots, (MppFrame)param, 0);
1340 } else {
1341 mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
1342 }
1343 } break;
1344 case MPP_DEC_GET_THUMBNAIL_FRAME_INFO: {
1345 vdpu383_update_thumbnail_frame_info((MppFrame)param);
1346 } break;
1347 default : {
1348 } break;
1349 }
1350
1351 return MPP_OK;
1352 }
1353
1354 const MppHalApi hal_vp9d_vdpu383 = {
1355 .name = "vp9d_vdpu383",
1356 .type = MPP_CTX_DEC,
1357 .coding = MPP_VIDEO_CodingVP9,
1358 .ctx_size = sizeof(Vdpu383Vp9dCtx),
1359 .flag = 0,
1360 .init = hal_vp9d_vdpu383_init,
1361 .deinit = hal_vp9d_vdpu383_deinit,
1362 .reg_gen = hal_vp9d_vdpu383_gen_regs,
1363 .start = hal_vp9d_vdpu383_start,
1364 .wait = hal_vp9d_vdpu383_wait,
1365 .reset = hal_vp9d_vdpu383_reset,
1366 .flush = hal_vp9d_vdpu383_flush,
1367 .control = hal_vp9d_vdpu383_control,
1368 };
1369