1 /*
2 * Copyright 2022 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_vp9d_vdpu382"
18
19 #include <stdio.h>
20 #include <string.h>
21
22 #include "mpp_env.h"
23 #include "mpp_mem.h"
24 #include "mpp_common.h"
25 #include "mpp_device.h"
26 #include "mpp_hal.h"
27
28 #include "hal_bufs.h"
29 #include "hal_vp9d_debug.h"
30 #include "hal_vp9d_com.h"
31 #include "hal_vp9d_vdpu382.h"
32 #include "hal_vp9d_ctx.h"
33 #include "vdpu382_vp9d.h"
34 #include "vp9d_syntax.h"
35
36 #define HW_PROB 1
37 #define VP9_CONTEXT 4
38 #define VP9_CTU_SIZE 64
39 #define PROB_SIZE_ALIGN_TO_4K MPP_ALIGN(PROB_SIZE, SZ_4K)
40 #define COUNT_SIZE_ALIGN_TO_4K MPP_ALIGN(COUNT_SIZE, SZ_4K)
41 #define MAX_SEGMAP_SIZE_ALIGN_TO_4K MPP_ALIGN(MAX_SEGMAP_SIZE, SZ_4K)
42
43 #define VDPU382_OFFSET_COUNT (PROB_SIZE_ALIGN_TO_4K)
44 #define VDPU382_PROBE_BUFFER_SIZE (PROB_SIZE_ALIGN_TO_4K + COUNT_SIZE_ALIGN_TO_4K)
45
46 typedef struct Vdpu382Vp9dCtx_t {
47 Vp9dRegBuf g_buf[MAX_GEN_REG];
48 MppBuffer probe_base;
49 MppBuffer seg_base;
50 RK_U32 offset_count;
51 RK_U32 offset_segid_cur;
52 RK_U32 offset_segid_last;
53 MppBuffer prob_default_base;
54 void* hw_regs;
55 RK_S32 mv_base_addr;
56 RK_S32 pre_mv_base_addr;
57 Vp9dLastInfo ls_info;
58 /*
59 * swap between segid_cur_base & segid_last_base
60 * 0 used segid_cur_base as last
61 * 1 used segid_last_base as
62 */
63 RK_U32 last_segid_flag;
64 RK_S32 width;
65 RK_S32 height;
66 /* rcb buffers info */
67 RK_S32 rcb_buf_size;
68 Vdpu382RcbInfo rcb_info[RCB_BUF_COUNT];
69 MppBuffer rcb_buf;
70 RK_U32 num_row_tiles;
71 RK_U32 bit_depth;
72 /* colmv buffers info */
73 HalBufs cmv_bufs;
74 RK_S32 mv_size;
75 RK_S32 mv_count;
76 RK_U32 prob_ctx_valid[VP9_CONTEXT];
77 MppBuffer prob_loop_base[VP9_CONTEXT];
78 RK_U32 prob_ref_poc[VP9_CONTEXT];
79 RK_U32 col_ref_poc;
80 RK_U32 segid_ref_poc;
81 } Vdpu382Vp9dCtx;
82
hal_vp9d_alloc_res(HalVp9dCtx * hal)83 static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal)
84 {
85 RK_S32 i = 0;
86 RK_S32 ret = 0;
87 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
88 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
89 hw_ctx->offset_count = VDPU382_OFFSET_COUNT;
90 hw_ctx->offset_segid_cur = 0;
91 hw_ctx->offset_segid_last = MAX_SEGMAP_SIZE_ALIGN_TO_4K;
92 /* alloc common buffer */
93 for (i = 0; i < VP9_CONTEXT; i++) {
94 ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_loop_base[i], PROB_SIZE);
95 if (ret) {
96 mpp_err("vp9 probe_loop_base get buffer failed\n");
97 return ret;
98 }
99 }
100 ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_default_base, PROB_SIZE);
101 if (ret) {
102 mpp_err("vp9 probe_default_base get buffer failed\n");
103 return ret;
104 }
105 /* alloc buffer for fast mode or normal */
106 if (p_hal->fast_mode) {
107 for (i = 0; i < MAX_GEN_REG; i++) {
108 hw_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu382Vp9dRegSet));
109 ret = mpp_buffer_get(p_hal->group, &hw_ctx->g_buf[i].probe_base, VDPU382_PROBE_BUFFER_SIZE);
110 if (ret) {
111 mpp_err("vp9 probe_base get buffer failed\n");
112 return ret;
113 }
114 }
115 } else {
116 hw_ctx->hw_regs = mpp_calloc_size(void, sizeof(Vdpu382Vp9dRegSet));
117 ret = mpp_buffer_get(p_hal->group, &hw_ctx->probe_base, VDPU382_PROBE_BUFFER_SIZE);
118 if (ret) {
119 mpp_err("vp9 probe_base get buffer failed\n");
120 return ret;
121 }
122 }
123
124 ret = mpp_buffer_get(p_hal->group, &hw_ctx->seg_base, MAX_SEGMAP_SIZE_ALIGN_TO_4K * 2);
125 if (ret) {
126 mpp_err("vp9 segid_base get buffer failed\n");
127 return ret;
128 }
129 return MPP_OK;
130 }
131
hal_vp9d_release_res(HalVp9dCtx * hal)132 static MPP_RET hal_vp9d_release_res(HalVp9dCtx *hal)
133 {
134 RK_S32 i = 0;
135 RK_S32 ret = 0;
136 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
137 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
138
139 if (hw_ctx->prob_default_base) {
140 ret = mpp_buffer_put(hw_ctx->prob_default_base);
141 if (ret) {
142 mpp_err("vp9 probe_wr_base put buffer failed\n");
143 return ret;
144 }
145 }
146 for (i = 0; i < VP9_CONTEXT; i++) {
147 if (hw_ctx->prob_loop_base[i]) {
148 ret = mpp_buffer_put(hw_ctx->prob_loop_base[i]);
149 if (ret) {
150 mpp_err("vp9 probe_base put buffer failed\n");
151 return ret;
152 }
153 }
154 }
155 if (p_hal->fast_mode) {
156 for (i = 0; i < MAX_GEN_REG; i++) {
157 if (hw_ctx->g_buf[i].probe_base) {
158 ret = mpp_buffer_put(hw_ctx->g_buf[i].probe_base);
159 if (ret) {
160 mpp_err("vp9 probe_base put buffer failed\n");
161 return ret;
162 }
163 }
164 if (hw_ctx->g_buf[i].hw_regs) {
165 mpp_free(hw_ctx->g_buf[i].hw_regs);
166 hw_ctx->g_buf[i].hw_regs = NULL;
167 }
168 if (hw_ctx->g_buf[i].rcb_buf) {
169 ret = mpp_buffer_put(hw_ctx->g_buf[i].rcb_buf);
170 if (ret) {
171 mpp_err("vp9 rcb_buf[%d] put buffer failed\n", i);
172 return ret;
173 }
174 }
175 }
176 } else {
177 if (hw_ctx->probe_base) {
178 ret = mpp_buffer_put(hw_ctx->probe_base);
179 if (ret) {
180 mpp_err("vp9 probe_base put buffer failed\n");
181 return ret;
182 }
183 }
184
185 if (hw_ctx->hw_regs) {
186 mpp_free(hw_ctx->hw_regs);
187 hw_ctx->hw_regs = NULL;
188 }
189 if (hw_ctx->rcb_buf) {
190 ret = mpp_buffer_put(hw_ctx->rcb_buf);
191 if (ret) {
192 mpp_err("vp9 rcb_buf put buffer failed\n");
193 return ret;
194 }
195 }
196 }
197
198 if (hw_ctx->cmv_bufs) {
199 ret = hal_bufs_deinit(hw_ctx->cmv_bufs);
200 if (ret) {
201 mpp_err("vp9 cmv bufs deinit buffer failed\n");
202 return ret;
203 }
204 }
205
206 if (hw_ctx->seg_base) {
207 ret = mpp_buffer_put(hw_ctx->seg_base);
208 if (ret) {
209 mpp_err("vp9 seg_base put buffer failed\n");
210 return ret;
211 }
212 }
213
214 return MPP_OK;
215 }
216
hal_vp9d_vdpu382_deinit(void * hal)217 static MPP_RET hal_vp9d_vdpu382_deinit(void *hal)
218 {
219 MPP_RET ret = MPP_OK;
220 HalVp9dCtx *p_hal = (HalVp9dCtx *)hal;
221
222 hal_vp9d_release_res(p_hal);
223
224 if (p_hal->group) {
225 ret = mpp_buffer_group_put(p_hal->group);
226 if (ret) {
227 mpp_err("vp9d group free buffer failed\n");
228 return ret;
229 }
230 }
231 MPP_FREE(p_hal->hw_ctx);
232 return ret = MPP_OK;
233 }
234
hal_vp9d_vdpu382_init(void * hal,MppHalCfg * cfg)235 static MPP_RET hal_vp9d_vdpu382_init(void *hal, MppHalCfg *cfg)
236 {
237 MPP_RET ret = MPP_OK;
238 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
239 MEM_CHECK(ret, p_hal->hw_ctx = mpp_calloc_size(void, sizeof(Vdpu382Vp9dCtx)));
240 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
241
242 hw_ctx->mv_base_addr = -1;
243 hw_ctx->pre_mv_base_addr = -1;
244 mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, vp9_hor_align);
245 mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, vp9_ver_align);
246
247 if (p_hal->group == NULL) {
248 ret = mpp_buffer_group_get_internal(&p_hal->group, MPP_BUFFER_TYPE_ION);
249 if (ret) {
250 mpp_err("vp9 mpp_buffer_group_get failed\n");
251 goto __FAILED;
252 }
253 }
254
255 ret = hal_vp9d_alloc_res(p_hal);
256 if (ret) {
257 mpp_err("hal_vp9d_alloc_res failed\n");
258 goto __FAILED;
259 }
260
261 hw_ctx->last_segid_flag = 1;
262
263 if (cfg->hal_fbc_adj_cfg) {
264 cfg->hal_fbc_adj_cfg->func = vdpu382_afbc_align_calc;
265 cfg->hal_fbc_adj_cfg->expand = 0;
266 }
267
268 return ret;
269 __FAILED:
270 hal_vp9d_vdpu382_deinit(hal);
271 return ret;
272 }
273
vp9d_refine_rcb_size(Vdpu382RcbInfo * rcb_info,Vdpu382Vp9dRegSet * vp9_hw_regs,RK_S32 width,RK_S32 height,void * data)274 static void vp9d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
275 Vdpu382Vp9dRegSet *vp9_hw_regs,
276 RK_S32 width, RK_S32 height, void* data)
277 {
278 RK_U32 rcb_bits = 0;
279 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data;
280 RK_U32 num_tiles_col = 1 << pic_param->log2_tile_cols;
281 RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8;
282 RK_U32 ext_align_size = num_tiles_col * 64 * 8;
283
284 width = MPP_ALIGN(width, VP9_CTU_SIZE);
285 height = MPP_ALIGN(height, VP9_CTU_SIZE);
286 /* RCB_STRMD_ROW */
287 if (width >= 4096)
288 rcb_bits = MPP_ALIGN(width, 64) * 232 + ext_align_size;
289 else
290 rcb_bits = 0;
291 rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
292
293 /* RCB_TRANSD_ROW */
294 if (width >= 8192)
295 rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
296 else
297 rcb_bits = 0;
298 rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
299
300 /* RCB_TRANSD_COL */
301 if ((height >= 8192) && (num_tiles_col > 1))
302 rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1);
303 else
304 rcb_bits = 0;
305 rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
306
307 /* RCB_INTER_ROW */
308 rcb_bits = width * 36 + ext_align_size;
309 rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
310
311 /* RCB_INTER_COL */
312 rcb_info[RCB_INTER_COL].size = 0;
313
314 /* RCB_INTRA_ROW */
315 rcb_bits = width * 2 * 11 + ext_align_size;
316 rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
317
318 /* RCB_DBLK_ROW */
319 rcb_bits = width * (0.5 + 16 * bit_depth) + num_tiles_col * 192 * bit_depth + ext_align_size;
320 rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
321
322 /* RCB_SAO_ROW */
323 rcb_info[RCB_SAO_ROW].size = 0;
324
325 /* RCB_FBC_ROW */
326 if (vp9_hw_regs->common.reg012.fbc_e) {
327 rcb_bits = 8 * width * bit_depth + ext_align_size;
328 } else
329 rcb_bits = 0;
330 rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
331
332 /* RCB_FILT_COL */
333 if (num_tiles_col > 1) {
334 if (vp9_hw_regs->common.reg012.fbc_e) {
335 rcb_bits = height * (4 + 24 * bit_depth);
336 } else
337 rcb_bits = height * (4 + 16 * bit_depth);
338 } else
339 rcb_bits = 0;
340 rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
341 }
342
hal_vp9d_rcb_info_update(void * hal,Vdpu382Vp9dRegSet * hw_regs,void * data)343 static void hal_vp9d_rcb_info_update(void *hal, Vdpu382Vp9dRegSet *hw_regs, void *data)
344 {
345 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
346 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
347 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data;
348 RK_U32 num_tiles = pic_param->log2_tile_rows;
349 RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8;
350 RK_S32 height = vp9_ver_align(pic_param->height);
351 RK_S32 width = vp9_ver_align(pic_param->width);
352
353 if (hw_ctx->num_row_tiles != num_tiles ||
354 hw_ctx->bit_depth != bit_depth ||
355 hw_ctx->width != width ||
356 hw_ctx->height != height) {
357
358 hw_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size(hw_ctx->rcb_info, width, height);
359 vp9d_refine_rcb_size(hw_ctx->rcb_info, hw_regs, width, height, pic_param);
360
361 if (p_hal->fast_mode) {
362 RK_U32 i;
363
364 for (i = 0; i < MPP_ARRAY_ELEMS(hw_ctx->g_buf); i++) {
365 MppBuffer rcb_buf = hw_ctx->g_buf[i].rcb_buf;
366
367 if (rcb_buf) {
368 mpp_buffer_put(rcb_buf);
369 hw_ctx->g_buf[i].rcb_buf = NULL;
370 }
371 mpp_buffer_get(p_hal->group, &rcb_buf, hw_ctx->rcb_buf_size);
372 hw_ctx->g_buf[i].rcb_buf = rcb_buf;
373 }
374 } else {
375 MppBuffer rcb_buf = hw_ctx->rcb_buf;
376
377 if (rcb_buf) {
378 mpp_buffer_put(rcb_buf);
379 rcb_buf = NULL;
380 }
381 mpp_buffer_get(p_hal->group, &rcb_buf, hw_ctx->rcb_buf_size);
382 hw_ctx->rcb_buf = rcb_buf;
383 }
384
385 hw_ctx->num_row_tiles = num_tiles;
386 hw_ctx->bit_depth = bit_depth;
387 hw_ctx->width = width;
388 hw_ctx->height = height;
389 }
390 }
391
392
hal_vp9d_vdpu382_setup_colmv_buf(void * hal,HalTaskInfo * task)393 static MPP_RET hal_vp9d_vdpu382_setup_colmv_buf(void *hal, HalTaskInfo *task)
394 {
395 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
396 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
397 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
398 RK_U32 width = pic_param->width;
399 RK_U32 height = pic_param->height;
400 RK_S32 mv_size = 0, colmv_size = 8, colmv_byte = 16;
401 RK_U32 compress = p_hal->hw_info ? p_hal->hw_info->cap_colmv_compress : 1;
402
403 mv_size = vdpu382_get_colmv_size(width, height, VP9_CTU_SIZE, colmv_byte, colmv_size, compress);
404 if (hw_ctx->cmv_bufs == NULL || hw_ctx->mv_size < mv_size) {
405 size_t size = mv_size;
406
407 if (hw_ctx->cmv_bufs) {
408 hal_bufs_deinit(hw_ctx->cmv_bufs);
409 hw_ctx->cmv_bufs = NULL;
410 }
411
412 hal_bufs_init(&hw_ctx->cmv_bufs);
413 if (hw_ctx->cmv_bufs == NULL) {
414 mpp_err_f("colmv bufs init fail");
415 return MPP_ERR_NOMEM;
416 }
417 hw_ctx->mv_size = mv_size;
418 hw_ctx->mv_count = mpp_buf_slot_get_count(p_hal ->slots);
419 hal_bufs_setup(hw_ctx->cmv_bufs, hw_ctx->mv_count, 1, &size);
420 }
421
422 return MPP_OK;
423 }
424
hal_vp9d_vdpu382_gen_regs(void * hal,HalTaskInfo * task)425 static MPP_RET hal_vp9d_vdpu382_gen_regs(void *hal, HalTaskInfo *task)
426 {
427 RK_S32 i;
428 RK_U8 bit_depth = 0;
429 RK_U32 ref_frame_width_y;
430 RK_U32 ref_frame_height_y;
431 RK_S32 stream_len = 0, aglin_offset = 0;
432 RK_U32 y_hor_virstride, uv_hor_virstride, y_virstride;
433 RK_U8 *bitstream = NULL;
434 MppBuffer streambuf = NULL;
435 RK_U32 sw_y_hor_virstride;
436 RK_U32 sw_uv_hor_virstride;
437 RK_U32 sw_y_virstride;
438 RK_U8 ref_idx = 0;
439 RK_U8 ref_frame_idx = 0;
440 RK_U32 *reg_ref_base = 0;
441 RK_S32 intraFlag = 0;
442 MppBuffer framebuf = NULL;
443 HalBuf *mv_buf = NULL;
444 RK_U32 fbc_en = 0;
445
446 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
447 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
448 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
449 RK_U32 frame_ctx_id = pic_param->frame_context_idx;
450
451 if (p_hal->fast_mode) {
452 for (i = 0; i < MAX_GEN_REG; i++) {
453 if (!hw_ctx->g_buf[i].use_flag) {
454 task->dec.reg_index = i;
455 hw_ctx->probe_base = hw_ctx->g_buf[i].probe_base;
456
457 hw_ctx->hw_regs = hw_ctx->g_buf[i].hw_regs;
458 hw_ctx->g_buf[i].use_flag = 1;
459 break;
460 }
461 }
462 if (i == MAX_GEN_REG) {
463 mpp_err("vp9 fast mode buf all used\n");
464 return MPP_ERR_NOMEM;
465 }
466 }
467
468 if (hal_vp9d_vdpu382_setup_colmv_buf(hal, task))
469 return MPP_ERR_NOMEM;
470
471 Vdpu382Vp9dRegSet *vp9_hw_regs = (Vdpu382Vp9dRegSet*)hw_ctx->hw_regs;
472 intraFlag = (!pic_param->frame_type || pic_param->intra_only);
473 stream_len = (RK_S32)mpp_packet_get_length(task->dec.input_packet);
474 memset(hw_ctx->hw_regs, 0, sizeof(Vdpu382Vp9dRegSet));
475 #if HW_PROB
476 hal_vp9d_prob_flag_delta(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data);
477 mpp_buffer_sync_end(hw_ctx->probe_base);
478 if (intraFlag) {
479 hal_vp9d_prob_default(mpp_buffer_get_ptr(hw_ctx->prob_default_base), task->dec.syntax.data);
480 mpp_buffer_sync_end(hw_ctx->prob_default_base);
481 }
482
483 /* config reg103 */
484 vp9_hw_regs->vp9d_param.reg103.prob_update_en = 1;
485 vp9_hw_regs->vp9d_param.reg103.intra_only_flag = intraFlag;
486 if (!intraFlag) {
487 vp9_hw_regs->vp9d_param.reg103.txfmmode_rfsh_en = (pic_param->txmode == 4) ? 1 : 0;
488 vp9_hw_regs->vp9d_param.reg103.interp_filter_switch_en = pic_param->interp_filter == 4 ? 1 : 0;
489 }
490 vp9_hw_regs->vp9d_param.reg103.ref_mode_rfsh_en = 1;
491 vp9_hw_regs->vp9d_param.reg103.single_ref_rfsh_en = 1;
492 vp9_hw_regs->vp9d_param.reg103.comp_ref_rfsh_en = 1;
493 vp9_hw_regs->vp9d_param.reg103.inter_coef_rfsh_flag = 0;
494 vp9_hw_regs->vp9d_param.reg103.refresh_en =
495 !pic_param->error_resilient_mode && !pic_param->parallelmode;
496 vp9_hw_regs->vp9d_param.reg103.prob_save_en = pic_param->refresh_frame_context;
497 vp9_hw_regs->vp9d_param.reg103.allow_high_precision_mv = pic_param->allow_high_precision_mv;
498 vp9_hw_regs->vp9d_param.reg103.last_key_frame_flag = hw_ctx->ls_info.last_intra_only;
499
500 /* set info for multi core */
501 {
502 MppFrame mframe = NULL;
503
504 vp9_hw_regs->common.reg028.sw_poc_arb_flag = 1;
505 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
506 vp9_hw_regs->vp9d_param.reg65.cur_poc = mframe ? mpp_frame_get_poc(mframe) : 0;
507 // last poc
508 ref_idx = pic_param->frame_refs[0].Index7Bits;
509 ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
510 if (ref_frame_idx < 0x7f) {
511 mframe = NULL;
512 mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &mframe);
513 vp9_hw_regs->vp9d_param.reg95.last_poc = mframe ? mpp_frame_get_poc(mframe) : 0;
514 }
515 // golden poc
516 ref_idx = pic_param->frame_refs[1].Index7Bits;
517 ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
518 if (ref_frame_idx < 0x7f) {
519 mframe = NULL;
520 mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &mframe);
521 vp9_hw_regs->vp9d_param.reg96.golden_poc = mframe ? mpp_frame_get_poc(mframe) : 0;
522 }
523 // altref poc
524 ref_idx = pic_param->frame_refs[2].Index7Bits;
525 ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
526 if (ref_frame_idx < 0x7f) {
527 mframe = NULL;
528 mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &mframe);
529 vp9_hw_regs->vp9d_param.reg97.altref_poc = mframe ? mpp_frame_get_poc(mframe) : 0;
530 }
531 // colref poc
532 vp9_hw_regs->vp9d_param.reg98.col_ref_poc =
533 hw_ctx->col_ref_poc ? hw_ctx->col_ref_poc : vp9_hw_regs->vp9d_param.reg65.cur_poc;
534 if (pic_param->show_frame && !pic_param->show_existing_frame)
535 hw_ctx->col_ref_poc = vp9_hw_regs->vp9d_param.reg65.cur_poc;
536 // segment id ref poc
537 vp9_hw_regs->vp9d_param.reg100.segid_ref_poc = hw_ctx->segid_ref_poc;
538
539 vp9_hw_regs->vp9d_addr.reg169_segidcur_base = mpp_buffer_get_fd(hw_ctx->seg_base);
540 vp9_hw_regs->vp9d_addr.reg168_segidlast_base = mpp_buffer_get_fd(hw_ctx->seg_base);
541 if (hw_ctx->last_segid_flag) {
542 mpp_dev_set_reg_offset(p_hal->dev, 168, hw_ctx->offset_segid_last);
543 mpp_dev_set_reg_offset(p_hal->dev, 169, hw_ctx->offset_segid_cur);
544 } else {
545 mpp_dev_set_reg_offset(p_hal->dev, 168, hw_ctx->offset_segid_cur);
546 mpp_dev_set_reg_offset(p_hal->dev, 169, hw_ctx->offset_segid_last);
547 }
548
549 if ((pic_param->stVP9Segments.enabled && pic_param->stVP9Segments.update_map) ||
550 (hw_ctx->ls_info.last_width != pic_param->width) ||
551 (hw_ctx->ls_info.last_height != pic_param->height) ||
552 intraFlag || pic_param->error_resilient_mode) {
553 hw_ctx->segid_ref_poc = vp9_hw_regs->vp9d_param.reg65.cur_poc;
554 hw_ctx->last_segid_flag = !hw_ctx->last_segid_flag;
555 vp9_hw_regs->vp9d_param.reg100.segid_ref_poc = 0;
556 vp9_hw_regs->vp9d_param.reg75.vp9_segment_id_update = 1;
557 } else
558 vp9_hw_regs->vp9d_param.reg75.vp9_segment_id_update = 0;
559 }
560
561 /* config last prob base and update write base */
562 {
563
564 if (intraFlag || pic_param->error_resilient_mode) {
565 if (intraFlag
566 || pic_param->error_resilient_mode
567 || (pic_param->reset_frame_context == 3)) {
568 memset(hw_ctx->prob_ctx_valid, 0, sizeof(hw_ctx->prob_ctx_valid));
569 } else if (pic_param->reset_frame_context == 2) {
570 hw_ctx->prob_ctx_valid[frame_ctx_id] = 0;
571 }
572 }
573
574 #if VP9_DUMP
575 {
576 static RK_U32 file_cnt = 0;
577 char file_name[128];
578 RK_U32 i = 0;
579 sprintf(file_name, "/data/vp9/prob_last_%d.txt", file_cnt);
580 FILE *fp = fopen(file_name, "wb");
581 RK_U32 *tmp = NULL;
582 if (hw_ctx->prob_ctx_valid[frame_ctx_id]) {
583 tmp = (RK_U32 *)mpp_buffer_get_ptr(hw_ctx->prob_loop_base[pic_param->frame_context_idx]);
584 } else {
585 tmp = (RK_U32 *)mpp_buffer_get_ptr(hw_ctx->prob_default_base);
586 }
587 for (i = 0; i < PROB_SIZE / 4; i += 2) {
588 fprintf(fp, "%08x%08x\n", tmp[i + 1], tmp[i]);
589 }
590 file_cnt++;
591 fflush(fp);
592 fclose(fp);
593 }
594 #endif
595
596 if (hw_ctx->prob_ctx_valid[frame_ctx_id]) {
597 vp9_hw_regs->vp9d_addr.reg162_last_prob_base =
598 mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]);
599 vp9_hw_regs->common.reg028.swreg_vp9_rd_prob_idx = frame_ctx_id + 1;
600 vp9_hw_regs->vp9d_param.reg99.prob_ref_poc = hw_ctx->prob_ref_poc[frame_ctx_id];
601 } else {
602 vp9_hw_regs->vp9d_addr.reg162_last_prob_base = mpp_buffer_get_fd(hw_ctx->prob_default_base);
603 hw_ctx->prob_ctx_valid[frame_ctx_id] |= pic_param->refresh_frame_context;
604 vp9_hw_regs->common.reg028.swreg_vp9_rd_prob_idx = 0;
605 vp9_hw_regs->vp9d_param.reg99.prob_ref_poc = 0;
606 if (pic_param->refresh_frame_context)
607 hw_ctx->prob_ref_poc[frame_ctx_id] = vp9_hw_regs->vp9d_param.reg65.cur_poc;
608 }
609 vp9_hw_regs->vp9d_addr.reg172_update_prob_wr_base =
610 mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]);
611 vp9_hw_regs->common.reg028.swreg_vp9_wr_prob_idx = frame_ctx_id + 1;
612
613 }
614 vp9_hw_regs->vp9d_addr.reg160_delta_prob_base = mpp_buffer_get_fd(hw_ctx->probe_base);
615 #else
616 hal_vp9d_output_probe(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data);
617 mpp_buffer_sync_end(hw_ctx->probe_base);
618 #endif
619 vp9_hw_regs->common.reg012.colmv_compress_en = p_hal->hw_info ? p_hal->hw_info->cap_colmv_compress : 1;
620 vp9_hw_regs->common.reg013.cur_pic_is_idr = !pic_param->frame_type;
621 vp9_hw_regs->common.reg009.dec_mode = 2; //set as vp9 dec
622 vp9_hw_regs->common.reg016_str_len = ((stream_len + 15) & (~15)) + 0x80;
623
624 mpp_buf_slot_get_prop(p_hal ->packet_slots, task->dec.input, SLOT_BUFFER, &streambuf);
625 bitstream = mpp_buffer_get_ptr(streambuf);
626 aglin_offset = vp9_hw_regs->common.reg016_str_len - stream_len;
627 if (aglin_offset > 0) {
628 memset((void *)(bitstream + stream_len), 0, aglin_offset);
629 }
630
631 //--- caculate the yuv_frame_size and mv_size
632 bit_depth = pic_param->BitDepthMinus8Luma + 8;
633
634 {
635 MppFrame mframe = NULL;
636
637 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
638 fbc_en = MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe));
639
640 if (fbc_en) {
641 RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
642 RK_U32 h = MPP_ALIGN(mpp_frame_get_height(mframe), 64);
643 RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (h + 16) / 16, SZ_4K);
644
645 vp9_hw_regs->common.reg012.fbc_e = 1;
646 vp9_hw_regs->common.reg018.y_hor_virstride = fbc_hdr_stride >> 4;
647 vp9_hw_regs->common.reg019.uv_hor_virstride = fbc_hdr_stride >> 4;
648 vp9_hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4;
649 } else {
650 sw_y_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
651 sw_uv_hor_virstride = sw_y_hor_virstride;
652 sw_y_virstride = mpp_frame_get_ver_stride(mframe) * sw_y_hor_virstride;
653
654 vp9_hw_regs->common.reg012.fbc_e = 0;
655 vp9_hw_regs->common.reg018.y_hor_virstride = sw_y_hor_virstride;
656 vp9_hw_regs->common.reg019.uv_hor_virstride = sw_uv_hor_virstride;
657 vp9_hw_regs->common.reg020_y_virstride.y_virstride = sw_y_virstride;
658 }
659 }
660 if (!pic_param->intra_only && pic_param->frame_type &&
661 !pic_param->error_resilient_mode && hw_ctx->ls_info.last_show_frame) {
662 hw_ctx->pre_mv_base_addr = hw_ctx->mv_base_addr;
663 }
664
665 mpp_buf_slot_get_prop(p_hal ->slots, task->dec.output, SLOT_BUFFER, &framebuf);
666 vp9_hw_regs->common_addr.reg130_decout_base = mpp_buffer_get_fd(framebuf);
667 vp9_hw_regs->common_addr.reg128_rlc_base = mpp_buffer_get_fd(streambuf);
668 vp9_hw_regs->common_addr.reg129_rlcwrite_base = mpp_buffer_get_fd(streambuf);
669
670 vp9_hw_regs->vp9d_addr.reg167_count_prob_base = mpp_buffer_get_fd(hw_ctx->probe_base);
671 mpp_dev_set_reg_offset(p_hal->dev, 167, hw_ctx->offset_count);
672
673 //set cur colmv base
674 mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, task->dec.output);
675 vp9_hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
676 hw_ctx->mv_base_addr = vp9_hw_regs->common_addr.reg131_colmv_cur_base;
677 if (hw_ctx->pre_mv_base_addr < 0) {
678 hw_ctx->pre_mv_base_addr = hw_ctx->mv_base_addr;
679 }
680 vp9_hw_regs->vp9d_addr.reg170_ref_colmv_base = hw_ctx->pre_mv_base_addr;
681
682 vp9_hw_regs->vp9d_param.reg64.cprheader_offset = 0;
683 reg_ref_base = (RK_U32*)&vp9_hw_regs->vp9d_addr.reg164_ref_last_base;
684 for (i = 0; i < 3; i++) {
685 MppFrame frame = NULL;
686
687 ref_idx = pic_param->frame_refs[i].Index7Bits;
688 ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
689 ref_frame_width_y = pic_param->ref_frame_coded_width[ref_idx];
690 ref_frame_height_y = pic_param->ref_frame_coded_height[ref_idx];
691
692 if (ref_frame_idx < 0x7f)
693 mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &frame);
694
695 if (fbc_en && frame) {
696 RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(frame);
697 RK_U32 h = MPP_ALIGN(mpp_frame_get_height(frame), 64);
698 RK_U32 fbd_offset = MPP_ALIGN(fbc_hdr_stride * (h + 16) / 16, SZ_4K);
699
700 y_hor_virstride = uv_hor_virstride = fbc_hdr_stride >> 4;
701 y_virstride = fbd_offset;
702 } else {
703 if (frame) {
704 y_hor_virstride = uv_hor_virstride = mpp_frame_get_hor_stride(frame) >> 4;
705 y_virstride = y_hor_virstride * mpp_frame_get_ver_stride(frame);
706 } else {
707 y_hor_virstride = uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
708 y_virstride = y_hor_virstride * vp9_ver_align(ref_frame_height_y);
709 }
710 }
711
712 if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) {
713 mpp_buf_slot_get_prop(p_hal ->slots, pic_param->ref_frame_map[ref_idx].Index7Bits, SLOT_BUFFER, &framebuf);
714 }
715
716 if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) {
717 switch (i) {
718 case 0: {
719 vp9_hw_regs->vp9d_param.reg106.framewidth_last = ref_frame_width_y;
720 vp9_hw_regs->vp9d_param.reg107.frameheight_last = ref_frame_height_y;
721 vp9_hw_regs->vp9d_param.reg79.lastfy_hor_virstride = y_hor_virstride;
722 vp9_hw_regs->vp9d_param.reg80.lastfuv_hor_virstride = uv_hor_virstride;
723 vp9_hw_regs->vp9d_param.reg85.lastfy_virstride = y_virstride;
724 } break;
725 case 1: {
726 vp9_hw_regs->vp9d_param.reg108.framewidth_golden = ref_frame_width_y;
727 vp9_hw_regs->vp9d_param.reg109.frameheight_golden = ref_frame_height_y;
728 vp9_hw_regs->vp9d_param.reg81.goldenfy_hor_virstride = y_hor_virstride;
729 vp9_hw_regs->vp9d_param.reg82.goldenfuv_hor_virstride = uv_hor_virstride;
730 vp9_hw_regs->vp9d_param.reg86.goldeny_virstride = y_virstride;
731 } break;
732 case 2: {
733 vp9_hw_regs->vp9d_param.reg110.framewidth_alfter = ref_frame_width_y;
734 vp9_hw_regs->vp9d_param.reg111.frameheight_alfter = ref_frame_height_y;
735 vp9_hw_regs->vp9d_param.reg83.altreffy_hor_virstride = y_hor_virstride;
736 vp9_hw_regs->vp9d_param.reg84.altreffuv_hor_virstride = uv_hor_virstride;
737 vp9_hw_regs->vp9d_param.reg87.altrefy_virstride = y_virstride;
738 } break;
739 default:
740 break;
741 }
742
743 /*0 map to 11*/
744 /*1 map to 12*/
745 /*2 map to 13*/
746 if (framebuf != NULL) {
747 reg_ref_base[i] = mpp_buffer_get_fd(framebuf);
748 } else {
749 mpp_log("ref buff address is no valid used out as base slot index 0x%x", pic_param->ref_frame_map[ref_idx].Index7Bits);
750 reg_ref_base[i] = vp9_hw_regs->common_addr.reg130_decout_base;
751 }
752 mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, pic_param->ref_frame_map[ref_idx].Index7Bits);
753 vp9_hw_regs->vp9d_addr.reg181_196_ref_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]);
754 } else {
755 reg_ref_base[i] = vp9_hw_regs->common_addr.reg130_decout_base;
756 vp9_hw_regs->vp9d_addr.reg181_196_ref_colmv_base[i] = vp9_hw_regs->common_addr.reg131_colmv_cur_base;
757 }
758 }
759
760 for (i = 0; i < 8; i++) {
761 vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_qp_delta_en = (hw_ctx->ls_info.feature_mask[i]) & 0x1;
762 vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_qp_delta = hw_ctx->ls_info.feature_data[i][0];
763 vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_loopfitler_value_en = (hw_ctx->ls_info.feature_mask[i] >> 1) & 0x1;
764 vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_loopfilter_value = hw_ctx->ls_info.feature_data[i][1];
765 vp9_hw_regs->vp9d_param.reg67_74[i].segid_referinfo_en = (hw_ctx->ls_info.feature_mask[i] >> 2) & 0x1;
766 vp9_hw_regs->vp9d_param.reg67_74[i].segid_referinfo = hw_ctx->ls_info.feature_data[i][2];
767 vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_skip_en = (hw_ctx->ls_info.feature_mask[i] >> 3) & 0x1;
768 }
769
770 vp9_hw_regs->vp9d_param.reg67_74[0].segid_abs_delta = hw_ctx->ls_info.abs_delta_last;
771 vp9_hw_regs->vp9d_param.reg76.tx_mode = pic_param->txmode;
772 vp9_hw_regs->vp9d_param.reg76.frame_reference_mode = pic_param->refmode;
773 vp9_hw_regs->vp9d_param.reg94.ref_deltas_lastframe = 0;
774
775 if (!intraFlag) {
776 for (i = 0; i < 4; i++)
777 vp9_hw_regs->vp9d_param.reg94.ref_deltas_lastframe |= (hw_ctx->ls_info.last_ref_deltas[i] & 0x7f) << (7 * i);
778
779 for (i = 0; i < 2; i++)
780 vp9_hw_regs->vp9d_param.reg75.mode_deltas_lastframe |= (hw_ctx->ls_info.last_mode_deltas[i] & 0x7f) << (7 * i);
781 } else {
782 hw_ctx->ls_info.segmentation_enable_flag_last = 0;
783 hw_ctx->ls_info.last_intra_only = 1;
784 }
785
786 vp9_hw_regs->vp9d_param.reg75.segmentation_enable_lstframe = hw_ctx->ls_info.segmentation_enable_flag_last;
787 vp9_hw_regs->vp9d_param.reg75.last_show_frame = hw_ctx->ls_info.last_show_frame;
788 vp9_hw_regs->vp9d_param.reg75.last_intra_only = hw_ctx->ls_info.last_intra_only;
789 vp9_hw_regs->vp9d_param.reg75.last_widthheight_eqcur = (pic_param->width == hw_ctx->ls_info.last_width) && (pic_param->height == hw_ctx->ls_info.last_height);
790 vp9_hw_regs->vp9d_param.reg78.lasttile_size = stream_len - pic_param->first_partition_size;
791
792
793 if (!intraFlag) {
794 vp9_hw_regs->vp9d_param.reg88.lref_hor_scale = pic_param->mvscale[0][0];
795 vp9_hw_regs->vp9d_param.reg89.lref_ver_scale = pic_param->mvscale[0][1];
796 vp9_hw_regs->vp9d_param.reg90.gref_hor_scale = pic_param->mvscale[1][0];
797 vp9_hw_regs->vp9d_param.reg91.gref_ver_scale = pic_param->mvscale[1][1];
798 vp9_hw_regs->vp9d_param.reg92.aref_hor_scale = pic_param->mvscale[2][0];
799 vp9_hw_regs->vp9d_param.reg93.aref_ver_scale = pic_param->mvscale[2][1];
800 }
801
802 vp9_hw_regs->common.reg010.dec_e = 1;
803 vp9_hw_regs->common.reg011.buf_empty_en = 1;
804 vp9_hw_regs->common.reg011.dec_clkgate_e = 1;
805 vp9_hw_regs->common.reg011.err_head_fill_e = 1;
806 vp9_hw_regs->common.reg011.err_colmv_fill_e = 1;
807
808 vp9_hw_regs->common.reg026.inter_auto_gating_e = 1;
809 vp9_hw_regs->common.reg026.filterd_auto_gating_e = 1;
810 vp9_hw_regs->common.reg026.strmd_auto_gating_e = 1;
811 vp9_hw_regs->common.reg026.mcp_auto_gating_e = 1;
812 vp9_hw_regs->common.reg026.busifd_auto_gating_e = 1;
813 vp9_hw_regs->common.reg026.dec_ctrl_auto_gating_e = 1;
814 vp9_hw_regs->common.reg026.intra_auto_gating_e = 1;
815 vp9_hw_regs->common.reg026.mc_auto_gating_e = 1;
816 vp9_hw_regs->common.reg026.transd_auto_gating_e = 1;
817 vp9_hw_regs->common.reg026.sram_auto_gating_e = 1;
818 vp9_hw_regs->common.reg026.cru_auto_gating_e = 1;
819 vp9_hw_regs->common.reg026.reg_cfg_gating_en = 1;
820
821 vp9_hw_regs->common.reg032_timeout_threshold = 0x3ffff;
822
823 //last info update
824 hw_ctx->ls_info.abs_delta_last = pic_param->stVP9Segments.abs_delta;
825 for (i = 0 ; i < 4; i ++) {
826 hw_ctx->ls_info.last_ref_deltas[i] = pic_param->ref_deltas[i];
827 }
828
829 for (i = 0 ; i < 2; i ++) {
830 hw_ctx->ls_info.last_mode_deltas[i] = pic_param->mode_deltas[i];
831 }
832
833 for (i = 0; i < 8; i++) {
834 hw_ctx->ls_info.feature_data[i][0] = pic_param->stVP9Segments.feature_data[i][0];
835 hw_ctx->ls_info.feature_data[i][1] = pic_param->stVP9Segments.feature_data[i][1];
836 hw_ctx->ls_info.feature_data[i][2] = pic_param->stVP9Segments.feature_data[i][2];
837 hw_ctx->ls_info.feature_data[i][3] = pic_param->stVP9Segments.feature_data[i][3];
838 hw_ctx->ls_info.feature_mask[i] = pic_param->stVP9Segments.feature_mask[i];
839 }
840 if (!hw_ctx->ls_info.segmentation_enable_flag_last)
841 hw_ctx->ls_info.segmentation_enable_flag_last = pic_param->stVP9Segments.enabled;
842
843 hw_ctx->ls_info.last_show_frame = pic_param->show_frame;
844 hw_ctx->ls_info.last_width = pic_param->width;
845 hw_ctx->ls_info.last_height = pic_param->height;
846 hw_ctx->ls_info.last_intra_only = (!pic_param->frame_type || pic_param->intra_only);
847 hal_vp9d_dbg_par("stVP9Segments.enabled %d show_frame %d width %d height %d last_intra_only %d",
848 pic_param->stVP9Segments.enabled, pic_param->show_frame,
849 pic_param->width, pic_param->height,
850 hw_ctx->ls_info.last_intra_only);
851
852 hal_vp9d_rcb_info_update(hal, vp9_hw_regs, pic_param);
853 {
854 MppBuffer rcb_buf = NULL;
855
856 rcb_buf = p_hal->fast_mode ? hw_ctx->g_buf[task->dec.reg_index].rcb_buf : hw_ctx->rcb_buf;
857 vdpu382_setup_rcb(&vp9_hw_regs->common_addr, p_hal->dev, rcb_buf, hw_ctx->rcb_info);
858 }
859
860 {
861 MppFrame mframe = NULL;
862
863 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
864 if (mpp_frame_get_thumbnail_en(mframe)) {
865 vp9_hw_regs->vp9d_addr.reg198_scale_down_luma_base =
866 vp9_hw_regs->common_addr.reg130_decout_base;
867 vp9_hw_regs->vp9d_addr.reg199_scale_down_chorme_base =
868 vp9_hw_regs->common_addr.reg130_decout_base;
869 vdpu382_setup_down_scale(mframe, p_hal->dev, &vp9_hw_regs->common);
870 } else {
871 vp9_hw_regs->vp9d_addr.reg198_scale_down_luma_base = 0;
872 vp9_hw_regs->vp9d_addr.reg199_scale_down_chorme_base = 0;
873 vp9_hw_regs->common.reg012.scale_down_en = 0;
874 }
875 }
876 vdpu382_setup_statistic(&vp9_hw_regs->common, &vp9_hw_regs->statistic);
877
878 // whether need update counts
879 if (pic_param->refresh_frame_context && !pic_param->parallelmode) {
880 task->dec.flags.wait_done = 1;
881 }
882
883 return MPP_OK;
884 }
885
hal_vp9d_vdpu382_start(void * hal,HalTaskInfo * task)886 static MPP_RET hal_vp9d_vdpu382_start(void *hal, HalTaskInfo *task)
887 {
888 MPP_RET ret = MPP_OK;
889 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
890 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
891 Vdpu382Vp9dRegSet *hw_regs = (Vdpu382Vp9dRegSet *)hw_ctx->hw_regs;
892 MppDev dev = p_hal->dev;
893
894 if (p_hal->fast_mode) {
895 RK_S32 index = task->dec.reg_index;
896 hw_regs = (Vdpu382Vp9dRegSet *)hw_ctx->g_buf[index].hw_regs;
897 }
898
899 mpp_assert(hw_regs);
900
901
902 #if VP9_DUMP
903 {
904 static RK_U32 file_cnt = 0;
905 char file_name[128];
906 sprintf(file_name, "/data/vp9_regs/reg_%d.txt", file_cnt);
907 FILE *fp = fopen(file_name, "wb");
908 RK_U32 i = 0;
909 RK_U32 *tmp = NULL;
910 tmp = (RK_U32 *)&hw_regs->common;
911 for (i = 0; i < sizeof(hw_regs->common) / 4; i++) {
912 fprintf(fp, "reg[%d] 0x%08x\n", i + 8, tmp[i]);
913 }
914 fprintf(fp, "\n");
915 tmp = (RK_U32 *)&hw_regs->vp9d_param;
916 for (i = 0; i < sizeof(hw_regs->vp9d_param) / 4; i++) {
917 fprintf(fp, "reg[%d] 0x%08x\n", i + 64, tmp[i]);
918 }
919 fprintf(fp, "\n");
920 tmp = (RK_U32 *)&hw_regs->common_addr;
921 for (i = 0; i < sizeof(hw_regs->common_addr) / 4; i++) {
922 fprintf(fp, "reg[%d] 0x%08x\n", i + 128, tmp[i]);
923 }
924 fprintf(fp, "\n");
925 tmp = (RK_U32 *)&hw_regs->vp9d_addr;
926 for (i = 0; i < sizeof(hw_regs->vp9d_addr) / 4; i++) {
927 fprintf(fp, "reg[%d] 0x%08x\n", i + 160, tmp[i]);
928 }
929 file_cnt++;
930 fflush(fp);
931 fclose(fp);
932 }
933 #endif
934
935 do {
936 MppDevRegWrCfg wr_cfg;
937 MppDevRegRdCfg rd_cfg;
938
939 wr_cfg.reg = &hw_regs->common;
940 wr_cfg.size = sizeof(hw_regs->common);
941 wr_cfg.offset = OFFSET_COMMON_REGS;
942
943 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
944 if (ret) {
945 mpp_err_f("set register write failed %d\n", ret);
946 break;
947 }
948
949 wr_cfg.reg = &hw_regs->vp9d_param;
950 wr_cfg.size = sizeof(hw_regs->vp9d_param);
951 wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS;
952
953 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
954 if (ret) {
955 mpp_err_f("set register write failed %d\n", ret);
956 break;
957 }
958
959 wr_cfg.reg = &hw_regs->common_addr;
960 wr_cfg.size = sizeof(hw_regs->common_addr);
961 wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
962
963 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
964 if (ret) {
965 mpp_err_f("set register write failed %d\n", ret);
966 break;
967 }
968
969 wr_cfg.reg = &hw_regs->vp9d_addr;
970 wr_cfg.size = sizeof(hw_regs->vp9d_addr);
971 wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
972
973 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
974 if (ret) {
975 mpp_err_f("set register write failed %d\n", ret);
976 break;
977 }
978
979 wr_cfg.reg = &hw_regs->statistic;
980 wr_cfg.size = sizeof(hw_regs->statistic);
981 wr_cfg.offset = OFFSET_STATISTIC_REGS;
982
983 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
984 if (ret) {
985 mpp_err_f("set register write failed %d\n", ret);
986 break;
987 }
988
989 rd_cfg.reg = &hw_regs->irq_status;
990 rd_cfg.size = sizeof(hw_regs->irq_status);
991 rd_cfg.offset = OFFSET_INTERRUPT_REGS;
992
993 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
994 if (ret) {
995 mpp_err_f("set register read failed %d\n", ret);
996 break;
997 }
998 /* rcb info for sram */
999 vdpu382_set_rcbinfo(dev, hw_ctx->rcb_info);
1000 ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL);
1001 if (ret) {
1002 mpp_err_f("send cmd failed %d\n", ret);
1003 break;
1004 }
1005 } while (0);
1006
1007 (void)task;
1008 return ret;
1009 }
1010
hal_vp9d_vdpu382_wait(void * hal,HalTaskInfo * task)1011 static MPP_RET hal_vp9d_vdpu382_wait(void *hal, HalTaskInfo *task)
1012 {
1013 MPP_RET ret = MPP_OK;
1014 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1015 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
1016 Vdpu382Vp9dRegSet *hw_regs = (Vdpu382Vp9dRegSet *)hw_ctx->hw_regs;
1017
1018 if (p_hal->fast_mode)
1019 hw_regs = (Vdpu382Vp9dRegSet *)hw_ctx->g_buf[task->dec.reg_index].hw_regs;
1020
1021 mpp_assert(hw_regs);
1022
1023 ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL);
1024 if (ret)
1025 mpp_err_f("poll cmd failed %d\n", ret);
1026
1027 if (hal_vp9d_debug & HAL_VP9D_DBG_REG) {
1028 RK_U32 *p = (RK_U32 *)hw_regs;
1029 RK_U32 i = 0;
1030
1031 for (i = 0; i < sizeof(Vdpu382Vp9dRegSet) / 4; i++)
1032 mpp_log("get regs[%02d]: %08X\n", i, *p++);
1033 }
1034
1035 if (task->dec.flags.parse_err ||
1036 task->dec.flags.ref_err ||
1037 !hw_regs->irq_status.reg224.dec_rdy_sta) {
1038 MppFrame mframe = NULL;
1039 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
1040 mpp_frame_set_errinfo(mframe, 1);
1041 }
1042 #if !HW_PROB
1043 if (p_hal->dec_cb && task->dec.flags.wait_done) {
1044 DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
1045
1046 mpp_buffer_sync_end(hw_ctx->count_base);
1047 hal_vp9d_update_counts(mpp_buffer_get_ptr(hw_ctx->count_base), task->dec.syntax.data);
1048 mpp_callback(p_hal->dec_cb, &pic_param->counts);
1049 }
1050 #endif
1051 if (p_hal->fast_mode) {
1052 hw_ctx->g_buf[task->dec.reg_index].use_flag = 0;
1053 }
1054
1055 (void)task;
1056 return ret;
1057 }
1058
hal_vp9d_vdpu382_reset(void * hal)1059 static MPP_RET hal_vp9d_vdpu382_reset(void *hal)
1060 {
1061 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1062 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
1063
1064 hal_vp9d_enter();
1065
1066 memset(&hw_ctx->ls_info, 0, sizeof(hw_ctx->ls_info));
1067 hw_ctx->mv_base_addr = -1;
1068 hw_ctx->pre_mv_base_addr = -1;
1069 hw_ctx->last_segid_flag = 1;
1070 memset(&hw_ctx->prob_ref_poc, 0, sizeof(hw_ctx->prob_ref_poc));
1071 hw_ctx->col_ref_poc = 0;
1072 hw_ctx->segid_ref_poc = 0;
1073
1074 hal_vp9d_leave();
1075
1076 return MPP_OK;
1077 }
1078
hal_vp9d_vdpu382_flush(void * hal)1079 static MPP_RET hal_vp9d_vdpu382_flush(void *hal)
1080 {
1081 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1082 Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx;
1083
1084 hal_vp9d_enter();
1085
1086 hw_ctx->mv_base_addr = -1;
1087 hw_ctx->pre_mv_base_addr = -1;
1088
1089 hal_vp9d_leave();
1090
1091 return MPP_OK;
1092 }
1093
hal_vp9d_vdpu382_control(void * hal,MpiCmd cmd_type,void * param)1094 static MPP_RET hal_vp9d_vdpu382_control(void *hal, MpiCmd cmd_type, void *param)
1095 {
1096 HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1097
1098 switch ((MpiCmd)cmd_type) {
1099 case MPP_DEC_SET_FRAME_INFO : {
1100 MppFrameFormat fmt = mpp_frame_get_fmt((MppFrame)param);
1101
1102 if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1103 vdpu382_afbc_align_calc(p_hal->slots, (MppFrame)param, 0);
1104 } else {
1105 mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, vp9_hor_align);
1106 }
1107 } break;
1108 default : {
1109 } break;
1110 }
1111
1112 return MPP_OK;
1113 }
1114
1115 const MppHalApi hal_vp9d_vdpu382 = {
1116 .name = "vp9d_vdpu382",
1117 .type = MPP_CTX_DEC,
1118 .coding = MPP_VIDEO_CodingVP9,
1119 .ctx_size = sizeof(Vdpu382Vp9dCtx),
1120 .flag = 0,
1121 .init = hal_vp9d_vdpu382_init,
1122 .deinit = hal_vp9d_vdpu382_deinit,
1123 .reg_gen = hal_vp9d_vdpu382_gen_regs,
1124 .start = hal_vp9d_vdpu382_start,
1125 .wait = hal_vp9d_vdpu382_wait,
1126 .reset = hal_vp9d_vdpu382_reset,
1127 .flush = hal_vp9d_vdpu382_flush,
1128 .control = hal_vp9d_vdpu382_control,
1129 };
1130