xref: /rockchip-linux_mpp/mpp/hal/rkdec/vp9d/hal_vp9d_vdpu383.c (revision 437bfbeb9567cca9cd9080e3f6954aa9d6a94f18)
1 /* SPDX-License-Identifier: Apache-2.0 OR MIT */
2 /*
3  * Copyright (c) 2024 Rockchip Electronics Co., Ltd.
4  */
5 
6 #define MODULE_TAG "hal_vp9d_vdpu383"
7 
8 #include <string.h>
9 
10 #include "mpp_debug.h"
11 #include "mpp_env.h"
12 #include "mpp_mem.h"
13 #include "mpp_common.h"
14 #include "mpp_buffer_impl.h"
15 #include "mpp_bitput.h"
16 #include "mpp_compat_impl.h"
17 
18 #include "hal_vp9d_debug.h"
19 #include "hal_vp9d_com.h"
20 #include "hal_vp9d_vdpu383.h"
21 #include "hal_vp9d_ctx.h"
22 #include "vdpu383_vp9d.h"
23 #include "vp9d_syntax.h"
24 
25 #define HW_PROB         1
26 #define VP9_CONTEXT     4
27 #define VP9_CTU_SIZE    64
28 
29 #define GBL_SIZE        2 * (MPP_ALIGN(1299, 128) / 8)
30 
31 #define EIGHTTAP        0
32 #define EIGHTTAP_SMOOTH 1
33 #define EIGHTTAP_SHARP  2
34 #define BILINEAR        3
35 
36 const RK_U8 literal_to_filter[] = { EIGHTTAP_SMOOTH, EIGHTTAP,
37                                     EIGHTTAP_SHARP, BILINEAR
38                                   };
39 
40 typedef struct Vdpu383Vp9dCtx_t {
41     Vp9dRegBuf      g_buf[MAX_GEN_REG];
42     MppBuffer       global_base;
43     MppBuffer       probe_base;
44     MppBuffer       count_base;
45     MppBuffer       segid_cur_base;
46     MppBuffer       segid_last_base;
47     MppBuffer       prob_default_base;
48     void*           hw_regs;
49     RK_S32          mv_base_addr;
50     RK_S32          pre_mv_base_addr;
51     Vp9dLastInfo    ls_info;
52     /*
53      * swap between segid_cur_base & segid_last_base
54      * 0  used segid_cur_base as last
55      * 1  used segid_last_base as
56      */
57     RK_U32          last_segid_flag;
58     RK_S32          width;
59     RK_S32          height;
60     /* rcb buffers info */
61     RK_S32          rcb_buf_size;
62     Vdpu383RcbInfo  rcb_info[RCB_BUF_COUNT];
63     MppBuffer       rcb_buf;
64     RK_U32          num_row_tiles;
65     RK_U32          bit_depth;
66     /* colmv buffers info */
67     HalBufs         cmv_bufs;
68     RK_S32          mv_size;
69     RK_S32          mv_count;
70     HalBufs         origin_bufs;
71     RK_U32          prob_ctx_valid[VP9_CONTEXT];
72     MppBuffer       prob_loop_base[VP9_CONTEXT];
73     /* uncompress header data */
74     RK_U8           header_data[168];
75 } Vdpu383Vp9dCtx;
76 
77 #ifdef DUMP_VDPU383_DATAS
78 static RK_U32 cur_last_segid_flag;
79 static MppBuffer cur_last_prob_base;
80 #endif
81 
vdpu383_setup_scale_origin_bufs(Vdpu383Vp9dCtx * ctx,MppFrame mframe)82 static MPP_RET vdpu383_setup_scale_origin_bufs(Vdpu383Vp9dCtx *ctx, MppFrame mframe)
83 {
84     /* for 8K FrameBuf scale mode */
85     size_t origin_buf_size = 0;
86 
87     origin_buf_size = mpp_frame_get_buf_size(mframe);
88 
89     if (!origin_buf_size) {
90         mpp_err_f("origin_bufs get buf size failed\n");
91         return MPP_NOK;
92     }
93     if (ctx->origin_bufs) {
94         hal_bufs_deinit(ctx->origin_bufs);
95         ctx->origin_bufs = NULL;
96     }
97     hal_bufs_init(&ctx->origin_bufs);
98     if (!ctx->origin_bufs) {
99         mpp_err_f("origin_bufs thumb init fail\n");
100         return MPP_ERR_NOMEM;
101     }
102     hal_bufs_setup(ctx->origin_bufs, 16, 1, &origin_buf_size);
103 
104     return MPP_OK;
105 }
hal_vp9d_alloc_res(HalVp9dCtx * hal)106 static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal)
107 {
108     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
109     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
110     RK_S32 ret = 0;
111     RK_S32 i = 0;
112 
113     /* alloc common buffer */
114     for (i = 0; i < VP9_CONTEXT; i++) {
115         ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_loop_base[i], PROB_SIZE);
116         if (ret) {
117             mpp_err("vp9 probe_loop_base get buffer failed\n");
118             return ret;
119         }
120         mpp_buffer_attach_dev(hw_ctx->prob_loop_base[i], p_hal->dev);
121     }
122     ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_default_base, PROB_SIZE);
123     if (ret) {
124         mpp_err("vp9 probe_default_base get buffer failed\n");
125         return ret;
126     }
127     mpp_buffer_attach_dev(hw_ctx->prob_default_base, p_hal->dev);
128 
129     ret = mpp_buffer_get(p_hal->group, &hw_ctx->segid_cur_base, MAX_SEGMAP_SIZE);
130     if (ret) {
131         mpp_err("vp9 segid_cur_base get buffer failed\n");
132         return ret;
133     }
134     mpp_buffer_attach_dev(hw_ctx->segid_cur_base, p_hal->dev);
135     ret = mpp_buffer_get(p_hal->group, &hw_ctx->segid_last_base, MAX_SEGMAP_SIZE);
136     if (ret) {
137         mpp_err("vp9 segid_last_base get buffer failed\n");
138         return ret;
139     }
140     mpp_buffer_attach_dev(hw_ctx->segid_last_base, p_hal->dev);
141 
142     /* alloc buffer for fast mode or normal */
143     if (p_hal->fast_mode) {
144         for (i = 0; i < MAX_GEN_REG; i++) {
145             hw_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu383Vp9dRegSet));
146             ret = mpp_buffer_get(p_hal->group,
147                                  &hw_ctx->g_buf[i].global_base, GBL_SIZE);
148             mpp_buffer_attach_dev(hw_ctx->g_buf[i].global_base, p_hal->dev);
149             if (ret) {
150                 mpp_err("vp9 global_base get buffer failed\n");
151                 return ret;
152             }
153             ret = mpp_buffer_get(p_hal->group,
154                                  &hw_ctx->g_buf[i].probe_base, PROB_KF_SIZE);
155             if (ret) {
156                 mpp_err("vp9 probe_base get buffer failed\n");
157                 return ret;
158             }
159             mpp_buffer_attach_dev(hw_ctx->g_buf[i].probe_base, p_hal->dev);
160             ret = mpp_buffer_get(p_hal->group,
161                                  &hw_ctx->g_buf[i].count_base, COUNT_SIZE);
162             if (ret) {
163                 mpp_err("vp9 count_base get buffer failed\n");
164                 return ret;
165             }
166             mpp_buffer_attach_dev(hw_ctx->g_buf[i].count_base, p_hal->dev);
167         }
168     } else {
169         hw_ctx->hw_regs = mpp_calloc_size(void, sizeof(Vdpu383Vp9dRegSet));
170         ret = mpp_buffer_get(p_hal->group, &hw_ctx->global_base, PROB_SIZE);
171         if (ret) {
172             mpp_err("vp9 global_base get buffer failed\n");
173             return ret;
174         }
175         mpp_buffer_attach_dev(hw_ctx->global_base, p_hal->dev);
176 
177         ret = mpp_buffer_get(p_hal->group, &hw_ctx->probe_base, PROB_KF_SIZE);
178         if (ret) {
179             mpp_err("vp9 probe_base get buffer failed\n");
180             return ret;
181         }
182         mpp_buffer_attach_dev(hw_ctx->probe_base, p_hal->dev);
183 
184         ret = mpp_buffer_get(p_hal->group, &hw_ctx->count_base, COUNT_SIZE);
185         if (ret) {
186             mpp_err("vp9 count_base get buffer failed\n");
187             return ret;
188         }
189         mpp_buffer_attach_dev(hw_ctx->count_base, p_hal->dev);
190     }
191     return MPP_OK;
192 }
193 
hal_vp9d_release_res(HalVp9dCtx * hal)194 static MPP_RET hal_vp9d_release_res(HalVp9dCtx *hal)
195 {
196     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
197     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
198     RK_S32 ret = 0;
199     RK_S32 i = 0;
200 
201     if (hw_ctx->prob_default_base) {
202         ret = mpp_buffer_put(hw_ctx->prob_default_base);
203         if (ret) {
204             mpp_err("vp9 probe_wr_base get buffer failed\n");
205             return ret;
206         }
207     }
208     if (hw_ctx->segid_cur_base) {
209         ret = mpp_buffer_put(hw_ctx->segid_cur_base);
210         if (ret) {
211             mpp_err("vp9 segid_cur_base put buffer failed\n");
212             return ret;
213         }
214     }
215     if (hw_ctx->segid_last_base) {
216         ret = mpp_buffer_put(hw_ctx->segid_last_base);
217         if (ret) {
218             mpp_err("vp9 segid_last_base put buffer failed\n");
219             return ret;
220         }
221     }
222     for (i = 0; i < VP9_CONTEXT; i++) {
223         if (hw_ctx->prob_loop_base[i]) {
224             ret = mpp_buffer_put(hw_ctx->prob_loop_base[i]);
225             if (ret) {
226                 mpp_err("vp9 prob_loop_base put buffer failed\n");
227                 return ret;
228             }
229         }
230     }
231     if (p_hal->fast_mode) {
232         for (i = 0; i < MAX_GEN_REG; i++) {
233             if (hw_ctx->g_buf[i].global_base) {
234                 ret = mpp_buffer_put(hw_ctx->g_buf[i].global_base);
235                 if (ret) {
236                     mpp_err("vp9 global_base put buffer failed\n");
237                     return ret;
238                 }
239             }
240             if (hw_ctx->g_buf[i].probe_base) {
241                 ret = mpp_buffer_put(hw_ctx->g_buf[i].probe_base);
242                 if (ret) {
243                     mpp_err("vp9 probe_base put buffer failed\n");
244                     return ret;
245                 }
246             }
247             if (hw_ctx->g_buf[i].count_base) {
248                 ret = mpp_buffer_put(hw_ctx->g_buf[i].count_base);
249                 if (ret) {
250                     mpp_err("vp9 count_base put buffer failed\n");
251                     return ret;
252                 }
253             }
254             if (hw_ctx->g_buf[i].hw_regs) {
255                 mpp_free(hw_ctx->g_buf[i].hw_regs);
256                 hw_ctx->g_buf[i].hw_regs = NULL;
257             }
258             if (hw_ctx->g_buf[i].rcb_buf) {
259                 ret = mpp_buffer_put(hw_ctx->g_buf[i].rcb_buf);
260                 if (ret) {
261                     mpp_err("vp9 rcb_buf[%d] put buffer failed\n", i);
262                     return ret;
263                 }
264             }
265         }
266     } else {
267         if (hw_ctx->global_base) {
268             ret = mpp_buffer_put(hw_ctx->global_base);
269             if (ret) {
270                 mpp_err("vp9 global_base get buffer failed\n");
271                 return ret;
272             }
273         }
274         if (hw_ctx->probe_base) {
275             ret = mpp_buffer_put(hw_ctx->probe_base);
276             if (ret) {
277                 mpp_err("vp9 probe_base get buffer failed\n");
278                 return ret;
279             }
280         }
281         if (hw_ctx->count_base) {
282             ret = mpp_buffer_put(hw_ctx->count_base);
283             if (ret) {
284                 mpp_err("vp9 count_base put buffer failed\n");
285                 return ret;
286             }
287         }
288         if (hw_ctx->hw_regs) {
289             mpp_free(hw_ctx->hw_regs);
290             hw_ctx->hw_regs = NULL;
291         }
292         if (hw_ctx->rcb_buf) {
293             ret = mpp_buffer_put(hw_ctx->rcb_buf);
294             if (ret) {
295                 mpp_err("vp9 rcb_buf put buffer failed\n");
296                 return ret;
297             }
298         }
299     }
300 
301     if (hw_ctx->cmv_bufs) {
302         ret = hal_bufs_deinit(hw_ctx->cmv_bufs);
303         if (ret) {
304             mpp_err("vp9 cmv bufs deinit buffer failed\n");
305             return ret;
306         }
307     }
308     if (hw_ctx->origin_bufs) {
309         ret = hal_bufs_deinit(hw_ctx->origin_bufs);
310         if (ret) {
311             mpp_err("thumb vp9 origin_bufs deinit buffer failed\n");
312             return ret;
313         }
314         hw_ctx->origin_bufs = NULL;
315     }
316 
317     return MPP_OK;
318 }
319 
hal_vp9d_vdpu383_deinit(void * hal)320 static MPP_RET hal_vp9d_vdpu383_deinit(void *hal)
321 {
322     HalVp9dCtx *p_hal = (HalVp9dCtx *)hal;
323     MPP_RET ret = MPP_OK;
324 
325     hal_vp9d_release_res(p_hal);
326 
327     if (p_hal->group) {
328         ret = mpp_buffer_group_put(p_hal->group);
329         if (ret) {
330             mpp_err("vp9d group free buffer failed\n");
331             return ret;
332         }
333     }
334     MPP_FREE(p_hal->hw_ctx);
335 
336     return ret;
337 }
338 
hal_vp9d_vdpu383_init(void * hal,MppHalCfg * cfg)339 static MPP_RET hal_vp9d_vdpu383_init(void *hal, MppHalCfg *cfg)
340 {
341     MPP_RET ret = MPP_OK;
342     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
343     MEM_CHECK(ret, p_hal->hw_ctx = mpp_calloc_size(void, sizeof(Vdpu383Vp9dCtx)));
344     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
345     (void) cfg;
346 
347     hw_ctx->mv_base_addr = -1;
348     hw_ctx->pre_mv_base_addr = -1;
349     mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
350     mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, vp9_ver_align);
351 
352     if (p_hal->group == NULL) {
353         ret = mpp_buffer_group_get_internal(&p_hal->group, MPP_BUFFER_TYPE_ION);
354         if (ret) {
355             mpp_err("vp9 mpp_buffer_group_get failed\n");
356             goto __FAILED;
357         }
358     }
359 
360     ret = hal_vp9d_alloc_res(p_hal);
361     if (ret) {
362         mpp_err("hal_vp9d_alloc_res failed\n");
363         goto __FAILED;
364     }
365 
366     hw_ctx->last_segid_flag = 1;
367 
368     if (cfg->hal_fbc_adj_cfg) {
369         cfg->hal_fbc_adj_cfg->func = vdpu383_afbc_align_calc;
370         cfg->hal_fbc_adj_cfg->expand = 0;
371     }
372 
373     return ret;
374 __FAILED:
375     hal_vp9d_vdpu383_deinit(hal);
376     return ret;
377 }
378 
vp9d_refine_rcb_size(Vdpu383RcbInfo * rcb_info,RK_S32 width,RK_S32 height,void * data)379 static void vp9d_refine_rcb_size(Vdpu383RcbInfo *rcb_info,
380                                  RK_S32 width, RK_S32 height, void* data)
381 {
382     RK_U32 rcb_bits = 0;
383     DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data;
384     RK_U32 tile_row_num = 1 << pic_param->log2_tile_rows;
385     RK_U32 tile_col_num = 1 << pic_param->log2_tile_cols;
386     RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8;
387     RK_U32 ext_row_align_size = tile_row_num * 64 * 8;
388     RK_U32 ext_col_align_size = tile_col_num * 64 * 8;
389     RK_U32 filterd_row_append = 8192;
390 
391     width = MPP_ALIGN(width, VP9_CTU_SIZE);
392     height = MPP_ALIGN(height, VP9_CTU_SIZE);
393     /* RCB_STRMD_ROW && RCB_STRMD_TILE_ROW*/
394     if (width > 4096)
395         rcb_bits = ((width + 63) / 64) * 250;
396     else
397         rcb_bits = 0;
398     rcb_info[RCB_STRMD_ROW].size = 0;
399     rcb_info[RCB_STRMD_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
400 
401     /* RCB_INTER_ROW && RCB_INTER_TILE_ROW*/
402     rcb_bits = ((width + 63) / 64) * 2368;
403     rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
404     rcb_bits += ext_row_align_size;
405     if (tile_row_num > 1)
406         rcb_info[RCB_INTER_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
407     else
408         rcb_info[RCB_INTER_TILE_ROW].size = 0;
409 
410     /* RCB_INTRA_ROW && RCB_INTRA_TILE_ROW*/
411     rcb_bits = MPP_ALIGN(width, 512) * (bit_depth + 2);
412     rcb_bits = rcb_bits * 3; //TODO:
413     rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
414     rcb_bits += ext_row_align_size;
415     if (tile_row_num > 1)
416         rcb_info[RCB_INTRA_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
417     else
418         rcb_info[RCB_INTRA_TILE_ROW].size = 0;
419 
420     /* RCB_FILTERD_ROW && RCB_FILTERD_TILE_ROW*/
421     // save space mode : half for RCB_FILTERD_ROW, half for RCB_FILTERD_PROTECT_ROW
422     if (width > 4096)
423         filterd_row_append = 27648;
424     rcb_bits = (RK_U32)(MPP_ALIGN(width, 64) * (41 * bit_depth + 13));
425     rcb_info[RCB_FILTERD_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2);
426     rcb_info[RCB_FILTERD_PROTECT_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2);
427     rcb_bits += ext_row_align_size;
428     if (tile_row_num > 1)
429         rcb_info[RCB_FILTERD_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits);
430     else
431         rcb_info[RCB_FILTERD_TILE_ROW].size = 0;
432 
433     /* RCB_FILTERD_TILE_COL */
434     if (tile_col_num > 1) {
435         rcb_bits = (RK_U32)(MPP_ALIGN(height, 64) * (42 * bit_depth + 13)) + ext_col_align_size;
436         rcb_info[RCB_FILTERD_TILE_COL].size = MPP_RCB_BYTES(rcb_bits);
437     } else {
438         rcb_info[RCB_FILTERD_TILE_COL].size = 0;
439     }
440 
441 }
442 
hal_vp9d_rcb_info_update(void * hal,Vdpu383Vp9dRegSet * hw_regs,void * data)443 static void hal_vp9d_rcb_info_update(void *hal, Vdpu383Vp9dRegSet *hw_regs, void *data)
444 {
445     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
446     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
447     DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data;
448     RK_U32 num_tiles = pic_param->log2_tile_rows;
449     RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8;
450     RK_S32 height = vp9_ver_align(pic_param->height);
451     RK_S32 width  = vp9_ver_align(pic_param->width);
452     (void) hw_regs;
453 
454     if (hw_ctx->num_row_tiles != num_tiles ||
455         hw_ctx->bit_depth != bit_depth ||
456         hw_ctx->width != width ||
457         hw_ctx->height != height) {
458 
459         hw_ctx->rcb_buf_size = vdpu383_get_rcb_buf_size(hw_ctx->rcb_info, width, height);
460         // TODO: refine rcb buffer size
461         vp9d_refine_rcb_size(hw_ctx->rcb_info, width, height, pic_param);
462 
463         if (p_hal->fast_mode) {
464             RK_U32 i;
465 
466             for (i = 0; i < MPP_ARRAY_ELEMS(hw_ctx->g_buf); i++) {
467                 MppBuffer rcb_buf = hw_ctx->g_buf[i].rcb_buf;
468 
469                 if (rcb_buf) {
470                     mpp_buffer_put(rcb_buf);
471                     hw_ctx->g_buf[i].rcb_buf = NULL;
472                 }
473                 mpp_buffer_get(p_hal->group, &rcb_buf, hw_ctx->rcb_buf_size);
474                 hw_ctx->g_buf[i].rcb_buf = rcb_buf;
475             }
476         } else {
477             MppBuffer rcb_buf = hw_ctx->rcb_buf;
478 
479             if (rcb_buf) {
480                 mpp_buffer_put(rcb_buf);
481                 rcb_buf = NULL;
482             }
483             mpp_buffer_get(p_hal->group, &rcb_buf, hw_ctx->rcb_buf_size);
484             hw_ctx->rcb_buf = rcb_buf;
485         }
486 
487         hw_ctx->num_row_tiles  = num_tiles;
488         hw_ctx->bit_depth      = bit_depth;
489         hw_ctx->width          = width;
490         hw_ctx->height         = height;
491     }
492 }
493 
494 static void
set_tile_offset(RK_S32 * start,RK_S32 * end,RK_S32 idx,RK_S32 log2_n,RK_S32 n)495 set_tile_offset(RK_S32 *start, RK_S32 *end, RK_S32 idx, RK_S32 log2_n, RK_S32 n)
496 {
497     RK_S32 sb_start = ( idx      * n) >> log2_n;
498     RK_S32 sb_end   = ((idx + 1) * n) >> log2_n;
499 
500     *start = MPP_MIN(sb_start, n) << 3;
501     *end   = MPP_MIN(sb_end,   n) << 3;
502 }
503 
prepare_uncompress_header(HalVp9dCtx * p_hal,DXVA_PicParams_VP9 * pp,RK_U64 * data,RK_U32 len)504 static MPP_RET prepare_uncompress_header(HalVp9dCtx *p_hal, DXVA_PicParams_VP9 *pp,
505                                          RK_U64 *data, RK_U32 len)
506 {
507     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
508     BitputCtx_t bp;
509     RK_S32 i, j;
510 
511     mpp_set_bitput_ctx(&bp, data, len);
512 
513     mpp_put_bits(&bp, pp->frame_type, 1);
514     mpp_put_bits(&bp, pp->error_resilient_mode, 1);
515     mpp_put_bits(&bp, pp->BitDepthMinus8Luma, 3);
516     mpp_put_bits(&bp, 1, 2); // yuv420
517     mpp_put_bits(&bp, pp->width, 16);
518     mpp_put_bits(&bp, pp->height, 16);
519 
520     mpp_put_bits(&bp, (!pp->frame_type || pp->intra_only), 1);
521     mpp_put_bits(&bp, pp->ref_frame_sign_bias[1], 1);
522     mpp_put_bits(&bp, pp->ref_frame_sign_bias[2], 1);
523     mpp_put_bits(&bp, pp->ref_frame_sign_bias[3], 1);
524 
525     mpp_put_bits(&bp, pp->allow_high_precision_mv, 1);
526     /* sync with cmodel */
527     if (!pp->frame_type || pp->intra_only)
528         mpp_put_bits(&bp, 0, 3);
529     else {
530         if (pp->interp_filter == 4) /* FILTER_SWITCHABLE */
531             mpp_put_bits(&bp, pp->interp_filter, 3);
532         else
533             mpp_put_bits(&bp, literal_to_filter[pp->interp_filter], 3);
534     }
535     mpp_put_bits(&bp, pp->parallelmode, 1);
536     mpp_put_bits(&bp, pp->refresh_frame_context, 1);
537 
538     /* loop filter */
539     mpp_put_bits(&bp, pp->filter_level, 6);
540     mpp_put_bits(&bp, pp->sharpness_level, 3);
541     mpp_put_bits(&bp, pp->mode_ref_delta_enabled, 1);
542     mpp_put_bits(&bp, pp->mode_ref_delta_update, 1);
543 
544     mpp_put_bits(&bp, pp->ref_deltas[0], 7);
545     mpp_put_bits(&bp, pp->ref_deltas[1], 7);
546     mpp_put_bits(&bp, pp->ref_deltas[2], 7);
547     mpp_put_bits(&bp, pp->ref_deltas[3], 7);
548     mpp_put_bits(&bp, pp->mode_deltas[0], 7);
549     mpp_put_bits(&bp, pp->mode_deltas[1], 7);
550 
551     mpp_put_bits(&bp, pp->base_qindex, 8);
552     mpp_put_bits(&bp, pp->y_dc_delta_q, 5);
553     mpp_put_bits(&bp, pp->uv_dc_delta_q, 5);
554     mpp_put_bits(&bp, pp->uv_ac_delta_q, 5);
555     mpp_put_bits(&bp, (!pp->base_qindex && !pp->y_dc_delta_q && !pp->uv_dc_delta_q && !pp->uv_ac_delta_q), 1);
556 
557     for (i = 0; i < 3; i++) {
558         mpp_put_bits(&bp, pp->stVP9Segments.pred_probs[i], 8);
559     }
560     for (i = 0; i < 7; i++) {
561         mpp_put_bits(&bp, pp->stVP9Segments.tree_probs[i], 8);
562     }
563     mpp_put_bits(&bp, pp->stVP9Segments.enabled, 1);
564     mpp_put_bits(&bp, pp->stVP9Segments.update_map, 1);
565     mpp_put_bits(&bp, pp->stVP9Segments.temporal_update, 1);
566     mpp_put_bits(&bp, pp->stVP9Segments.abs_delta, 1);
567 
568     {
569         RK_U32 use_prev_frame_mvs = !pp->error_resilient_mode &&
570                                     pp->width == hw_ctx->ls_info.last_width &&
571                                     pp->height == hw_ctx->ls_info.last_height &&
572                                     !hw_ctx->ls_info.last_intra_only &&
573                                     hw_ctx->ls_info.last_show_frame;
574         mpp_put_bits(&bp, use_prev_frame_mvs, 1);
575     }
576 
577     for ( i = 0; i < 8; i++ )
578         for ( j = 0; j < 4; j++ )
579             mpp_put_bits(&bp, (pp->stVP9Segments.feature_mask[i] >> j) & 0x1, 1);
580 
581     for ( i = 0; i < 8; i++ ) {
582         mpp_put_bits(&bp, pp->stVP9Segments.feature_data[i][0], 9);
583         mpp_put_bits(&bp, pp->stVP9Segments.feature_data[i][1], 7);
584         mpp_put_bits(&bp, pp->stVP9Segments.feature_data[i][2], 2);
585     }
586 
587     mpp_put_bits(&bp, pp->first_partition_size, 16);
588 
589     /* refer frame width and height */
590     {
591         RK_S32 ref_idx = pp->frame_refs[0].Index7Bits;
592         mpp_put_bits(&bp, pp->ref_frame_coded_width[ref_idx], 16);
593         mpp_put_bits(&bp, pp->ref_frame_coded_height[ref_idx], 16);
594         ref_idx = pp->frame_refs[1].Index7Bits;
595         mpp_put_bits(&bp, pp->ref_frame_coded_width[ref_idx], 16);
596         mpp_put_bits(&bp, pp->ref_frame_coded_height[ref_idx], 16);
597         ref_idx = pp->frame_refs[2].Index7Bits;
598         mpp_put_bits(&bp, pp->ref_frame_coded_width[ref_idx], 16);
599         mpp_put_bits(&bp, pp->ref_frame_coded_height[ref_idx], 16);
600     }
601 
602     /* last frame info */
603     mpp_put_bits(&bp, hw_ctx->ls_info.last_mode_deltas[0], 7);
604     mpp_put_bits(&bp, hw_ctx->ls_info.last_mode_deltas[1], 7);
605     mpp_put_bits(&bp, hw_ctx->ls_info.last_ref_deltas[0], 7);
606     mpp_put_bits(&bp, hw_ctx->ls_info.last_ref_deltas[1], 7);
607     mpp_put_bits(&bp, hw_ctx->ls_info.last_ref_deltas[2], 7);
608     mpp_put_bits(&bp, hw_ctx->ls_info.last_ref_deltas[3], 7);
609     mpp_put_bits(&bp, hw_ctx->ls_info.segmentation_enable_flag_last, 1);
610 
611     mpp_put_bits(&bp, hw_ctx->ls_info.last_show_frame, 1);
612     mpp_put_bits(&bp, pp->intra_only, 1);
613     {
614         RK_U32 last_widthheight_eqcur = pp->width == hw_ctx->ls_info.last_width &&
615                                         pp->height == hw_ctx->ls_info.last_height;
616 
617         mpp_put_bits(&bp, last_widthheight_eqcur, 1);
618     }
619     mpp_put_bits(&bp, hw_ctx->ls_info.color_space_last, 3);
620 
621     mpp_put_bits(&bp, !hw_ctx->ls_info.last_frame_type, 1);
622     mpp_put_bits(&bp, 0, 1);
623     mpp_put_bits(&bp, 1, 1);
624     mpp_put_bits(&bp, 1, 1);
625     mpp_put_bits(&bp, 1, 1);
626 
627     mpp_put_bits(&bp, pp->mvscale[0][0], 16);
628     mpp_put_bits(&bp, pp->mvscale[0][1], 16);
629     mpp_put_bits(&bp, pp->mvscale[1][0], 16);
630     mpp_put_bits(&bp, pp->mvscale[1][1], 16);
631     mpp_put_bits(&bp, pp->mvscale[2][0], 16);
632     mpp_put_bits(&bp, pp->mvscale[2][1], 16);
633 
634     /* tile cols and rows */
635     {
636         RK_S32 tile_width[64] = {0};
637         RK_S32 tile_height[4] = {0};
638         RK_S32 tile_cols = 1 << pp->log2_tile_cols;
639         RK_S32 tile_rows = 1 << pp->log2_tile_rows;
640 
641         mpp_put_bits(&bp, tile_cols, 7);
642         mpp_put_bits(&bp, tile_rows, 3);
643 
644         for (i = 0; i < tile_cols; ++i) { // tile_col
645             RK_S32 tile_col_start = 0;
646             RK_S32 tile_col_end = 0;
647 
648             set_tile_offset(&tile_col_start, &tile_col_end,
649                             i, pp->log2_tile_cols, MPP_ALIGN(pp->width, 64) / 64);
650             tile_width[i] = (tile_col_end - tile_col_start + 7) / 8;
651         }
652 
653         for (j = 0; j < tile_rows; ++j) { // tile_row
654             RK_S32 tile_row_start = 0;
655             RK_S32 tile_row_end = 0;
656 
657             set_tile_offset(&tile_row_start, &tile_row_end,
658                             j, pp->log2_tile_rows, MPP_ALIGN(pp->height, 64) / 64);
659             tile_height[j] = (tile_row_end - tile_row_start + 7) / 8;
660         }
661 
662         for (i = 0; i < 64; i++)
663             mpp_put_bits(&bp, tile_width[i], 10);
664 
665         for (j = 0; j < 4; j++)
666             mpp_put_bits(&bp, tile_height[j], 10);
667     }
668 
669     mpp_put_align(&bp, 64, 0);//128
670 
671 #ifdef DUMP_VDPU383_DATAS
672     {
673         char *cur_fname = "global_cfg.dat";
674         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
675         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
676         dump_data_to_file(dump_cur_fname_path, (void *)bp.pbuf, 64 * (bp.index - 1) + bp.bitpos, 64, 0);
677     }
678 #endif
679 
680     return MPP_OK;
681 }
682 
hal_vp9d_vdpu383_gen_regs(void * hal,HalTaskInfo * task)683 static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
684 {
685     RK_S32 i;
686     RK_U8  bit_depth = 0;
687     RK_U32 ref_frame_width_y;
688     RK_U32 ref_frame_height_y;
689     RK_S32 stream_len = 0, aglin_offset = 0;
690     RK_U32 y_hor_virstride, uv_hor_virstride, y_virstride;
691     RK_U8  *bitstream = NULL;
692     MppBuffer streambuf = NULL;
693     RK_U32 sw_y_hor_virstride;
694     RK_U32 sw_uv_hor_virstride;
695     RK_U32 sw_y_virstride;
696     RK_U32 sw_uv_virstride;
697     RK_U8  ref_idx = 0;
698     RK_U8  ref_frame_idx = 0;
699     RK_U32 *reg_ref_base = NULL;
700     RK_U32 *reg_payload_ref_base = NULL;
701     RK_S32 intraFlag = 0;
702     MppBuffer framebuf = NULL;
703     HalBuf *mv_buf = NULL;
704     RK_U32 fbc_en = 0;
705     HalBuf *origin_buf = NULL;
706 
707     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
708     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
709     DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
710     Vdpu383Vp9dRegSet *vp9_hw_regs = NULL;
711     RK_S32 mv_size = pic_param->width * pic_param->height / 2;
712     RK_U32 frame_ctx_id = pic_param->frame_context_idx;
713     MppFrame mframe;
714     MppFrame ref_frame = NULL;
715 
716     if (p_hal->fast_mode) {
717         for (i = 0; i < MAX_GEN_REG; i++) {
718             if (!hw_ctx->g_buf[i].use_flag) {
719                 task->dec.reg_index = i;
720                 hw_ctx->global_base = hw_ctx->g_buf[i].global_base;
721                 hw_ctx->probe_base = hw_ctx->g_buf[i].probe_base;
722                 hw_ctx->count_base = hw_ctx->g_buf[i].count_base;
723                 hw_ctx->hw_regs = hw_ctx->g_buf[i].hw_regs;
724                 hw_ctx->g_buf[i].use_flag = 1;
725                 break;
726             }
727         }
728         if (i == MAX_GEN_REG) {
729             mpp_err("vp9 fast mode buf all used\n");
730             return MPP_ERR_NOMEM;
731         }
732     }
733     vp9_hw_regs = (Vdpu383Vp9dRegSet*)hw_ctx->hw_regs;
734     memset(vp9_hw_regs, 0, sizeof(Vdpu383Vp9dRegSet));
735 
736 #ifdef DUMP_VDPU383_DATAS
737     {
738         memset(dump_cur_dir, 0, sizeof(dump_cur_dir));
739         sprintf(dump_cur_dir, "vp9/Frame%04d", dump_cur_frame);
740         if (access(dump_cur_dir, 0)) {
741             if (mkdir(dump_cur_dir))
742                 mpp_err_f("error: mkdir %s\n", dump_cur_dir);
743         }
744         dump_cur_frame++;
745     }
746 #endif
747 
748     /* uncompress header data */
749     prepare_uncompress_header(p_hal, pic_param, (RK_U64 *)hw_ctx->header_data, sizeof(hw_ctx->header_data) / 8);
750     memcpy(mpp_buffer_get_ptr(hw_ctx->global_base), hw_ctx->header_data, sizeof(hw_ctx->header_data));
751     mpp_buffer_sync_end(hw_ctx->global_base);
752     vp9_hw_regs->vp9d_paras.reg67_global_len = GBL_SIZE / 16;
753     vp9_hw_regs->common_addr.reg131_gbl_base = mpp_buffer_get_fd(hw_ctx->global_base);
754 
755     if (hw_ctx->cmv_bufs == NULL || hw_ctx->mv_size < mv_size) {
756         size_t size = mv_size;
757 
758         if (hw_ctx->cmv_bufs) {
759             hal_bufs_deinit(hw_ctx->cmv_bufs);
760             hw_ctx->cmv_bufs = NULL;
761         }
762 
763         hal_bufs_init(&hw_ctx->cmv_bufs);
764         if (hw_ctx->cmv_bufs == NULL) {
765             mpp_err_f("colmv bufs init fail");
766             return MPP_NOK;
767         }
768         hw_ctx->mv_size = mv_size;
769         hw_ctx->mv_count = mpp_buf_slot_get_count(p_hal ->slots);
770         hal_bufs_setup(hw_ctx->cmv_bufs, hw_ctx->mv_count, 1, &size);
771     }
772 
773     mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
774     if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY &&
775         hw_ctx->origin_bufs == NULL) {
776         vdpu383_setup_scale_origin_bufs(hw_ctx, mframe);
777     }
778 
779     stream_len = (RK_S32)mpp_packet_get_length(task->dec.input_packet);
780 
781     intraFlag = (!pic_param->frame_type || pic_param->intra_only);
782 #if HW_PROB
783     // hal_vp9d_prob_flag_delta(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data);
784     /* init kf_probe */
785     hal_vp9d_prob_kf(mpp_buffer_get_ptr(hw_ctx->probe_base));
786     mpp_buffer_sync_end(hw_ctx->probe_base);
787     if (intraFlag) {
788         hal_vp9d_prob_default(mpp_buffer_get_ptr(hw_ctx->prob_default_base), task->dec.syntax.data);
789         mpp_buffer_sync_end(hw_ctx->prob_default_base);
790     }
791 
792     /* config last prob base and update write base */
793     {
794         if (intraFlag || pic_param->error_resilient_mode) {
795             if (intraFlag
796                 || pic_param->error_resilient_mode
797                 || (pic_param->reset_frame_context == 3)) {
798                 memset(hw_ctx->prob_ctx_valid, 0, sizeof(hw_ctx->prob_ctx_valid));
799             } else if (pic_param->reset_frame_context == 2) {
800                 hw_ctx->prob_ctx_valid[frame_ctx_id] = 0;
801             }
802         }
803 
804         if (hw_ctx->prob_ctx_valid[frame_ctx_id]) {
805             vp9_hw_regs->vp9d_addrs.reg184_lastprob_base =
806                 mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]);
807 #ifdef DUMP_VDPU383_DATAS
808             { cur_last_prob_base = hw_ctx->prob_loop_base[frame_ctx_id]; }
809 #endif
810         } else {
811             vp9_hw_regs->vp9d_addrs.reg184_lastprob_base = mpp_buffer_get_fd(hw_ctx->prob_default_base);
812             hw_ctx->prob_ctx_valid[frame_ctx_id] |= pic_param->refresh_frame_context;
813 #ifdef DUMP_VDPU383_DATAS
814             { cur_last_prob_base = hw_ctx->prob_default_base; }
815 #endif
816         }
817         vp9_hw_regs->vp9d_addrs.reg185_updateprob_base =
818             mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]);
819     }
820     vp9_hw_regs->vp9d_addrs.reg183_kfprob_base = mpp_buffer_get_fd(hw_ctx->probe_base);
821 #ifdef DUMP_VDPU383_DATAS
822     {
823         char *cur_fname = "cabac_last_probe.dat";
824         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
825         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
826         dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(cur_last_prob_base),
827                           8 * 152 * 16, 128, 0);
828     }
829     {
830         char *cur_fname = "cabac_kf_probe.dat";
831         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
832         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
833         dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(hw_ctx->probe_base),
834                           8 * PROB_KF_SIZE, 128, 0);
835     }
836 #endif
837 #else
838 #endif
839 
840     vp9_hw_regs->vp9d_paras.reg66_stream_len = ((stream_len + 15) & (~15)) + 0x80;
841 
842     mpp_buf_slot_get_prop(p_hal->packet_slots, task->dec.input, SLOT_BUFFER, &streambuf);
843     bitstream = mpp_buffer_get_ptr(streambuf);
844     aglin_offset = vp9_hw_regs->vp9d_paras.reg66_stream_len - stream_len;
845     if (aglin_offset > 0) {
846         memset((void *)(bitstream + stream_len), 0, aglin_offset);
847     }
848 
849     //--- caculate the yuv_frame_size and mv_size
850     bit_depth = pic_param->BitDepthMinus8Luma + 8;
851 
852     {
853         mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
854         fbc_en = MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe));
855 
856         if (fbc_en) {
857             RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe);
858             RK_U32 h = MPP_ALIGN(mpp_frame_get_height(mframe), 64);
859             RK_U32 fbd_offset;
860 
861             vp9_hw_regs->ctrl_regs.reg9.fbc_e = 1;
862             vp9_hw_regs->vp9d_paras.reg68_hor_virstride = fbc_hdr_stride / 64;
863             fbd_offset = vp9_hw_regs->vp9d_paras.reg68_hor_virstride * h * 4;
864             vp9_hw_regs->vp9d_addrs.reg193_fbc_payload_offset = fbd_offset;
865             /* error stride */
866             vp9_hw_regs->vp9d_paras.reg80_error_ref_hor_virstride = fbc_hdr_stride / 64;
867         } else {
868             sw_y_hor_virstride = mpp_frame_get_hor_stride(mframe) >> 4;
869             sw_uv_hor_virstride = sw_y_hor_virstride;
870             sw_y_virstride = mpp_frame_get_ver_stride(mframe) * sw_y_hor_virstride;
871             sw_uv_virstride = sw_y_virstride / 2;
872 
873             vp9_hw_regs->ctrl_regs.reg9.fbc_e = 0;
874             if (MPP_FRAME_FMT_IS_TILE(mpp_frame_get_fmt(mframe))) {
875                 vp9_hw_regs->ctrl_regs.reg9.tile_e = 1;
876                 vp9_hw_regs->vp9d_paras.reg68_hor_virstride = sw_y_hor_virstride * 6;
877                 vp9_hw_regs->vp9d_paras.reg70_y_virstride = sw_y_virstride + sw_uv_virstride;
878             } else {
879                 vp9_hw_regs->ctrl_regs.reg9.tile_e = 0;
880                 vp9_hw_regs->vp9d_paras.reg68_hor_virstride = sw_y_hor_virstride;
881                 vp9_hw_regs->vp9d_paras.reg69_raster_uv_hor_virstride = sw_uv_hor_virstride;
882                 vp9_hw_regs->vp9d_paras.reg70_y_virstride = sw_y_virstride;
883             }
884             /* error stride */
885             vp9_hw_regs->vp9d_paras.reg80_error_ref_hor_virstride = sw_y_hor_virstride;
886             vp9_hw_regs->vp9d_paras.reg81_error_ref_raster_uv_hor_virstride = sw_uv_hor_virstride;
887             vp9_hw_regs->vp9d_paras.reg82_error_ref_virstride = sw_y_virstride;
888         }
889     }
890     if (!pic_param->intra_only && pic_param->frame_type &&
891         !pic_param->error_resilient_mode && hw_ctx->ls_info.last_show_frame) {
892         hw_ctx->pre_mv_base_addr = hw_ctx->mv_base_addr;
893     }
894 
895     mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
896     mpp_buf_slot_get_prop(p_hal ->slots, task->dec.output, SLOT_BUFFER, &framebuf);
897     if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) {
898         origin_buf = hal_bufs_get_buf(hw_ctx->origin_bufs, task->dec.output);
899         framebuf = origin_buf->buf[0];
900     }
901     vp9_hw_regs->vp9d_addrs.reg168_decout_base = mpp_buffer_get_fd(framebuf);
902     vp9_hw_regs->vp9d_addrs.reg169_error_ref_base = mpp_buffer_get_fd(framebuf);
903     vp9_hw_regs->vp9d_addrs.reg192_payload_st_cur_base = mpp_buffer_get_fd(framebuf);
904     vp9_hw_regs->vp9d_addrs.reg194_payload_st_error_ref_base = mpp_buffer_get_fd(framebuf);
905     vp9_hw_regs->common_addr.reg128_strm_base = mpp_buffer_get_fd(streambuf);
906 
907     {
908         RK_U32 strm_offset = pic_param->uncompressed_header_size_byte_aligned;
909 
910         vp9_hw_regs->vp9d_paras.reg65_strm_start_bit = 8 * (strm_offset & 0xf);
911         mpp_dev_set_reg_offset(p_hal->dev, 128, strm_offset & 0xfffffff0);
912     }
913 
914     if (hw_ctx->last_segid_flag) {
915         vp9_hw_regs->vp9d_addrs.reg181_segidlast_base = mpp_buffer_get_fd(hw_ctx->segid_last_base);
916         vp9_hw_regs->vp9d_addrs.reg182_segidcur_base = mpp_buffer_get_fd(hw_ctx->segid_cur_base);
917     } else {
918         vp9_hw_regs->vp9d_addrs.reg181_segidlast_base = mpp_buffer_get_fd(hw_ctx->segid_cur_base);
919         vp9_hw_regs->vp9d_addrs.reg182_segidcur_base = mpp_buffer_get_fd(hw_ctx->segid_last_base);
920     }
921 #ifdef DUMP_VDPU383_DATAS
922     cur_last_segid_flag = hw_ctx->last_segid_flag;
923     {
924         char *cur_fname = "stream_in.dat";
925         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
926         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
927         dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(streambuf)
928                           + pic_param->uncompressed_header_size_byte_aligned,
929                           8 * (((stream_len + 15) & (~15)) + 0x80), 128, 0);
930     }
931 #endif
932     /* set last segid flag */
933     if ((pic_param->stVP9Segments.enabled && pic_param->stVP9Segments.update_map) ||
934         (pic_param->width != hw_ctx->ls_info.last_width || pic_param->height != hw_ctx->ls_info.last_height) ||
935         intraFlag || pic_param->error_resilient_mode) {
936         hw_ctx->last_segid_flag = !hw_ctx->last_segid_flag;
937     }
938     //set cur colmv base
939     mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, task->dec.output);
940 
941     vp9_hw_regs->vp9d_addrs.reg216_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]);
942 
943     hw_ctx->mv_base_addr = vp9_hw_regs->vp9d_addrs.reg216_colmv_cur_base;
944     if (hw_ctx->pre_mv_base_addr < 0)
945         hw_ctx->pre_mv_base_addr = hw_ctx->mv_base_addr;
946 
947     // vp9 only one colmv
948     vp9_hw_regs->vp9d_addrs.reg217_232_colmv_ref_base[0] = hw_ctx->pre_mv_base_addr;
949 
950     reg_ref_base = vp9_hw_regs->vp9d_addrs.reg170_185_ref_base;
951     reg_payload_ref_base = vp9_hw_regs->vp9d_addrs.reg195_210_payload_st_ref_base;
952     for (i = 0; i < 3; i++) {
953         ref_idx = pic_param->frame_refs[i].Index7Bits;
954         ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits;
955         ref_frame_width_y = pic_param->ref_frame_coded_width[ref_idx];
956         ref_frame_height_y = pic_param->ref_frame_coded_height[ref_idx];
957         if (ref_frame_idx < 0x7f)
958             mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &ref_frame);
959         if (fbc_en) {
960             y_hor_virstride = uv_hor_virstride = MPP_ALIGN(ref_frame_width_y, 64) / 64;
961             if (*compat_ext_fbc_hdr_256_odd)
962                 y_hor_virstride = uv_hor_virstride = (MPP_ALIGN(ref_frame_width_y, 256) | 256) / 64;
963         } else {
964             if (ref_frame)
965                 y_hor_virstride = uv_hor_virstride = (mpp_frame_get_hor_stride(ref_frame) >> 4);
966             else
967                 y_hor_virstride = uv_hor_virstride = (mpp_align_128_odd_plus_64((ref_frame_width_y * bit_depth) >> 3) >> 4);
968         }
969         if (ref_frame)
970             y_virstride = y_hor_virstride * mpp_frame_get_ver_stride(ref_frame);
971         else
972             y_virstride = y_hor_virstride * vp9_ver_align(ref_frame_height_y);
973 
974         if (ref_frame_idx < 0x7f) {
975             mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_BUFFER, &framebuf);
976             if (hw_ctx->origin_bufs && mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) {
977                 origin_buf = hal_bufs_get_buf(hw_ctx->origin_bufs, ref_frame_idx);
978                 framebuf = origin_buf->buf[0];
979             }
980 
981             switch (i) {
982             case 0: {
983                 vp9_hw_regs->vp9d_paras.reg83_ref0_hor_virstride = y_hor_virstride;
984                 vp9_hw_regs->vp9d_paras.reg84_ref0_raster_uv_hor_virstride = uv_hor_virstride;
985                 vp9_hw_regs->vp9d_paras.reg85_ref0_virstride = y_virstride;
986             } break;
987             case 1: {
988                 vp9_hw_regs->vp9d_paras.reg86_ref1_hor_virstride = y_hor_virstride;
989                 vp9_hw_regs->vp9d_paras.reg87_ref1_raster_uv_hor_virstride = uv_hor_virstride;
990                 vp9_hw_regs->vp9d_paras.reg88_ref1_virstride = y_virstride;
991             } break;
992             case 2: {
993                 vp9_hw_regs->vp9d_paras.reg89_ref2_hor_virstride = y_hor_virstride;
994                 vp9_hw_regs->vp9d_paras.reg90_ref2_raster_uv_hor_virstride = uv_hor_virstride;
995                 vp9_hw_regs->vp9d_paras.reg91_ref2_virstride = y_virstride;
996             } break;
997             default:
998                 break;
999             }
1000 
1001             /*0 map to 11*/
1002             /*1 map to 12*/
1003             /*2 map to 13*/
1004             if (framebuf != NULL) {
1005                 reg_ref_base[i] = mpp_buffer_get_fd(framebuf);
1006                 reg_payload_ref_base[i] = mpp_buffer_get_fd(framebuf);
1007             } else {
1008                 mpp_log("ref buff address is no valid used out as base slot index 0x%x", ref_frame_idx);
1009                 reg_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
1010                 reg_payload_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
1011             }
1012             mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, ref_frame_idx);
1013         } else {
1014             reg_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
1015             reg_payload_ref_base[i] = vp9_hw_regs->vp9d_addrs.reg168_decout_base;
1016         }
1017     }
1018 
1019     /* common register setting */
1020     vp9_hw_regs->ctrl_regs.reg8_dec_mode = 2; //set as vp9 dec
1021     vp9_hw_regs->ctrl_regs.reg9.buf_empty_en = 0;
1022 
1023     vp9_hw_regs->ctrl_regs.reg10.strmd_auto_gating_e      = 1;
1024     vp9_hw_regs->ctrl_regs.reg10.inter_auto_gating_e      = 1;
1025     vp9_hw_regs->ctrl_regs.reg10.intra_auto_gating_e      = 1;
1026     vp9_hw_regs->ctrl_regs.reg10.transd_auto_gating_e     = 1;
1027     vp9_hw_regs->ctrl_regs.reg10.recon_auto_gating_e      = 1;
1028     vp9_hw_regs->ctrl_regs.reg10.filterd_auto_gating_e    = 1;
1029     vp9_hw_regs->ctrl_regs.reg10.bus_auto_gating_e        = 1;
1030     vp9_hw_regs->ctrl_regs.reg10.ctrl_auto_gating_e       = 1;
1031     vp9_hw_regs->ctrl_regs.reg10.rcb_auto_gating_e        = 1;
1032     vp9_hw_regs->ctrl_regs.reg10.err_prc_auto_gating_e    = 1;
1033 
1034     vp9_hw_regs->ctrl_regs.reg16.error_proc_disable = 1;
1035     vp9_hw_regs->ctrl_regs.reg16.error_spread_disable = 0;
1036     vp9_hw_regs->ctrl_regs.reg16.roi_error_ctu_cal_en = 0;
1037 
1038     vp9_hw_regs->ctrl_regs.reg20_cabac_error_en_lowbits = 0xffffffdf;
1039     vp9_hw_regs->ctrl_regs.reg21_cabac_error_en_highbits = 0x3fffffff;
1040 
1041     vp9_hw_regs->ctrl_regs.reg13_core_timeout_threshold = 0x3ffff;
1042 
1043     //last info update
1044     hw_ctx->ls_info.abs_delta_last = pic_param->stVP9Segments.abs_delta;
1045     for (i = 0 ; i < 4; i ++) {
1046         hw_ctx->ls_info.last_ref_deltas[i] = pic_param->ref_deltas[i];
1047     }
1048 
1049     for (i = 0 ; i < 2; i ++) {
1050         hw_ctx->ls_info.last_mode_deltas[i] = pic_param->mode_deltas[i];
1051     }
1052 
1053     for (i = 0; i < 8; i++) {
1054         hw_ctx->ls_info.feature_data[i][0] = pic_param->stVP9Segments.feature_data[i][0];
1055         hw_ctx->ls_info.feature_data[i][1] = pic_param->stVP9Segments.feature_data[i][1];
1056         hw_ctx->ls_info.feature_data[i][2] = pic_param->stVP9Segments.feature_data[i][2];
1057         hw_ctx->ls_info.feature_data[i][3] = pic_param->stVP9Segments.feature_data[i][3];
1058         hw_ctx->ls_info.feature_mask[i] = pic_param->stVP9Segments.feature_mask[i];
1059     }
1060     if (!hw_ctx->ls_info.segmentation_enable_flag_last)
1061         hw_ctx->ls_info.segmentation_enable_flag_last = pic_param->stVP9Segments.enabled;
1062 
1063     hw_ctx->ls_info.last_show_frame = pic_param->show_frame;
1064     hw_ctx->ls_info.last_width = pic_param->width;
1065     hw_ctx->ls_info.last_height = pic_param->height;
1066     hw_ctx->ls_info.last_frame_type = pic_param->frame_type;
1067 
1068     if (intraFlag)
1069         hw_ctx->ls_info.last_intra_only = 1;
1070 
1071     hw_ctx->ls_info.last_intra_only = (!pic_param->frame_type || pic_param->intra_only);
1072     hal_vp9d_dbg_par("stVP9Segments.enabled %d show_frame %d width %d height %d last_intra_only %d",
1073                      pic_param->stVP9Segments.enabled, pic_param->show_frame,
1074                      pic_param->width, pic_param->height,
1075                      hw_ctx->ls_info.last_intra_only);
1076 
1077     hal_vp9d_rcb_info_update(hal, vp9_hw_regs, pic_param);
1078     {
1079         MppBuffer rcb_buf = NULL;
1080 
1081         rcb_buf = p_hal->fast_mode ? hw_ctx->g_buf[task->dec.reg_index].rcb_buf : hw_ctx->rcb_buf;
1082         vdpu383_setup_rcb(&vp9_hw_regs->common_addr, p_hal->dev, rcb_buf, hw_ctx->rcb_info);
1083     }
1084     vdpu383_setup_statistic(&vp9_hw_regs->ctrl_regs);
1085     // whether need update counts
1086     if (pic_param->refresh_frame_context && !pic_param->parallelmode) {
1087         task->dec.flags.wait_done = 1;
1088     }
1089 
1090     {
1091         //scale down config
1092         MppBuffer mbuffer = NULL;
1093         RK_S32 fd = -1;
1094         MppFrameThumbnailMode thumbnail_mode;
1095 
1096         mpp_buf_slot_get_prop(p_hal->slots, task->dec.output,
1097                               SLOT_BUFFER, &mbuffer);
1098         mpp_buf_slot_get_prop(p_hal->slots, task->dec.output,
1099                               SLOT_FRAME_PTR, &mframe);
1100         thumbnail_mode = mpp_frame_get_thumbnail_en(mframe);
1101         switch (thumbnail_mode) {
1102         case MPP_FRAME_THUMBNAIL_ONLY:
1103             vp9_hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer);
1104             origin_buf = hal_bufs_get_buf(hw_ctx->origin_bufs, task->dec.output);
1105             fd = mpp_buffer_get_fd(origin_buf->buf[0]);
1106             vp9_hw_regs->vp9d_addrs.reg168_decout_base = fd;
1107             vp9_hw_regs->vp9d_addrs.reg169_error_ref_base = fd;
1108             vp9_hw_regs->vp9d_addrs.reg192_payload_st_cur_base = fd;
1109             vp9_hw_regs->vp9d_addrs.reg194_payload_st_error_ref_base = fd;
1110             vdpu383_setup_down_scale(mframe, p_hal->dev, &vp9_hw_regs->ctrl_regs,
1111                                      (void *)&vp9_hw_regs->vp9d_paras);
1112             break;
1113         case MPP_FRAME_THUMBNAIL_MIXED:
1114             vp9_hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer);
1115             vdpu383_setup_down_scale(mframe, p_hal->dev, &vp9_hw_regs->ctrl_regs,
1116                                      (void *)&vp9_hw_regs->vp9d_paras);
1117             break;
1118         case MPP_FRAME_THUMBNAIL_NONE:
1119         default:
1120             vp9_hw_regs->ctrl_regs.reg9.scale_down_en = 0;
1121             break;
1122         }
1123     }
1124 
1125     return MPP_OK;
1126 }
1127 
hal_vp9d_vdpu383_start(void * hal,HalTaskInfo * task)1128 static MPP_RET hal_vp9d_vdpu383_start(void *hal, HalTaskInfo *task)
1129 {
1130     MPP_RET ret = MPP_OK;
1131     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1132     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
1133     Vdpu383Vp9dRegSet *hw_regs = (Vdpu383Vp9dRegSet *)hw_ctx->hw_regs;
1134     MppDev dev = p_hal->dev;
1135 
1136     if (p_hal->fast_mode) {
1137         RK_S32 index = task->dec.reg_index;
1138 
1139         hw_regs = (Vdpu383Vp9dRegSet *)hw_ctx->g_buf[index].hw_regs;
1140     }
1141 
1142     mpp_assert(hw_regs);
1143 
1144     do {
1145         MppDevRegWrCfg wr_cfg;
1146         MppDevRegRdCfg rd_cfg;
1147 
1148         wr_cfg.reg = &hw_regs->ctrl_regs;
1149         wr_cfg.size = sizeof(hw_regs->ctrl_regs);
1150         wr_cfg.offset = OFFSET_CTRL_REGS;
1151         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
1152         if (ret) {
1153             mpp_err_f("set register write failed %d\n", ret);
1154             break;
1155         }
1156 
1157         wr_cfg.reg = &hw_regs->common_addr;
1158         wr_cfg.size = sizeof(hw_regs->common_addr);
1159         wr_cfg.offset = OFFSET_COMMON_ADDR_REGS;
1160         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
1161         if (ret) {
1162             mpp_err_f("set register write failed %d\n", ret);
1163             break;
1164         }
1165 
1166         wr_cfg.reg = &hw_regs->vp9d_paras;
1167         wr_cfg.size = sizeof(hw_regs->vp9d_paras);
1168         wr_cfg.offset = OFFSET_CODEC_PARAS_REGS;
1169         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
1170         if (ret) {
1171             mpp_err_f("set register write failed %d\n", ret);
1172             break;
1173         }
1174 
1175         wr_cfg.reg = &hw_regs->vp9d_addrs;
1176         wr_cfg.size = sizeof(hw_regs->vp9d_addrs);
1177         wr_cfg.offset = OFFSET_CODEC_ADDR_REGS;
1178         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
1179         if (ret) {
1180             mpp_err_f("set register write failed %d\n", ret);
1181             break;
1182         }
1183 
1184         rd_cfg.reg = &hw_regs->ctrl_regs.reg15;
1185         rd_cfg.size = sizeof(hw_regs->ctrl_regs.reg15);
1186         rd_cfg.offset = OFFSET_INTERRUPT_REGS;
1187         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
1188         if (ret) {
1189             mpp_err_f("set register read failed %d\n", ret);
1190             break;
1191         }
1192 
1193         // rcb info for sram
1194         vdpu383_set_rcbinfo(dev, hw_ctx->rcb_info);
1195 
1196         ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL);
1197         if (ret) {
1198             mpp_err_f("send cmd failed %d\n", ret);
1199             break;
1200         }
1201     } while (0);
1202 
1203     return ret;
1204 }
1205 
hal_vp9d_vdpu383_wait(void * hal,HalTaskInfo * task)1206 static MPP_RET hal_vp9d_vdpu383_wait(void *hal, HalTaskInfo *task)
1207 {
1208     MPP_RET ret = MPP_OK;
1209     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1210     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
1211     Vdpu383Vp9dRegSet *hw_regs = (Vdpu383Vp9dRegSet *)hw_ctx->hw_regs;
1212 
1213     if (p_hal->fast_mode)
1214         hw_regs = (Vdpu383Vp9dRegSet *)hw_ctx->g_buf[task->dec.reg_index].hw_regs;
1215 
1216     mpp_assert(hw_regs);
1217 
1218     ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL);
1219     if (ret)
1220         mpp_err_f("poll cmd failed %d\n", ret);
1221 #ifdef DUMP_VDPU383_DATAS
1222     {
1223         char *cur_fname = "cabac_update_probe.dat";
1224         DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
1225         RK_U32 frame_ctx_id = pic_param->frame_context_idx;
1226         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
1227         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
1228         dump_data_to_file(dump_cur_fname_path,
1229                           (void *)mpp_buffer_get_ptr(hw_ctx->prob_loop_base[frame_ctx_id]),
1230                           8 * 152 * 16, 128, 0);
1231     }
1232     {
1233         char *cur_fname = "segid_last.dat";
1234         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
1235         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
1236         if (!cur_last_segid_flag)
1237             dump_data_to_file(dump_cur_fname_path,
1238                               (void *)mpp_buffer_get_ptr(hw_ctx->segid_cur_base),
1239                               8 * 1559 * 8, 64, 0);
1240         else
1241             dump_data_to_file(dump_cur_fname_path,
1242                               (void *)mpp_buffer_get_ptr(hw_ctx->segid_last_base),
1243                               8 * 1559 * 8, 64, 0);
1244     }
1245     {
1246         char *cur_fname = "segid_cur.dat";
1247         memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path));
1248         sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname);
1249         if (cur_last_segid_flag)
1250             dump_data_to_file(dump_cur_fname_path,
1251                               (void *)mpp_buffer_get_ptr(hw_ctx->segid_cur_base),
1252                               8 * 1559 * 8, 64, 0);
1253         else
1254             dump_data_to_file(dump_cur_fname_path,
1255                               (void *)mpp_buffer_get_ptr(hw_ctx->segid_last_base),
1256                               8 * 1559 * 8, 64, 0);
1257     }
1258 #endif
1259 
1260     if (hal_vp9d_debug & HAL_VP9D_DBG_REG) {
1261         RK_U32 *p = (RK_U32 *)hw_regs;
1262         RK_U32 i = 0;
1263 
1264         for (i = 0; i < sizeof(Vdpu383Vp9dRegSet) / 4; i++)
1265             mpp_log("get regs[%02d]: %08X\n", i, *p++);
1266     }
1267 
1268     if (task->dec.flags.parse_err ||
1269         task->dec.flags.ref_err ||
1270         (!hw_regs->ctrl_regs.reg15.rkvdec_frame_rdy_sta) ||
1271         hw_regs->ctrl_regs.reg15.rkvdec_strm_error_sta ||
1272         hw_regs->ctrl_regs.reg15.rkvdec_core_timeout_sta ||
1273         hw_regs->ctrl_regs.reg15.rkvdec_ip_timeout_sta ||
1274         hw_regs->ctrl_regs.reg15.rkvdec_bus_error_sta ||
1275         hw_regs->ctrl_regs.reg15.rkvdec_buffer_empty_sta ||
1276         hw_regs->ctrl_regs.reg15.rkvdec_colmv_ref_error_sta) {
1277         MppFrame mframe = NULL;
1278 
1279         mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
1280         mpp_frame_set_errinfo(mframe, 1);
1281     }
1282 
1283 #if !HW_PROB
1284     if (p_hal->dec_cb && task->dec.flags.wait_done) {
1285         DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
1286         hal_vp9d_update_counts(mpp_buffer_get_ptr(hw_ctx->count_base), task->dec.syntax.data);
1287         mpp_callback(p_hal->dec_cb, &pic_param->counts);
1288     }
1289 #endif
1290     if (p_hal->fast_mode) {
1291         hw_ctx->g_buf[task->dec.reg_index].use_flag = 0;
1292     }
1293 
1294     (void)task;
1295     return ret;
1296 }
1297 
hal_vp9d_vdpu383_reset(void * hal)1298 static MPP_RET hal_vp9d_vdpu383_reset(void *hal)
1299 {
1300     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1301     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
1302 
1303     hal_vp9d_enter();
1304 
1305     memset(&hw_ctx->ls_info, 0, sizeof(hw_ctx->ls_info));
1306     hw_ctx->mv_base_addr = -1;
1307     hw_ctx->pre_mv_base_addr = -1;
1308     hw_ctx->last_segid_flag = 1;
1309 
1310     hal_vp9d_leave();
1311 
1312     return MPP_OK;
1313 }
1314 
hal_vp9d_vdpu383_flush(void * hal)1315 static MPP_RET hal_vp9d_vdpu383_flush(void *hal)
1316 {
1317     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1318     Vdpu383Vp9dCtx *hw_ctx = (Vdpu383Vp9dCtx*)p_hal->hw_ctx;
1319 
1320     hal_vp9d_enter();
1321 
1322     hw_ctx->mv_base_addr = -1;
1323     hw_ctx->pre_mv_base_addr = -1;
1324 
1325     hal_vp9d_leave();
1326 
1327     return MPP_OK;
1328 }
1329 
hal_vp9d_vdpu383_control(void * hal,MpiCmd cmd_type,void * param)1330 static MPP_RET hal_vp9d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param)
1331 {
1332     HalVp9dCtx *p_hal = (HalVp9dCtx*)hal;
1333 
1334     switch ((MpiCmd)cmd_type) {
1335     case MPP_DEC_SET_FRAME_INFO : {
1336         MppFrameFormat fmt = mpp_frame_get_fmt((MppFrame)param);
1337 
1338         if (MPP_FRAME_FMT_IS_FBC(fmt)) {
1339             vdpu383_afbc_align_calc(p_hal->slots, (MppFrame)param, 0);
1340         } else {
1341             mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
1342         }
1343     } break;
1344     case MPP_DEC_GET_THUMBNAIL_FRAME_INFO: {
1345         vdpu383_update_thumbnail_frame_info((MppFrame)param);
1346     } break;
1347     default : {
1348     } break;
1349     }
1350 
1351     return MPP_OK;
1352 }
1353 
1354 const MppHalApi hal_vp9d_vdpu383 = {
1355     .name = "vp9d_vdpu383",
1356     .type = MPP_CTX_DEC,
1357     .coding = MPP_VIDEO_CodingVP9,
1358     .ctx_size = sizeof(Vdpu383Vp9dCtx),
1359     .flag = 0,
1360     .init = hal_vp9d_vdpu383_init,
1361     .deinit = hal_vp9d_vdpu383_deinit,
1362     .reg_gen = hal_vp9d_vdpu383_gen_regs,
1363     .start = hal_vp9d_vdpu383_start,
1364     .wait = hal_vp9d_vdpu383_wait,
1365     .reset = hal_vp9d_vdpu383_reset,
1366     .flush = hal_vp9d_vdpu383_flush,
1367     .control = hal_vp9d_vdpu383_control,
1368 };
1369