xref: /OK3568_Linux_fs/external/mpp/mpp/hal/vpu/av1d/hal_av1d_vdpu.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Copyright 2020 Rockchip Electronics Co. LTD
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define MODULE_TAG "hal_av1d_vdpu"
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "rk_type.h"
24 #include "mpp_err.h"
25 #include "mpp_mem.h"
26 #include "mpp_common.h"
27 #include "mpp_bitput.h"
28 #include "mpp_hal.h"
29 #include "mpp_dec_cb_param.h"
30 #include "mpp_device.h"
31 #include "hal_bufs.h"
32 
33 #include "hal_av1d_vdpu_reg.h"
34 #include "hal_av1d_common.h"
35 #include "av1d_syntax.h"
36 #include "film_grain_noise_table.h"
37 #include "av1d_common.h"
38 #include "rk_hdr_meta_com.h"
39 
40 #define VDPU_FAST_REG_SET_CNT    3
41 #define AV1_MAX_TILES 128
42 #define AV1_TILE_INFO_SIZE AV1_MAX_TILES * 16
43 #define GM_GLOBAL_MODELS_PER_FRAME 7
44 #define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
45 #define GLOBAL_MODEL_SIZE GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE
46 #define MaxTiles 128
47 
48 #define DUMP_AV1_DATAS 0
49 
50 typedef enum AV1D_FILT_TYPE_E {
51     DB_DATA_COL,
52     DB_CTRL_COL,
53     CDEF_COL,
54     SR_COL,
55     LR_COL,
56     RFC_COL,
57     FILT_TYPE_BUT,
58 } Av1dFiltType_e;
59 
60 typedef struct filt_info_t {
61     RK_U32 size;
62     RK_U32 offset;
63 } filtInfo;
64 
65 typedef struct av1d_rkv_buf_t {
66     RK_U32              valid;
67     VdpuAv1dRegSet  *regs;
68 } av1dVdpuBuf;
69 
70 typedef struct VdpuAv1dRegCtx_t {
71     av1dVdpuBuf     reg_buf[VDPU_FAST_REG_SET_CNT];
72     MppBuffer       prob_tbl_base;
73     MppBuffer       prob_tbl_out_base;
74     MppBuffer       tile_info;
75     MppBuffer       film_grain_mem;
76     MppBuffer       global_model;
77     MppBuffer       filter_mem;
78     MppBuffer       tile_buf;
79     filtInfo        filt_info[FILT_TYPE_BUT];
80 
81     AV1CDFs         *cdfs;
82     MvCDFs          *cdfs_ndvc;
83     AV1CDFs         default_cdfs;
84     MvCDFs          default_cdfs_ndvc;
85     AV1CDFs         cdfs_last[NUM_REF_FRAMES];
86     MvCDFs          cdfs_last_ndvc[NUM_REF_FRAMES];
87     RK_U32          refresh_frame_flags;
88 
89     RK_U32          width;
90     RK_U32          height;
91     RK_S32          hor_stride;
92     RK_S32          ver_stride;
93     RK_U32          luma_size ;
94     RK_U32          chroma_size;
95 
96     FilmGrainMemory fgsmem;
97 
98     RK_S8           prev_out_buffer_i;
99     RK_U8           fbc_en;
100     RK_U8           resolution_change;
101     RK_U8           tile_transpose;
102     RK_U32          ref_frame_sign_bias[AV1_REF_LIST_SIZE];
103 
104     VdpuAv1dRegSet  *regs;
105     HalBufs         tile_out_bufs;
106     RK_U32          tile_out_count;
107     size_t          tile_out_size;
108 
109     RK_U32          num_tile_cols;
110 } VdpuAv1dRegCtx;
111 
rkv_ver_align(RK_U32 val)112 static RK_U32 rkv_ver_align(RK_U32 val)
113 {
114     return MPP_ALIGN(val, 8);
115 }
116 
rkv_hor_align(RK_U32 val)117 static RK_U32 rkv_hor_align(RK_U32 val)
118 {
119     return MPP_ALIGN(val, 8);
120 }
121 
rkv_len_align(RK_U32 val)122 static RK_U32 rkv_len_align(RK_U32 val)
123 {
124     return (2 * MPP_ALIGN(val, 128));
125 }
126 
rkv_len_align_422(RK_U32 val)127 static RK_U32 rkv_len_align_422(RK_U32 val)
128 {
129     return ((5 * MPP_ALIGN(val, 64)) / 2);
130 }
131 
hal_av1d_alloc_res(void * hal)132 static MPP_RET hal_av1d_alloc_res(void *hal)
133 {
134     MPP_RET ret = MPP_OK;
135     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
136     RK_U32 max_cnt = p_hal->fast_mode ? VDPU_FAST_REG_SET_CNT : 1;
137     RK_U32 i = 0;
138     INP_CHECK(ret, NULL == p_hal);
139 
140     MEM_CHECK(ret, p_hal->reg_ctx = mpp_calloc_size(void, sizeof(VdpuAv1dRegCtx)));
141     VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
142 
143     //!< malloc buffers
144     for (i = 0; i < max_cnt; i++) {
145         reg_ctx->reg_buf[i].regs = mpp_calloc(VdpuAv1dRegSet, 1);
146         memset(reg_ctx->reg_buf[i].regs, 0, sizeof(VdpuAv1dRegSet));
147     }
148 
149     if (!p_hal->fast_mode) {
150         reg_ctx->regs = reg_ctx->reg_buf[0].regs;
151     }
152 
153     BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, &reg_ctx->prob_tbl_base, MPP_ALIGN(sizeof(AV1CDFs), 2048)));
154     BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, &reg_ctx->prob_tbl_out_base, MPP_ALIGN(sizeof(AV1CDFs), 2048)));
155     BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, &reg_ctx->tile_info, AV1_TILE_INFO_SIZE));
156     BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, &reg_ctx->film_grain_mem, MPP_ALIGN(sizeof(AV1FilmGrainMemory), 2048)));
157     BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, &reg_ctx->global_model, MPP_ALIGN(GLOBAL_MODEL_SIZE, 2048)));
158     BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, &reg_ctx->tile_buf, MPP_ALIGN(32 * MaxTiles, 4096)));
159 
160 __RETURN:
161     return ret;
162 __FAILED:
163     return ret;
164 }
165 
vdpu_av1d_filtermem_release(VdpuAv1dRegCtx * ctx)166 static void vdpu_av1d_filtermem_release(VdpuAv1dRegCtx *ctx)
167 {
168     BUF_PUT(ctx->filter_mem);
169 }
170 
vdpu_av1d_filtermem_alloc(Av1dHalCtx * p_hal,VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)171 static MPP_RET vdpu_av1d_filtermem_alloc(Av1dHalCtx *p_hal, VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
172 {
173     RK_U32 size = 0;
174     RK_U32 pic_height = MPP_ALIGN(dxva->height, 64);
175     RK_U32 height_in_sb = pic_height / 64;
176     RK_U32 stripe_num = ((pic_height + 8) + 63) / 64;
177     RK_U32 max_bit_depth = 10;
178     RK_U32 num_tile_cols = 1 << dxva->tile_cols_log2;//dxva->tiles.cols;
179     filtInfo *filt_info = ctx->filt_info;
180 
181     /* db tile col data buffer */
182     // asic_buff->db_data_col_offset = 0;
183     // asic_buff->db_data_col_tsize = NEXT_MULTIPLE(pic_height * 12 * max_bit_depth / 8, 128);
184     // size = asic_buff->db_data_col_tsize * num_tile_cols;
185     // asic_buff->db_ctrl_col_offset = size;
186 
187     filt_info[DB_DATA_COL].offset = 0;
188     filt_info[DB_DATA_COL].size = MPP_ALIGN(pic_height * 12 * max_bit_depth / 8, 128);
189     size += filt_info[DB_DATA_COL].size * num_tile_cols;
190 
191 
192     /* db tile col ctrl buffer */
193     filt_info[DB_CTRL_COL].offset = size;
194     filt_info[DB_CTRL_COL].size = MPP_ALIGN(pic_height * 2 * 16 / 4, 128);
195     size += filt_info[DB_CTRL_COL].size * num_tile_cols;
196 
197     // size += asic_buff->db_ctrl_col_tsize * num_tile_cols;
198     // asic_buff->cdef_col_offset = size;
199 
200     /* cdef tile col buffer */
201     filt_info[CDEF_COL].offset = size;
202     filt_info[CDEF_COL].size = MPP_ALIGN(height_in_sb * 44 * max_bit_depth * 16 / 8, 128);
203     size += filt_info[CDEF_COL].size * num_tile_cols;
204     // asic_buff->cdef_col_tsize = NEXT_MULTIPLE(height_in_sb * 44 * max_bit_depth * 16 / 8, 128);
205     // size += asic_buff->cdef_col_tsize * num_tile_cols;
206     // asic_buff->sr_col_offset = size;
207 
208     /* sr tile col buffer */
209     filt_info[SR_COL].offset = size;
210     filt_info[SR_COL].size = MPP_ALIGN(height_in_sb * (3040 + 1280), 128);
211     size += filt_info[SR_COL].size * num_tile_cols;
212     // asic_buff->sr_col_tsize = NEXT_MULTIPLE(height_in_sb * (3040 + 1280), 128);
213     // size += asic_buff->sr_col_tsize * num_tile_cols;
214     // asic_buff->lr_col_offset = size;
215 
216     /* lr tile col buffer */
217     filt_info[LR_COL].offset = size;
218     filt_info[LR_COL].size = MPP_ALIGN(stripe_num * 1536 * max_bit_depth / 8, 128);
219     size += filt_info[LR_COL].size * num_tile_cols;
220     // asic_buff->lr_col_tsize = NEXT_MULTIPLE(stripe_num * 1536 * max_bit_depth / 8, 128);
221     // size += asic_buff->lr_col_tsize * num_tile_cols;
222     // if (dec_cont->use_multicore) {
223     //     asic_buff->rfc_col_offset = size;
224     //     asic_buff->rfc_col_size = NEXT_MULTIPLE(asic_buff->height, 8) / 8 * 16 * 2;
225     //     size += asic_buff->rfc_col_size * num_tile_cols;
226     // }
227     if (!mpp_buffer_get(p_hal->buf_group, &ctx->filter_mem, MPP_ALIGN(size, SZ_4K)))
228         return MPP_NOK;
229 
230     return MPP_OK;
231 }
232 
hal_av1d_release_res(void * hal)233 static void hal_av1d_release_res(void *hal)
234 {
235     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
236     VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
237     RK_U32 i = 0;
238     RK_U32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
239 
240     for (i = 0; i < loop; i++)
241         MPP_FREE(reg_ctx->reg_buf[i].regs);
242 
243     BUF_PUT(reg_ctx->prob_tbl_base);
244     BUF_PUT(reg_ctx->prob_tbl_out_base);
245     BUF_PUT(reg_ctx->tile_info);
246     BUF_PUT(reg_ctx->film_grain_mem);
247     BUF_PUT(reg_ctx->global_model);
248     BUF_PUT(reg_ctx->tile_buf);
249     vdpu_av1d_filtermem_release(reg_ctx);
250     hal_bufs_deinit(reg_ctx->tile_out_bufs);
251 
252     MPP_FREE(p_hal->reg_ctx);
253 }
254 
vdpu_av1d_deinit(void * hal)255 MPP_RET vdpu_av1d_deinit(void *hal)
256 {
257     hal_av1d_release_res(hal);
258 
259     return MPP_OK;
260 }
261 
vdpu_av1d_init(void * hal,MppHalCfg * cfg)262 MPP_RET vdpu_av1d_init(void *hal, MppHalCfg *cfg)
263 {
264     MPP_RET ret = MPP_OK;
265     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
266     INP_CHECK(ret, NULL == p_hal);
267 
268     FUN_CHECK(hal_av1d_alloc_res(hal));
269 
270     {
271         VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
272 
273         reg_ctx->cdfs = &reg_ctx->default_cdfs;
274         reg_ctx->cdfs_ndvc = &reg_ctx->default_cdfs_ndvc;
275         reg_ctx->tile_transpose = 1;
276     }
277 
278     mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, rkv_hor_align);
279     mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, rkv_ver_align);
280     mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align);
281 
282     (void)cfg;
283 __RETURN:
284     return MPP_OK;
285 __FAILED:
286     vdpu_av1d_deinit(hal);
287 
288     return ret;
289 }
290 
set_ref_width(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)291 static void set_ref_width(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
292 {
293     if (i == 0) {
294         regs->swreg33.sw_ref0_width = val;
295     } else if (i == 1) {
296         regs->swreg34.sw_ref1_width = val;
297     } else if (i == 2) {
298         regs->swreg35.sw_ref2_width = val;
299     } else if (i == 3) {
300         regs->swreg43.sw_ref3_width = val;
301     } else if (i == 4) {
302         regs->swreg44.sw_ref4_width = val;
303     } else if (i == 5) {
304         regs->swreg45.sw_ref5_width = val;
305     } else if (i == 6) {
306         regs->swreg46.sw_ref6_width = val;
307     } else {
308         mpp_err("Error: trying to set invalid reference index.");
309     }
310 }
311 
set_ref_height(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)312 static void set_ref_height(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
313 {
314     if (i == 0) {
315         regs->swreg33.sw_ref0_height = val;
316     } else if (i == 1) {
317         regs->swreg34.sw_ref1_height = val;
318     } else if (i == 2) {
319         regs->swreg35.sw_ref2_height = val;
320     } else if (i == 3) {
321         regs->swreg43.sw_ref3_height = val;
322     } else if (i == 4) {
323         regs->swreg44.sw_ref4_height = val;
324     } else if (i == 5) {
325         regs->swreg45.sw_ref5_height = val;
326     } else if (i == 6) {
327         regs->swreg46.sw_ref6_height = val;
328     } else {
329         mpp_err("Error: trying to set invalid reference index.");
330     }
331 }
332 
set_ref_hor_scale(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)333 static void set_ref_hor_scale(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
334 {
335     if (i == 0) {
336         regs->swreg36.sw_ref0_hor_scale = val;
337     } else if (i == 1) {
338         regs->swreg37.sw_ref1_hor_scale = val;
339     } else if (i == 2) {
340         regs->swreg38.sw_ref2_hor_scale = val;
341     } else if (i == 3) {
342         regs->swreg39.sw_ref3_hor_scale = val;
343     } else if (i == 4) {
344         regs->swreg40.sw_ref4_hor_scale = val;
345     } else if (i == 5) {
346         regs->swreg41.sw_ref5_hor_scale = val;
347     } else if (i == 6) {
348         regs->swreg42.sw_ref6_hor_scale = val;
349     } else {
350         mpp_err("Error: trying to set invalid reference index.");
351     }
352 }
353 
set_ref_ver_scale(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)354 static void set_ref_ver_scale(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
355 {
356     if (i == 0) {
357         regs->swreg36.sw_ref0_ver_scale = val;
358     } else if (i == 1) {
359         regs->swreg37.sw_ref1_ver_scale = val;
360     } else if (i == 2) {
361         regs->swreg38.sw_ref2_ver_scale = val;
362     } else if (i == 3) {
363         regs->swreg39.sw_ref3_ver_scale = val;
364     } else if (i == 4) {
365         regs->swreg40.sw_ref4_ver_scale = val;
366     } else if (i == 5) {
367         regs->swreg41.sw_ref5_ver_scale = val;
368     } else if (i == 6) {
369         regs->swreg42.sw_ref6_ver_scale = val;
370     } else {
371         mpp_err("Error: trying to set invalid reference index.");
372     }
373 }
374 
set_ref_lum_base(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val,HalBufs bufs)375 static void set_ref_lum_base(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val, HalBufs bufs)
376 {
377     HalBuf *tile_out_buf;
378     tile_out_buf = hal_bufs_get_buf(bufs, val);
379     // MppBuffer framebuf = NULL;
380     // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
381     if (tile_out_buf == NULL) {
382         mpp_err_f("get slots frame buff fail");
383         return;
384     }
385     val =  mpp_buffer_get_fd(tile_out_buf->buf[0]);
386     if (i == 0) {
387         regs->addr_cfg.swreg67.sw_refer0_ybase_lsb = val;
388     } else if (i == 1) {
389         regs->addr_cfg.swreg69.sw_refer1_ybase_lsb = val;
390     } else if (i == 2) {
391         regs->addr_cfg.swreg71.sw_refer2_ybase_lsb = val;
392     } else if (i == 3) {
393         regs->addr_cfg.swreg73.sw_refer3_ybase_lsb = val;
394     } else if (i == 4) {
395         regs->addr_cfg.swreg75.sw_refer4_ybase_lsb = val;
396     } else if (i == 5) {
397         regs->addr_cfg.swreg77.sw_refer5_ybase_lsb = val;
398     } else if (i == 6) {
399         regs->addr_cfg.swreg79.sw_refer6_ybase_lsb = val;
400     } else {
401         mpp_err( "Error: trying to set invalid reference index.");
402     }
403 }
404 
set_ref_lum_base_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)405 static void set_ref_lum_base_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
406 {
407     if (i == 0) {
408         regs->addr_cfg.swreg66.sw_refer0_ybase_msb = val;
409     } else if (i == 1) {
410         regs->addr_cfg.swreg68.sw_refer1_ybase_msb = val;
411     } else if (i == 2) {
412         regs->addr_cfg.swreg70.sw_refer2_ybase_msb = val;
413     } else if (i == 3) {
414         regs->addr_cfg.swreg72.sw_refer3_ybase_msb = val;
415     } else if (i == 4) {
416         regs->addr_cfg.swreg74.sw_refer4_ybase_msb = val;
417     } else if (i == 5) {
418         regs->addr_cfg.swreg76.sw_refer5_ybase_msb = val;
419     } else if (i == 6) {
420         regs->addr_cfg.swreg78.sw_refer6_ybase_msb = val;
421     } else {
422         mpp_err("Error: trying to set invalid reference index.");
423     }
424 }
425 
set_ref_cb_base(Av1dHalCtx * p_hal,RK_S32 i,RK_S32 val,HalBufs bufs,RK_U32 offset)426 static void set_ref_cb_base(Av1dHalCtx *p_hal, RK_S32 i, RK_S32 val, HalBufs bufs, RK_U32 offset)
427 {
428     VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
429     VdpuAv1dRegSet *regs = ctx->regs;
430     HalBuf *tile_out_buf;
431     tile_out_buf = hal_bufs_get_buf(bufs, val);
432     // MppBuffer framebuf = NULL;
433 
434     // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
435     if (tile_out_buf == NULL) {
436         mpp_err_f("get slots frame buff fail");
437         return;
438     }
439     val =  mpp_buffer_get_fd(tile_out_buf->buf[0]);
440 
441     if (i == 0) {
442         mpp_dev_set_reg_offset(p_hal->dev, 101, offset);
443         regs->addr_cfg.swreg101.sw_refer0_cbase_lsb  = val;
444     } else if (i == 1) {
445         mpp_dev_set_reg_offset(p_hal->dev, 103, offset);
446         regs->addr_cfg.swreg103.sw_refer1_cbase_lsb  = val;
447     } else if (i == 2) {
448         mpp_dev_set_reg_offset(p_hal->dev, 105, offset);
449         regs->addr_cfg.swreg105.sw_refer2_cbase_lsb  = val;
450     } else if (i == 3) {
451         mpp_dev_set_reg_offset(p_hal->dev, 107, offset);
452         regs->addr_cfg.swreg107.sw_refer3_cbase_lsb  = val;
453     } else if (i == 4) {
454         mpp_dev_set_reg_offset(p_hal->dev, 109, offset);
455         regs->addr_cfg.swreg109.sw_refer4_cbase_lsb  = val;
456     } else if (i == 5) {
457         mpp_dev_set_reg_offset(p_hal->dev, 111, offset);
458         regs->addr_cfg.swreg111.sw_refer5_cbase_lsb  = val;
459     } else if (i == 6) {
460         mpp_dev_set_reg_offset(p_hal->dev, 113, offset);
461         regs->addr_cfg.swreg113.sw_refer6_cbase_lsb  = val;
462     } else {
463         mpp_err("Error: trying to set invalid reference index.");
464     }
465 }
466 
set_ref_cb_base_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)467 static void set_ref_cb_base_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
468 {
469     if (i == 0) {
470         regs->addr_cfg.swreg100.sw_refer0_cbase_msb = val;
471     } else if (i == 1) {
472         regs->addr_cfg.swreg102.sw_refer1_cbase_msb = val;
473     } else if (i == 2) {
474         regs->addr_cfg.swreg104.sw_refer2_cbase_msb = val;
475     } else if (i == 3) {
476         regs->addr_cfg.swreg106.sw_refer3_cbase_msb = val;
477     } else if (i == 4) {
478         regs->addr_cfg.swreg108.sw_refer4_cbase_msb = val;
479     } else if (i == 5) {
480         regs->addr_cfg.swreg110.sw_refer5_cbase_msb = val;
481     } else if (i == 6) {
482         regs->addr_cfg.swreg112.sw_refer6_cbase_msb = val;
483     } else {
484         mpp_err("Error: trying to set invalid reference index.");
485     }
486 }
487 
488 
set_ref_dbase(Av1dHalCtx * p_hal,RK_S32 i,RK_S32 val,HalBufs bufs,RK_U32 offset)489 static void set_ref_dbase(Av1dHalCtx *p_hal, RK_S32 i, RK_S32 val,  HalBufs bufs, RK_U32 offset)
490 {
491     VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
492     VdpuAv1dRegSet *regs = ctx->regs;
493     HalBuf *tile_out_buf;
494     tile_out_buf = hal_bufs_get_buf(bufs, val);
495     // MppBuffer framebuf = NULL;
496 
497     // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
498     if (tile_out_buf == NULL) {
499         mpp_err_f("get slots frame buff fail");
500         return;
501     }
502     val =  mpp_buffer_get_fd(tile_out_buf->buf[0]);
503     if (i == 0) {
504         mpp_dev_set_reg_offset(p_hal->dev, 135, offset);
505         regs->addr_cfg.swreg135.sw_refer0_dbase_lsb = val;
506     } else if (i == 1) {
507         mpp_dev_set_reg_offset(p_hal->dev, 137, offset);
508         regs->addr_cfg.swreg137.sw_refer1_dbase_lsb = val;
509     } else if (i == 2) {
510         mpp_dev_set_reg_offset(p_hal->dev, 139, offset);
511         regs->addr_cfg.swreg139.sw_refer2_dbase_lsb = val;
512     } else if (i == 3) {
513         mpp_dev_set_reg_offset(p_hal->dev, 141, offset);
514         regs->addr_cfg.swreg141.sw_refer3_dbase_lsb = val;
515     } else if (i == 4) {
516         mpp_dev_set_reg_offset(p_hal->dev, 143, offset);
517         regs->addr_cfg.swreg143.sw_refer4_dbase_lsb = val;
518     } else if (i == 5) {
519         mpp_dev_set_reg_offset(p_hal->dev, 145, offset);
520         regs->addr_cfg.swreg145.sw_refer5_dbase_lsb = val;
521     } else if (i == 6) {
522         mpp_dev_set_reg_offset(p_hal->dev, 147, offset);
523         regs->addr_cfg.swreg147.sw_refer6_dbase_lsb = val;
524     } else {
525         mpp_err("Error: trying to set invalid reference index.");
526     }
527 }
528 
set_ref_dbase_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)529 static void set_ref_dbase_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
530 {
531     if (i == 0) {
532         regs->addr_cfg.swreg134.sw_refer0_dbase_msb = val;
533     } else if (i == 1) {
534         regs->addr_cfg.swreg136.sw_refer1_dbase_msb = val;
535     } else if (i == 2) {
536         regs->addr_cfg.swreg138.sw_refer2_dbase_msb = val;
537     } else if (i == 3) {
538         regs->addr_cfg.swreg140.sw_refer3_dbase_msb = val;
539     } else if (i == 4) {
540         regs->addr_cfg.swreg142.sw_refer4_dbase_msb = val;
541     } else if (i == 5) {
542         regs->addr_cfg.swreg144.sw_refer5_dbase_msb = val;
543     } else if (i == 6) {
544         regs->addr_cfg.swreg146.sw_refer6_dbase_msb = val;
545     } else {
546         mpp_err("Error: trying to set invalid reference index.");
547     }
548 }
549 
set_ref_ty_base(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val,HalBufs bufs)550 static void set_ref_ty_base(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val, HalBufs bufs)
551 {
552     // MppBuffer framebuf = NULL;
553     // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
554     HalBuf *tile_out_buf;
555     tile_out_buf = hal_bufs_get_buf(bufs, val);
556 
557     if (tile_out_buf == NULL) {
558         mpp_err_f("get slots frame buff fail");
559     }
560     val =  mpp_buffer_get_fd(tile_out_buf->buf[0]);
561 
562     if (i == 0) {
563         regs->swreg192.sw_refer0_tybase_lsb = val;
564     } else if (i == 1) {
565         regs->swreg194.sw_refer1_tybase_lsb = val;
566     } else if (i == 2) {
567         regs->swreg196.sw_refer2_tybase_lsb = val;
568     } else if (i == 3) {
569         regs->swreg198.sw_refer3_tybase_lsb = val;
570     } else if (i == 4) {
571         regs->swreg200.sw_refer4_tybase_lsb = val;
572     } else if (i == 5) {
573         regs->swreg202.sw_refer5_tybase_lsb = val;
574     } else if (i == 6) {
575         regs->swreg204.sw_refer6_tybase_lsb = val;
576     } else {
577         mpp_err("Error: trying to set invalid reference index.");
578     }
579 }
580 
set_ref_ty_base_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)581 static void set_ref_ty_base_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
582 {
583     if (i == 0) {
584         regs->swreg191.sw_refer0_tybase_msb = val;
585     } else if (i == 1) {
586         regs->swreg193.sw_refer1_tybase_msb = val;
587     } else if (i == 2) {
588         regs->swreg195.sw_refer2_tybase_msb = val;
589     } else if (i == 3) {
590         regs->swreg197.sw_refer3_tybase_msb = val;
591     } else if (i == 4) {
592         regs->swreg199.sw_refer4_tybase_msb = val;
593     } else if (i == 5) {
594         regs->swreg201.sw_refer5_tybase_msb = val;
595     } else if (i == 6) {
596         regs->swreg203.sw_refer6_tybase_msb = val;
597     } else {
598         mpp_err(" trying to set invalid reference index.");
599     }
600 }
601 
set_ref_tc_base(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val,HalBufs bufs)602 static void set_ref_tc_base(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val, HalBufs bufs)
603 {
604     // MppBuffer framebuf = NULL;
605     // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
606     HalBuf *tile_out_buf;
607     tile_out_buf = hal_bufs_get_buf(bufs, val);
608 
609     if (tile_out_buf == NULL) {
610         mpp_err_f("get slots frame buff fail");
611     }
612     val =  mpp_buffer_get_fd(tile_out_buf->buf[0]);
613 
614     if (i == 0) {
615         regs->swreg226.sw_refer0_tcbase_lsb = val;
616     } else if (i == 1) {
617         regs->swreg228.sw_refer1_tcbase_lsb = val;
618     } else if (i == 2) {
619         regs->swreg230.sw_refer2_tcbase_lsb = val;
620     } else if (i == 3) {
621         regs->swreg232.sw_refer3_tcbase_lsb = val;
622     } else if (i == 4) {
623         regs->swreg234.sw_refer4_tcbase_lsb = val;
624     } else if (i == 5) {
625         regs->swreg236.sw_refer5_tcbase_lsb = val;
626     } else if (i == 6) {
627         regs->swreg238.sw_refer6_tcbase_lsb = val;
628     } else {
629         mpp_err("Error: trying to set invalid reference index.");
630     }
631 }
632 
633 
set_ref_tc_base_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)634 static void set_ref_tc_base_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
635 {
636     if (i == 0) {
637         regs->swreg225.sw_refer0_tcbase_msb = val;
638     } else if (i == 1) {
639         regs->swreg227.sw_refer1_tcbase_msb = val;
640     } else if (i == 2) {
641         regs->swreg229.sw_refer2_tcbase_msb = val;
642     } else if (i == 3) {
643         regs->swreg231.sw_refer3_tcbase_msb = val;
644     } else if (i == 4) {
645         regs->swreg233.sw_refer4_tcbase_msb = val;
646     } else if (i == 5) {
647         regs->swreg235.sw_refer5_tcbase_msb = val;
648     } else if (i == 6) {
649         regs->swreg237.sw_refer6_tcbase_msb = val;
650     } else {
651         mpp_err("Error: trying to set invalid reference index.");
652     }
653 }
654 
set_ref_sign_bias(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)655 static void set_ref_sign_bias(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
656 {
657     if (i == 0) {
658         regs->swreg59.sw_ref0_sign_bias = val;
659     } else if (i == 1) {
660         regs->swreg59.sw_ref1_sign_bias = val;
661     } else if (i == 2) {
662         regs->swreg59.sw_ref2_sign_bias = val;
663     } else if (i == 3) {
664         regs->swreg59.sw_ref3_sign_bias = val;
665     } else if (i == 4) {
666         regs->swreg9.sw_ref4_sign_bias = val;
667     } else if (i == 5) {
668         regs->swreg9.sw_ref5_sign_bias = val;
669     } else if (i == 6) {
670         regs->swreg9.sw_ref6_sign_bias = val;
671     } else {
672         mpp_err("Error: trying to set invalid reference index.");
673     }
674 }
675 
676 #define MAX_FRAME_DISTANCE 31
677 #define MAX_ACTIVE_REFS AV1_ACTIVE_REFS_EX
678 
GetRelativeDist(DXVA_PicParams_AV1 * dxva,RK_S32 a,RK_S32 b)679 RK_S32 GetRelativeDist(DXVA_PicParams_AV1 *dxva, RK_S32 a, RK_S32 b)
680 {
681     if (!dxva->order_hint_bits) return 0;
682     const RK_S32 bits = dxva->order_hint_bits - 1;
683 
684     RK_S32 diff = a - b;
685     RK_S32 m = 1 << bits;
686     diff = (diff & (m - 1)) - (diff & m);
687     return diff;
688 }
689 
690 #define POPULATE_REF_OFFSET(index)                                               \
691  {                                                                               \
692     RK_S32 ref_offset[MAX_REF_FRAMES_EX - 1];                                    \
693     RK_S32 idx = refs_selected[(index) - 1];                                     \
694     ref_offset[0] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint,      \
695                                     dxva->frame_refs[idx].lst_frame_offset);     \
696     ref_offset[1] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint,      \
697                                     dxva->frame_refs[idx].lst2_frame_offset);    \
698     ref_offset[2] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint,      \
699                                     dxva->frame_refs[idx].lst3_frame_offset);    \
700     ref_offset[3] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint,      \
701                                     dxva->frame_refs[idx].gld_frame_offset);     \
702     ref_offset[4] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint,      \
703                                     dxva->frame_refs[idx].bwd_frame_offset);     \
704     ref_offset[5] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint,      \
705                                     dxva->frame_refs[idx].alt2_frame_offset);    \
706     ref_offset[6] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint,      \
707                                     dxva->frame_refs[idx].alt_frame_offset);     \
708     if(index == 1) {                                                             \
709         regs->swreg20.sw_mf1_last_offset = ref_offset[0];                        \
710         regs->swreg21.sw_mf1_last2_offset = ref_offset[1];                       \
711         regs->swreg22.sw_mf1_last3_offset = ref_offset[2];                       \
712         regs->swreg23.sw_mf1_golden_offset = ref_offset[3];                      \
713         regs->swreg24.sw_mf1_bwdref_offset = ref_offset[4];                      \
714         regs->swreg25.sw_mf1_altref2_offset = ref_offset[5];                     \
715         regs->swreg26.sw_mf1_altref_offset = ref_offset[6];                      \
716     }else if(index == 2) {                                                       \
717         regs->swreg27.sw_mf2_last_offset = ref_offset[0];                        \
718         regs->swreg47.sw_mf2_last2_offset = ref_offset[1];                       \
719         regs->swreg47.sw_mf2_last3_offset = ref_offset[2];                       \
720         regs->swreg47.sw_mf2_golden_offset = ref_offset[3];                      \
721         regs->swreg48.sw_mf2_bwdref_offset = ref_offset[4];                      \
722         regs->swreg48.sw_mf2_altref2_offset = ref_offset[5];                     \
723         regs->swreg48.sw_mf2_altref_offset = ref_offset[6];                      \
724     }else {                                                                      \
725         regs->swreg184.sw_mf3_last_offset = ref_offset[0];                       \
726         regs->swreg185.sw_mf3_last2_offset = ref_offset[1];                      \
727         regs->swreg186.sw_mf3_last3_offset = ref_offset[2];                      \
728         regs->swreg187.sw_mf3_golden_offset = ref_offset[3];                     \
729         regs->swreg188.sw_mf3_bwdref_offset = ref_offset[4];                     \
730         regs->swreg257.sw_mf3_altref2_offset = ref_offset[5];                    \
731         regs->swreg262.sw_mf3_altref_offset = ref_offset[6];                     \
732     }                                                                            \
733 }
734 
735 
set_frame_sign_bias(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)736 void set_frame_sign_bias(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
737 {
738     RK_U32 i = 0;
739     VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
740     if (!dxva->order_hint_bits || dxva->format.frame_type == AV1_FRAME_INTRA_ONLY ||
741         dxva->format.frame_type == AV1_FRAME_KEY) {
742         for (i = 0; i < AV1_REF_LIST_SIZE; i++) {
743             reg_ctx->ref_frame_sign_bias[i] = 0;
744         }
745         return;
746     }
747 
748     // Identify the nearest forward and backward references.
749     for (i = 0; i < AV1_ACTIVE_REFS_EX; i++) {
750         if (dxva->frame_refs[i].Index >= 0) {
751             RK_S32 ref_frame_offset = dxva->frame_refs[i].order_hint;
752             RK_S32 rel_off = GetRelativeDist(dxva, ref_frame_offset, dxva->order_hint);
753             reg_ctx->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
754             AV1D_DBG(AV1D_DBG_LOG, "frame_refs[%d] order_hint %d ref_frame_offset %d\n",
755                      i, dxva->order_hint, ref_frame_offset);
756         }
757     }
758 }
759 
vdpu_av1d_set_prob(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)760 void vdpu_av1d_set_prob(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
761 {
762     VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
763     const int mv_cdf_offset = offsetof(AV1CDFs, mv_cdf);
764     void* prob_base = mpp_buffer_get_ptr(reg_ctx->prob_tbl_base);
765     VdpuAv1dRegSet *regs = reg_ctx->regs;
766 
767     memcpy(prob_base, dxva->cdfs, sizeof(AV1CDFs));
768     if (dxva->format.frame_type == AV1_FRAME_INTRA_ONLY ||
769         dxva->format.frame_type == AV1_FRAME_KEY) {
770         // Overwrite MV context area with intrabc MV context
771         memcpy(prob_base + mv_cdf_offset, dxva->cdfs_ndvc, sizeof(MvCDFs));
772     }
773 
774     regs->addr_cfg.swreg171.sw_prob_tab_out_base_lsb    = mpp_buffer_get_fd(reg_ctx->prob_tbl_out_base);
775     regs->addr_cfg.swreg173.sw_prob_tab_base_lsb        = mpp_buffer_get_fd(reg_ctx->prob_tbl_base);
776 }
777 
vdpu_av1d_set_reference_frames(Av1dHalCtx * p_hal,VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)778 void vdpu_av1d_set_reference_frames(Av1dHalCtx *p_hal, VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
779 {
780     RK_U32 tmp1, tmp2, i;
781     RK_U32 cur_height, cur_width;
782     RK_U8  max_ref_frames = MAX_REF_FRAMES_EX;
783     RK_U8 prev_valid = 0;
784 
785     VdpuAv1dRegSet *regs = ctx->regs;
786     RK_S32 ref_count[AV1DEC_MAX_PIC_BUFFERS] = {0};
787 
788     RK_U32 ref_scale_e = 0;
789     RK_U32 y_stride = ctx->luma_size;
790     RK_U32 uv_stride = y_stride / 2;
791     RK_U32 mv_offset = ctx->luma_size + ctx->chroma_size + 64;
792 
793     if (!dxva->coding.intrabc) {
794         for (i = 0; i < AV1_REF_LIST_SIZE - 1; i++) {
795             if (dxva->frame_refs[i].Index >= 0)
796                 ref_count[dxva->frame_refs[i].Index]++;
797         }
798 
799         for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
800             if (ref_count[i])
801                 regs->swreg4.sw_ref_frames++;
802         }
803     } else
804         regs->swreg4.sw_ref_frames = 1;
805 
806     cur_width = dxva->width;
807     cur_height = dxva->height;
808 
809     set_frame_sign_bias(p_hal, dxva);
810 
811     for (i = LAST_FRAME; i < max_ref_frames; i++) {
812         RK_U32 ref = i - 1;
813         RK_S32 idx = 0;
814         if (dxva->coding.intrabc) {
815             idx = dxva->CurrPicTextureIndex;
816             tmp1 = cur_width;
817             tmp2 = cur_height;
818         } else {
819             tmp1 =  dxva->frame_refs[ref].width;
820             tmp2 =  dxva->frame_refs[ref].height;
821             if (dxva->frame_refs[ref].Index > 0) {
822                 idx = dxva->frame_refs[ref].Index;
823             }
824         }
825 
826         set_ref_width(regs, ref, tmp1);
827         set_ref_height(regs, ref, tmp2);
828         tmp1 = ((tmp1 << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
829         tmp2 = ((tmp2 << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
830 
831         set_ref_hor_scale(regs, ref, tmp1);
832         set_ref_ver_scale(regs, ref, tmp2);
833         if (tmp1 != (1 << AV1_REF_SCALE_SHIFT) ||
834             tmp2 != (1 << AV1_REF_SCALE_SHIFT)) {
835             ref_scale_e = 1;
836         }
837 
838         if (idx == ctx->prev_out_buffer_i) {
839             prev_valid = 1;
840         }
841 
842         set_ref_lum_base(regs, ref, idx, ctx->tile_out_bufs);
843         set_ref_cb_base(p_hal,  ref, idx, ctx->tile_out_bufs, y_stride);
844         set_ref_dbase  (p_hal,  ref, idx, ctx->tile_out_bufs, mv_offset);
845 
846         set_ref_lum_base_msb(regs, ref, 0);
847         set_ref_cb_base_msb(regs,  ref, 0);
848         set_ref_dbase_msb  (regs,  ref, 0);
849 
850         if (0) {
851             set_ref_ty_base(regs,  ref, idx, ctx->tile_out_bufs);
852             set_ref_tc_base(regs,  ref, idx, ctx->tile_out_bufs);
853             set_ref_ty_base_msb(regs,  ref, 0);
854             set_ref_tc_base_msb(regs,  ref, 0);
855         }
856         set_ref_sign_bias(regs, ref, ctx->ref_frame_sign_bias[i]);
857     }
858 
859     regs->swreg184.sw_ref0_gm_mode                 = dxva->frame_refs[0].wmtype;
860     regs->swreg185.sw_ref1_gm_mode                 = dxva->frame_refs[1].wmtype;
861     regs->swreg186.sw_ref2_gm_mode                 = dxva->frame_refs[2].wmtype;
862     regs->swreg187.sw_ref3_gm_mode                 = dxva->frame_refs[3].wmtype;
863     regs->swreg188.sw_ref4_gm_mode                 = dxva->frame_refs[4].wmtype;
864     regs->swreg257.sw_ref5_gm_mode                 = dxva->frame_refs[5].wmtype;
865     regs->swreg262.sw_ref6_gm_mode                 = dxva->frame_refs[6].wmtype;
866 
867 
868     if (dxva->coding.intrabc) {
869         ctx->prev_out_buffer_i = dxva->CurrPicTextureIndex;
870     } else if (!prev_valid) {
871         ctx->prev_out_buffer_i = dxva->frame_refs[0].Index;  // LAST
872     }
873 
874     {
875         RK_S32 gld_buf_idx = GOLDEN_FRAME_EX - LAST_FRAME;
876         RK_S32 alt_buf_idx = ALTREF_FRAME_EX - LAST_FRAME;
877         RK_S32 lst_buf_idx = LAST_FRAME - LAST_FRAME;
878         RK_S32 bwd_buf_idx = BWDREF_FRAME_EX - LAST_FRAME;
879         RK_S32 alt2_buf_idx = ALTREF2_FRAME_EX - LAST_FRAME;
880         RK_S32 lst2_buf_idx = LAST2_FRAME_EX - LAST_FRAME;
881 
882         RK_S32 cur_frame_offset = dxva->order_hint;
883         RK_S32 alt_frame_offset = 0;
884         RK_S32 gld_frame_offset = 0;
885         RK_S32 bwd_frame_offset = 0;
886         RK_S32 alt2_frame_offset = 0;
887         RK_S32 refs_selected[3] = {0, 0, 0};
888         RK_S32 cur_mi_cols = (dxva->width + 7) >> 3;
889         RK_S32 cur_mi_rows = (dxva->height + 7) >> 3;
890         RK_U8 mf_types[3] = {0, 0, 0};
891         RK_S32 ref_stamp = 2;
892         RK_S32 ref_ind = 0;
893         RK_S32 rf;
894 
895         if (dxva->frame_refs[alt_buf_idx].Index >= 0)
896             alt_frame_offset = dxva->frame_refs[alt_buf_idx].order_hint;
897         if (dxva->frame_refs[gld_buf_idx].Index >= 0)
898             gld_frame_offset = dxva->frame_refs[gld_buf_idx].order_hint;
899         if (dxva->frame_refs[bwd_buf_idx].Index >= 0)
900             bwd_frame_offset = dxva->frame_refs[bwd_buf_idx].order_hint;
901         if (dxva->frame_refs[alt2_buf_idx].Index >= 0)
902             alt2_frame_offset = dxva->frame_refs[alt2_buf_idx].order_hint;
903 
904         AV1D_DBG(AV1D_DBG_LOG, "frame_offset[%d %d %d %d] lst_idx %d alt_off %d\n",
905                  alt_frame_offset,
906                  gld_frame_offset,
907                  bwd_frame_offset,
908                  alt2_frame_offset,
909                  dxva->frame_refs[lst_buf_idx].Index,
910                  dxva->frame_refs[lst_buf_idx].alt_frame_offset);
911 
912         if (dxva->frame_refs[lst_buf_idx].Index >= 0) {
913             const RK_S32 alt_frame_offset_in_lst =
914                 dxva->frame_refs[lst_buf_idx].alt_frame_offset;
915 
916             const RK_S32 is_lst_overlay = (alt_frame_offset_in_lst == gld_frame_offset);
917             if (!is_lst_overlay) {
918                 RK_S32 lst_mi_cols =
919                     (dxva->frame_refs[lst_buf_idx].width + 7) >> 3;
920                 RK_S32 lst_mi_rows =
921                     (dxva->frame_refs[lst_buf_idx].height + 7) >> 3;
922                 // TODO(stan): what's the difference btw key_frame and intra_only?
923                 RK_S32 lst_intra_only =
924                     dxva->frame_refs[lst_buf_idx].intra_only ||
925                     dxva->frame_refs[lst_buf_idx].is_intra_frame;
926                 if (lst_mi_cols == cur_mi_cols && lst_mi_rows == cur_mi_rows &&
927                     !lst_intra_only) {
928                     mf_types[ref_ind] = LAST_FRAME;
929                     refs_selected[ref_ind++] = lst_buf_idx;
930                 }
931             }
932             ref_stamp--;
933         }
934 
935         if (GetRelativeDist(dxva, bwd_frame_offset, cur_frame_offset) > 0) {
936             RK_S32 bwd_mi_cols =
937                 (dxva->frame_refs[bwd_buf_idx].width + 7) >> 3;
938             RK_S32 bwd_mi_rows =
939                 (dxva->frame_refs[bwd_buf_idx].height + 7) >> 3;
940             RK_S32 bwd_intra_only = dxva->frame_refs[bwd_buf_idx].intra_only ||
941                                     dxva->frame_refs[bwd_buf_idx].is_intra_frame;
942             if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
943                 !bwd_intra_only) {
944                 mf_types[ref_ind] = BWDREF_FRAME_EX;
945                 refs_selected[ref_ind++] = bwd_buf_idx;
946                 ref_stamp--;
947             }
948         }
949 
950         if (GetRelativeDist(dxva, alt2_frame_offset, cur_frame_offset) > 0) {
951             RK_S32 alt2_mi_cols =
952                 (dxva->frame_refs[alt2_buf_idx].width + 7) >> 3;
953             RK_S32 alt2_mi_rows =
954                 (dxva->frame_refs[alt2_buf_idx].height + 7) >> 3;
955             RK_S32 alt2_intra_only =
956                 dxva->frame_refs[alt2_buf_idx].intra_only ||
957                 dxva->frame_refs[alt2_buf_idx].is_intra_frame;
958             if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
959                 !alt2_intra_only) {
960                 mf_types[ref_ind] = ALTREF2_FRAME_EX;
961                 refs_selected[ref_ind++] = alt2_buf_idx;
962                 ref_stamp--;
963             }
964         }
965 
966         if (GetRelativeDist(dxva, alt_frame_offset, cur_frame_offset) > 0 &&
967             ref_stamp >= 0) {
968             RK_S32 alt_mi_cols =
969                 (dxva->frame_refs[alt_buf_idx].width + 7) >> 3;
970             RK_S32 alt_mi_rows =
971                 (dxva->frame_refs[alt_buf_idx].height + 7) >> 3;
972             RK_S32 alt_intra_only = dxva->frame_refs[alt_buf_idx].intra_only ||
973                                     dxva->frame_refs[alt_buf_idx].is_intra_frame;
974             if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
975                 !alt_intra_only) {
976                 mf_types[ref_ind] = ALTREF_FRAME_EX;
977                 refs_selected[ref_ind++] = alt_buf_idx;
978                 ref_stamp--;
979             }
980         }
981 
982         if (ref_stamp >= 0 && dxva->frame_refs[lst2_buf_idx].Index >= 0) {
983             RK_S32 lst2_mi_cols =
984                 (dxva->frame_refs[lst2_buf_idx].width + 7) >> 3;
985             RK_S32 lst2_mi_rows =
986                 (dxva->frame_refs[lst2_buf_idx].height + 7) >> 3;
987             RK_S32 lst2_intra_only =
988                 dxva->frame_refs[lst2_buf_idx].intra_only ||
989                 dxva->frame_refs[lst2_buf_idx].is_intra_frame;
990             if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
991                 !lst2_intra_only) {
992                 mf_types[ref_ind] = LAST2_FRAME_EX;
993                 refs_selected[ref_ind++] = lst2_buf_idx;
994                 ref_stamp--;
995             }
996         }
997 
998         RK_S32 cur_offset[MAX_REF_FRAMES_EX - 1];
999         RK_S32 cur_roffset[MAX_REF_FRAMES_EX - 1];
1000         for ( rf = 0; rf < MAX_REF_FRAMES_EX - 1; ++rf) {
1001             RK_S32 buf_idx = dxva->frame_refs[rf].Index;
1002             if (buf_idx >= 0) {
1003                 cur_offset[rf] =
1004                     GetRelativeDist(dxva, cur_frame_offset,
1005                                     dxva->frame_refs[rf].order_hint);
1006                 cur_roffset[rf] =
1007                     GetRelativeDist(dxva, dxva->frame_refs[rf].order_hint,
1008                                     cur_frame_offset);
1009                 AV1D_DBG(AV1D_DBG_LOG, "buf_idx[%d]=%d offset[%d : %d] hin %d\n", rf, buf_idx, cur_offset[rf], cur_roffset[rf], dxva->frame_refs[rf].order_hint);
1010             } else {
1011                 cur_offset[rf] = 0;
1012                 cur_roffset[rf] = 0;
1013             }
1014         }
1015 
1016         regs->swreg11.sw_use_temporal0_mvs = 0;
1017         regs->swreg11.sw_use_temporal1_mvs = 0;
1018         regs->swreg11.sw_use_temporal2_mvs = 0;
1019         regs->swreg11.sw_use_temporal3_mvs = 0;
1020 
1021         if (dxva->coding.use_ref_frame_mvs && ref_ind > 0 &&
1022             cur_offset[mf_types[0] - LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1023             cur_offset[mf_types[0] - LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1024             regs->swreg11.sw_use_temporal0_mvs = 1;
1025             POPULATE_REF_OFFSET(1)
1026         }
1027 
1028         if (dxva->coding.use_ref_frame_mvs && ref_ind > 1 &&
1029             cur_offset[mf_types[1] - LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1030             cur_offset[mf_types[1] - LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1031             regs->swreg11.sw_use_temporal1_mvs = 1;
1032             POPULATE_REF_OFFSET(2)
1033         }
1034 
1035         if (dxva->coding.use_ref_frame_mvs && ref_ind > 2 &&
1036             cur_offset[mf_types[2] - LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1037             cur_offset[mf_types[2] - LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1038             regs->swreg11.sw_use_temporal2_mvs = 1;
1039             POPULATE_REF_OFFSET(3)
1040         }
1041 
1042         // Pass one additional frame that will contain the segment information
1043         if (dxva->segmentation.enabled &&
1044             dxva->primary_ref_frame < ALLOWED_REFS_PER_FRAME_EX) {
1045             // Primary ref frame is zero based
1046             RK_S32 prim_buf_idx = dxva->frame_refs[dxva->primary_ref_frame].Index;
1047 
1048             if (prim_buf_idx >= 0) {
1049                 HalBuf *tile_out_buf;
1050 
1051                 y_stride = ctx->luma_size ;
1052                 uv_stride = y_stride / 2;
1053                 mv_offset = y_stride + uv_stride + 64;
1054 
1055                 tile_out_buf = hal_bufs_get_buf(ctx->tile_out_bufs, prim_buf_idx);
1056                 regs->addr_cfg.swreg80.sw_segment_read_base_msb = 0;
1057                 regs->addr_cfg.swreg81.sw_segment_read_base_lsb = mpp_buffer_get_fd(tile_out_buf->buf[0]);
1058                 mpp_dev_set_reg_offset(p_hal->dev, 81, mv_offset);
1059                 regs->swreg11.sw_use_temporal3_mvs = 1;
1060             }
1061         }
1062         if (dxva->primary_ref_frame < ALLOWED_REFS_PER_FRAME_EX) {
1063             RK_S32 prim_buf_idx = dxva->primary_ref_frame;
1064             ctx->resolution_change =
1065                 cur_mi_cols !=
1066                 (RK_S32)((dxva->frame_refs[prim_buf_idx].width + 7) >>
1067                          3) ||
1068                 cur_mi_rows !=
1069                 (RK_S32)((dxva->frame_refs[prim_buf_idx].height + 7) >>
1070                          3);
1071         }
1072 
1073         regs->swreg184.sw_cur_last_offset              = cur_offset[0];
1074         regs->swreg185.sw_cur_last2_offset             = cur_offset[1];
1075         regs->swreg186.sw_cur_last3_offset             = cur_offset[2];
1076         regs->swreg187.sw_cur_golden_offset            = cur_offset[3];
1077         regs->swreg188.sw_cur_bwdref_offset            = cur_offset[4];
1078         regs->swreg257.sw_cur_altref2_offset           = cur_offset[5];
1079         regs->swreg262.sw_cur_altref_offset            = cur_offset[6];
1080 
1081         regs->swreg184.sw_cur_last_roffset             = cur_roffset[0];
1082         regs->swreg185.sw_cur_last2_roffset            = cur_roffset[1];
1083         regs->swreg186.sw_cur_last3_roffset            = cur_roffset[2];
1084         regs->swreg187.sw_cur_golden_roffset           = cur_roffset[3];
1085         regs->swreg188.sw_cur_bwdref_roffset           = cur_roffset[4];
1086         regs->swreg257.sw_cur_altref2_roffset          = cur_roffset[5];
1087         regs->swreg262.sw_cur_altref_roffset           = cur_roffset[6];
1088 
1089         /* Index start from 0 */
1090         regs->swreg9.sw_mf1_type  = mf_types[0] - LAST_FRAME;
1091         regs->swreg9.sw_mf2_type  = mf_types[1] - LAST_FRAME;
1092         regs->swreg9.sw_mf3_type  = mf_types[2] - LAST_FRAME;
1093         AV1D_DBG(AV1D_DBG_LOG, "mf_types[%d %d %d]\n", mf_types[0], mf_types[1], mf_types[2]);
1094     }
1095     regs->swreg5.sw_ref_scaling_enable = ref_scale_e;
1096 }
1097 #undef MAX_FRAME_DISTANCE
1098 
vdpu_av1d_superres_params(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1099 void vdpu_av1d_superres_params(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1100 {
1101     // Compute and store scaling paramers needed for superres
1102 #define SUPERRES_SCALE_BITS 3
1103 #define SCALE_NUMERATOR 8
1104 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
1105 
1106 #define RS_SUBPEL_BITS 6
1107 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
1108 #define RS_SCALE_SUBPEL_BITS 14
1109 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
1110 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
1111 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
1112     VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1113     VdpuAv1dRegSet *regs = ctx->regs;
1114     RK_U8 superres_scale_denominator = SCALE_NUMERATOR;
1115     RK_U32 superres_luma_step = 0;
1116     RK_U32 superres_chroma_step = 0;
1117     RK_U32 superres_luma_step_invra = 0;
1118     RK_U32 superres_chroma_step_invra = 0;
1119     RK_U32 superres_init_luma_subpel_x = 0;
1120     RK_U32 superres_init_chroma_subpel_x = 0;
1121     RK_U32 superres_is_scaled = 1;
1122     RK_U32 width = 0;
1123     if (dxva->coding.superres) {
1124         superres_scale_denominator = regs->swreg9.sw_scale_denom_minus9 + 9;
1125     }
1126 
1127     if (superres_scale_denominator > SCALE_NUMERATOR) {
1128         width = (dxva->upscaled_width * SCALE_NUMERATOR +
1129                  (superres_scale_denominator / 2)) /
1130                 superres_scale_denominator;
1131         RK_U32 min_w = MPP_MIN(16, dxva->upscaled_width);
1132         if (width < min_w) width = min_w;
1133         if (width == dxva->upscaled_width) {
1134             superres_is_scaled = 0;
1135             superres_luma_step = RS_SCALE_SUBPEL_BITS;
1136             superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1137             superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1138             superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1139             superres_init_luma_subpel_x = 0;
1140             superres_init_chroma_subpel_x = 0;
1141             goto end;
1142         }
1143         {
1144             RK_S32 upscaledLumaPlaneW = dxva->upscaled_width;
1145             RK_S32 downscaledLumaPlaneW = width;
1146 
1147             RK_S32 downscaledChromaPlaneW = (downscaledLumaPlaneW + 1) >> 1;
1148             RK_S32 upscaledChromaPlaneW = (upscaledLumaPlaneW + 1) >> 1;
1149 
1150             RK_S32 stepLumaX = ((downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
1151                                 (upscaledLumaPlaneW / 2)) /
1152                                upscaledLumaPlaneW;
1153             RK_S32 stepChromaX = ((downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
1154                                   (upscaledChromaPlaneW / 2)) /
1155                                  upscaledChromaPlaneW;
1156             RK_S32 errLuma = (upscaledLumaPlaneW * stepLumaX) -
1157                              (downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS);
1158             RK_S32  errChroma = (upscaledChromaPlaneW * stepChromaX) -
1159                                 (downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS);
1160             RK_S32 initialLumaSubpelX =
1161                 ((-((upscaledLumaPlaneW - downscaledLumaPlaneW)
1162                     << (RS_SCALE_SUBPEL_BITS - 1)) +
1163                   upscaledLumaPlaneW / 2) /
1164                  upscaledLumaPlaneW +
1165                  (1 << (RS_SCALE_EXTRA_BITS - 1)) - errLuma / 2) &
1166                 RS_SCALE_SUBPEL_MASK;
1167             RK_S32 initialChromaSubpelX =
1168                 ((-((upscaledChromaPlaneW - downscaledChromaPlaneW)
1169                     << (RS_SCALE_SUBPEL_BITS - 1)) +
1170                   upscaledChromaPlaneW / 2) /
1171                  upscaledChromaPlaneW +
1172                  (1 << (RS_SCALE_EXTRA_BITS - 1)) - errChroma / 2) &
1173                 RS_SCALE_SUBPEL_MASK;
1174 
1175             superres_luma_step = stepLumaX;
1176             superres_chroma_step = stepChromaX;
1177             superres_luma_step_invra =
1178                 ((upscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
1179                  (downscaledLumaPlaneW / 2)) /
1180                 downscaledLumaPlaneW;
1181             superres_chroma_step_invra =
1182                 ((upscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
1183                  (downscaledChromaPlaneW / 2)) /
1184                 downscaledChromaPlaneW;
1185             superres_init_luma_subpel_x = initialLumaSubpelX;
1186             superres_init_chroma_subpel_x = initialChromaSubpelX;
1187         }
1188     } else {
1189         superres_luma_step = RS_SCALE_SUBPEL_BITS;
1190         superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1191         superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1192         superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1193         superres_init_luma_subpel_x = 0;
1194         superres_init_chroma_subpel_x = 0;
1195         superres_is_scaled = 0;
1196     }
1197 end:
1198     regs->swreg51.sw_superres_luma_step = superres_luma_step;
1199     regs->swreg51.sw_superres_chroma_step = superres_chroma_step;
1200     regs->swreg298.sw_superres_luma_step_invra = superres_luma_step_invra;
1201     regs->swreg298.sw_superres_chroma_step_invra = superres_chroma_step_invra;
1202     regs->swreg52.sw_superres_init_luma_subpel_x = superres_init_luma_subpel_x;
1203     regs->swreg52.sw_superres_init_chroma_subpel_x = superres_init_chroma_subpel_x;
1204     regs->swreg5.sw_superres_is_scaled = superres_is_scaled;
1205 
1206     regs->addr_cfg.swreg89.sw_superres_colbuf_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1207     mpp_dev_set_reg_offset(p_hal->dev, 89, ctx->filt_info[SR_COL].offset);
1208 }
1209 
1210 
vdpu_av1d_set_picture_dimensions(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1211 void vdpu_av1d_set_picture_dimensions(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1212 {
1213     /* Write dimensions for the current picture
1214        (This is needed when scaling is used) */
1215     VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1216     VdpuAv1dRegSet *regs = ctx->regs;
1217 
1218     regs->swreg4.sw_pic_width_in_cbs    = MPP_ALIGN(dxva->width, 8) >> 3;
1219     regs->swreg4.sw_pic_height_in_cbs   = MPP_ALIGN(dxva->height, 8) >> 3;
1220     regs->swreg12.sw_pic_width_pad    = MPP_ALIGN(dxva->width, 8) - dxva->width;
1221     regs->swreg12.sw_pic_height_pad   = MPP_ALIGN(dxva->height, 8) - dxva->height;
1222 
1223     regs->swreg8.sw_superres_pic_width = dxva->upscaled_width;
1224     regs->swreg9.sw_scale_denom_minus9 = dxva->superres_denom;
1225 
1226     vdpu_av1d_superres_params(p_hal, dxva);
1227 }
1228 
vdpu_av1d_set_segmentation(VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)1229 void vdpu_av1d_set_segmentation(VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
1230 {
1231     RK_U32 segval[MAX_MB_SEGMENTS][SEG_AV1_LVL_MAX];
1232     VdpuAv1dRegSet *regs = ctx->regs;
1233     RK_U8 s, i, j;
1234     RK_U8 segsign = 0;
1235     RK_U8 preskip_segid = 0;
1236     RK_U8 last_active_seg = 0;
1237     /* Segmentation */
1238     regs->swreg13.sw_segment_temp_upd_e   = dxva->segmentation.temporal_update;
1239     regs->swreg13.sw_segment_upd_e        = dxva->segmentation.update_map;
1240     regs->swreg13.sw_segment_e            = dxva->segmentation.enabled;
1241 
1242     //dec->error_resilient || dec->resolution_change;
1243     regs->swreg5.sw_error_resilient     =  dxva->coding.error_resilient_mode;
1244 
1245     if ((!dxva->format.frame_type ||  dxva->format.frame_type == AV1_FRAME_INTRA_ONLY)
1246         || regs->swreg5.sw_error_resilient) {
1247         regs->swreg11.sw_use_temporal3_mvs = 0;
1248     }
1249 
1250     regs->swreg14.sw_filt_level0   = dxva->loop_filter.filter_level[0];
1251     regs->swreg15.sw_filt_level1   = dxva->loop_filter.filter_level[1];
1252     regs->swreg16.sw_filt_level2   = dxva->loop_filter.filter_level_u;
1253     regs->swreg17.sw_filt_level3   = dxva->loop_filter.filter_level_v;
1254 
1255     /* Set filter level and QP for every segment ID. Initialize all
1256     * segments with default QP and filter level. */
1257     for (s = 0; s < MAX_MB_SEGMENTS; s++) {
1258         segval[s][SEG_AV1_LVL_ALT_Q] = 0;
1259         segval[s][SEG_AV1_LVL_ALT_LF_Y_V] = 0;
1260         segval[s][SEG_AV1_LVL_ALT_LF_Y_H] = 0;
1261         segval[s][SEG_AV1_LVL_ALT_LF_U] = 0;
1262         segval[s][SEG_AV1_LVL_ALT_LF_V] = 0;
1263         segval[s][SEG_AV1_LVL_REF_FRAME] = 0; /* segment ref_frame disabled */
1264         segval[s][SEG_AV1_LVL_SKIP] = 0;      /* segment skip disabled */
1265         segval[s][SEG_AV1_LVL_GLOBALMV] = 0;  /* global motion */
1266     }
1267     /* If a feature is enabled for a segment, overwrite the default. */
1268     if (dxva->segmentation.enabled) {
1269         RK_S32 (*segdata)[SEG_AV1_LVL_MAX] = dxva->segmentation.feature_data;
1270 
1271         for (s = 0; s < MAX_MB_SEGMENTS; s++) {
1272             if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_Q)) {
1273                 segval[s][SEG_AV1_LVL_ALT_Q] =
1274                     MPP_CLIP3(0, 255, MPP_ABS(segdata[s][SEG_AV1_LVL_ALT_Q]));
1275                 segsign |= (segdata[s][SEG_AV1_LVL_ALT_Q] < 0) << s;
1276             }
1277 
1278             if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_LF_Y_V))
1279                 segval[s][SEG_AV1_LVL_ALT_LF_Y_V] =
1280                     MPP_CLIP3(-63, 63, segdata[s][SEG_AV1_LVL_ALT_LF_Y_V]);
1281 
1282             if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_LF_Y_H))
1283                 segval[s][SEG_AV1_LVL_ALT_LF_Y_H] =
1284                     MPP_CLIP3(-63, 63, segdata[s][SEG_AV1_LVL_ALT_LF_Y_H]);
1285 
1286             if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_LF_U))
1287                 segval[s][SEG_AV1_LVL_ALT_LF_U] =
1288                     MPP_CLIP3(-63, 63, segdata[s][SEG_AV1_LVL_ALT_LF_U]);
1289 
1290             if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_LF_V))
1291                 segval[s][SEG_AV1_LVL_ALT_LF_V] =
1292                     MPP_CLIP3(-63, 63, segdata[s][SEG_AV1_LVL_ALT_LF_V]);
1293 
1294             if (dxva->format.frame_type &&
1295                 dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_REF_FRAME))
1296                 segval[s][SEG_AV1_LVL_REF_FRAME] =
1297                     segdata[s][SEG_AV1_LVL_REF_FRAME] + 1;
1298 
1299             if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_SKIP))
1300                 segval[s][SEG_AV1_LVL_SKIP] = 1;
1301             if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_GLOBALMV))
1302                 segval[s][SEG_AV1_LVL_GLOBALMV] = 1;
1303         }
1304     }
1305 
1306     for (i = 0; i < MAX_MB_SEGMENTS; i++) {
1307         for (j = 0; j < SEG_AV1_LVL_MAX; j++) {
1308             if (dxva->segmentation.feature_mask[i] & (1 << j)) {
1309                 preskip_segid |= j >= SEG_AV1_LVL_REF_FRAME;
1310                 last_active_seg = MPP_MAX(i, last_active_seg);
1311             }
1312         }
1313     }
1314 
1315     regs->swreg9.sw_last_active_seg = last_active_seg;
1316     regs->swreg5.sw_preskip_segid   = preskip_segid;
1317 
1318     regs->swreg12.sw_seg_quant_sign = segsign;
1319     /* Write QP, filter level, ref frame and skip for every segment */
1320     regs->swreg14.sw_quant_seg0 = segval[0][SEG_AV1_LVL_ALT_Q];
1321     regs->swreg14.sw_filt_level_delta0_seg0 = segval[0][SEG_AV1_LVL_ALT_LF_Y_V];
1322     regs->swreg20.sw_filt_level_delta1_seg0 = segval[0][SEG_AV1_LVL_ALT_LF_Y_H];
1323     regs->swreg20.sw_filt_level_delta2_seg0 = segval[0][SEG_AV1_LVL_ALT_LF_U];
1324     regs->swreg20.sw_filt_level_delta3_seg0 = segval[0][SEG_AV1_LVL_ALT_LF_V];
1325     regs->swreg14.sw_refpic_seg0 = segval[0][SEG_AV1_LVL_REF_FRAME];
1326     regs->swreg14.sw_skip_seg0 = segval[0][SEG_AV1_LVL_SKIP];
1327     regs->swreg20.sw_global_mv_seg0 = segval[0][SEG_AV1_LVL_GLOBALMV];
1328 
1329     regs->swreg15.sw_quant_seg1 = segval[1][SEG_AV1_LVL_ALT_Q];
1330     regs->swreg15.sw_filt_level_delta0_seg1 = segval[1][SEG_AV1_LVL_ALT_LF_Y_V];
1331     regs->swreg21.sw_filt_level_delta1_seg1 = segval[1][SEG_AV1_LVL_ALT_LF_Y_H];
1332     regs->swreg21.sw_filt_level_delta2_seg1 = segval[1][SEG_AV1_LVL_ALT_LF_U];
1333     regs->swreg21.sw_filt_level_delta3_seg1 = segval[1][SEG_AV1_LVL_ALT_LF_V];
1334     regs->swreg15.sw_refpic_seg1 = segval[1][SEG_AV1_LVL_REF_FRAME];
1335     regs->swreg15.sw_skip_seg1 = segval[1][SEG_AV1_LVL_SKIP];
1336     regs->swreg21.sw_global_mv_seg1 = segval[1][SEG_AV1_LVL_GLOBALMV];
1337 
1338     regs->swreg16.sw_quant_seg2 = segval[2][SEG_AV1_LVL_ALT_Q];
1339     regs->swreg16.sw_filt_level_delta0_seg2 = segval[2][SEG_AV1_LVL_ALT_LF_Y_V];
1340     regs->swreg22.sw_filt_level_delta1_seg2 = segval[2][SEG_AV1_LVL_ALT_LF_Y_H];
1341     regs->swreg22.sw_filt_level_delta2_seg2 = segval[2][SEG_AV1_LVL_ALT_LF_U];
1342     regs->swreg22.sw_filt_level_delta3_seg2 = segval[2][SEG_AV1_LVL_ALT_LF_V];
1343     regs->swreg16.sw_refpic_seg2 = segval[2][SEG_AV1_LVL_REF_FRAME];
1344     regs->swreg16.sw_skip_seg2 = segval[2][SEG_AV1_LVL_SKIP];
1345     regs->swreg22.sw_global_mv_seg2 = segval[2][SEG_AV1_LVL_GLOBALMV];
1346 
1347     regs->swreg17.sw_quant_seg3 = segval[3][SEG_AV1_LVL_ALT_Q];
1348     regs->swreg17.sw_filt_level_delta0_seg3 = segval[3][SEG_AV1_LVL_ALT_LF_Y_V];
1349     regs->swreg23.sw_filt_level_delta1_seg3 = segval[3][SEG_AV1_LVL_ALT_LF_Y_H];
1350     regs->swreg23.sw_filt_level_delta2_seg3 = segval[3][SEG_AV1_LVL_ALT_LF_U];
1351     regs->swreg23.sw_filt_level_delta3_seg3 = segval[3][SEG_AV1_LVL_ALT_LF_V];
1352     regs->swreg17.sw_refpic_seg3 = segval[3][SEG_AV1_LVL_REF_FRAME];
1353     regs->swreg17.sw_skip_seg3 = segval[3][SEG_AV1_LVL_SKIP];
1354     regs->swreg23.sw_global_mv_seg3 = segval[3][SEG_AV1_LVL_GLOBALMV];
1355 
1356     regs->swreg18.sw_quant_seg4 = segval[4][SEG_AV1_LVL_ALT_Q];
1357     regs->swreg18.sw_filt_level_delta0_seg4 = segval[4][SEG_AV1_LVL_ALT_LF_Y_V];
1358     regs->swreg24.sw_filt_level_delta1_seg4 = segval[4][SEG_AV1_LVL_ALT_LF_Y_H];
1359     regs->swreg24.sw_filt_level_delta2_seg4 = segval[4][SEG_AV1_LVL_ALT_LF_U];
1360     regs->swreg24.sw_filt_level_delta3_seg4 = segval[4][SEG_AV1_LVL_ALT_LF_V];
1361     regs->swreg18.sw_refpic_seg4 = segval[4][SEG_AV1_LVL_REF_FRAME];
1362     regs->swreg18.sw_skip_seg4 = segval[4][SEG_AV1_LVL_SKIP];
1363     regs->swreg24.sw_global_mv_seg4 = segval[4][SEG_AV1_LVL_GLOBALMV];
1364 
1365     regs->swreg19.sw_quant_seg5 = segval[5][SEG_AV1_LVL_ALT_Q];
1366     regs->swreg19.sw_filt_level_delta0_seg5 = segval[5][SEG_AV1_LVL_ALT_LF_Y_V];
1367     regs->swreg25.sw_filt_level_delta1_seg5 = segval[5][SEG_AV1_LVL_ALT_LF_Y_H];
1368     regs->swreg25.sw_filt_level_delta2_seg5 = segval[5][SEG_AV1_LVL_ALT_LF_U];
1369     regs->swreg25.sw_filt_level_delta3_seg5 = segval[5][SEG_AV1_LVL_ALT_LF_V];
1370     regs->swreg19.sw_refpic_seg5 = segval[5][SEG_AV1_LVL_REF_FRAME];
1371     regs->swreg19.sw_skip_seg5 = segval[5][SEG_AV1_LVL_SKIP];
1372     regs->swreg25.sw_global_mv_seg5 = segval[5][SEG_AV1_LVL_GLOBALMV];
1373 
1374     regs->swreg31.sw_quant_seg6 = segval[6][SEG_AV1_LVL_ALT_Q];
1375     regs->swreg31.sw_filt_level_delta0_seg6 = segval[6][SEG_AV1_LVL_ALT_LF_Y_V];
1376     regs->swreg26.sw_filt_level_delta1_seg6 = segval[6][SEG_AV1_LVL_ALT_LF_Y_H];
1377     regs->swreg26.sw_filt_level_delta2_seg6 = segval[6][SEG_AV1_LVL_ALT_LF_U];
1378     regs->swreg26.sw_filt_level_delta3_seg6 = segval[6][SEG_AV1_LVL_ALT_LF_V];
1379     regs->swreg31.sw_refpic_seg6 = segval[6][SEG_AV1_LVL_REF_FRAME];
1380     regs->swreg31.sw_skip_seg6 = segval[6][SEG_AV1_LVL_SKIP];
1381     regs->swreg26.sw_global_mv_seg6 = segval[6][SEG_AV1_LVL_GLOBALMV];
1382 
1383     regs->swreg32.sw_quant_seg7 = segval[7][SEG_AV1_LVL_ALT_Q];
1384     regs->swreg32.sw_filt_level_delta0_seg7 = segval[7][SEG_AV1_LVL_ALT_LF_Y_V];
1385     regs->swreg27.sw_filt_level_delta1_seg7 = segval[7][SEG_AV1_LVL_ALT_LF_Y_H];
1386     regs->swreg27.sw_filt_level_delta2_seg7 = segval[7][SEG_AV1_LVL_ALT_LF_U];
1387     regs->swreg27.sw_filt_level_delta3_seg7 = segval[7][SEG_AV1_LVL_ALT_LF_V];
1388     regs->swreg32.sw_refpic_seg7 = segval[7][SEG_AV1_LVL_REF_FRAME];
1389     regs->swreg32.sw_skip_seg7 = segval[7][SEG_AV1_LVL_SKIP];
1390     regs->swreg27.sw_global_mv_seg7 = segval[7][SEG_AV1_LVL_GLOBALMV];
1391 }
1392 
vdpu_av1d_set_loopfilter(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1393 void vdpu_av1d_set_loopfilter(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1394 {
1395     VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1396     VdpuAv1dRegSet *regs = ctx->regs;
1397     regs->swreg3.sw_filtering_dis      = (dxva->loop_filter.filter_level[0] == 0) && (dxva->loop_filter.filter_level[1] == 0);
1398     regs->swreg5.sw_filt_level_base_gt32    = dxva->loop_filter.filter_level[0] > 32;
1399     regs->swreg30.sw_filt_sharpness         = dxva->loop_filter.sharpness_level;
1400     if (dxva->loop_filter.mode_ref_delta_enabled) {
1401         regs->swreg59.sw_filt_ref_adj_0 = dxva->loop_filter.ref_deltas[0];
1402         regs->swreg59.sw_filt_ref_adj_1 = dxva->loop_filter.ref_deltas[1];
1403         regs->swreg59.sw_filt_ref_adj_2 = dxva->loop_filter.ref_deltas[2];
1404         regs->swreg59.sw_filt_ref_adj_3 = dxva->loop_filter.ref_deltas[3];
1405         regs->swreg30.sw_filt_ref_adj_4 = dxva->loop_filter.ref_deltas[4];
1406         regs->swreg30.sw_filt_ref_adj_5 = dxva->loop_filter.ref_deltas[5];
1407         regs->swreg49.sw_filt_ref_adj_7 = dxva->loop_filter.ref_deltas[6];
1408         regs->swreg49.sw_filt_ref_adj_6 = dxva->loop_filter.ref_deltas[7];
1409         regs->swreg30.sw_filt_mb_adj_0  = dxva->loop_filter.mode_deltas[0];
1410         regs->swreg30.sw_filt_mb_adj_1  = dxva->loop_filter.mode_deltas[1];
1411     } else {
1412         regs->swreg59.sw_filt_ref_adj_0 = 0;
1413         regs->swreg59.sw_filt_ref_adj_1 = 0;
1414         regs->swreg59.sw_filt_ref_adj_2 = 0;
1415         regs->swreg59.sw_filt_ref_adj_3 = 0;
1416         regs->swreg30.sw_filt_ref_adj_4 = 0;
1417         regs->swreg30.sw_filt_ref_adj_5 = 0;
1418         regs->swreg49.sw_filt_ref_adj_7 = 0;
1419         regs->swreg49.sw_filt_ref_adj_6 = 0;
1420         regs->swreg30.sw_filt_mb_adj_0  = 0;
1421         regs->swreg30.sw_filt_mb_adj_1  = 0;
1422     }
1423 
1424     regs->addr_cfg.swreg179.sw_dec_vert_filt_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1425     regs->addr_cfg.swreg183.sw_dec_bsd_ctrl_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1426     mpp_dev_set_reg_offset(p_hal->dev, 183, ctx->filt_info[DB_CTRL_COL].offset);
1427 }
1428 
vdpu_av1d_set_global_model(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1429 void vdpu_av1d_set_global_model(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1430 {
1431     VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1432     VdpuAv1dRegSet *regs = ctx->regs;
1433     RK_U8 *dst = (RK_U8 *) mpp_buffer_get_ptr(ctx->global_model);
1434     RK_S32 ref_frame, i;
1435 
1436     for (ref_frame = 0; ref_frame < GM_GLOBAL_MODELS_PER_FRAME; ++ref_frame) {
1437         mpp_assert(dxva->frame_refs[ref_frame].wmtype <= 3);
1438 
1439         /* In DDR wmmat order is 0, 1, 3, 2, 4, 5 */
1440         for (i = 0; i < 6; ++i) {
1441             if (i == 2)
1442                 *(RK_S32 *)(dst) = dxva->frame_refs[ref_frame].wmmat[3];
1443             else if (i == 3)
1444                 *(RK_S32 *)(dst) = dxva->frame_refs[ref_frame].wmmat[2];
1445             else
1446                 *(RK_S32 *)(dst) = dxva->frame_refs[ref_frame].wmmat[i];
1447             dst += 4;
1448         }
1449 
1450         *(RK_S16 *)(dst) = dxva->frame_refs[ref_frame].alpha;//-32768;
1451         dst += 2;
1452         *(RK_S16 *)(dst) = dxva->frame_refs[ref_frame].beta;//-32768;
1453         dst += 2;
1454         *(RK_S16 *)(dst) = dxva->frame_refs[ref_frame].gamma;//-32768;
1455         dst += 2;
1456         *(RK_S16 *)(dst) = dxva->frame_refs[ref_frame].delta;//-32768;
1457         dst += 2;
1458         AV1D_DBG(AV1D_DBG_LOG, "ref_frame[%d] alpa %d beta %d gamma %d delta %d\n",
1459                  ref_frame,
1460                  dxva->frame_refs[ref_frame].alpha,
1461                  dxva->frame_refs[ref_frame].beta,
1462                  dxva->frame_refs[ref_frame].gamma,
1463                  dxva->frame_refs[ref_frame].delta);
1464     }
1465 
1466     regs->addr_cfg.swreg82.sw_global_model_base_msb = 0;
1467     regs->addr_cfg.swreg83.sw_global_model_base_lsb = mpp_buffer_get_fd(ctx->global_model);
1468 }
1469 
vdpu_av1d_set_tile_info_regs(VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)1470 void vdpu_av1d_set_tile_info_regs(VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
1471 {
1472     int transpose = ctx->tile_transpose;
1473     VdpuAv1dRegSet *regs = ctx->regs;
1474     size_t context_update_tile_id =  dxva->tiles.context_update_id;
1475     size_t context_update_y = context_update_tile_id / dxva->tiles.cols;
1476     size_t context_update_x = context_update_tile_id % dxva->tiles.cols;
1477 
1478     regs->swreg11.sw_multicore_expect_context_update = (0 == context_update_x);
1479     if (transpose) {
1480         context_update_tile_id =
1481             context_update_x * dxva->tiles.rows + context_update_y;
1482     }
1483     regs->swreg10.sw_tile_enable = (dxva->tiles.cols > 1) || (dxva->tiles.rows > 1);
1484     regs->swreg10.sw_num_tile_cols_8k       = dxva->tiles.cols;
1485     regs->swreg10.sw_num_tile_rows_8k_av1   = dxva->tiles.rows;
1486     regs->swreg9.sw_context_update_tile_id  = context_update_tile_id;
1487     regs->swreg10.sw_tile_transpose         = transpose;
1488     regs->swreg11.sw_dec_tile_size_mag      = dxva->tiles.tile_sz_mag;
1489     if (regs->swreg10.sw_tile_enable) AV1D_DBG(AV1D_DBG_LOG, "NOTICE: tile enabled.\n");
1490 
1491     regs->addr_cfg.swreg167.sw_tile_base_lsb = mpp_buffer_get_fd(ctx->tile_info);//
1492     regs->addr_cfg.swreg166.sw_tile_base_msb = 0;
1493 }
1494 
check_tile_width(DXVA_PicParams_AV1 * dxva,RK_S32 width,RK_S32 leftmost)1495 static int check_tile_width(DXVA_PicParams_AV1 *dxva, RK_S32 width, RK_S32 leftmost)
1496 {
1497     RK_S32 valid = 1;
1498     if (!leftmost && dxva->coding.use_128x128_superblock == 0 && dxva->coding.superres && width == 1) {
1499         AV1D_DBG(AV1D_DBG_LOG, "WARNING: Superres used and tile width == 64\n");
1500         valid = 0;
1501     }
1502 
1503     const RK_S32 sb_size_log2 = dxva->coding.use_128x128_superblock ? 7 : 6;
1504     RK_S32 tile_width_pixels = (width << sb_size_log2);
1505     if (dxva->coding.superres) {
1506         tile_width_pixels =
1507             (tile_width_pixels * (9 + dxva->superres_denom) + 4) / 8;
1508     }
1509     if (tile_width_pixels > 4096) {
1510         if (dxva->coding.superres)
1511             AV1D_LOG("WARNING: Tile width after superres > 4096\n");
1512         else
1513             AV1D_LOG("WARNING: Tile width > 4096\n");
1514         valid = 0;
1515     }
1516     return valid;
1517 }
1518 
vdpu_av1d_set_tile_info_mem(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1519 void vdpu_av1d_set_tile_info_mem(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1520 {
1521     VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
1522 
1523     RK_S32 transpose = ctx->tile_transpose;
1524     RK_S32 tmp = dxva->frame_tag_size + dxva->offset_to_dct_parts;
1525     RK_U32 stream_len =  p_hal->strm_len - tmp;
1526     RK_U8  *p1 = (RK_U8*)mpp_buffer_get_ptr(ctx->tile_info);
1527     RK_S32 size0 = transpose ?  dxva->tiles.cols : dxva->tiles.rows;
1528     RK_S32 size1 = transpose ? dxva->tiles.rows  :  dxva->tiles.cols;
1529     RK_S32 tile0, tile1;
1530     RK_U32 not_valid_tile_dimension = 0;
1531 
1532     // Write tile dimensions
1533     for (tile0 = 0; tile0 < size0; tile0++) {
1534         for (tile1 = 0; tile1 < size1; tile1++) {
1535             RK_S32 tile_y = transpose ? tile1 : tile0;
1536             RK_S32 tile_x = transpose ? tile0 : tile1;
1537             RK_S32 tile_id = transpose ? tile1 * size0 + tile0 : tile0 * size1 + tile1;
1538             RK_U32 start, end;
1539 
1540             RK_U32 y0 = dxva->tiles.heights[tile_y];
1541             RK_U32 y1 = dxva->tiles.heights[tile_y + 1];
1542             RK_U32 x0 = dxva->tiles.widths[tile_x];
1543             RK_U32 x1 = dxva->tiles.widths[tile_x + 1];
1544 
1545             RK_U8 leftmost = (tile_x == dxva->tiles.cols - 1);
1546             if (!not_valid_tile_dimension)
1547                 not_valid_tile_dimension = !check_tile_width(dxva, x1 - x0, leftmost);
1548             if ((x0 << (dxva->coding.use_128x128_superblock ? 7 : 6)) >= dxva->width ||
1549                 (y0 << (dxva->coding.use_128x128_superblock ? 7 : 6)) >= dxva->height)
1550                 not_valid_tile_dimension = 1;
1551 
1552             // tile size in SB units (width,height)
1553             *p1++ = x1 - x0;
1554             *p1++ = 0;
1555             *p1++ = 0;
1556             *p1++ = 0;
1557             *p1++ = y1 - y0;
1558             *p1++ = 0;
1559             *p1++ = 0;
1560             *p1++ = 0;
1561 
1562             // tile start position (offset from sw_stream0_base)
1563             start = dxva->tiles.tile_offset_start[tile_id];
1564             *p1++ = start & 255;
1565             *p1++ = (start >> 8) & 255;
1566             *p1++ = (start >> 16) & 255;
1567             *p1++ = (start >> 24) & 255;
1568             if (!not_valid_tile_dimension) {
1569                 if ((start + 1) > stream_len)
1570                     not_valid_tile_dimension = 1;
1571             }
1572 
1573             // # of bytes in tile data
1574             end = dxva->tiles.tile_offset_end[tile_id];
1575             *p1++ = end & 255;
1576             *p1++ = (end >> 8) & 255;
1577             *p1++ = (end >> 16) & 255;
1578             *p1++ = (end >> 24) & 255;
1579             if (!not_valid_tile_dimension) {
1580                 if (end > stream_len)
1581                     not_valid_tile_dimension = 1;
1582             }
1583             AV1D_DBG(AV1D_DBG_LOG, "tile_info[%d][%d]: start=%08x end=%08x x0:x1=%d:%d y0:y1=%d:%d\n",
1584                      tile0, tile1, start, end, x0, x1, y0, y1);
1585         }
1586     }
1587 }
1588 
vdpu_av1d_set_cdef(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1589 void vdpu_av1d_set_cdef(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1590 {
1591     RK_U32 luma_pri_strength = 0;
1592     RK_U16 luma_sec_strength = 0;
1593     RK_U32 chroma_pri_strength = 0;
1594     RK_U16 chroma_sec_strength = 0;
1595     VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1596     VdpuAv1dRegSet *regs = ctx->regs;
1597     RK_S32 i;
1598 
1599     /* CDEF */
1600     regs->swreg7.sw_cdef_bits           = dxva->cdef.bits;
1601     regs->swreg7.sw_cdef_damping        = dxva->cdef.damping;
1602 
1603     for (i = 0; i < 8; i++) {
1604         if (i == (1 << (dxva->cdef.bits))) break;
1605         luma_pri_strength |= dxva->cdef.y_strengths[i].primary << (i * 4);
1606         luma_sec_strength |= dxva->cdef.y_strengths[i].secondary << (i * 2);
1607         chroma_pri_strength |= dxva->cdef.uv_strengths[i].primary << (i * 4);
1608         chroma_sec_strength |= dxva->cdef.uv_strengths[i].secondary << (i * 2);
1609     }
1610 
1611     regs->swreg263.sw_cdef_luma_primary_strength = luma_pri_strength;
1612     regs->swreg53.sw_cdef_luma_secondary_strength = luma_sec_strength;
1613     regs->swreg264.sw_cdef_chroma_primary_strength = chroma_pri_strength;
1614     regs->swreg53.sw_cdef_chroma_secondary_strength = chroma_sec_strength;
1615 
1616     // tile column buffer; repurpose some encoder specific base
1617     regs->addr_cfg.swreg85.sw_cdef_colbuf_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1618     mpp_dev_set_reg_offset(p_hal->dev, 85, ctx->filt_info[CDEF_COL].offset);
1619 }
1620 
vdpu_av1d_set_lr(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1621 void vdpu_av1d_set_lr(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1622 {
1623     VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1624     VdpuAv1dRegSet *regs = ctx->regs;
1625     RK_U16 lr_type = 0;
1626     RK_U16 lr_unit_size = 0;
1627     RK_S32 i = 0;
1628 
1629     for (i = 0; i < 3; i++) {
1630         lr_type |= dxva->loop_filter.frame_restoration_type[i] << (i * 2);
1631         lr_unit_size |= dxva->loop_filter.log2_restoration_unit_size[i] << (i * 2);
1632     }
1633     regs->swreg18.sw_lr_type = lr_type;
1634     regs->swreg19.sw_lr_unit_size = lr_unit_size;
1635     regs->addr_cfg.swreg91.sw_lr_colbuf_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1636     mpp_dev_set_reg_offset(p_hal->dev, 91, ctx->filt_info[LR_COL].offset);
1637 }
1638 
init_scaling_function(RK_U8 scaling_points[][2],RK_U8 num_points,RK_U8 scaling_lut[])1639 void init_scaling_function(RK_U8 scaling_points[][2], RK_U8 num_points,
1640                            RK_U8 scaling_lut[])
1641 {
1642     RK_S32 i, point;
1643 
1644     if (num_points == 0) {
1645         memset(scaling_lut, 0, 256);
1646         return;
1647     }
1648 
1649     for (i = 0; i < scaling_points[0][0]; i++)
1650         scaling_lut[i] = scaling_points[0][1];
1651 
1652     for (point = 0; point < num_points - 1; point++) {
1653         RK_S32 x ;
1654         RK_S32 delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
1655         RK_S32 delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
1656         RK_S64 delta =
1657             delta_x ? delta_y * ((65536 + (delta_x >> 1)) / delta_x) : 0;
1658         for (x = 0; x < delta_x; x++) {
1659             scaling_lut[scaling_points[point][0] + x] =
1660                 scaling_points[point][1] + (RK_S32)((x * delta + 32768) >> 16);
1661         }
1662     }
1663 
1664     for (i = scaling_points[num_points - 1][0]; i < 256; i++)
1665         scaling_lut[i] = scaling_points[num_points - 1][1];
1666 }
1667 
vdpu_av1d_set_fgs(VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)1668 void vdpu_av1d_set_fgs(VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
1669 {
1670     VdpuAv1dRegSet *regs = ctx->regs;
1671     RK_S32 ar_coeffs_y[24];
1672     RK_S32 ar_coeffs_cb[25];
1673     RK_S32 ar_coeffs_cr[25];
1674     RK_S32 luma_grain_block[73][82];
1675     RK_S32 cb_grain_block[38][44];
1676     RK_S32 cr_grain_block[38][44];
1677     RK_S32 ar_coeff_lag;
1678     RK_S32 ar_coeff_shift;
1679     RK_S32 grain_scale_shift;
1680     RK_S32 bitdepth;
1681     RK_S32 grain_center;
1682     RK_S32 grain_min;
1683     RK_S32 grain_max;
1684     RK_S32 i, j;
1685     RK_U8 *ptr = mpp_buffer_get_ptr(ctx->film_grain_mem);
1686     if (!dxva->film_grain.apply_grain) {
1687         regs->swreg7.sw_apply_grain = 0;
1688         // store reset params
1689         //   asic_buff->fg_params[asic_buff->out_buffer_i] = dec->fg_params;
1690         return;
1691     }
1692     /*   struct Av1FilmGrainParams *fg_params = &dec->fg_params;
1693        if (!dec->update_parameters) {
1694            RK_S32 active_ref = dec->film_grain_params_ref_idx;
1695            RK_S32 index_ref = Av1BufferQueueGetRef(dec_cont->bq, active_ref);
1696            u16 random_seed = fg_params->random_seed;
1697            *fg_params = asic_buff->fg_params[index_ref];
1698            fg_params->random_seed = random_seed;
1699        }
1700        asic_buff->fg_params[asic_buff->out_buffer_i] = *fg_params;*/
1701 
1702     // film grain applied on secondary output
1703     //  sw_ctrl->sw_apply_grain = dec_cont->pp_enabled ? 1 : 0;
1704     regs->swreg7.sw_num_y_points_b = dxva->film_grain.num_y_points > 0;
1705     regs->swreg7.sw_num_cb_points_b = dxva->film_grain.num_cb_points > 0;
1706     regs->swreg7.sw_num_cr_points_b = dxva->film_grain.num_cr_points > 0;
1707     regs->swreg8.sw_scaling_shift =  dxva->film_grain.scaling_shift_minus8 + 8;
1708     if (! dxva->film_grain.chroma_scaling_from_luma) {
1709         regs->swreg28.sw_cb_mult = dxva->film_grain.cb_mult - 128;
1710         regs->swreg28.sw_cb_luma_mult = dxva->film_grain.cb_luma_mult - 128;
1711         regs->swreg28.sw_cb_offset = dxva->film_grain.cb_offset - 256;
1712         regs->swreg29.sw_cr_mult = dxva->film_grain.cr_mult - 128;
1713         regs->swreg29.sw_cr_luma_mult = dxva->film_grain.cr_luma_mult - 128;
1714         regs->swreg29.sw_cr_offset = dxva->film_grain.cr_offset - 256;
1715     } else {
1716         regs->swreg28.sw_cb_mult = 0;
1717         regs->swreg28.sw_cb_luma_mult = 64;
1718         regs->swreg28.sw_cb_offset = 0;
1719         regs->swreg29.sw_cr_mult = 0;
1720         regs->swreg29.sw_cr_luma_mult = 64;
1721         regs->swreg29.sw_cr_offset = 0;
1722     }
1723     regs->swreg7.sw_overlap_flag = dxva->film_grain.overlap_flag;
1724     regs->swreg7.sw_clip_to_restricted_range = dxva->film_grain.clip_to_restricted_range;
1725     regs->swreg7.sw_chroma_scaling_from_luma = dxva->film_grain.chroma_scaling_from_luma;
1726     regs->swreg7.sw_random_seed = dxva->film_grain.grain_seed;
1727 
1728     init_scaling_function(dxva->film_grain.scaling_points_y, dxva->film_grain.num_y_points,
1729                           ctx->fgsmem.scaling_lut_y);
1730 
1731     if (dxva->film_grain.chroma_scaling_from_luma) {
1732         memcpy(ctx->fgsmem.scaling_lut_cb, ctx->fgsmem.scaling_lut_y,
1733                sizeof(*ctx->fgsmem.scaling_lut_y) * 256);
1734         memcpy(ctx->fgsmem.scaling_lut_cr, ctx->fgsmem.scaling_lut_y,
1735                sizeof(*ctx->fgsmem.scaling_lut_y) * 256);
1736     } else {
1737         init_scaling_function(dxva->film_grain.scaling_points_cb,
1738                               dxva->film_grain.num_cb_points, ctx->fgsmem.scaling_lut_cb);
1739         init_scaling_function(dxva->film_grain.scaling_points_cr,
1740                               dxva->film_grain.num_cr_points, ctx->fgsmem.scaling_lut_cr);
1741     }
1742 
1743 
1744     for (i = 0; i < 25; i++) {
1745         if (i < 24) {
1746             ar_coeffs_y[i] = dxva->film_grain.ar_coeffs_y[i] - 128;
1747         }
1748         ar_coeffs_cb[i] = dxva->film_grain.ar_coeffs_cb[i] - 128;
1749         ar_coeffs_cr[i] = dxva->film_grain.ar_coeffs_cr[i] - 128;
1750     }
1751 
1752     ar_coeff_lag = dxva->film_grain.ar_coeff_lag;
1753     ar_coeff_shift = dxva->film_grain.ar_coeff_shift_minus6 + 6;
1754     grain_scale_shift = dxva->film_grain.grain_scale_shift;
1755     bitdepth =  dxva->bitdepth;
1756     grain_center = 128 << (bitdepth - 8);
1757     grain_min = 0 - grain_center;
1758     grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1759 
1760     GenerateLumaGrainBlock(luma_grain_block, bitdepth, dxva->film_grain.num_y_points,
1761                            grain_scale_shift, ar_coeff_lag, ar_coeffs_y,
1762                            ar_coeff_shift, grain_min, grain_max,
1763                            dxva->film_grain.grain_seed);
1764 
1765     GenerateChromaGrainBlock(
1766         luma_grain_block, cb_grain_block, cr_grain_block, bitdepth,
1767         dxva->film_grain.num_y_points, dxva->film_grain.num_cb_points,
1768         dxva->film_grain.num_cr_points, grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1769         ar_coeffs_cr, ar_coeff_shift, grain_min, grain_max,
1770         dxva->film_grain.chroma_scaling_from_luma, dxva->film_grain.grain_seed);
1771 
1772     for (i = 0; i < 64; i++) {
1773         for (j = 0; j < 64; j++) {
1774             ctx->fgsmem.cropped_luma_grain_block[i * 64 + j] =
1775                 luma_grain_block[i + 9][j + 9];
1776         }
1777     }
1778 
1779     for (i = 0; i < 32; i++) {
1780         for (j = 0; j < 32; j++) {
1781             ctx->fgsmem.cropped_chroma_grain_block[i * 64 + 2 * j] =
1782                 cb_grain_block[i + 6][j + 6];
1783             ctx->fgsmem.cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1784                 cr_grain_block[i + 6][j + 6];
1785         }
1786     }
1787 
1788     memcpy(ptr, &ctx->fgsmem, sizeof(FilmGrainMemory));
1789 
1790     regs->addr_cfg.swreg94.sw_filmgrain_base_msb = 0;
1791     regs->addr_cfg.swreg95.sw_filmgrain_base_lsb = mpp_buffer_get_fd(ctx->film_grain_mem);
1792 
1793     if (regs->swreg7.sw_apply_grain) AV1D_DBG(AV1D_DBG_LOG, "NOTICE: filmgrain enabled.\n");
1794 }
1795 
vdpu_av1d_gen_regs(void * hal,HalTaskInfo * task)1796 MPP_RET vdpu_av1d_gen_regs(void *hal, HalTaskInfo *task)
1797 {
1798     MPP_RET ret = MPP_ERR_UNKNOW;
1799     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
1800     VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
1801     VdpuAv1dRegSet *regs;
1802     DXVA_PicParams_AV1 *dxva = (DXVA_PicParams_AV1*)task->dec.syntax.data;
1803     MppFrame mframe;
1804     MppBuffer buffer = NULL;
1805     MppBuffer streambuf = NULL;
1806     RK_U32 height = dxva->height;
1807     RK_U32 width = dxva->width;
1808     RK_U32 hor_stride;
1809     RK_U32 ver_stride;
1810     HalBuf *tile_out_buf;
1811     RK_U32 num_tile_cols = 1 << dxva->tile_cols_log2;
1812 
1813     INP_CHECK(ret, NULL == p_hal);
1814 
1815     ctx->refresh_frame_flags = dxva->refresh_frame_flags;
1816 
1817     if (task->dec.flags.parse_err ||
1818         task->dec.flags.ref_err) {
1819         mpp_err_f("parse err %d ref err %d\n",
1820                   task->dec.flags.parse_err, task->dec.flags.ref_err);
1821         goto __RETURN;
1822     }
1823 
1824     if (p_hal->fast_mode) {
1825         RK_U32 i = 0;
1826 
1827         for (i = 0; i <  MPP_ARRAY_ELEMS(ctx->reg_buf); i++) {
1828             if (!ctx->reg_buf[i].valid) {
1829                 task->dec.reg_index = i;
1830                 ctx->regs = ctx->reg_buf[i].regs;
1831                 ctx->reg_buf[i].valid = 1;
1832                 break;
1833             }
1834         }
1835     }
1836 
1837     regs = ctx->regs;
1838     memset(regs, 0, sizeof(*regs));
1839 
1840     if (!ctx->tile_out_bufs) {
1841         RK_U32 out_w = MPP_ALIGN(dxva->max_width * dxva->bitdepth, 16 * 8) / 8;
1842         RK_U32 num_sbs = ((dxva->max_width + 63) / 64 + 1) * ((dxva->max_height + 63) / 64  + 1);
1843         RK_U32 dir_mvs_size = MPP_ALIGN(num_sbs * 24 * 128 / 8, 16) * 2;
1844         RK_U32 out_h = MPP_ALIGN(dxva->max_height, 16);
1845         RK_U32 luma_size = out_w * out_h;
1846         RK_U32 chroma_size = luma_size / 2;
1847 
1848         ctx->hor_stride = out_w;
1849         ctx->luma_size = luma_size;
1850         ctx->chroma_size = chroma_size;
1851         ctx->tile_out_size = luma_size + chroma_size + dir_mvs_size + 512;
1852 
1853         if (ctx->tile_out_bufs) {
1854             hal_bufs_deinit(ctx->tile_out_bufs);
1855             ctx->tile_out_bufs = NULL;
1856         }
1857         hal_bufs_init(&ctx->tile_out_bufs);
1858         if (!ctx->tile_out_bufs) {
1859             mpp_err_f("tile out bufs init fail\n");
1860             goto __RETURN;
1861         }
1862         ctx->tile_out_count = mpp_buf_slot_get_count(p_hal->slots);
1863         hal_bufs_setup(ctx->tile_out_bufs, ctx->tile_out_count, 1, &ctx->tile_out_size);
1864     }
1865 
1866     if (!ctx->filter_mem || height > ctx->height || num_tile_cols > ctx->num_tile_cols) {
1867         if (ctx->filter_mem)
1868             vdpu_av1d_filtermem_release(ctx);
1869         ret = vdpu_av1d_filtermem_alloc(p_hal, ctx, dxva);
1870         if (!ret) {
1871             mpp_err("filt buffer get fail\n");
1872             vdpu_av1d_filtermem_release(ctx);
1873         }
1874     }
1875 
1876     ctx->width = width;
1877     ctx->height = height;
1878     ctx->num_tile_cols = num_tile_cols;
1879     mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
1880     mpp_buf_slot_get_prop(p_hal ->slots, task->dec.output, SLOT_BUFFER, &buffer);
1881     mpp_buf_slot_get_prop(p_hal ->packet_slots, task->dec.input, SLOT_BUFFER, &streambuf);
1882     tile_out_buf = hal_bufs_get_buf(ctx->tile_out_bufs, task->dec.output);
1883     hor_stride = mpp_frame_get_hor_stride(mframe);
1884     ver_stride = mpp_frame_get_ver_stride(mframe);
1885     if (MPP_FRAME_FMT_IS_HDR(mpp_frame_get_fmt(mframe)) && p_hal->cfg->base.enable_hdr_meta)
1886         fill_hdr_meta_to_frame(mframe, HDR_AV1);
1887 
1888     ctx->ver_stride = ver_stride;
1889 
1890     p_hal->strm_len = (RK_S32)mpp_packet_get_length(task->dec.input_packet);
1891 
1892     ctx->fbc_en = !!MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe));
1893 
1894     AV1D_DBG(AV1D_DBG_LOG, "bitdepth %d fmt %d [%d : %d] wxh [%d : %d] uxv [%d : %d]\n",
1895              dxva->bitdepth, mpp_frame_get_fmt(mframe),
1896              dxva->format.subsampling_x, dxva->format.subsampling_y,
1897              ctx->width, ctx->height,
1898              ctx->hor_stride, ctx->ver_stride);
1899 
1900     regs->swreg1.sw_dec_abort_e     = 0;
1901     regs->swreg1.sw_dec_e           = 1;
1902     regs->swreg1.sw_dec_tile_int_e  = 0;
1903     regs->swreg2.sw_dec_clk_gate_e = 1;
1904 
1905     regs->swreg3.sw_dec_mode           = 17; // av1 mode
1906     regs->swreg3.sw_skip_mode          = dxva->coding.skip_mode;
1907     regs->swreg3.sw_dec_out_ec_byte_word = 0; // word align
1908     regs->swreg3.sw_write_mvs_e        = 1;
1909     regs->swreg3.sw_dec_out_ec_bypass  = 1;
1910 
1911     regs->swreg5.sw_tempor_mvp_e        = dxva->coding.use_ref_frame_mvs;
1912     regs->swreg5.sw_delta_lf_res_log    = dxva->loop_filter.delta_lf_res;
1913     regs->swreg5.sw_delta_lf_multi      = dxva->loop_filter.delta_lf_multi;
1914     regs->swreg5.sw_delta_lf_present    = dxva->loop_filter.delta_lf_present;
1915     regs->swreg5.sw_disable_cdf_update  = dxva->coding.disable_cdf_update;
1916     regs->swreg5.sw_allow_warp          = dxva->coding.warped_motion;
1917     regs->swreg5.sw_show_frame          = dxva->format.show_frame;
1918     regs->swreg5.sw_switchable_motion_mode  = dxva->coding.switchable_motion_mode;
1919     regs->swreg5.sw_enable_cdef         = dxva->coding.cdef_en;
1920     regs->swreg5.sw_allow_masked_compound   = dxva->coding.masked_compound;
1921     regs->swreg5.sw_allow_interintra    = dxva->coding.interintra_compound;
1922     regs->swreg5.sw_enable_intra_edge_filter = dxva->coding.intra_edge_filter;
1923     regs->swreg5.sw_allow_filter_intra  = dxva->coding.filter_intra;
1924     regs->swreg5.sw_enable_jnt_comp     = dxva->coding.jnt_comp;
1925     regs->swreg5.sw_enable_dual_filter  = dxva->coding.dual_filter;
1926     regs->swreg5.sw_reduced_tx_set_used = dxva->coding.reduced_tx_set;
1927     regs->swreg5.sw_allow_screen_content_tools = dxva->coding.screen_content_tools;
1928     regs->swreg5.sw_allow_intrabc       = dxva->coding.intrabc;
1929 
1930     regs->swreg5.sw_force_interger_mv   = dxva->coding.integer_mv;
1931 
1932     vdpu_av1d_set_global_model(p_hal, dxva);
1933     vdpu_av1d_set_tile_info_mem(p_hal, dxva);
1934 
1935     if ((dxva->format.frame_type && (dxva->format.frame_type != AV1_FRAME_INTRA_ONLY))
1936         || dxva->coding.intrabc) {
1937         vdpu_av1d_set_reference_frames(p_hal, ctx, dxva);
1938     }
1939     vdpu_av1d_set_segmentation(ctx, dxva);
1940     vdpu_av1d_set_loopfilter(p_hal, dxva);
1941     vdpu_av1d_set_picture_dimensions(p_hal, dxva);
1942     vdpu_av1d_set_cdef(p_hal, dxva);
1943     vdpu_av1d_set_lr(p_hal, dxva);
1944     vdpu_av1d_set_fgs(ctx, dxva);
1945     vdpu_av1d_set_prob(p_hal, dxva);
1946     vdpu_av1d_set_tile_info_regs(ctx, dxva);
1947 
1948 #if DUMP_AV1_DATAS/* dump buffer */
1949     {
1950         char name[128];
1951         char *path = "/data/video";
1952         static int g_frame_num = 0;
1953         FILE *fp;
1954         RK_U32 i;
1955         RK_U32 *data;
1956         RK_U32 size;
1957 
1958         data = mpp_buffer_get_ptr(ctx->global_model);
1959         size = MPP_ALIGN(GLOBAL_MODEL_SIZE, 2048);
1960         memset(name, 0, sizeof(name));
1961         sprintf(name, "%s/global_mode_%d.txt", path, g_frame_num);
1962         fp = fopen(name, "wb");
1963         for ( i = 0; i < size / 4; i++)
1964             fprintf(fp, "%08x\n", data[i]);
1965         fflush(fp);
1966         fclose(fp);
1967 
1968         data = mpp_buffer_get_ptr(ctx->tile_info);
1969         size = AV1_TILE_INFO_SIZE;
1970         memset(name, 0, sizeof(name));
1971         sprintf(name, "%s/tile_info_%d.txt", path, g_frame_num);
1972         fp = fopen(name, "wb");
1973         for ( i = 0; i < size / 4; i++)
1974             fprintf(fp, "%08x\n", data[i]);
1975         fflush(fp);
1976         fclose(fp);
1977 
1978         data = mpp_buffer_get_ptr(streambuf);
1979         size = MPP_ALIGN(p_hal->strm_len, 1);
1980         memset(name, 0, sizeof(name));
1981         sprintf(name, "%s/stream_%d.txt", path, g_frame_num);
1982         fp = fopen(name, "wb");
1983         fwrite((RK_U8*)data, 1, size, fp);
1984         fflush(fp);
1985         fclose(fp);
1986 
1987         data = mpp_buffer_get_ptr(ctx->film_grain_mem);
1988         size = MPP_ALIGN(sizeof(AV1FilmGrainMemory), 2048);
1989         memset(name, 0, sizeof(name));
1990         sprintf(name, "%s/film_grain_mem_%d.txt", path, g_frame_num);
1991         fp = fopen(name, "wb");
1992         for ( i = 0; i < size / 4; i++)
1993             fprintf(fp, "%08x\n", data[i]);
1994         fflush(fp);
1995         fclose(fp);
1996 
1997         data = mpp_buffer_get_ptr(ctx->prob_tbl_base);
1998         size = MPP_ALIGN(sizeof(AV1CDFs), 2048);
1999         memset(name, 0, sizeof(name));
2000         sprintf(name, "%s/prob_tbl_%d.txt", path, g_frame_num);
2001         fp = fopen(name, "wb");
2002         for ( i = 0; i < size / 4; i++)
2003             fprintf(fp, "%08x\n", data[i]);
2004         fflush(fp);
2005         fclose(fp);
2006 
2007         data = mpp_buffer_get_ptr(ctx->prob_tbl_out_base);
2008         size = MPP_ALIGN(sizeof(AV1CDFs), 2048);
2009         memset(name, 0, sizeof(name));
2010         sprintf(name, "%s/prob_tbl_out_%d.txt", path, g_frame_num);
2011         fp = fopen(name, "wb");
2012         for ( i = 0; i < size / 4; i++)
2013             fprintf(fp, "%08x\n", data[i]);
2014         fflush(fp);
2015         fclose(fp);
2016 
2017         g_frame_num ++;
2018     }
2019 #endif
2020 
2021     regs->swreg7.sw_blackwhite_e    = dxva->format.mono_chrome;
2022     regs->swreg7.sw_clip_to_restricted_range  = dxva->film_grain.clip_to_restricted_range;
2023     regs->swreg7.sw_delta_q_res_log     = dxva->quantization.delta_q_res;
2024     regs->swreg7.sw_delta_q_present     = dxva->quantization.delta_q_present;
2025 
2026     regs->swreg8.sw_idr_pic_e           = !dxva->format.frame_type;
2027     regs->swreg8.sw_quant_base_qindex   = dxva->quantization.base_qindex;
2028     regs->swreg8.sw_bit_depth_y_minus8  = dxva->bitdepth - 8;
2029     regs->swreg8.sw_bit_depth_c_minus8  = dxva->bitdepth - 8;
2030 
2031     regs->swreg11.sw_mcomp_filt_type    = dxva->interp_filter;
2032     regs->swreg11.sw_high_prec_mv_e     = dxva->coding.high_precision_mv;
2033     regs->swreg11.sw_comp_pred_mode     =  dxva->coding.reference_mode ? 2 : 0;
2034     regs->swreg11.sw_transform_mode     = dxva->coding.tx_mode;
2035     regs->swreg12.sw_max_cb_size        = dxva->coding.use_128x128_superblock ? 7 : 6;;
2036     regs->swreg12.sw_min_cb_size        = 3;
2037 
2038     /* unused in comdel */
2039     regs->swreg12.sw_av1_comp_pred_fixed_ref    = 0;
2040     regs->swreg13.sw_comp_pred_var_ref0_av1     = 0;
2041     regs->swreg13.sw_comp_pred_var_ref1_av1     = 0;
2042     regs->swreg14.sw_filt_level_seg0            = 0;
2043     regs->swreg15.sw_filt_level_seg1            = 0;
2044     regs->swreg16.sw_filt_level_seg2            = 0;
2045     regs->swreg17.sw_filt_level_seg3            = 0;
2046     regs->swreg18.sw_filt_level_seg4            = 0;
2047     regs->swreg19.sw_filt_level_seg5            = 0;
2048     regs->swreg31.sw_filt_level_seg6            = 0;
2049     regs->swreg32.sw_filt_level_seg7            = 0;
2050 
2051 
2052     regs->swreg13.sw_qp_delta_y_dc_av1          = dxva->quantization.y_dc_delta_q;
2053     regs->swreg13.sw_qp_delta_ch_dc_av1         = dxva->quantization.u_dc_delta_q;
2054     regs->swreg13.sw_qp_delta_ch_ac_av1         = dxva->quantization.u_ac_delta_q;
2055     regs->swreg47.sw_qmlevel_y                  = dxva->quantization.qm_y;
2056     regs->swreg48.sw_qmlevel_u                  = dxva->quantization.qm_u;
2057     regs->swreg49.sw_qmlevel_v                  = dxva->quantization.qm_v;
2058 
2059     regs->swreg13.sw_lossless_e                 = dxva->coded_lossless;
2060     regs->swreg28.sw_quant_delta_v_dc           = dxva->quantization.v_dc_delta_q;
2061     regs->swreg29.sw_quant_delta_v_ac           = dxva->quantization.v_ac_delta_q;
2062 
2063     regs->swreg31.sw_skip_ref0              = dxva->skip_ref0 ? dxva->skip_ref0 : 1;
2064     regs->swreg32.sw_skip_ref1              = dxva->skip_ref1 ? dxva->skip_ref1 : 1;
2065 
2066     /*input out put buf cfg*/
2067     {
2068         // RK_U32 out_w = MPP_ALIGN(4 * width * bit_depth, 128) / 8;
2069         // RK_U32 out_h = height / 4;
2070         // RK_U32 y_stride = out_w * out_h;
2071         // RK_U32 uv_stride = y_stride / 2;
2072 
2073         RK_U32 y_stride = ctx->luma_size;
2074         RK_U32 uv_stride = y_stride / 2;
2075         RK_U32 mv_offset = y_stride + uv_stride + 64;
2076         RK_U32 offset = (dxva->frame_tag_size & (~0xf));
2077 
2078         regs->addr_cfg.swreg65.sw_dec_out_ybase_lsb = mpp_buffer_get_fd(tile_out_buf->buf[0]);//mpp_buffer_get_fd(buffer);
2079         regs->addr_cfg.swreg99.sw_dec_out_cbase_lsb = mpp_buffer_get_fd(tile_out_buf->buf[0]);
2080         mpp_dev_set_reg_offset(p_hal->dev, 99, y_stride);
2081         regs->addr_cfg.swreg133.sw_dec_out_dbase_lsb = mpp_buffer_get_fd(tile_out_buf->buf[0]);
2082         mpp_dev_set_reg_offset(p_hal->dev, 133, mv_offset);
2083 
2084         /*  if (ctx->fbc_en) {
2085               regs->swreg190.sw_dec_out_tybase_lsb = 0;// TODO:
2086               regs->swreg224.sw_dec_out_tcbase_lsb = 0;// TODO:
2087           }*/
2088 
2089         regs->swreg258.sw_strm_buffer_len = MPP_ALIGN(p_hal->strm_len, 128);//
2090         regs->swreg5.sw_strm_start_bit    =  (dxva->frame_tag_size & 0xf) * 8; // bit start to decode
2091         regs->swreg6.sw_stream_len  = MPP_ALIGN(p_hal->strm_len, 128);//p_hal->strm_len - offset;
2092         regs->swreg259.sw_strm_start_offset = 0;
2093         regs->addr_cfg.swreg168.sw_stream_base_msb = 0;
2094         regs->addr_cfg.swreg169.sw_stream_base_lsb = mpp_buffer_get_fd(streambuf);
2095         mpp_dev_set_reg_offset(p_hal->dev, 169, offset);
2096 
2097         AV1D_DBG(AV1D_DBG_LOG, "stream len %d\n", p_hal->strm_len);
2098         AV1D_DBG(AV1D_DBG_LOG, "stream offset %d\n", offset);
2099         AV1D_DBG(AV1D_DBG_LOG, "stream tag_size %d\n", dxva->frame_tag_size);
2100         AV1D_DBG(AV1D_DBG_LOG, "stream start_bit %d\n", regs->swreg5.sw_strm_start_bit);
2101     }
2102     regs->swreg314.sw_dec_alignment = 64;
2103 
2104     regs->addr_cfg.swreg175.sw_mc_sync_curr_base_lsb = mpp_buffer_get_fd(ctx->tile_buf);
2105     regs->addr_cfg.swreg177.sw_mc_sync_left_base_lsb = mpp_buffer_get_fd(ctx->tile_buf);
2106 
2107     regs->swreg55.sw_apf_disable = 0;
2108     regs->swreg55.sw_apf_threshold = 8;
2109     regs->swreg58.sw_dec_buswidth = 2;
2110     regs->swreg58.sw_dec_max_burst = 16;
2111     regs->swreg266.sw_error_conceal_e                     = 0;
2112     regs->swreg265.sw_axi_rd_ostd_threshold               = 64;
2113     regs->swreg265.sw_axi_wr_ostd_threshold               = 64;
2114 
2115     regs->swreg318.sw_ext_timeout_cycles                  = 0xfffffff;
2116     regs->swreg318.sw_ext_timeout_override_e              = 1;
2117     regs->swreg319.sw_timeout_cycles                      = 0xfffffff;
2118     regs->swreg319.sw_timeout_override_e                  = 1;
2119 
2120     /* pp cfg */
2121     regs->vdpu_av1d_pp_cfg.swreg320.sw_pp_out_e = 1;
2122     regs->vdpu_av1d_pp_cfg.swreg322.sw_pp_in_format = 0;
2123     regs->vdpu_av1d_pp_cfg.swreg394.sw_pp0_dup_hor = 1;
2124     regs->vdpu_av1d_pp_cfg.swreg394.sw_pp0_dup_ver = 1;
2125     regs->vdpu_av1d_pp_cfg.swreg331.sw_pp_in_height = height / 2;
2126     regs->vdpu_av1d_pp_cfg.swreg331.sw_pp_in_width = width / 2;
2127     regs->vdpu_av1d_pp_cfg.swreg332.sw_pp_out_height = height;
2128     regs->vdpu_av1d_pp_cfg.swreg332.sw_pp_out_width = width;
2129     regs->vdpu_av1d_pp_cfg.swreg329.sw_pp_out_y_stride = hor_stride;
2130     regs->vdpu_av1d_pp_cfg.swreg329.sw_pp_out_c_stride = hor_stride;
2131 
2132     // regs->vdpu_av1d_pp_cfg.swreg337.sw_pp_in_y_stride = hor_stride;
2133     // regs->vdpu_av1d_pp_cfg.swreg337.sw_pp_in_c_stride = hor_stride;
2134     if (ctx->fbc_en) {
2135         RK_U32 vir_left = 0, vir_right = 0, vir_top = 0, vir_bottom = 0;
2136         RK_U32 bypass_filter = !regs->swreg5.sw_superres_is_scaled &&
2137                                !regs->swreg5.sw_enable_cdef &&
2138                                !regs->swreg14.sw_filt_level0 &&
2139                                !regs->swreg15.sw_filt_level1 &&
2140                                !regs->swreg18.sw_lr_type;
2141 
2142         regs->vdpu_av1d_pp_cfg.swreg329.sw_pp_out_y_stride = dxva->bitdepth > 8 ?
2143                                                              width * 2 : width;
2144         regs->vdpu_av1d_pp_cfg.swreg329.sw_pp_out_c_stride = dxva->bitdepth > 8 ?
2145                                                              width * 2 : width;
2146         regs->swreg58.sw_dec_axi_wd_id_e = 1;
2147         regs->swreg58.sw_dec_axi_rd_id_e = 1;
2148         regs->vdpu_av1d_pp_cfg.swreg320.sw_pp_out_tile_e = 1;
2149         regs->vdpu_av1d_pp_cfg.swreg321.sw_pp_tile_size = 2;
2150 
2151         vir_left = 0;
2152         if (((vir_left + width) % 16))
2153             vir_right = 16 - ((vir_left + width) % 16);
2154         else
2155             vir_right = 0;
2156 
2157         if (!bypass_filter)
2158             vir_top = 8;
2159         else
2160             vir_top = 0;
2161 
2162         if (((vir_top + height) % 16))
2163             vir_bottom = 16 - ((vir_top + height) % 16);
2164         else
2165             vir_bottom = 0;
2166 
2167         regs->vdpu_av1d_pp_cfg.swreg503.sw_pp0_virtual_top = vir_top;
2168         regs->vdpu_av1d_pp_cfg.swreg503.sw_pp0_virtual_left = vir_left;
2169         regs->vdpu_av1d_pp_cfg.swreg503.sw_pp0_virtual_bottom = vir_bottom;
2170         regs->vdpu_av1d_pp_cfg.swreg503.sw_pp0_virtual_right = vir_right;
2171         mpp_frame_set_offset_y(mframe, vir_top);
2172         mpp_frame_set_ver_stride(mframe, vir_top + height + vir_bottom);
2173         regs->vdpu_av1d_pp_cfg.swreg322.sw_pp_out_format = 0;
2174         regs->vdpu_av1d_pp_cfg.swreg326.sw_pp_out_lu_base_lsb = mpp_buffer_get_fd(buffer);
2175         regs->vdpu_av1d_pp_cfg.swreg328.sw_pp_out_ch_base_lsb = mpp_buffer_get_fd(buffer);
2176         regs->vdpu_av1d_pp_cfg.swreg505.sw_pp0_afbc_tile_base_lsb = mpp_buffer_get_fd(buffer);
2177     } else {
2178         RK_U32 out_w = hor_stride;
2179         RK_U32 out_h = ver_stride;
2180         RK_U32 y_stride = out_w * out_h;
2181         RK_U32 out_fmt = 0;
2182 
2183         if (mpp_frame_get_fmt(mframe) == MPP_FMT_YUV420SP)
2184             out_fmt = 3;
2185 
2186         /*
2187          * out_fmt:
2188          * 0 is 8bit or 10bit output by syntax
2189          * 3 is force 8bit output
2190          */
2191         regs->vdpu_av1d_pp_cfg.swreg322.sw_pp_out_format = out_fmt;
2192         regs->vdpu_av1d_pp_cfg.swreg326.sw_pp_out_lu_base_lsb = mpp_buffer_get_fd(buffer);
2193         regs->vdpu_av1d_pp_cfg.swreg328.sw_pp_out_ch_base_lsb = mpp_buffer_get_fd(buffer);
2194         mpp_dev_set_reg_offset(p_hal->dev, 328, y_stride);
2195     }
2196 
2197 __RETURN:
2198     return ret = MPP_OK;
2199 }
2200 
vdpu_av1d_start(void * hal,HalTaskInfo * task)2201 MPP_RET vdpu_av1d_start(void *hal, HalTaskInfo *task)
2202 {
2203     MPP_RET ret = MPP_ERR_UNKNOW;
2204     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2205     INP_CHECK(ret, NULL == p_hal);
2206     if (task->dec.flags.parse_err ||
2207         task->dec.flags.ref_err) {
2208         goto __RETURN;
2209     }
2210 
2211     VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
2212     VdpuAv1dRegSet *regs = p_hal->fast_mode ?
2213                            reg_ctx->reg_buf[task->dec.reg_index].regs :
2214                            reg_ctx->regs;
2215     MppDev dev = p_hal->dev;
2216 #if DUMP_AV1_DATAS
2217     {
2218         RK_U32 i = 0;
2219         RK_U32 *p = (RK_U32*)regs;
2220         char fname[128];
2221         FILE *fp_in = NULL;
2222         static RK_U32 g_frame_no = 0;
2223 
2224         sprintf(fname, "/data/video/reg_%d_in.txt", g_frame_no++);
2225         fp_in = fopen(fname, "wb");
2226         for (i = 0; i < sizeof(*regs) / 4; i++, p++)
2227             fprintf(fp_in, "reg[%3d] = %08x\n", i, *p);
2228 
2229         fflush(fp_in);
2230         fclose(fp_in);
2231     }
2232 #endif
2233     do {
2234         MppDevRegWrCfg wr_cfg;
2235         MppDevRegRdCfg rd_cfg;
2236 
2237         wr_cfg.reg = regs;
2238         wr_cfg.size = sizeof(*regs);
2239         wr_cfg.offset = 0;
2240         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
2241         if (ret) {
2242             mpp_err_f("set register write failed %d\n", ret);
2243             break;
2244         }
2245 
2246         rd_cfg.reg      = regs;
2247         rd_cfg.size     = sizeof(*regs);
2248         rd_cfg.offset   = 0;
2249 
2250         ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
2251         if (ret) {
2252             mpp_err_f("set register read failed %d\n", ret);
2253             break;
2254         }
2255         /* send request to hardware */
2256         ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL);
2257         if (ret) {
2258             mpp_err_f("send cmd failed %d\n", ret);
2259             break;
2260         }
2261     } while (0);
2262 
2263 __RETURN:
2264     return ret = MPP_OK;
2265 }
2266 
vdpu_av1d_wait(void * hal,HalTaskInfo * task)2267 MPP_RET vdpu_av1d_wait(void *hal, HalTaskInfo *task)
2268 {
2269     MPP_RET ret = MPP_ERR_UNKNOW;
2270     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2271 
2272     INP_CHECK(ret, NULL == p_hal);
2273     VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
2274     VdpuAv1dRegSet *p_regs = p_hal->fast_mode ?
2275                              reg_ctx->reg_buf[task->dec.reg_index].regs :
2276                              reg_ctx->regs;
2277 
2278     if (task->dec.flags.parse_err ||
2279         task->dec.flags.ref_err) {
2280         goto __SKIP_HARD;
2281     }
2282 
2283     ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL);
2284     if (ret)
2285         mpp_err_f("poll cmd failed %d\n", ret);
2286 #if DUMP_AV1_DATAS
2287     {
2288         char fname[128];
2289         FILE *fp_in = NULL;
2290         static RK_U32 g_frame_no = 0;
2291         RK_U32 *p = (RK_U32*)p_regs;
2292         RK_U32 i;
2293 
2294         sprintf(fname, "/data/video/reg_%d_out.txt", g_frame_no++);
2295         fp_in = fopen(fname, "wb");
2296         for (i = 0; i < sizeof(*p_regs) / 4; i++, p++)
2297             fprintf(fp_in, "reg[%3d] = %08x\n", i, *p);
2298 
2299         fflush(fp_in);
2300         fclose(fp_in);
2301     }
2302 #endif
2303 
2304 __SKIP_HARD:
2305     if (p_hal->dec_cb) {
2306         DecCbHalDone m_ctx;
2307         RK_U32 *prob_out = (RK_U32*)mpp_buffer_get_ptr(reg_ctx->prob_tbl_out_base);
2308 
2309         m_ctx.task = mpp_buffer_get_ptr(reg_ctx->prob_tbl_out_base);//(void *)&task->dec;
2310         m_ctx.regs = (RK_U32 *)prob_out;
2311         if (!p_regs->swreg1.sw_dec_rdy_int/* decode err */)
2312             m_ctx.hard_err = 1;
2313         else
2314             m_ctx.hard_err = 0;
2315 
2316         mpp_callback(p_hal->dec_cb, &m_ctx);
2317     }
2318     if (p_hal->fast_mode)
2319         reg_ctx->reg_buf[task->dec.reg_index].valid = 0;
2320 
2321     (void)task;
2322 __RETURN:
2323     return ret = MPP_OK;
2324 }
2325 
vdpu_av1d_reset(void * hal)2326 MPP_RET vdpu_av1d_reset(void *hal)
2327 {
2328     MPP_RET ret = MPP_ERR_UNKNOW;
2329     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2330 
2331     INP_CHECK(ret, NULL == p_hal);
2332 
2333 
2334 __RETURN:
2335     return ret = MPP_OK;
2336 }
2337 
vdpu_av1d_flush(void * hal)2338 MPP_RET vdpu_av1d_flush(void *hal)
2339 {
2340     MPP_RET ret = MPP_ERR_UNKNOW;
2341     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2342 
2343     INP_CHECK(ret, NULL == p_hal);
2344 
2345 __RETURN:
2346     return ret = MPP_OK;
2347 }
2348 
vdpu_av1d_control(void * hal,MpiCmd cmd_type,void * param)2349 MPP_RET vdpu_av1d_control(void *hal, MpiCmd cmd_type, void *param)
2350 {
2351     MPP_RET ret = MPP_ERR_UNKNOW;
2352     Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2353 
2354     INP_CHECK(ret, NULL == p_hal);
2355 
2356     switch ((MpiCmd)cmd_type) {
2357     case MPP_DEC_SET_FRAME_INFO: {
2358         MppFrameFormat fmt = mpp_frame_get_fmt((MppFrame)param);
2359         RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param);
2360         RK_U32 imgheight = mpp_frame_get_height((MppFrame)param);
2361 
2362         AV1D_DBG(AV1D_DBG_LOG, "control info: fmt %d, w %d, h %d\n", fmt, imgwidth, imgheight);
2363         if ((fmt & MPP_FRAME_FMT_MASK) == MPP_FMT_YUV422SP) {
2364             mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_422);
2365         }
2366         break;
2367     }
2368     case MPP_DEC_SET_OUTPUT_FORMAT: {
2369 
2370     } break;
2371     default:
2372         break;
2373     }
2374 
2375 __RETURN:
2376     return ret = MPP_OK;
2377 }
2378 
2379 const MppHalApi hal_av1d_vdpu = {
2380     .name       = "av1d_vdpu",
2381     .type       = MPP_CTX_DEC,
2382     .coding     = MPP_VIDEO_CodingAV1,
2383     .ctx_size   = sizeof(VdpuAv1dRegCtx),
2384     .flag       = 0,
2385     .init       = vdpu_av1d_init,
2386     .deinit     = vdpu_av1d_deinit,
2387     .reg_gen    = vdpu_av1d_gen_regs,
2388     .start      = vdpu_av1d_start,
2389     .wait       = vdpu_av1d_wait,
2390     .reset      = vdpu_av1d_reset,
2391     .flush      = vdpu_av1d_flush,
2392     .control    = vdpu_av1d_control,
2393 };
2394