1 /*
2 * Copyright 2020 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_av1d_vdpu"
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "rk_type.h"
24 #include "mpp_err.h"
25 #include "mpp_mem.h"
26 #include "mpp_common.h"
27 #include "mpp_bitput.h"
28 #include "mpp_hal.h"
29 #include "mpp_dec_cb_param.h"
30 #include "mpp_device.h"
31 #include "hal_bufs.h"
32
33 #include "hal_av1d_vdpu_reg.h"
34 #include "hal_av1d_common.h"
35 #include "av1d_syntax.h"
36 #include "film_grain_noise_table.h"
37 #include "av1d_common.h"
38
39 #define VDPU_FAST_REG_SET_CNT 3
40 #define AV1_MAX_TILES 128
41 #define AV1_TILE_INFO_SIZE AV1_MAX_TILES * 16
42 #define GM_GLOBAL_MODELS_PER_FRAME 7
43 #define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
44 #define GLOBAL_MODEL_SIZE GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE
45 #define MaxTiles 128
46
47 #define DUMP_AV1_DATAS 0
48
49 typedef enum AV1D_FILT_TYPE_E {
50 DB_DATA_COL,
51 DB_CTRL_COL,
52 CDEF_COL,
53 SR_COL,
54 LR_COL,
55 RFC_COL,
56 FILT_TYPE_BUT,
57 } Av1dFiltType_e;
58
59 typedef struct filt_info_t {
60 RK_U32 size;
61 RK_U32 offset;
62 } filtInfo;
63
64 typedef struct av1d_rkv_buf_t {
65 RK_U32 valid;
66 VdpuAv1dRegSet *regs;
67 } av1dVdpuBuf;
68
69 typedef struct VdpuAv1dRegCtx_t {
70 av1dVdpuBuf reg_buf[VDPU_FAST_REG_SET_CNT];
71 MppBuffer prob_tbl_base;
72 MppBuffer prob_tbl_out_base;
73 MppBuffer tile_info;
74 MppBuffer film_grain_mem;
75 MppBuffer global_model;
76 MppBuffer filter_mem;
77 MppBuffer tile_buf;
78 filtInfo filt_info[FILT_TYPE_BUT];
79
80 AV1CDFs *cdfs;
81 MvCDFs *cdfs_ndvc;
82 AV1CDFs default_cdfs;
83 MvCDFs default_cdfs_ndvc;
84 AV1CDFs cdfs_last[NUM_REF_FRAMES];
85 MvCDFs cdfs_last_ndvc[NUM_REF_FRAMES];
86 RK_U32 refresh_frame_flags;
87
88 RK_U32 width;
89 RK_U32 height;
90 RK_S32 hor_stride;
91 RK_S32 ver_stride;
92 RK_U32 luma_size ;
93 RK_U32 chroma_size;
94
95 FilmGrainMemory fgsmem;
96
97 RK_S8 prev_out_buffer_i;
98 RK_U8 fbc_en;
99 RK_U8 resolution_change;
100 RK_U8 tile_transpose;
101 RK_U32 ref_frame_sign_bias[AV1_REF_LIST_SIZE];
102
103 VdpuAv1dRegSet *regs;
104 HalBufs tile_out_bufs;
105 RK_U32 tile_out_count;
106 size_t tile_out_size;
107
108 RK_U32 num_tile_cols;
109 } VdpuAv1dRegCtx;
110
rkv_ver_align(RK_U32 val)111 static RK_U32 rkv_ver_align(RK_U32 val)
112 {
113 return MPP_ALIGN(val, 8);
114 }
115
rkv_hor_align(RK_U32 val)116 static RK_U32 rkv_hor_align(RK_U32 val)
117 {
118 return MPP_ALIGN(val, 16);
119 }
120
rkv_len_align(RK_U32 val)121 static RK_U32 rkv_len_align(RK_U32 val)
122 {
123 return (2 * MPP_ALIGN(val, 128));
124 }
125
rkv_len_align_422(RK_U32 val)126 static RK_U32 rkv_len_align_422(RK_U32 val)
127 {
128 return ((5 * MPP_ALIGN(val, 64)) / 2);
129 }
130
hal_av1d_alloc_res(void * hal)131 static MPP_RET hal_av1d_alloc_res(void *hal)
132 {
133 MPP_RET ret = MPP_OK;
134 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
135 RK_U32 max_cnt = p_hal->fast_mode ? VDPU_FAST_REG_SET_CNT : 1;
136 RK_U32 i = 0;
137 INP_CHECK(ret, NULL == p_hal);
138
139 MEM_CHECK(ret, p_hal->reg_ctx = mpp_calloc_size(void, sizeof(VdpuAv1dRegCtx)));
140 VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
141
142 //!< malloc buffers
143 for (i = 0; i < max_cnt; i++) {
144 reg_ctx->reg_buf[i].regs = mpp_calloc(VdpuAv1dRegSet, 1);
145 memset(reg_ctx->reg_buf[i].regs, 0, sizeof(VdpuAv1dRegSet));
146 }
147
148 if (!p_hal->fast_mode) {
149 reg_ctx->regs = reg_ctx->reg_buf[0].regs;
150 }
151
152 BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, ®_ctx->prob_tbl_base, MPP_ALIGN(sizeof(AV1CDFs), 2048)));
153 BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, ®_ctx->prob_tbl_out_base, MPP_ALIGN(sizeof(AV1CDFs), 2048)));
154 BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, ®_ctx->tile_info, AV1_TILE_INFO_SIZE));
155 BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, ®_ctx->film_grain_mem, MPP_ALIGN(sizeof(AV1FilmGrainMemory), 2048)));
156 BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, ®_ctx->global_model, MPP_ALIGN(GLOBAL_MODEL_SIZE, 2048)));
157 BUF_CHECK(ret, mpp_buffer_get(p_hal->buf_group, ®_ctx->tile_buf, MPP_ALIGN(32 * MaxTiles, 4096)));
158
159 __RETURN:
160 return ret;
161 __FAILED:
162 return ret;
163 }
164
vdpu_av1d_filtermem_release(VdpuAv1dRegCtx * ctx)165 static void vdpu_av1d_filtermem_release(VdpuAv1dRegCtx *ctx)
166 {
167 BUF_PUT(ctx->filter_mem);
168 }
169
vdpu_av1d_filtermem_alloc(Av1dHalCtx * p_hal,VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)170 static MPP_RET vdpu_av1d_filtermem_alloc(Av1dHalCtx *p_hal, VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
171 {
172 RK_U32 size = 0;
173 RK_U32 pic_height = MPP_ALIGN(dxva->height, 64);
174 RK_U32 height_in_sb = pic_height / 64;
175 RK_U32 stripe_num = ((pic_height + 8) + 63) / 64;
176 RK_U32 max_bit_depth = 10;
177 RK_U32 num_tile_cols = 1 << dxva->tile_cols_log2;//dxva->tiles.cols;
178 filtInfo *filt_info = ctx->filt_info;
179
180 /* db tile col data buffer */
181 // asic_buff->db_data_col_offset = 0;
182 // asic_buff->db_data_col_tsize = NEXT_MULTIPLE(pic_height * 12 * max_bit_depth / 8, 128);
183 // size = asic_buff->db_data_col_tsize * num_tile_cols;
184 // asic_buff->db_ctrl_col_offset = size;
185
186 filt_info[DB_DATA_COL].offset = 0;
187 filt_info[DB_DATA_COL].size = MPP_ALIGN(pic_height * 12 * max_bit_depth / 8, 128);
188 size += filt_info[DB_DATA_COL].size * num_tile_cols;
189
190
191 /* db tile col ctrl buffer */
192 filt_info[DB_CTRL_COL].offset = size;
193 filt_info[DB_CTRL_COL].size = MPP_ALIGN(pic_height * 2 * 16 / 4, 128);
194 size += filt_info[DB_CTRL_COL].size * num_tile_cols;
195
196 // size += asic_buff->db_ctrl_col_tsize * num_tile_cols;
197 // asic_buff->cdef_col_offset = size;
198
199 /* cdef tile col buffer */
200 filt_info[CDEF_COL].offset = size;
201 filt_info[CDEF_COL].size = MPP_ALIGN(height_in_sb * 44 * max_bit_depth * 16 / 8, 128);
202 size += filt_info[CDEF_COL].size * num_tile_cols;
203 // asic_buff->cdef_col_tsize = NEXT_MULTIPLE(height_in_sb * 44 * max_bit_depth * 16 / 8, 128);
204 // size += asic_buff->cdef_col_tsize * num_tile_cols;
205 // asic_buff->sr_col_offset = size;
206
207 /* sr tile col buffer */
208 filt_info[SR_COL].offset = size;
209 filt_info[SR_COL].size = MPP_ALIGN(height_in_sb * (3040 + 1280), 128);
210 size += filt_info[SR_COL].size * num_tile_cols;
211 // asic_buff->sr_col_tsize = NEXT_MULTIPLE(height_in_sb * (3040 + 1280), 128);
212 // size += asic_buff->sr_col_tsize * num_tile_cols;
213 // asic_buff->lr_col_offset = size;
214
215 /* lr tile col buffer */
216 filt_info[LR_COL].offset = size;
217 filt_info[LR_COL].size = MPP_ALIGN(stripe_num * 1536 * max_bit_depth / 8, 128);
218 size += filt_info[LR_COL].size * num_tile_cols;
219 // asic_buff->lr_col_tsize = NEXT_MULTIPLE(stripe_num * 1536 * max_bit_depth / 8, 128);
220 // size += asic_buff->lr_col_tsize * num_tile_cols;
221 // if (dec_cont->use_multicore) {
222 // asic_buff->rfc_col_offset = size;
223 // asic_buff->rfc_col_size = NEXT_MULTIPLE(asic_buff->height, 8) / 8 * 16 * 2;
224 // size += asic_buff->rfc_col_size * num_tile_cols;
225 // }
226 if (!mpp_buffer_get(p_hal->buf_group, &ctx->filter_mem, MPP_ALIGN(size, SZ_4K)))
227 return MPP_NOK;
228
229 return MPP_OK;
230 }
231
hal_av1d_release_res(void * hal)232 static void hal_av1d_release_res(void *hal)
233 {
234 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
235 VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
236 RK_U32 i = 0;
237 RK_U32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
238
239 for (i = 0; i < loop; i++)
240 MPP_FREE(reg_ctx->reg_buf[i].regs);
241
242 BUF_PUT(reg_ctx->prob_tbl_base);
243 BUF_PUT(reg_ctx->prob_tbl_out_base);
244 BUF_PUT(reg_ctx->tile_info);
245 BUF_PUT(reg_ctx->film_grain_mem);
246 BUF_PUT(reg_ctx->global_model);
247 BUF_PUT(reg_ctx->tile_buf);
248 vdpu_av1d_filtermem_release(reg_ctx);
249 hal_bufs_deinit(reg_ctx->tile_out_bufs);
250
251 MPP_FREE(p_hal->reg_ctx);
252 }
253
vdpu_av1d_deinit(void * hal)254 MPP_RET vdpu_av1d_deinit(void *hal)
255 {
256 hal_av1d_release_res(hal);
257
258 return MPP_OK;
259 }
260
vdpu_av1d_init(void * hal,MppHalCfg * cfg)261 MPP_RET vdpu_av1d_init(void *hal, MppHalCfg *cfg)
262 {
263 MPP_RET ret = MPP_OK;
264 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
265 INP_CHECK(ret, NULL == p_hal);
266
267 FUN_CHECK(hal_av1d_alloc_res(hal));
268
269 {
270 VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
271
272 reg_ctx->cdfs = ®_ctx->default_cdfs;
273 reg_ctx->cdfs_ndvc = ®_ctx->default_cdfs_ndvc;
274 reg_ctx->tile_transpose = 1;
275 }
276
277 mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, rkv_hor_align);
278 mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, rkv_ver_align);
279 mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align);
280
281 (void)cfg;
282 __RETURN:
283 return MPP_OK;
284 __FAILED:
285 vdpu_av1d_deinit(hal);
286
287 return ret;
288 }
289
set_ref_width(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)290 static void set_ref_width(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
291 {
292 if (i == 0) {
293 regs->swreg33.sw_ref0_width = val;
294 } else if (i == 1) {
295 regs->swreg34.sw_ref1_width = val;
296 } else if (i == 2) {
297 regs->swreg35.sw_ref2_width = val;
298 } else if (i == 3) {
299 regs->swreg43.sw_ref3_width = val;
300 } else if (i == 4) {
301 regs->swreg44.sw_ref4_width = val;
302 } else if (i == 5) {
303 regs->swreg45.sw_ref5_width = val;
304 } else if (i == 6) {
305 regs->swreg46.sw_ref6_width = val;
306 } else {
307 mpp_err("Error: trying to set invalid reference index.");
308 }
309 }
310
set_ref_height(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)311 static void set_ref_height(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
312 {
313 if (i == 0) {
314 regs->swreg33.sw_ref0_height = val;
315 } else if (i == 1) {
316 regs->swreg34.sw_ref1_height = val;
317 } else if (i == 2) {
318 regs->swreg35.sw_ref2_height = val;
319 } else if (i == 3) {
320 regs->swreg43.sw_ref3_height = val;
321 } else if (i == 4) {
322 regs->swreg44.sw_ref4_height = val;
323 } else if (i == 5) {
324 regs->swreg45.sw_ref5_height = val;
325 } else if (i == 6) {
326 regs->swreg46.sw_ref6_height = val;
327 } else {
328 mpp_err("Error: trying to set invalid reference index.");
329 }
330 }
331
set_ref_hor_scale(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)332 static void set_ref_hor_scale(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
333 {
334 if (i == 0) {
335 regs->swreg36.sw_ref0_hor_scale = val;
336 } else if (i == 1) {
337 regs->swreg37.sw_ref1_hor_scale = val;
338 } else if (i == 2) {
339 regs->swreg38.sw_ref2_hor_scale = val;
340 } else if (i == 3) {
341 regs->swreg39.sw_ref3_hor_scale = val;
342 } else if (i == 4) {
343 regs->swreg40.sw_ref4_hor_scale = val;
344 } else if (i == 5) {
345 regs->swreg41.sw_ref5_hor_scale = val;
346 } else if (i == 6) {
347 regs->swreg42.sw_ref6_hor_scale = val;
348 } else {
349 mpp_err("Error: trying to set invalid reference index.");
350 }
351 }
352
set_ref_ver_scale(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)353 static void set_ref_ver_scale(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
354 {
355 if (i == 0) {
356 regs->swreg36.sw_ref0_ver_scale = val;
357 } else if (i == 1) {
358 regs->swreg37.sw_ref1_ver_scale = val;
359 } else if (i == 2) {
360 regs->swreg38.sw_ref2_ver_scale = val;
361 } else if (i == 3) {
362 regs->swreg39.sw_ref3_ver_scale = val;
363 } else if (i == 4) {
364 regs->swreg40.sw_ref4_ver_scale = val;
365 } else if (i == 5) {
366 regs->swreg41.sw_ref5_ver_scale = val;
367 } else if (i == 6) {
368 regs->swreg42.sw_ref6_ver_scale = val;
369 } else {
370 mpp_err("Error: trying to set invalid reference index.");
371 }
372 }
373
set_ref_lum_base(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val,HalBufs bufs)374 static void set_ref_lum_base(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val, HalBufs bufs)
375 {
376 HalBuf *tile_out_buf;
377 tile_out_buf = hal_bufs_get_buf(bufs, val);
378 // MppBuffer framebuf = NULL;
379 // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
380 if (tile_out_buf == NULL) {
381 mpp_err_f("get slots frame buff fail");
382 return;
383 }
384 val = mpp_buffer_get_fd(tile_out_buf->buf[0]);
385 if (i == 0) {
386 regs->addr_cfg.swreg67.sw_refer0_ybase_lsb = val;
387 } else if (i == 1) {
388 regs->addr_cfg.swreg69.sw_refer1_ybase_lsb = val;
389 } else if (i == 2) {
390 regs->addr_cfg.swreg71.sw_refer2_ybase_lsb = val;
391 } else if (i == 3) {
392 regs->addr_cfg.swreg73.sw_refer3_ybase_lsb = val;
393 } else if (i == 4) {
394 regs->addr_cfg.swreg75.sw_refer4_ybase_lsb = val;
395 } else if (i == 5) {
396 regs->addr_cfg.swreg77.sw_refer5_ybase_lsb = val;
397 } else if (i == 6) {
398 regs->addr_cfg.swreg79.sw_refer6_ybase_lsb = val;
399 } else {
400 mpp_err( "Error: trying to set invalid reference index.");
401 }
402 }
403
set_ref_lum_base_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)404 static void set_ref_lum_base_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
405 {
406 if (i == 0) {
407 regs->addr_cfg.swreg66.sw_refer0_ybase_msb = val;
408 } else if (i == 1) {
409 regs->addr_cfg.swreg68.sw_refer1_ybase_msb = val;
410 } else if (i == 2) {
411 regs->addr_cfg.swreg70.sw_refer2_ybase_msb = val;
412 } else if (i == 3) {
413 regs->addr_cfg.swreg72.sw_refer3_ybase_msb = val;
414 } else if (i == 4) {
415 regs->addr_cfg.swreg74.sw_refer4_ybase_msb = val;
416 } else if (i == 5) {
417 regs->addr_cfg.swreg76.sw_refer5_ybase_msb = val;
418 } else if (i == 6) {
419 regs->addr_cfg.swreg78.sw_refer6_ybase_msb = val;
420 } else {
421 mpp_err("Error: trying to set invalid reference index.");
422 }
423 }
424
set_ref_cb_base(Av1dHalCtx * p_hal,RK_S32 i,RK_S32 val,HalBufs bufs,RK_U32 offset)425 static void set_ref_cb_base(Av1dHalCtx *p_hal, RK_S32 i, RK_S32 val, HalBufs bufs, RK_U32 offset)
426 {
427 VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
428 VdpuAv1dRegSet *regs = ctx->regs;
429 HalBuf *tile_out_buf;
430 tile_out_buf = hal_bufs_get_buf(bufs, val);
431 // MppBuffer framebuf = NULL;
432
433 // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
434 if (tile_out_buf == NULL) {
435 mpp_err_f("get slots frame buff fail");
436 return;
437 }
438 val = mpp_buffer_get_fd(tile_out_buf->buf[0]);
439
440 if (i == 0) {
441 mpp_dev_set_reg_offset(p_hal->dev, 101, offset);
442 regs->addr_cfg.swreg101.sw_refer0_cbase_lsb = val;
443 } else if (i == 1) {
444 mpp_dev_set_reg_offset(p_hal->dev, 103, offset);
445 regs->addr_cfg.swreg103.sw_refer1_cbase_lsb = val;
446 } else if (i == 2) {
447 mpp_dev_set_reg_offset(p_hal->dev, 105, offset);
448 regs->addr_cfg.swreg105.sw_refer2_cbase_lsb = val;
449 } else if (i == 3) {
450 mpp_dev_set_reg_offset(p_hal->dev, 107, offset);
451 regs->addr_cfg.swreg107.sw_refer3_cbase_lsb = val;
452 } else if (i == 4) {
453 mpp_dev_set_reg_offset(p_hal->dev, 109, offset);
454 regs->addr_cfg.swreg109.sw_refer4_cbase_lsb = val;
455 } else if (i == 5) {
456 mpp_dev_set_reg_offset(p_hal->dev, 111, offset);
457 regs->addr_cfg.swreg111.sw_refer5_cbase_lsb = val;
458 } else if (i == 6) {
459 mpp_dev_set_reg_offset(p_hal->dev, 113, offset);
460 regs->addr_cfg.swreg113.sw_refer6_cbase_lsb = val;
461 } else {
462 mpp_err("Error: trying to set invalid reference index.");
463 }
464 }
465
set_ref_cb_base_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)466 static void set_ref_cb_base_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
467 {
468 if (i == 0) {
469 regs->addr_cfg.swreg100.sw_refer0_cbase_msb = val;
470 } else if (i == 1) {
471 regs->addr_cfg.swreg102.sw_refer1_cbase_msb = val;
472 } else if (i == 2) {
473 regs->addr_cfg.swreg104.sw_refer2_cbase_msb = val;
474 } else if (i == 3) {
475 regs->addr_cfg.swreg106.sw_refer3_cbase_msb = val;
476 } else if (i == 4) {
477 regs->addr_cfg.swreg108.sw_refer4_cbase_msb = val;
478 } else if (i == 5) {
479 regs->addr_cfg.swreg110.sw_refer5_cbase_msb = val;
480 } else if (i == 6) {
481 regs->addr_cfg.swreg112.sw_refer6_cbase_msb = val;
482 } else {
483 mpp_err("Error: trying to set invalid reference index.");
484 }
485 }
486
487
set_ref_dbase(Av1dHalCtx * p_hal,RK_S32 i,RK_S32 val,HalBufs bufs,RK_U32 offset)488 static void set_ref_dbase(Av1dHalCtx *p_hal, RK_S32 i, RK_S32 val, HalBufs bufs, RK_U32 offset)
489 {
490 VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
491 VdpuAv1dRegSet *regs = ctx->regs;
492 HalBuf *tile_out_buf;
493 tile_out_buf = hal_bufs_get_buf(bufs, val);
494 // MppBuffer framebuf = NULL;
495
496 // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
497 if (tile_out_buf == NULL) {
498 mpp_err_f("get slots frame buff fail");
499 return;
500 }
501 val = mpp_buffer_get_fd(tile_out_buf->buf[0]);
502 if (i == 0) {
503 mpp_dev_set_reg_offset(p_hal->dev, 135, offset);
504 regs->addr_cfg.swreg135.sw_refer0_dbase_lsb = val;
505 } else if (i == 1) {
506 mpp_dev_set_reg_offset(p_hal->dev, 137, offset);
507 regs->addr_cfg.swreg137.sw_refer1_dbase_lsb = val;
508 } else if (i == 2) {
509 mpp_dev_set_reg_offset(p_hal->dev, 139, offset);
510 regs->addr_cfg.swreg139.sw_refer2_dbase_lsb = val;
511 } else if (i == 3) {
512 mpp_dev_set_reg_offset(p_hal->dev, 141, offset);
513 regs->addr_cfg.swreg141.sw_refer3_dbase_lsb = val;
514 } else if (i == 4) {
515 mpp_dev_set_reg_offset(p_hal->dev, 143, offset);
516 regs->addr_cfg.swreg143.sw_refer4_dbase_lsb = val;
517 } else if (i == 5) {
518 mpp_dev_set_reg_offset(p_hal->dev, 145, offset);
519 regs->addr_cfg.swreg145.sw_refer5_dbase_lsb = val;
520 } else if (i == 6) {
521 mpp_dev_set_reg_offset(p_hal->dev, 147, offset);
522 regs->addr_cfg.swreg147.sw_refer6_dbase_lsb = val;
523 } else {
524 mpp_err("Error: trying to set invalid reference index.");
525 }
526 }
527
set_ref_dbase_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)528 static void set_ref_dbase_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
529 {
530 if (i == 0) {
531 regs->addr_cfg.swreg134.sw_refer0_dbase_msb = val;
532 } else if (i == 1) {
533 regs->addr_cfg.swreg136.sw_refer1_dbase_msb = val;
534 } else if (i == 2) {
535 regs->addr_cfg.swreg138.sw_refer2_dbase_msb = val;
536 } else if (i == 3) {
537 regs->addr_cfg.swreg140.sw_refer3_dbase_msb = val;
538 } else if (i == 4) {
539 regs->addr_cfg.swreg142.sw_refer4_dbase_msb = val;
540 } else if (i == 5) {
541 regs->addr_cfg.swreg144.sw_refer5_dbase_msb = val;
542 } else if (i == 6) {
543 regs->addr_cfg.swreg146.sw_refer6_dbase_msb = val;
544 } else {
545 mpp_err("Error: trying to set invalid reference index.");
546 }
547 }
548
set_ref_ty_base(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val,HalBufs bufs)549 static void set_ref_ty_base(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val, HalBufs bufs)
550 {
551 // MppBuffer framebuf = NULL;
552 // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
553 HalBuf *tile_out_buf;
554 tile_out_buf = hal_bufs_get_buf(bufs, val);
555
556 if (tile_out_buf == NULL) {
557 mpp_err_f("get slots frame buff fail");
558 }
559 val = mpp_buffer_get_fd(tile_out_buf->buf[0]);
560
561 if (i == 0) {
562 regs->swreg192.sw_refer0_tybase_lsb = val;
563 } else if (i == 1) {
564 regs->swreg194.sw_refer1_tybase_lsb = val;
565 } else if (i == 2) {
566 regs->swreg196.sw_refer2_tybase_lsb = val;
567 } else if (i == 3) {
568 regs->swreg198.sw_refer3_tybase_lsb = val;
569 } else if (i == 4) {
570 regs->swreg200.sw_refer4_tybase_lsb = val;
571 } else if (i == 5) {
572 regs->swreg202.sw_refer5_tybase_lsb = val;
573 } else if (i == 6) {
574 regs->swreg204.sw_refer6_tybase_lsb = val;
575 } else {
576 mpp_err("Error: trying to set invalid reference index.");
577 }
578 }
579
set_ref_ty_base_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)580 static void set_ref_ty_base_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
581 {
582 if (i == 0) {
583 regs->swreg191.sw_refer0_tybase_msb = val;
584 } else if (i == 1) {
585 regs->swreg193.sw_refer1_tybase_msb = val;
586 } else if (i == 2) {
587 regs->swreg195.sw_refer2_tybase_msb = val;
588 } else if (i == 3) {
589 regs->swreg197.sw_refer3_tybase_msb = val;
590 } else if (i == 4) {
591 regs->swreg199.sw_refer4_tybase_msb = val;
592 } else if (i == 5) {
593 regs->swreg201.sw_refer5_tybase_msb = val;
594 } else if (i == 6) {
595 regs->swreg203.sw_refer6_tybase_msb = val;
596 } else {
597 mpp_err(" trying to set invalid reference index.");
598 }
599 }
600
set_ref_tc_base(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val,HalBufs bufs)601 static void set_ref_tc_base(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val, HalBufs bufs)
602 {
603 // MppBuffer framebuf = NULL;
604 // mpp_buf_slot_get_prop(slots, val, SLOT_BUFFER, &framebuf);
605 HalBuf *tile_out_buf;
606 tile_out_buf = hal_bufs_get_buf(bufs, val);
607
608 if (tile_out_buf == NULL) {
609 mpp_err_f("get slots frame buff fail");
610 }
611 val = mpp_buffer_get_fd(tile_out_buf->buf[0]);
612
613 if (i == 0) {
614 regs->swreg226.sw_refer0_tcbase_lsb = val;
615 } else if (i == 1) {
616 regs->swreg228.sw_refer1_tcbase_lsb = val;
617 } else if (i == 2) {
618 regs->swreg230.sw_refer2_tcbase_lsb = val;
619 } else if (i == 3) {
620 regs->swreg232.sw_refer3_tcbase_lsb = val;
621 } else if (i == 4) {
622 regs->swreg234.sw_refer4_tcbase_lsb = val;
623 } else if (i == 5) {
624 regs->swreg236.sw_refer5_tcbase_lsb = val;
625 } else if (i == 6) {
626 regs->swreg238.sw_refer6_tcbase_lsb = val;
627 } else {
628 mpp_err("Error: trying to set invalid reference index.");
629 }
630 }
631
632
set_ref_tc_base_msb(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)633 static void set_ref_tc_base_msb(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
634 {
635 if (i == 0) {
636 regs->swreg225.sw_refer0_tcbase_msb = val;
637 } else if (i == 1) {
638 regs->swreg227.sw_refer1_tcbase_msb = val;
639 } else if (i == 2) {
640 regs->swreg229.sw_refer2_tcbase_msb = val;
641 } else if (i == 3) {
642 regs->swreg231.sw_refer3_tcbase_msb = val;
643 } else if (i == 4) {
644 regs->swreg233.sw_refer4_tcbase_msb = val;
645 } else if (i == 5) {
646 regs->swreg235.sw_refer5_tcbase_msb = val;
647 } else if (i == 6) {
648 regs->swreg237.sw_refer6_tcbase_msb = val;
649 } else {
650 mpp_err("Error: trying to set invalid reference index.");
651 }
652 }
653
set_ref_sign_bias(VdpuAv1dRegSet * regs,RK_S32 i,RK_S32 val)654 static void set_ref_sign_bias(VdpuAv1dRegSet *regs, RK_S32 i, RK_S32 val)
655 {
656 if (i == 0) {
657 regs->swreg59.sw_ref0_sign_bias = val;
658 } else if (i == 1) {
659 regs->swreg59.sw_ref1_sign_bias = val;
660 } else if (i == 2) {
661 regs->swreg59.sw_ref2_sign_bias = val;
662 } else if (i == 3) {
663 regs->swreg59.sw_ref3_sign_bias = val;
664 } else if (i == 4) {
665 regs->swreg9.sw_ref4_sign_bias = val;
666 } else if (i == 5) {
667 regs->swreg9.sw_ref5_sign_bias = val;
668 } else if (i == 6) {
669 regs->swreg9.sw_ref6_sign_bias = val;
670 } else {
671 mpp_err("Error: trying to set invalid reference index.");
672 }
673 }
674
675 #define MAX_FRAME_DISTANCE 31
676 #define MAX_ACTIVE_REFS AV1_ACTIVE_REFS_EX
677
GetRelativeDist(DXVA_PicParams_AV1 * dxva,RK_S32 a,RK_S32 b)678 static RK_S32 GetRelativeDist(DXVA_PicParams_AV1 *dxva, RK_S32 a, RK_S32 b)
679 {
680 if (!dxva->order_hint_bits) return 0;
681 const RK_S32 bits = dxva->order_hint_bits - 1;
682
683 RK_S32 diff = a - b;
684 RK_S32 m = 1 << bits;
685 diff = (diff & (m - 1)) - (diff & m);
686 return diff;
687 }
688
689 #define POPULATE_REF_OFFSET(index) \
690 { \
691 RK_S32 ref_offset[MAX_REF_FRAMES_EX - 1]; \
692 RK_S32 idx = refs_selected[(index) - 1]; \
693 ref_offset[0] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint, \
694 dxva->frame_refs[idx].lst_frame_offset); \
695 ref_offset[1] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint, \
696 dxva->frame_refs[idx].lst2_frame_offset); \
697 ref_offset[2] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint, \
698 dxva->frame_refs[idx].lst3_frame_offset); \
699 ref_offset[3] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint, \
700 dxva->frame_refs[idx].gld_frame_offset); \
701 ref_offset[4] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint, \
702 dxva->frame_refs[idx].bwd_frame_offset); \
703 ref_offset[5] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint, \
704 dxva->frame_refs[idx].alt2_frame_offset); \
705 ref_offset[6] = GetRelativeDist(dxva, dxva->frame_refs[idx].order_hint, \
706 dxva->frame_refs[idx].alt_frame_offset); \
707 if(index == 1) { \
708 regs->swreg20.sw_mf1_last_offset = ref_offset[0]; \
709 regs->swreg21.sw_mf1_last2_offset = ref_offset[1]; \
710 regs->swreg22.sw_mf1_last3_offset = ref_offset[2]; \
711 regs->swreg23.sw_mf1_golden_offset = ref_offset[3]; \
712 regs->swreg24.sw_mf1_bwdref_offset = ref_offset[4]; \
713 regs->swreg25.sw_mf1_altref2_offset = ref_offset[5]; \
714 regs->swreg26.sw_mf1_altref_offset = ref_offset[6]; \
715 }else if(index == 2) { \
716 regs->swreg27.sw_mf2_last_offset = ref_offset[0]; \
717 regs->swreg47.sw_mf2_last2_offset = ref_offset[1]; \
718 regs->swreg47.sw_mf2_last3_offset = ref_offset[2]; \
719 regs->swreg47.sw_mf2_golden_offset = ref_offset[3]; \
720 regs->swreg48.sw_mf2_bwdref_offset = ref_offset[4]; \
721 regs->swreg48.sw_mf2_altref2_offset = ref_offset[5]; \
722 regs->swreg48.sw_mf2_altref_offset = ref_offset[6]; \
723 }else { \
724 regs->swreg184.sw_mf3_last_offset = ref_offset[0]; \
725 regs->swreg185.sw_mf3_last2_offset = ref_offset[1]; \
726 regs->swreg186.sw_mf3_last3_offset = ref_offset[2]; \
727 regs->swreg187.sw_mf3_golden_offset = ref_offset[3]; \
728 regs->swreg188.sw_mf3_bwdref_offset = ref_offset[4]; \
729 regs->swreg257.sw_mf3_altref2_offset = ref_offset[5]; \
730 regs->swreg262.sw_mf3_altref_offset = ref_offset[6]; \
731 } \
732 }
733
734
set_frame_sign_bias(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)735 static void set_frame_sign_bias(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
736 {
737 RK_U32 i = 0;
738 VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
739 if (!dxva->order_hint_bits || dxva->format.frame_type == AV1_FRAME_INTRA_ONLY ||
740 dxva->format.frame_type == AV1_FRAME_KEY) {
741 for (i = 0; i < AV1_REF_LIST_SIZE; i++) {
742 reg_ctx->ref_frame_sign_bias[i] = 0;
743 }
744 return;
745 }
746
747 // Identify the nearest forward and backward references.
748 for (i = 0; i < AV1_ACTIVE_REFS_EX; i++) {
749 if (dxva->frame_refs[i].Index >= 0) {
750 RK_S32 ref_frame_offset = dxva->frame_refs[i].order_hint;
751 RK_S32 rel_off = GetRelativeDist(dxva, ref_frame_offset, dxva->order_hint);
752 reg_ctx->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
753 AV1D_DBG(AV1D_DBG_LOG, "frame_refs[%d] order_hint %d ref_frame_offset %d\n",
754 i, dxva->order_hint, ref_frame_offset);
755 }
756 }
757 }
758
vdpu_av1d_set_prob(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)759 static void vdpu_av1d_set_prob(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
760 {
761 VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
762 const int mv_cdf_offset = offsetof(AV1CDFs, mv_cdf);
763 void* prob_base = mpp_buffer_get_ptr(reg_ctx->prob_tbl_base);
764 VdpuAv1dRegSet *regs = reg_ctx->regs;
765
766 memcpy(prob_base, dxva->cdfs, sizeof(AV1CDFs));
767 if (dxva->format.frame_type == AV1_FRAME_INTRA_ONLY ||
768 dxva->format.frame_type == AV1_FRAME_KEY) {
769 // Overwrite MV context area with intrabc MV context
770 memcpy(prob_base + mv_cdf_offset, dxva->cdfs_ndvc, sizeof(MvCDFs));
771 }
772 mpp_buffer_sync_end(reg_ctx->prob_tbl_base);
773
774 regs->addr_cfg.swreg171.sw_prob_tab_out_base_lsb = mpp_buffer_get_fd(reg_ctx->prob_tbl_out_base);
775 regs->addr_cfg.swreg173.sw_prob_tab_base_lsb = mpp_buffer_get_fd(reg_ctx->prob_tbl_base);
776 }
777
vdpu_av1d_set_reference_frames(Av1dHalCtx * p_hal,VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)778 static void vdpu_av1d_set_reference_frames(Av1dHalCtx *p_hal, VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
779 {
780 RK_U32 tmp1, tmp2, i;
781 RK_U32 cur_height, cur_width;
782 RK_U8 max_ref_frames = MAX_REF_FRAMES_EX;
783 RK_U8 prev_valid = 0;
784
785 VdpuAv1dRegSet *regs = ctx->regs;
786 RK_S32 ref_count[AV1DEC_MAX_PIC_BUFFERS] = {0};
787
788 RK_U32 ref_scale_e = 0;
789 RK_U32 y_stride = ctx->luma_size;
790 RK_U32 uv_stride = y_stride / 2;
791 RK_U32 mv_offset = ctx->luma_size + ctx->chroma_size + 64;
792
793 if (!dxva->coding.intrabc) {
794 for (i = 0; i < AV1_REF_LIST_SIZE - 1; i++) {
795 if (dxva->frame_refs[i].Index >= 0)
796 ref_count[dxva->frame_refs[i].Index]++;
797 }
798
799 for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
800 if (ref_count[i])
801 regs->swreg4.sw_ref_frames++;
802 }
803 } else
804 regs->swreg4.sw_ref_frames = 1;
805
806 cur_width = dxva->width;
807 cur_height = dxva->height;
808
809 set_frame_sign_bias(p_hal, dxva);
810
811 for (i = LAST_FRAME; i < max_ref_frames; i++) {
812 RK_U32 ref = i - 1;
813 RK_S32 idx = 0;
814 if (dxva->coding.intrabc) {
815 idx = dxva->CurrPicTextureIndex;
816 tmp1 = cur_width;
817 tmp2 = cur_height;
818 } else {
819 tmp1 = dxva->frame_refs[ref].width;
820 tmp2 = dxva->frame_refs[ref].height;
821 if (dxva->frame_refs[ref].Index > 0) {
822 idx = dxva->frame_refs[ref].Index;
823 }
824 }
825
826 set_ref_width(regs, ref, tmp1);
827 set_ref_height(regs, ref, tmp2);
828 tmp1 = ((tmp1 << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
829 tmp2 = ((tmp2 << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
830
831 set_ref_hor_scale(regs, ref, tmp1);
832 set_ref_ver_scale(regs, ref, tmp2);
833 if (tmp1 != (1 << AV1_REF_SCALE_SHIFT) ||
834 tmp2 != (1 << AV1_REF_SCALE_SHIFT)) {
835 ref_scale_e = 1;
836 }
837
838 if (idx == ctx->prev_out_buffer_i) {
839 prev_valid = 1;
840 }
841
842 set_ref_lum_base(regs, ref, idx, ctx->tile_out_bufs);
843 set_ref_cb_base(p_hal, ref, idx, ctx->tile_out_bufs, y_stride);
844 set_ref_dbase (p_hal, ref, idx, ctx->tile_out_bufs, mv_offset);
845
846 set_ref_lum_base_msb(regs, ref, 0);
847 set_ref_cb_base_msb(regs, ref, 0);
848 set_ref_dbase_msb (regs, ref, 0);
849
850 if (0) {
851 set_ref_ty_base(regs, ref, idx, ctx->tile_out_bufs);
852 set_ref_tc_base(regs, ref, idx, ctx->tile_out_bufs);
853 set_ref_ty_base_msb(regs, ref, 0);
854 set_ref_tc_base_msb(regs, ref, 0);
855 }
856 set_ref_sign_bias(regs, ref, ctx->ref_frame_sign_bias[i]);
857 }
858
859 regs->swreg184.sw_ref0_gm_mode = dxva->frame_refs[0].wmtype;
860 regs->swreg185.sw_ref1_gm_mode = dxva->frame_refs[1].wmtype;
861 regs->swreg186.sw_ref2_gm_mode = dxva->frame_refs[2].wmtype;
862 regs->swreg187.sw_ref3_gm_mode = dxva->frame_refs[3].wmtype;
863 regs->swreg188.sw_ref4_gm_mode = dxva->frame_refs[4].wmtype;
864 regs->swreg257.sw_ref5_gm_mode = dxva->frame_refs[5].wmtype;
865 regs->swreg262.sw_ref6_gm_mode = dxva->frame_refs[6].wmtype;
866
867
868 if (dxva->coding.intrabc) {
869 ctx->prev_out_buffer_i = dxva->CurrPicTextureIndex;
870 } else if (!prev_valid) {
871 ctx->prev_out_buffer_i = dxva->frame_refs[0].Index; // LAST
872 }
873
874 {
875 RK_S32 gld_buf_idx = GOLDEN_FRAME_EX - LAST_FRAME;
876 RK_S32 alt_buf_idx = ALTREF_FRAME_EX - LAST_FRAME;
877 RK_S32 lst_buf_idx = LAST_FRAME - LAST_FRAME;
878 RK_S32 bwd_buf_idx = BWDREF_FRAME_EX - LAST_FRAME;
879 RK_S32 alt2_buf_idx = ALTREF2_FRAME_EX - LAST_FRAME;
880 RK_S32 lst2_buf_idx = LAST2_FRAME_EX - LAST_FRAME;
881
882 RK_S32 cur_frame_offset = dxva->order_hint;
883 RK_S32 alt_frame_offset = 0;
884 RK_S32 gld_frame_offset = 0;
885 RK_S32 bwd_frame_offset = 0;
886 RK_S32 alt2_frame_offset = 0;
887 RK_S32 refs_selected[3] = {0, 0, 0};
888 RK_S32 cur_mi_cols = (dxva->width + 7) >> 3;
889 RK_S32 cur_mi_rows = (dxva->height + 7) >> 3;
890 RK_U8 mf_types[3] = {0, 0, 0};
891 RK_S32 ref_stamp = 2;
892 RK_S32 ref_ind = 0;
893 RK_S32 rf;
894
895 if (dxva->frame_refs[alt_buf_idx].Index >= 0)
896 alt_frame_offset = dxva->frame_refs[alt_buf_idx].order_hint;
897 if (dxva->frame_refs[gld_buf_idx].Index >= 0)
898 gld_frame_offset = dxva->frame_refs[gld_buf_idx].order_hint;
899 if (dxva->frame_refs[bwd_buf_idx].Index >= 0)
900 bwd_frame_offset = dxva->frame_refs[bwd_buf_idx].order_hint;
901 if (dxva->frame_refs[alt2_buf_idx].Index >= 0)
902 alt2_frame_offset = dxva->frame_refs[alt2_buf_idx].order_hint;
903
904 AV1D_DBG(AV1D_DBG_LOG, "frame_offset[%d %d %d %d] lst_idx %d alt_off %d\n",
905 alt_frame_offset,
906 gld_frame_offset,
907 bwd_frame_offset,
908 alt2_frame_offset,
909 dxva->frame_refs[lst_buf_idx].Index,
910 dxva->frame_refs[lst_buf_idx].alt_frame_offset);
911
912 if (dxva->frame_refs[lst_buf_idx].Index >= 0) {
913 const RK_S32 alt_frame_offset_in_lst =
914 dxva->frame_refs[lst_buf_idx].alt_frame_offset;
915
916 const RK_S32 is_lst_overlay = (alt_frame_offset_in_lst == gld_frame_offset);
917 if (!is_lst_overlay) {
918 RK_S32 lst_mi_cols =
919 (dxva->frame_refs[lst_buf_idx].width + 7) >> 3;
920 RK_S32 lst_mi_rows =
921 (dxva->frame_refs[lst_buf_idx].height + 7) >> 3;
922 // TODO(stan): what's the difference btw key_frame and intra_only?
923 RK_S32 lst_intra_only =
924 dxva->frame_refs[lst_buf_idx].intra_only ||
925 dxva->frame_refs[lst_buf_idx].is_intra_frame;
926 if (lst_mi_cols == cur_mi_cols && lst_mi_rows == cur_mi_rows &&
927 !lst_intra_only) {
928 mf_types[ref_ind] = LAST_FRAME;
929 refs_selected[ref_ind++] = lst_buf_idx;
930 }
931 }
932 ref_stamp--;
933 }
934
935 if (GetRelativeDist(dxva, bwd_frame_offset, cur_frame_offset) > 0) {
936 RK_S32 bwd_mi_cols =
937 (dxva->frame_refs[bwd_buf_idx].width + 7) >> 3;
938 RK_S32 bwd_mi_rows =
939 (dxva->frame_refs[bwd_buf_idx].height + 7) >> 3;
940 RK_S32 bwd_intra_only = dxva->frame_refs[bwd_buf_idx].intra_only ||
941 dxva->frame_refs[bwd_buf_idx].is_intra_frame;
942 if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
943 !bwd_intra_only) {
944 mf_types[ref_ind] = BWDREF_FRAME_EX;
945 refs_selected[ref_ind++] = bwd_buf_idx;
946 ref_stamp--;
947 }
948 }
949
950 if (GetRelativeDist(dxva, alt2_frame_offset, cur_frame_offset) > 0) {
951 RK_S32 alt2_mi_cols =
952 (dxva->frame_refs[alt2_buf_idx].width + 7) >> 3;
953 RK_S32 alt2_mi_rows =
954 (dxva->frame_refs[alt2_buf_idx].height + 7) >> 3;
955 RK_S32 alt2_intra_only =
956 dxva->frame_refs[alt2_buf_idx].intra_only ||
957 dxva->frame_refs[alt2_buf_idx].is_intra_frame;
958 if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
959 !alt2_intra_only) {
960 mf_types[ref_ind] = ALTREF2_FRAME_EX;
961 refs_selected[ref_ind++] = alt2_buf_idx;
962 ref_stamp--;
963 }
964 }
965
966 if (GetRelativeDist(dxva, alt_frame_offset, cur_frame_offset) > 0 &&
967 ref_stamp >= 0) {
968 RK_S32 alt_mi_cols =
969 (dxva->frame_refs[alt_buf_idx].width + 7) >> 3;
970 RK_S32 alt_mi_rows =
971 (dxva->frame_refs[alt_buf_idx].height + 7) >> 3;
972 RK_S32 alt_intra_only = dxva->frame_refs[alt_buf_idx].intra_only ||
973 dxva->frame_refs[alt_buf_idx].is_intra_frame;
974 if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
975 !alt_intra_only) {
976 mf_types[ref_ind] = ALTREF_FRAME_EX;
977 refs_selected[ref_ind++] = alt_buf_idx;
978 ref_stamp--;
979 }
980 }
981
982 if (ref_stamp >= 0 && dxva->frame_refs[lst2_buf_idx].Index >= 0) {
983 RK_S32 lst2_mi_cols =
984 (dxva->frame_refs[lst2_buf_idx].width + 7) >> 3;
985 RK_S32 lst2_mi_rows =
986 (dxva->frame_refs[lst2_buf_idx].height + 7) >> 3;
987 RK_S32 lst2_intra_only =
988 dxva->frame_refs[lst2_buf_idx].intra_only ||
989 dxva->frame_refs[lst2_buf_idx].is_intra_frame;
990 if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
991 !lst2_intra_only) {
992 mf_types[ref_ind] = LAST2_FRAME_EX;
993 refs_selected[ref_ind++] = lst2_buf_idx;
994 ref_stamp--;
995 }
996 }
997
998 RK_S32 cur_offset[MAX_REF_FRAMES_EX - 1];
999 RK_S32 cur_roffset[MAX_REF_FRAMES_EX - 1];
1000 for ( rf = 0; rf < MAX_REF_FRAMES_EX - 1; ++rf) {
1001 RK_S32 buf_idx = dxva->frame_refs[rf].Index;
1002 if (buf_idx >= 0) {
1003 cur_offset[rf] =
1004 GetRelativeDist(dxva, cur_frame_offset,
1005 dxva->frame_refs[rf].order_hint);
1006 cur_roffset[rf] =
1007 GetRelativeDist(dxva, dxva->frame_refs[rf].order_hint,
1008 cur_frame_offset);
1009 AV1D_DBG(AV1D_DBG_LOG, "buf_idx[%d]=%d offset[%d : %d] hin %d\n", rf, buf_idx, cur_offset[rf], cur_roffset[rf], dxva->frame_refs[rf].order_hint);
1010 } else {
1011 cur_offset[rf] = 0;
1012 cur_roffset[rf] = 0;
1013 }
1014 }
1015
1016 regs->swreg11.sw_use_temporal0_mvs = 0;
1017 regs->swreg11.sw_use_temporal1_mvs = 0;
1018 regs->swreg11.sw_use_temporal2_mvs = 0;
1019 regs->swreg11.sw_use_temporal3_mvs = 0;
1020
1021 if (dxva->coding.use_ref_frame_mvs && ref_ind > 0 &&
1022 cur_offset[mf_types[0] - LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1023 cur_offset[mf_types[0] - LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1024 regs->swreg11.sw_use_temporal0_mvs = 1;
1025 POPULATE_REF_OFFSET(1)
1026 }
1027
1028 if (dxva->coding.use_ref_frame_mvs && ref_ind > 1 &&
1029 cur_offset[mf_types[1] - LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1030 cur_offset[mf_types[1] - LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1031 regs->swreg11.sw_use_temporal1_mvs = 1;
1032 POPULATE_REF_OFFSET(2)
1033 }
1034
1035 if (dxva->coding.use_ref_frame_mvs && ref_ind > 2 &&
1036 cur_offset[mf_types[2] - LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1037 cur_offset[mf_types[2] - LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1038 regs->swreg11.sw_use_temporal2_mvs = 1;
1039 POPULATE_REF_OFFSET(3)
1040 }
1041
1042 // Pass one additional frame that will contain the segment information
1043 if (dxva->segmentation.enabled &&
1044 dxva->primary_ref_frame < ALLOWED_REFS_PER_FRAME_EX) {
1045 // Primary ref frame is zero based
1046 RK_S32 prim_buf_idx = dxva->frame_refs[dxva->primary_ref_frame].Index;
1047
1048 if (prim_buf_idx >= 0) {
1049 HalBuf *tile_out_buf;
1050
1051 y_stride = ctx->luma_size ;
1052 uv_stride = y_stride / 2;
1053 mv_offset = y_stride + uv_stride + 64;
1054
1055 tile_out_buf = hal_bufs_get_buf(ctx->tile_out_bufs, prim_buf_idx);
1056 regs->addr_cfg.swreg80.sw_segment_read_base_msb = 0;
1057 regs->addr_cfg.swreg81.sw_segment_read_base_lsb = mpp_buffer_get_fd(tile_out_buf->buf[0]);
1058 mpp_dev_set_reg_offset(p_hal->dev, 81, mv_offset);
1059 regs->swreg11.sw_use_temporal3_mvs = 1;
1060 }
1061 }
1062 if (dxva->primary_ref_frame < ALLOWED_REFS_PER_FRAME_EX) {
1063 RK_S32 prim_buf_idx = dxva->primary_ref_frame;
1064 ctx->resolution_change =
1065 cur_mi_cols !=
1066 (RK_S32)((dxva->frame_refs[prim_buf_idx].width + 7) >>
1067 3) ||
1068 cur_mi_rows !=
1069 (RK_S32)((dxva->frame_refs[prim_buf_idx].height + 7) >>
1070 3);
1071 }
1072
1073 regs->swreg184.sw_cur_last_offset = cur_offset[0];
1074 regs->swreg185.sw_cur_last2_offset = cur_offset[1];
1075 regs->swreg186.sw_cur_last3_offset = cur_offset[2];
1076 regs->swreg187.sw_cur_golden_offset = cur_offset[3];
1077 regs->swreg188.sw_cur_bwdref_offset = cur_offset[4];
1078 regs->swreg257.sw_cur_altref2_offset = cur_offset[5];
1079 regs->swreg262.sw_cur_altref_offset = cur_offset[6];
1080
1081 regs->swreg184.sw_cur_last_roffset = cur_roffset[0];
1082 regs->swreg185.sw_cur_last2_roffset = cur_roffset[1];
1083 regs->swreg186.sw_cur_last3_roffset = cur_roffset[2];
1084 regs->swreg187.sw_cur_golden_roffset = cur_roffset[3];
1085 regs->swreg188.sw_cur_bwdref_roffset = cur_roffset[4];
1086 regs->swreg257.sw_cur_altref2_roffset = cur_roffset[5];
1087 regs->swreg262.sw_cur_altref_roffset = cur_roffset[6];
1088
1089 /* Index start from 0 */
1090 regs->swreg9.sw_mf1_type = mf_types[0] - LAST_FRAME;
1091 regs->swreg9.sw_mf2_type = mf_types[1] - LAST_FRAME;
1092 regs->swreg9.sw_mf3_type = mf_types[2] - LAST_FRAME;
1093 AV1D_DBG(AV1D_DBG_LOG, "mf_types[%d %d %d]\n", mf_types[0], mf_types[1], mf_types[2]);
1094 }
1095 regs->swreg5.sw_ref_scaling_enable = ref_scale_e;
1096 }
1097 #undef MAX_FRAME_DISTANCE
1098
vdpu_av1d_superres_params(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1099 static void vdpu_av1d_superres_params(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1100 {
1101 // Compute and store scaling paramers needed for superres
1102 #define SUPERRES_SCALE_BITS 3
1103 #define SCALE_NUMERATOR 8
1104 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
1105
1106 #define RS_SUBPEL_BITS 6
1107 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
1108 #define RS_SCALE_SUBPEL_BITS 14
1109 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
1110 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
1111 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
1112 VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1113 VdpuAv1dRegSet *regs = ctx->regs;
1114 RK_U8 superres_scale_denominator = SCALE_NUMERATOR;
1115 RK_U32 superres_luma_step = 0;
1116 RK_U32 superres_chroma_step = 0;
1117 RK_U32 superres_luma_step_invra = 0;
1118 RK_U32 superres_chroma_step_invra = 0;
1119 RK_U32 superres_init_luma_subpel_x = 0;
1120 RK_U32 superres_init_chroma_subpel_x = 0;
1121 RK_U32 superres_is_scaled = 1;
1122 RK_U32 width = 0;
1123 if (dxva->coding.superres) {
1124 superres_scale_denominator = regs->swreg9.sw_scale_denom_minus9 + 9;
1125 }
1126
1127 if (superres_scale_denominator > SCALE_NUMERATOR) {
1128 width = (dxva->upscaled_width * SCALE_NUMERATOR +
1129 (superres_scale_denominator / 2)) /
1130 superres_scale_denominator;
1131 RK_U32 min_w = MPP_MIN(16, dxva->upscaled_width);
1132 if (width < min_w) width = min_w;
1133 if (width == dxva->upscaled_width) {
1134 superres_is_scaled = 0;
1135 superres_luma_step = RS_SCALE_SUBPEL_BITS;
1136 superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1137 superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1138 superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1139 superres_init_luma_subpel_x = 0;
1140 superres_init_chroma_subpel_x = 0;
1141 goto end;
1142 }
1143 {
1144 RK_S32 upscaledLumaPlaneW = dxva->upscaled_width;
1145 RK_S32 downscaledLumaPlaneW = width;
1146
1147 RK_S32 downscaledChromaPlaneW = (downscaledLumaPlaneW + 1) >> 1;
1148 RK_S32 upscaledChromaPlaneW = (upscaledLumaPlaneW + 1) >> 1;
1149
1150 RK_S32 stepLumaX = ((downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
1151 (upscaledLumaPlaneW / 2)) /
1152 upscaledLumaPlaneW;
1153 RK_S32 stepChromaX = ((downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
1154 (upscaledChromaPlaneW / 2)) /
1155 upscaledChromaPlaneW;
1156 RK_S32 errLuma = (upscaledLumaPlaneW * stepLumaX) -
1157 (downscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS);
1158 RK_S32 errChroma = (upscaledChromaPlaneW * stepChromaX) -
1159 (downscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS);
1160 RK_S32 initialLumaSubpelX =
1161 ((-((upscaledLumaPlaneW - downscaledLumaPlaneW)
1162 << (RS_SCALE_SUBPEL_BITS - 1)) +
1163 upscaledLumaPlaneW / 2) /
1164 upscaledLumaPlaneW +
1165 (1 << (RS_SCALE_EXTRA_BITS - 1)) - errLuma / 2) &
1166 RS_SCALE_SUBPEL_MASK;
1167 RK_S32 initialChromaSubpelX =
1168 ((-((upscaledChromaPlaneW - downscaledChromaPlaneW)
1169 << (RS_SCALE_SUBPEL_BITS - 1)) +
1170 upscaledChromaPlaneW / 2) /
1171 upscaledChromaPlaneW +
1172 (1 << (RS_SCALE_EXTRA_BITS - 1)) - errChroma / 2) &
1173 RS_SCALE_SUBPEL_MASK;
1174
1175 superres_luma_step = stepLumaX;
1176 superres_chroma_step = stepChromaX;
1177 superres_luma_step_invra =
1178 ((upscaledLumaPlaneW << RS_SCALE_SUBPEL_BITS) +
1179 (downscaledLumaPlaneW / 2)) /
1180 downscaledLumaPlaneW;
1181 superres_chroma_step_invra =
1182 ((upscaledChromaPlaneW << RS_SCALE_SUBPEL_BITS) +
1183 (downscaledChromaPlaneW / 2)) /
1184 downscaledChromaPlaneW;
1185 superres_init_luma_subpel_x = initialLumaSubpelX;
1186 superres_init_chroma_subpel_x = initialChromaSubpelX;
1187 }
1188 } else {
1189 superres_luma_step = RS_SCALE_SUBPEL_BITS;
1190 superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1191 superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1192 superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1193 superres_init_luma_subpel_x = 0;
1194 superres_init_chroma_subpel_x = 0;
1195 superres_is_scaled = 0;
1196 }
1197 end:
1198 regs->swreg51.sw_superres_luma_step = superres_luma_step;
1199 regs->swreg51.sw_superres_chroma_step = superres_chroma_step;
1200 regs->swreg298.sw_superres_luma_step_invra = superres_luma_step_invra;
1201 regs->swreg298.sw_superres_chroma_step_invra = superres_chroma_step_invra;
1202 regs->swreg52.sw_superres_init_luma_subpel_x = superres_init_luma_subpel_x;
1203 regs->swreg52.sw_superres_init_chroma_subpel_x = superres_init_chroma_subpel_x;
1204 regs->swreg5.sw_superres_is_scaled = superres_is_scaled;
1205
1206 regs->addr_cfg.swreg89.sw_superres_colbuf_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1207 mpp_dev_set_reg_offset(p_hal->dev, 89, ctx->filt_info[SR_COL].offset);
1208 }
1209
1210
vdpu_av1d_set_picture_dimensions(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1211 static void vdpu_av1d_set_picture_dimensions(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1212 {
1213 /* Write dimensions for the current picture
1214 (This is needed when scaling is used) */
1215 VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1216 VdpuAv1dRegSet *regs = ctx->regs;
1217
1218 regs->swreg4.sw_pic_width_in_cbs = MPP_ALIGN(dxva->width, 8) >> 3;
1219 regs->swreg4.sw_pic_height_in_cbs = MPP_ALIGN(dxva->height, 8) >> 3;
1220 regs->swreg12.sw_pic_width_pad = MPP_ALIGN(dxva->width, 8) - dxva->width;
1221 regs->swreg12.sw_pic_height_pad = MPP_ALIGN(dxva->height, 8) - dxva->height;
1222
1223 regs->swreg8.sw_superres_pic_width = dxva->upscaled_width;
1224 regs->swreg9.sw_scale_denom_minus9 = dxva->superres_denom;
1225
1226 vdpu_av1d_superres_params(p_hal, dxva);
1227 }
1228
vdpu_av1d_set_segmentation(VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)1229 static void vdpu_av1d_set_segmentation(VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
1230 {
1231 RK_U32 segval[MAX_MB_SEGMENTS][SEG_AV1_LVL_MAX];
1232 VdpuAv1dRegSet *regs = ctx->regs;
1233 RK_U8 s, i, j;
1234 RK_U8 segsign = 0;
1235 RK_U8 preskip_segid = 0;
1236 RK_U8 last_active_seg = 0;
1237 /* Segmentation */
1238 regs->swreg13.sw_segment_temp_upd_e = dxva->segmentation.temporal_update;
1239 regs->swreg13.sw_segment_upd_e = dxva->segmentation.update_map;
1240 regs->swreg13.sw_segment_e = dxva->segmentation.enabled;
1241
1242 //dec->error_resilient || dec->resolution_change;
1243 regs->swreg5.sw_error_resilient = dxva->coding.error_resilient_mode;
1244
1245 if ((!dxva->format.frame_type || dxva->format.frame_type == AV1_FRAME_INTRA_ONLY)
1246 || regs->swreg5.sw_error_resilient) {
1247 regs->swreg11.sw_use_temporal3_mvs = 0;
1248 }
1249
1250 regs->swreg14.sw_filt_level0 = dxva->loop_filter.filter_level[0];
1251 regs->swreg15.sw_filt_level1 = dxva->loop_filter.filter_level[1];
1252 regs->swreg16.sw_filt_level2 = dxva->loop_filter.filter_level_u;
1253 regs->swreg17.sw_filt_level3 = dxva->loop_filter.filter_level_v;
1254
1255 /* Set filter level and QP for every segment ID. Initialize all
1256 * segments with default QP and filter level. */
1257 for (s = 0; s < MAX_MB_SEGMENTS; s++) {
1258 segval[s][SEG_AV1_LVL_ALT_Q] = 0;
1259 segval[s][SEG_AV1_LVL_ALT_LF_Y_V] = 0;
1260 segval[s][SEG_AV1_LVL_ALT_LF_Y_H] = 0;
1261 segval[s][SEG_AV1_LVL_ALT_LF_U] = 0;
1262 segval[s][SEG_AV1_LVL_ALT_LF_V] = 0;
1263 segval[s][SEG_AV1_LVL_REF_FRAME] = 0; /* segment ref_frame disabled */
1264 segval[s][SEG_AV1_LVL_SKIP] = 0; /* segment skip disabled */
1265 segval[s][SEG_AV1_LVL_GLOBALMV] = 0; /* global motion */
1266 }
1267 /* If a feature is enabled for a segment, overwrite the default. */
1268 if (dxva->segmentation.enabled) {
1269 RK_S32 (*segdata)[SEG_AV1_LVL_MAX] = dxva->segmentation.feature_data;
1270
1271 for (s = 0; s < MAX_MB_SEGMENTS; s++) {
1272 if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_Q)) {
1273 segval[s][SEG_AV1_LVL_ALT_Q] =
1274 MPP_CLIP3(0, 255, MPP_ABS(segdata[s][SEG_AV1_LVL_ALT_Q]));
1275 segsign |= (segdata[s][SEG_AV1_LVL_ALT_Q] < 0) << s;
1276 }
1277
1278 if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_LF_Y_V))
1279 segval[s][SEG_AV1_LVL_ALT_LF_Y_V] =
1280 MPP_CLIP3(-63, 63, segdata[s][SEG_AV1_LVL_ALT_LF_Y_V]);
1281
1282 if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_LF_Y_H))
1283 segval[s][SEG_AV1_LVL_ALT_LF_Y_H] =
1284 MPP_CLIP3(-63, 63, segdata[s][SEG_AV1_LVL_ALT_LF_Y_H]);
1285
1286 if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_LF_U))
1287 segval[s][SEG_AV1_LVL_ALT_LF_U] =
1288 MPP_CLIP3(-63, 63, segdata[s][SEG_AV1_LVL_ALT_LF_U]);
1289
1290 if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_ALT_LF_V))
1291 segval[s][SEG_AV1_LVL_ALT_LF_V] =
1292 MPP_CLIP3(-63, 63, segdata[s][SEG_AV1_LVL_ALT_LF_V]);
1293
1294 if (dxva->format.frame_type &&
1295 dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_REF_FRAME))
1296 segval[s][SEG_AV1_LVL_REF_FRAME] =
1297 segdata[s][SEG_AV1_LVL_REF_FRAME] + 1;
1298
1299 if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_SKIP))
1300 segval[s][SEG_AV1_LVL_SKIP] = 1;
1301 if (dxva->segmentation.feature_mask[s] & (1 << SEG_AV1_LVL_GLOBALMV))
1302 segval[s][SEG_AV1_LVL_GLOBALMV] = 1;
1303 }
1304 }
1305
1306 for (i = 0; i < MAX_MB_SEGMENTS; i++) {
1307 for (j = 0; j < SEG_AV1_LVL_MAX; j++) {
1308 if (dxva->segmentation.feature_mask[i] & (1 << j)) {
1309 preskip_segid |= j >= SEG_AV1_LVL_REF_FRAME;
1310 last_active_seg = MPP_MAX(i, last_active_seg);
1311 }
1312 }
1313 }
1314
1315 regs->swreg9.sw_last_active_seg = last_active_seg;
1316 regs->swreg5.sw_preskip_segid = preskip_segid;
1317
1318 regs->swreg12.sw_seg_quant_sign = segsign;
1319 /* Write QP, filter level, ref frame and skip for every segment */
1320 regs->swreg14.sw_quant_seg0 = segval[0][SEG_AV1_LVL_ALT_Q];
1321 regs->swreg14.sw_filt_level_delta0_seg0 = segval[0][SEG_AV1_LVL_ALT_LF_Y_V];
1322 regs->swreg20.sw_filt_level_delta1_seg0 = segval[0][SEG_AV1_LVL_ALT_LF_Y_H];
1323 regs->swreg20.sw_filt_level_delta2_seg0 = segval[0][SEG_AV1_LVL_ALT_LF_U];
1324 regs->swreg20.sw_filt_level_delta3_seg0 = segval[0][SEG_AV1_LVL_ALT_LF_V];
1325 regs->swreg14.sw_refpic_seg0 = segval[0][SEG_AV1_LVL_REF_FRAME];
1326 regs->swreg14.sw_skip_seg0 = segval[0][SEG_AV1_LVL_SKIP];
1327 regs->swreg20.sw_global_mv_seg0 = segval[0][SEG_AV1_LVL_GLOBALMV];
1328
1329 regs->swreg15.sw_quant_seg1 = segval[1][SEG_AV1_LVL_ALT_Q];
1330 regs->swreg15.sw_filt_level_delta0_seg1 = segval[1][SEG_AV1_LVL_ALT_LF_Y_V];
1331 regs->swreg21.sw_filt_level_delta1_seg1 = segval[1][SEG_AV1_LVL_ALT_LF_Y_H];
1332 regs->swreg21.sw_filt_level_delta2_seg1 = segval[1][SEG_AV1_LVL_ALT_LF_U];
1333 regs->swreg21.sw_filt_level_delta3_seg1 = segval[1][SEG_AV1_LVL_ALT_LF_V];
1334 regs->swreg15.sw_refpic_seg1 = segval[1][SEG_AV1_LVL_REF_FRAME];
1335 regs->swreg15.sw_skip_seg1 = segval[1][SEG_AV1_LVL_SKIP];
1336 regs->swreg21.sw_global_mv_seg1 = segval[1][SEG_AV1_LVL_GLOBALMV];
1337
1338 regs->swreg16.sw_quant_seg2 = segval[2][SEG_AV1_LVL_ALT_Q];
1339 regs->swreg16.sw_filt_level_delta0_seg2 = segval[2][SEG_AV1_LVL_ALT_LF_Y_V];
1340 regs->swreg22.sw_filt_level_delta1_seg2 = segval[2][SEG_AV1_LVL_ALT_LF_Y_H];
1341 regs->swreg22.sw_filt_level_delta2_seg2 = segval[2][SEG_AV1_LVL_ALT_LF_U];
1342 regs->swreg22.sw_filt_level_delta3_seg2 = segval[2][SEG_AV1_LVL_ALT_LF_V];
1343 regs->swreg16.sw_refpic_seg2 = segval[2][SEG_AV1_LVL_REF_FRAME];
1344 regs->swreg16.sw_skip_seg2 = segval[2][SEG_AV1_LVL_SKIP];
1345 regs->swreg22.sw_global_mv_seg2 = segval[2][SEG_AV1_LVL_GLOBALMV];
1346
1347 regs->swreg17.sw_quant_seg3 = segval[3][SEG_AV1_LVL_ALT_Q];
1348 regs->swreg17.sw_filt_level_delta0_seg3 = segval[3][SEG_AV1_LVL_ALT_LF_Y_V];
1349 regs->swreg23.sw_filt_level_delta1_seg3 = segval[3][SEG_AV1_LVL_ALT_LF_Y_H];
1350 regs->swreg23.sw_filt_level_delta2_seg3 = segval[3][SEG_AV1_LVL_ALT_LF_U];
1351 regs->swreg23.sw_filt_level_delta3_seg3 = segval[3][SEG_AV1_LVL_ALT_LF_V];
1352 regs->swreg17.sw_refpic_seg3 = segval[3][SEG_AV1_LVL_REF_FRAME];
1353 regs->swreg17.sw_skip_seg3 = segval[3][SEG_AV1_LVL_SKIP];
1354 regs->swreg23.sw_global_mv_seg3 = segval[3][SEG_AV1_LVL_GLOBALMV];
1355
1356 regs->swreg18.sw_quant_seg4 = segval[4][SEG_AV1_LVL_ALT_Q];
1357 regs->swreg18.sw_filt_level_delta0_seg4 = segval[4][SEG_AV1_LVL_ALT_LF_Y_V];
1358 regs->swreg24.sw_filt_level_delta1_seg4 = segval[4][SEG_AV1_LVL_ALT_LF_Y_H];
1359 regs->swreg24.sw_filt_level_delta2_seg4 = segval[4][SEG_AV1_LVL_ALT_LF_U];
1360 regs->swreg24.sw_filt_level_delta3_seg4 = segval[4][SEG_AV1_LVL_ALT_LF_V];
1361 regs->swreg18.sw_refpic_seg4 = segval[4][SEG_AV1_LVL_REF_FRAME];
1362 regs->swreg18.sw_skip_seg4 = segval[4][SEG_AV1_LVL_SKIP];
1363 regs->swreg24.sw_global_mv_seg4 = segval[4][SEG_AV1_LVL_GLOBALMV];
1364
1365 regs->swreg19.sw_quant_seg5 = segval[5][SEG_AV1_LVL_ALT_Q];
1366 regs->swreg19.sw_filt_level_delta0_seg5 = segval[5][SEG_AV1_LVL_ALT_LF_Y_V];
1367 regs->swreg25.sw_filt_level_delta1_seg5 = segval[5][SEG_AV1_LVL_ALT_LF_Y_H];
1368 regs->swreg25.sw_filt_level_delta2_seg5 = segval[5][SEG_AV1_LVL_ALT_LF_U];
1369 regs->swreg25.sw_filt_level_delta3_seg5 = segval[5][SEG_AV1_LVL_ALT_LF_V];
1370 regs->swreg19.sw_refpic_seg5 = segval[5][SEG_AV1_LVL_REF_FRAME];
1371 regs->swreg19.sw_skip_seg5 = segval[5][SEG_AV1_LVL_SKIP];
1372 regs->swreg25.sw_global_mv_seg5 = segval[5][SEG_AV1_LVL_GLOBALMV];
1373
1374 regs->swreg31.sw_quant_seg6 = segval[6][SEG_AV1_LVL_ALT_Q];
1375 regs->swreg31.sw_filt_level_delta0_seg6 = segval[6][SEG_AV1_LVL_ALT_LF_Y_V];
1376 regs->swreg26.sw_filt_level_delta1_seg6 = segval[6][SEG_AV1_LVL_ALT_LF_Y_H];
1377 regs->swreg26.sw_filt_level_delta2_seg6 = segval[6][SEG_AV1_LVL_ALT_LF_U];
1378 regs->swreg26.sw_filt_level_delta3_seg6 = segval[6][SEG_AV1_LVL_ALT_LF_V];
1379 regs->swreg31.sw_refpic_seg6 = segval[6][SEG_AV1_LVL_REF_FRAME];
1380 regs->swreg31.sw_skip_seg6 = segval[6][SEG_AV1_LVL_SKIP];
1381 regs->swreg26.sw_global_mv_seg6 = segval[6][SEG_AV1_LVL_GLOBALMV];
1382
1383 regs->swreg32.sw_quant_seg7 = segval[7][SEG_AV1_LVL_ALT_Q];
1384 regs->swreg32.sw_filt_level_delta0_seg7 = segval[7][SEG_AV1_LVL_ALT_LF_Y_V];
1385 regs->swreg27.sw_filt_level_delta1_seg7 = segval[7][SEG_AV1_LVL_ALT_LF_Y_H];
1386 regs->swreg27.sw_filt_level_delta2_seg7 = segval[7][SEG_AV1_LVL_ALT_LF_U];
1387 regs->swreg27.sw_filt_level_delta3_seg7 = segval[7][SEG_AV1_LVL_ALT_LF_V];
1388 regs->swreg32.sw_refpic_seg7 = segval[7][SEG_AV1_LVL_REF_FRAME];
1389 regs->swreg32.sw_skip_seg7 = segval[7][SEG_AV1_LVL_SKIP];
1390 regs->swreg27.sw_global_mv_seg7 = segval[7][SEG_AV1_LVL_GLOBALMV];
1391 }
1392
vdpu_av1d_set_loopfilter(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1393 static void vdpu_av1d_set_loopfilter(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1394 {
1395 VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1396 VdpuAv1dRegSet *regs = ctx->regs;
1397 regs->swreg3.sw_filtering_dis = (dxva->loop_filter.filter_level[0] == 0) && (dxva->loop_filter.filter_level[1] == 0);
1398 regs->swreg5.sw_filt_level_base_gt32 = dxva->loop_filter.filter_level[0] > 32;
1399 regs->swreg30.sw_filt_sharpness = dxva->loop_filter.sharpness_level;
1400 if (dxva->loop_filter.mode_ref_delta_enabled) {
1401 regs->swreg59.sw_filt_ref_adj_0 = dxva->loop_filter.ref_deltas[0];
1402 regs->swreg59.sw_filt_ref_adj_1 = dxva->loop_filter.ref_deltas[1];
1403 regs->swreg59.sw_filt_ref_adj_2 = dxva->loop_filter.ref_deltas[2];
1404 regs->swreg59.sw_filt_ref_adj_3 = dxva->loop_filter.ref_deltas[3];
1405 regs->swreg30.sw_filt_ref_adj_4 = dxva->loop_filter.ref_deltas[4];
1406 regs->swreg30.sw_filt_ref_adj_5 = dxva->loop_filter.ref_deltas[5];
1407 regs->swreg49.sw_filt_ref_adj_7 = dxva->loop_filter.ref_deltas[6];
1408 regs->swreg49.sw_filt_ref_adj_6 = dxva->loop_filter.ref_deltas[7];
1409 regs->swreg30.sw_filt_mb_adj_0 = dxva->loop_filter.mode_deltas[0];
1410 regs->swreg30.sw_filt_mb_adj_1 = dxva->loop_filter.mode_deltas[1];
1411 } else {
1412 regs->swreg59.sw_filt_ref_adj_0 = 0;
1413 regs->swreg59.sw_filt_ref_adj_1 = 0;
1414 regs->swreg59.sw_filt_ref_adj_2 = 0;
1415 regs->swreg59.sw_filt_ref_adj_3 = 0;
1416 regs->swreg30.sw_filt_ref_adj_4 = 0;
1417 regs->swreg30.sw_filt_ref_adj_5 = 0;
1418 regs->swreg49.sw_filt_ref_adj_7 = 0;
1419 regs->swreg49.sw_filt_ref_adj_6 = 0;
1420 regs->swreg30.sw_filt_mb_adj_0 = 0;
1421 regs->swreg30.sw_filt_mb_adj_1 = 0;
1422 }
1423
1424 regs->addr_cfg.swreg179.sw_dec_vert_filt_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1425 regs->addr_cfg.swreg183.sw_dec_bsd_ctrl_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1426 mpp_dev_set_reg_offset(p_hal->dev, 183, ctx->filt_info[DB_CTRL_COL].offset);
1427 }
1428
vdpu_av1d_set_global_model(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1429 static void vdpu_av1d_set_global_model(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1430 {
1431 VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1432 VdpuAv1dRegSet *regs = ctx->regs;
1433 RK_U8 *dst = (RK_U8 *) mpp_buffer_get_ptr(ctx->global_model);
1434 RK_S32 ref_frame, i;
1435
1436 for (ref_frame = 0; ref_frame < GM_GLOBAL_MODELS_PER_FRAME; ++ref_frame) {
1437 mpp_assert(dxva->frame_refs[ref_frame].wmtype <= 3);
1438
1439 /* In DDR wmmat order is 0, 1, 3, 2, 4, 5 */
1440 for (i = 0; i < 6; ++i) {
1441 if (i == 2)
1442 *(RK_S32 *)(dst) = dxva->frame_refs[ref_frame].wmmat[3];
1443 else if (i == 3)
1444 *(RK_S32 *)(dst) = dxva->frame_refs[ref_frame].wmmat[2];
1445 else
1446 *(RK_S32 *)(dst) = dxva->frame_refs[ref_frame].wmmat[i];
1447 dst += 4;
1448 }
1449
1450 *(RK_S16 *)(dst) = dxva->frame_refs[ref_frame].alpha;//-32768;
1451 dst += 2;
1452 *(RK_S16 *)(dst) = dxva->frame_refs[ref_frame].beta;//-32768;
1453 dst += 2;
1454 *(RK_S16 *)(dst) = dxva->frame_refs[ref_frame].gamma;//-32768;
1455 dst += 2;
1456 *(RK_S16 *)(dst) = dxva->frame_refs[ref_frame].delta;//-32768;
1457 dst += 2;
1458 AV1D_DBG(AV1D_DBG_LOG, "ref_frame[%d] alpa %d beta %d gamma %d delta %d\n",
1459 ref_frame,
1460 dxva->frame_refs[ref_frame].alpha,
1461 dxva->frame_refs[ref_frame].beta,
1462 dxva->frame_refs[ref_frame].gamma,
1463 dxva->frame_refs[ref_frame].delta);
1464 }
1465 mpp_buffer_sync_end(ctx->global_model);
1466
1467 regs->addr_cfg.swreg82.sw_global_model_base_msb = 0;
1468 regs->addr_cfg.swreg83.sw_global_model_base_lsb = mpp_buffer_get_fd(ctx->global_model);
1469 }
1470
vdpu_av1d_set_tile_info_regs(VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)1471 static void vdpu_av1d_set_tile_info_regs(VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
1472 {
1473 int transpose = ctx->tile_transpose;
1474 VdpuAv1dRegSet *regs = ctx->regs;
1475 size_t context_update_tile_id = dxva->tiles.context_update_id;
1476 size_t context_update_y = context_update_tile_id / dxva->tiles.cols;
1477 size_t context_update_x = context_update_tile_id % dxva->tiles.cols;
1478
1479 regs->swreg11.sw_multicore_expect_context_update = (0 == context_update_x);
1480 if (transpose) {
1481 context_update_tile_id =
1482 context_update_x * dxva->tiles.rows + context_update_y;
1483 }
1484 regs->swreg10.sw_tile_enable = (dxva->tiles.cols > 1) || (dxva->tiles.rows > 1);
1485 regs->swreg10.sw_num_tile_cols_8k = dxva->tiles.cols;
1486 regs->swreg10.sw_num_tile_rows_8k_av1 = dxva->tiles.rows;
1487 regs->swreg9.sw_context_update_tile_id = context_update_tile_id;
1488 regs->swreg10.sw_tile_transpose = transpose;
1489 regs->swreg11.sw_dec_tile_size_mag = dxva->tiles.tile_sz_mag;
1490 if (regs->swreg10.sw_tile_enable) AV1D_DBG(AV1D_DBG_LOG, "NOTICE: tile enabled.\n");
1491
1492 regs->addr_cfg.swreg167.sw_tile_base_lsb = mpp_buffer_get_fd(ctx->tile_info);//
1493 regs->addr_cfg.swreg166.sw_tile_base_msb = 0;
1494 }
1495
check_tile_width(DXVA_PicParams_AV1 * dxva,RK_S32 width,RK_S32 leftmost)1496 static int check_tile_width(DXVA_PicParams_AV1 *dxva, RK_S32 width, RK_S32 leftmost)
1497 {
1498 RK_S32 valid = 1;
1499 if (!leftmost && dxva->coding.use_128x128_superblock == 0 && dxva->coding.superres && width == 1) {
1500 AV1D_DBG(AV1D_DBG_LOG, "WARNING: Superres used and tile width == 64\n");
1501 valid = 0;
1502 }
1503
1504 const RK_S32 sb_size_log2 = dxva->coding.use_128x128_superblock ? 7 : 6;
1505 RK_S32 tile_width_pixels = (width << sb_size_log2);
1506 if (dxva->coding.superres) {
1507 tile_width_pixels =
1508 (tile_width_pixels * (9 + dxva->superres_denom) + 4) / 8;
1509 }
1510 if (tile_width_pixels > 4096) {
1511 if (dxva->coding.superres)
1512 AV1D_LOG("WARNING: Tile width after superres > 4096\n");
1513 else
1514 AV1D_LOG("WARNING: Tile width > 4096\n");
1515 valid = 0;
1516 }
1517 return valid;
1518 }
1519
vdpu_av1d_set_tile_info_mem(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1520 static void vdpu_av1d_set_tile_info_mem(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1521 {
1522 VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
1523
1524 RK_S32 transpose = ctx->tile_transpose;
1525 RK_S32 tmp = dxva->frame_tag_size + dxva->offset_to_dct_parts;
1526 RK_U32 stream_len = p_hal->strm_len - tmp;
1527 RK_U8 *p1 = (RK_U8*)mpp_buffer_get_ptr(ctx->tile_info);
1528 RK_S32 size0 = transpose ? dxva->tiles.cols : dxva->tiles.rows;
1529 RK_S32 size1 = transpose ? dxva->tiles.rows : dxva->tiles.cols;
1530 RK_S32 tile0, tile1;
1531 RK_U32 not_valid_tile_dimension = 0;
1532 RK_U32 tiles[2][64];
1533
1534 /* convert to per tile position */
1535 {
1536 RK_U8 val = 0, i;
1537
1538 for (i = 0; i < dxva->tiles.cols; i++) {
1539 tiles[0][i] = val;
1540 val += dxva->tiles.widths[i];
1541 }
1542 tiles[0][i] = val;
1543
1544 val = 0;
1545 for (i = 0; i < dxva->tiles.rows; i++) {
1546 tiles[1][i] = val;
1547 val += dxva->tiles.heights[i];
1548 }
1549 tiles[1][i] = val;
1550 }
1551
1552 // Write tile dimensions
1553 for (tile0 = 0; tile0 < size0; tile0++) {
1554 for (tile1 = 0; tile1 < size1; tile1++) {
1555 RK_S32 tile_y = transpose ? tile1 : tile0;
1556 RK_S32 tile_x = transpose ? tile0 : tile1;
1557 RK_S32 tile_id = transpose ? tile1 * size0 + tile0 : tile0 * size1 + tile1;
1558 RK_U32 start, end;
1559
1560 RK_U32 y0 = tiles[1][tile_y];
1561 RK_U32 y1 = tiles[1][tile_y + 1];
1562 RK_U32 x0 = tiles[0][tile_x];
1563 RK_U32 x1 = tiles[0][tile_x + 1];
1564
1565 RK_U8 leftmost = (tile_x == dxva->tiles.cols - 1);
1566 if (!not_valid_tile_dimension)
1567 not_valid_tile_dimension = !check_tile_width(dxva, x1 - x0, leftmost);
1568 if ((x0 << (dxva->coding.use_128x128_superblock ? 7 : 6)) >= dxva->width ||
1569 (y0 << (dxva->coding.use_128x128_superblock ? 7 : 6)) >= dxva->height)
1570 not_valid_tile_dimension = 1;
1571
1572 // tile size in SB units (width,height)
1573 *p1++ = x1 - x0;
1574 *p1++ = 0;
1575 *p1++ = 0;
1576 *p1++ = 0;
1577 *p1++ = y1 - y0;
1578 *p1++ = 0;
1579 *p1++ = 0;
1580 *p1++ = 0;
1581
1582 // tile start position (offset from sw_stream0_base)
1583 start = dxva->tiles.tile_offset_start[tile_id];
1584 *p1++ = start & 255;
1585 *p1++ = (start >> 8) & 255;
1586 *p1++ = (start >> 16) & 255;
1587 *p1++ = (start >> 24) & 255;
1588 if (!not_valid_tile_dimension) {
1589 if ((start + 1) > stream_len)
1590 not_valid_tile_dimension = 1;
1591 }
1592
1593 // # of bytes in tile data
1594 end = dxva->tiles.tile_offset_end[tile_id];
1595 *p1++ = end & 255;
1596 *p1++ = (end >> 8) & 255;
1597 *p1++ = (end >> 16) & 255;
1598 *p1++ = (end >> 24) & 255;
1599 if (!not_valid_tile_dimension) {
1600 if (end > stream_len)
1601 not_valid_tile_dimension = 1;
1602 }
1603 AV1D_DBG(AV1D_DBG_LOG, "tile_info[%d][%d]: start=%08x end=%08x x0:x1=%d:%d y0:y1=%d:%d\n",
1604 tile0, tile1, start, end, x0, x1, y0, y1);
1605 }
1606 }
1607 mpp_buffer_sync_end(ctx->tile_info);
1608 }
1609
vdpu_av1d_set_cdef(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1610 static void vdpu_av1d_set_cdef(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1611 {
1612 RK_U32 luma_pri_strength = 0;
1613 RK_U16 luma_sec_strength = 0;
1614 RK_U32 chroma_pri_strength = 0;
1615 RK_U16 chroma_sec_strength = 0;
1616 VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1617 VdpuAv1dRegSet *regs = ctx->regs;
1618 RK_S32 i;
1619
1620 /* CDEF */
1621 regs->swreg7.sw_cdef_bits = dxva->cdef.bits;
1622 regs->swreg7.sw_cdef_damping = dxva->cdef.damping;
1623
1624 for (i = 0; i < 8; i++) {
1625 if (i == (1 << (dxva->cdef.bits))) break;
1626 luma_pri_strength |= dxva->cdef.y_strengths[i].primary << (i * 4);
1627 luma_sec_strength |= dxva->cdef.y_strengths[i].secondary << (i * 2);
1628 chroma_pri_strength |= dxva->cdef.uv_strengths[i].primary << (i * 4);
1629 chroma_sec_strength |= dxva->cdef.uv_strengths[i].secondary << (i * 2);
1630 }
1631
1632 regs->swreg263.sw_cdef_luma_primary_strength = luma_pri_strength;
1633 regs->swreg53.sw_cdef_luma_secondary_strength = luma_sec_strength;
1634 regs->swreg264.sw_cdef_chroma_primary_strength = chroma_pri_strength;
1635 regs->swreg53.sw_cdef_chroma_secondary_strength = chroma_sec_strength;
1636
1637 // tile column buffer; repurpose some encoder specific base
1638 regs->addr_cfg.swreg85.sw_cdef_colbuf_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1639 mpp_dev_set_reg_offset(p_hal->dev, 85, ctx->filt_info[CDEF_COL].offset);
1640 }
1641
vdpu_av1d_set_lr(Av1dHalCtx * p_hal,DXVA_PicParams_AV1 * dxva)1642 static void vdpu_av1d_set_lr(Av1dHalCtx *p_hal, DXVA_PicParams_AV1 *dxva)
1643 {
1644 VdpuAv1dRegCtx *ctx = p_hal->reg_ctx;
1645 VdpuAv1dRegSet *regs = ctx->regs;
1646 RK_U16 lr_type = 0;
1647 RK_U16 lr_unit_size = 0;
1648 RK_S32 i = 0;
1649
1650 for (i = 0; i < 3; i++) {
1651 lr_type |= dxva->loop_filter.frame_restoration_type[i] << (i * 2);
1652 lr_unit_size |= dxva->loop_filter.log2_restoration_unit_size[i] << (i * 2);
1653 }
1654 regs->swreg18.sw_lr_type = lr_type;
1655 regs->swreg19.sw_lr_unit_size = lr_unit_size;
1656 regs->addr_cfg.swreg91.sw_lr_colbuf_base_lsb = mpp_buffer_get_fd(ctx->filter_mem);
1657 mpp_dev_set_reg_offset(p_hal->dev, 91, ctx->filt_info[LR_COL].offset);
1658 }
1659
init_scaling_function(RK_U8 scaling_points[][2],RK_U8 num_points,RK_U8 scaling_lut[])1660 static void init_scaling_function(RK_U8 scaling_points[][2], RK_U8 num_points,
1661 RK_U8 scaling_lut[])
1662 {
1663 RK_S32 i, point;
1664
1665 if (num_points == 0) {
1666 memset(scaling_lut, 0, 256);
1667 return;
1668 }
1669
1670 for (i = 0; i < scaling_points[0][0]; i++)
1671 scaling_lut[i] = scaling_points[0][1];
1672
1673 for (point = 0; point < num_points - 1; point++) {
1674 RK_S32 x ;
1675 RK_S32 delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
1676 RK_S32 delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
1677 RK_S64 delta =
1678 delta_x ? delta_y * ((65536 + (delta_x >> 1)) / delta_x) : 0;
1679 for (x = 0; x < delta_x; x++) {
1680 scaling_lut[scaling_points[point][0] + x] =
1681 scaling_points[point][1] + (RK_S32)((x * delta + 32768) >> 16);
1682 }
1683 }
1684
1685 for (i = scaling_points[num_points - 1][0]; i < 256; i++)
1686 scaling_lut[i] = scaling_points[num_points - 1][1];
1687 }
1688
vdpu_av1d_set_fgs(VdpuAv1dRegCtx * ctx,DXVA_PicParams_AV1 * dxva)1689 static void vdpu_av1d_set_fgs(VdpuAv1dRegCtx *ctx, DXVA_PicParams_AV1 *dxva)
1690 {
1691 VdpuAv1dRegSet *regs = ctx->regs;
1692 RK_S32 ar_coeffs_y[24];
1693 RK_S32 ar_coeffs_cb[25];
1694 RK_S32 ar_coeffs_cr[25];
1695 RK_S32 luma_grain_block[73][82];
1696 RK_S32 cb_grain_block[38][44];
1697 RK_S32 cr_grain_block[38][44];
1698 RK_S32 ar_coeff_lag;
1699 RK_S32 ar_coeff_shift;
1700 RK_S32 grain_scale_shift;
1701 RK_S32 bitdepth;
1702 RK_S32 grain_center;
1703 RK_S32 grain_min;
1704 RK_S32 grain_max;
1705 RK_S32 i, j;
1706 RK_U8 *ptr = mpp_buffer_get_ptr(ctx->film_grain_mem);
1707 if (!dxva->film_grain.apply_grain) {
1708 regs->swreg7.sw_apply_grain = 0;
1709 // store reset params
1710 // asic_buff->fg_params[asic_buff->out_buffer_i] = dec->fg_params;
1711 return;
1712 }
1713 /* struct Av1FilmGrainParams *fg_params = &dec->fg_params;
1714 if (!dec->update_parameters) {
1715 RK_S32 active_ref = dec->film_grain_params_ref_idx;
1716 RK_S32 index_ref = Av1BufferQueueGetRef(dec_cont->bq, active_ref);
1717 u16 random_seed = fg_params->random_seed;
1718 *fg_params = asic_buff->fg_params[index_ref];
1719 fg_params->random_seed = random_seed;
1720 }
1721 asic_buff->fg_params[asic_buff->out_buffer_i] = *fg_params;*/
1722
1723 // film grain applied on secondary output
1724 // sw_ctrl->sw_apply_grain = dec_cont->pp_enabled ? 1 : 0;
1725 regs->swreg7.sw_num_y_points_b = dxva->film_grain.num_y_points > 0;
1726 regs->swreg7.sw_num_cb_points_b = dxva->film_grain.num_cb_points > 0;
1727 regs->swreg7.sw_num_cr_points_b = dxva->film_grain.num_cr_points > 0;
1728 regs->swreg8.sw_scaling_shift = dxva->film_grain.scaling_shift_minus8 + 8;
1729 if (! dxva->film_grain.chroma_scaling_from_luma) {
1730 regs->swreg28.sw_cb_mult = dxva->film_grain.cb_mult - 128;
1731 regs->swreg28.sw_cb_luma_mult = dxva->film_grain.cb_luma_mult - 128;
1732 regs->swreg28.sw_cb_offset = dxva->film_grain.cb_offset - 256;
1733 regs->swreg29.sw_cr_mult = dxva->film_grain.cr_mult - 128;
1734 regs->swreg29.sw_cr_luma_mult = dxva->film_grain.cr_luma_mult - 128;
1735 regs->swreg29.sw_cr_offset = dxva->film_grain.cr_offset - 256;
1736 } else {
1737 regs->swreg28.sw_cb_mult = 0;
1738 regs->swreg28.sw_cb_luma_mult = 64;
1739 regs->swreg28.sw_cb_offset = 0;
1740 regs->swreg29.sw_cr_mult = 0;
1741 regs->swreg29.sw_cr_luma_mult = 64;
1742 regs->swreg29.sw_cr_offset = 0;
1743 }
1744 regs->swreg7.sw_overlap_flag = dxva->film_grain.overlap_flag;
1745 regs->swreg7.sw_clip_to_restricted_range = dxva->film_grain.clip_to_restricted_range;
1746 regs->swreg7.sw_chroma_scaling_from_luma = dxva->film_grain.chroma_scaling_from_luma;
1747 regs->swreg7.sw_random_seed = dxva->film_grain.grain_seed;
1748
1749 init_scaling_function(dxva->film_grain.scaling_points_y, dxva->film_grain.num_y_points,
1750 ctx->fgsmem.scaling_lut_y);
1751
1752 if (dxva->film_grain.chroma_scaling_from_luma) {
1753 memcpy(ctx->fgsmem.scaling_lut_cb, ctx->fgsmem.scaling_lut_y,
1754 sizeof(*ctx->fgsmem.scaling_lut_y) * 256);
1755 memcpy(ctx->fgsmem.scaling_lut_cr, ctx->fgsmem.scaling_lut_y,
1756 sizeof(*ctx->fgsmem.scaling_lut_y) * 256);
1757 } else {
1758 init_scaling_function(dxva->film_grain.scaling_points_cb,
1759 dxva->film_grain.num_cb_points, ctx->fgsmem.scaling_lut_cb);
1760 init_scaling_function(dxva->film_grain.scaling_points_cr,
1761 dxva->film_grain.num_cr_points, ctx->fgsmem.scaling_lut_cr);
1762 }
1763
1764
1765 for (i = 0; i < 25; i++) {
1766 if (i < 24) {
1767 ar_coeffs_y[i] = dxva->film_grain.ar_coeffs_y[i] - 128;
1768 }
1769 ar_coeffs_cb[i] = dxva->film_grain.ar_coeffs_cb[i] - 128;
1770 ar_coeffs_cr[i] = dxva->film_grain.ar_coeffs_cr[i] - 128;
1771 }
1772
1773 ar_coeff_lag = dxva->film_grain.ar_coeff_lag;
1774 ar_coeff_shift = dxva->film_grain.ar_coeff_shift_minus6 + 6;
1775 grain_scale_shift = dxva->film_grain.grain_scale_shift;
1776 bitdepth = dxva->bitdepth;
1777 grain_center = 128 << (bitdepth - 8);
1778 grain_min = 0 - grain_center;
1779 grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1780
1781 GenerateLumaGrainBlock(luma_grain_block, bitdepth, dxva->film_grain.num_y_points,
1782 grain_scale_shift, ar_coeff_lag, ar_coeffs_y,
1783 ar_coeff_shift, grain_min, grain_max,
1784 dxva->film_grain.grain_seed);
1785
1786 GenerateChromaGrainBlock(
1787 luma_grain_block, cb_grain_block, cr_grain_block, bitdepth,
1788 dxva->film_grain.num_y_points, dxva->film_grain.num_cb_points,
1789 dxva->film_grain.num_cr_points, grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1790 ar_coeffs_cr, ar_coeff_shift, grain_min, grain_max,
1791 dxva->film_grain.chroma_scaling_from_luma, dxva->film_grain.grain_seed);
1792
1793 for (i = 0; i < 64; i++) {
1794 for (j = 0; j < 64; j++) {
1795 ctx->fgsmem.cropped_luma_grain_block[i * 64 + j] =
1796 luma_grain_block[i + 9][j + 9];
1797 }
1798 }
1799
1800 for (i = 0; i < 32; i++) {
1801 for (j = 0; j < 32; j++) {
1802 ctx->fgsmem.cropped_chroma_grain_block[i * 64 + 2 * j] =
1803 cb_grain_block[i + 6][j + 6];
1804 ctx->fgsmem.cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1805 cr_grain_block[i + 6][j + 6];
1806 }
1807 }
1808
1809 memcpy(ptr, &ctx->fgsmem, sizeof(FilmGrainMemory));
1810 mpp_buffer_sync_end(ctx->film_grain_mem);
1811
1812 regs->addr_cfg.swreg94.sw_filmgrain_base_msb = 0;
1813 regs->addr_cfg.swreg95.sw_filmgrain_base_lsb = mpp_buffer_get_fd(ctx->film_grain_mem);
1814
1815 if (regs->swreg7.sw_apply_grain) AV1D_DBG(AV1D_DBG_LOG, "NOTICE: filmgrain enabled.\n");
1816 }
1817
vdpu_av1d_setup_tile_bufs(void * hal,DXVA_PicParams_AV1 * dxva)1818 static MPP_RET vdpu_av1d_setup_tile_bufs(void *hal, DXVA_PicParams_AV1 *dxva)
1819 {
1820 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
1821 VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
1822 RK_U32 out_w = MPP_ALIGN(dxva->max_width * dxva->bitdepth, 16 * 8) / 8;
1823 RK_U32 num_sbs = (MPP_ALIGN(dxva->max_width, 64) / 64 + 1) * (MPP_ALIGN(dxva->max_height, 64) / 64 + 1);
1824 RK_U32 dir_mvs_size = MPP_ALIGN(num_sbs * 24 * 128 / 8, 16) * 2;
1825 RK_U32 out_h = MPP_ALIGN(dxva->max_height, 16);
1826 RK_U32 luma_size = out_w * out_h;
1827 RK_U32 chroma_size = luma_size >> 1;
1828 RK_U32 tile_out_size = luma_size + chroma_size + dir_mvs_size + 512;
1829
1830 if (tile_out_size <= ctx->tile_out_size)
1831 return MPP_OK;
1832
1833 ctx->hor_stride = out_w;
1834 ctx->luma_size = luma_size;
1835 ctx->chroma_size = chroma_size;
1836 ctx->tile_out_size = tile_out_size;
1837
1838 if (ctx->tile_out_bufs) {
1839 hal_bufs_deinit(ctx->tile_out_bufs);
1840 ctx->tile_out_bufs = NULL;
1841 }
1842 hal_bufs_init(&ctx->tile_out_bufs);
1843 if (!ctx->tile_out_bufs) {
1844 mpp_err_f("tile out bufs init fail\n");
1845 return MPP_ERR_NOMEM;
1846 }
1847 ctx->tile_out_count = mpp_buf_slot_get_count(p_hal->slots);
1848 hal_bufs_setup(ctx->tile_out_bufs, ctx->tile_out_count, 1, &ctx->tile_out_size);
1849
1850 return MPP_OK;
1851 }
1852
vdpu_av1d_gen_regs(void * hal,HalTaskInfo * task)1853 MPP_RET vdpu_av1d_gen_regs(void *hal, HalTaskInfo *task)
1854 {
1855 MPP_RET ret = MPP_ERR_UNKNOW;
1856 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
1857 VdpuAv1dRegCtx *ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
1858 VdpuAv1dRegSet *regs;
1859 DXVA_PicParams_AV1 *dxva = (DXVA_PicParams_AV1*)task->dec.syntax.data;
1860 MppFrame mframe;
1861 MppBuffer buffer = NULL;
1862 MppBuffer streambuf = NULL;
1863 RK_U32 height = dxva->height;
1864 RK_U32 width = dxva->width;
1865 RK_U32 hor_stride;
1866 RK_U32 ver_stride;
1867 HalBuf *tile_out_buf;
1868 RK_U32 num_tile_cols = 1 << dxva->tile_cols_log2;
1869
1870 INP_CHECK(ret, NULL == p_hal);
1871
1872 ctx->refresh_frame_flags = dxva->refresh_frame_flags;
1873
1874 if (task->dec.flags.parse_err ||
1875 task->dec.flags.ref_err) {
1876 mpp_err_f("parse err %d ref err %d\n",
1877 task->dec.flags.parse_err, task->dec.flags.ref_err);
1878 goto __RETURN;
1879 }
1880
1881 if (p_hal->fast_mode) {
1882 RK_U32 i = 0;
1883
1884 for (i = 0; i < MPP_ARRAY_ELEMS(ctx->reg_buf); i++) {
1885 if (!ctx->reg_buf[i].valid) {
1886 task->dec.reg_index = i;
1887 ctx->regs = ctx->reg_buf[i].regs;
1888 ctx->reg_buf[i].valid = 1;
1889 break;
1890 }
1891 }
1892 }
1893
1894 regs = ctx->regs;
1895 memset(regs, 0, sizeof(*regs));
1896
1897 vdpu_av1d_setup_tile_bufs(p_hal, dxva);
1898
1899 if (!ctx->filter_mem || height > ctx->height || num_tile_cols > ctx->num_tile_cols) {
1900 if (ctx->filter_mem)
1901 vdpu_av1d_filtermem_release(ctx);
1902 ret = vdpu_av1d_filtermem_alloc(p_hal, ctx, dxva);
1903 if (!ret) {
1904 mpp_err("filt buffer get fail\n");
1905 vdpu_av1d_filtermem_release(ctx);
1906 }
1907 }
1908
1909 ctx->width = width;
1910 ctx->height = height;
1911 ctx->num_tile_cols = num_tile_cols;
1912 mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
1913 mpp_buf_slot_get_prop(p_hal ->slots, task->dec.output, SLOT_BUFFER, &buffer);
1914 mpp_buf_slot_get_prop(p_hal ->packet_slots, task->dec.input, SLOT_BUFFER, &streambuf);
1915 tile_out_buf = hal_bufs_get_buf(ctx->tile_out_bufs, task->dec.output);
1916 hor_stride = mpp_frame_get_hor_stride(mframe);
1917 ver_stride = mpp_frame_get_ver_stride(mframe);
1918
1919 ctx->ver_stride = ver_stride;
1920
1921 p_hal->strm_len = (RK_S32)mpp_packet_get_length(task->dec.input_packet);
1922
1923 ctx->fbc_en = !!MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe));
1924
1925 AV1D_DBG(AV1D_DBG_LOG, "bitdepth %d fmt %d [%d : %d] wxh [%d : %d] uxv [%d : %d]\n",
1926 dxva->bitdepth, mpp_frame_get_fmt(mframe),
1927 dxva->format.subsampling_x, dxva->format.subsampling_y,
1928 ctx->width, ctx->height,
1929 ctx->hor_stride, ctx->ver_stride);
1930
1931 regs->swreg1.sw_dec_abort_e = 0;
1932 regs->swreg1.sw_dec_e = 1;
1933 regs->swreg1.sw_dec_tile_int_e = 0;
1934 regs->swreg2.sw_dec_clk_gate_e = 1;
1935
1936 regs->swreg3.sw_dec_mode = 17; // av1 mode
1937 regs->swreg3.sw_skip_mode = dxva->coding.skip_mode;
1938 regs->swreg3.sw_dec_out_ec_byte_word = 0; // word align
1939 regs->swreg3.sw_write_mvs_e = 1;
1940 regs->swreg3.sw_dec_out_ec_bypass = 1;
1941
1942 regs->swreg5.sw_tempor_mvp_e = dxva->coding.use_ref_frame_mvs;
1943 regs->swreg5.sw_delta_lf_res_log = dxva->loop_filter.delta_lf_res;
1944 regs->swreg5.sw_delta_lf_multi = dxva->loop_filter.delta_lf_multi;
1945 regs->swreg5.sw_delta_lf_present = dxva->loop_filter.delta_lf_present;
1946 regs->swreg5.sw_disable_cdf_update = dxva->coding.disable_cdf_update;
1947 regs->swreg5.sw_allow_warp = dxva->coding.warped_motion;
1948 regs->swreg5.sw_show_frame = dxva->format.show_frame;
1949 regs->swreg5.sw_switchable_motion_mode = dxva->coding.switchable_motion_mode;
1950 regs->swreg5.sw_enable_cdef = !(dxva->cdef.bits == 0 && dxva->cdef.damping == 0 &&
1951 dxva->cdef.y_strengths[0].primary == 0 &&
1952 dxva->cdef.y_strengths[0].secondary == 0 &&
1953 dxva->cdef.uv_strengths[0].primary == 0 &&
1954 dxva->cdef.uv_strengths[0].secondary == 0);
1955 regs->swreg5.sw_allow_masked_compound = dxva->coding.masked_compound;
1956 regs->swreg5.sw_allow_interintra = dxva->coding.interintra_compound;
1957 regs->swreg5.sw_enable_intra_edge_filter = dxva->coding.intra_edge_filter;
1958 regs->swreg5.sw_allow_filter_intra = dxva->coding.filter_intra;
1959 regs->swreg5.sw_enable_jnt_comp = dxva->coding.jnt_comp;
1960 regs->swreg5.sw_enable_dual_filter = dxva->coding.dual_filter;
1961 regs->swreg5.sw_reduced_tx_set_used = dxva->coding.reduced_tx_set;
1962 regs->swreg5.sw_allow_screen_content_tools = dxva->coding.screen_content_tools;
1963 regs->swreg5.sw_allow_intrabc = dxva->coding.intrabc;
1964
1965 regs->swreg5.sw_force_interger_mv = dxva->coding.integer_mv;
1966
1967 vdpu_av1d_set_global_model(p_hal, dxva);
1968 vdpu_av1d_set_tile_info_mem(p_hal, dxva);
1969
1970 if ((dxva->format.frame_type && (dxva->format.frame_type != AV1_FRAME_INTRA_ONLY))
1971 || dxva->coding.intrabc) {
1972 vdpu_av1d_set_reference_frames(p_hal, ctx, dxva);
1973 }
1974 vdpu_av1d_set_segmentation(ctx, dxva);
1975 vdpu_av1d_set_loopfilter(p_hal, dxva);
1976 vdpu_av1d_set_picture_dimensions(p_hal, dxva);
1977 vdpu_av1d_set_cdef(p_hal, dxva);
1978 vdpu_av1d_set_lr(p_hal, dxva);
1979 vdpu_av1d_set_fgs(ctx, dxva);
1980 vdpu_av1d_set_prob(p_hal, dxva);
1981 vdpu_av1d_set_tile_info_regs(ctx, dxva);
1982
1983 #if DUMP_AV1_DATAS/* dump buffer */
1984 {
1985 char name[128];
1986 char *path = "/data/video";
1987 static int g_frame_num = 0;
1988 FILE *fp;
1989 RK_U32 i;
1990 RK_U32 *data;
1991 RK_U32 size;
1992
1993 data = mpp_buffer_get_ptr(ctx->global_model);
1994 size = MPP_ALIGN(GLOBAL_MODEL_SIZE, 2048);
1995 memset(name, 0, sizeof(name));
1996 sprintf(name, "%s/global_mode_%d.txt", path, g_frame_num);
1997 fp = fopen(name, "wb");
1998 for ( i = 0; i < size / 4; i++)
1999 fprintf(fp, "%08x\n", data[i]);
2000 fflush(fp);
2001 fclose(fp);
2002
2003 data = mpp_buffer_get_ptr(ctx->tile_info);
2004 size = AV1_TILE_INFO_SIZE;
2005 memset(name, 0, sizeof(name));
2006 sprintf(name, "%s/tile_info_%d.txt", path, g_frame_num);
2007 fp = fopen(name, "wb");
2008 for ( i = 0; i < size / 4; i++)
2009 fprintf(fp, "%08x\n", data[i]);
2010 fflush(fp);
2011 fclose(fp);
2012
2013 data = mpp_buffer_get_ptr(streambuf);
2014 size = MPP_ALIGN(p_hal->strm_len, 1);
2015 memset(name, 0, sizeof(name));
2016 sprintf(name, "%s/stream_%d.txt", path, g_frame_num);
2017 fp = fopen(name, "wb");
2018 fwrite((RK_U8*)data, 1, size, fp);
2019 fflush(fp);
2020 fclose(fp);
2021
2022 data = mpp_buffer_get_ptr(ctx->film_grain_mem);
2023 size = MPP_ALIGN(sizeof(AV1FilmGrainMemory), 2048);
2024 memset(name, 0, sizeof(name));
2025 sprintf(name, "%s/film_grain_mem_%d.txt", path, g_frame_num);
2026 fp = fopen(name, "wb");
2027 for ( i = 0; i < size / 4; i++)
2028 fprintf(fp, "%08x\n", data[i]);
2029 fflush(fp);
2030 fclose(fp);
2031
2032 data = mpp_buffer_get_ptr(ctx->prob_tbl_base);
2033 size = MPP_ALIGN(sizeof(AV1CDFs), 2048);
2034 memset(name, 0, sizeof(name));
2035 sprintf(name, "%s/prob_tbl_%d.txt", path, g_frame_num);
2036 fp = fopen(name, "wb");
2037 for ( i = 0; i < size / 4; i++)
2038 fprintf(fp, "%08x\n", data[i]);
2039 fflush(fp);
2040 fclose(fp);
2041
2042 data = mpp_buffer_get_ptr(ctx->prob_tbl_out_base);
2043 size = MPP_ALIGN(sizeof(AV1CDFs), 2048);
2044 memset(name, 0, sizeof(name));
2045 sprintf(name, "%s/prob_tbl_out_%d.txt", path, g_frame_num);
2046 fp = fopen(name, "wb");
2047 for ( i = 0; i < size / 4; i++)
2048 fprintf(fp, "%08x\n", data[i]);
2049 fflush(fp);
2050 fclose(fp);
2051
2052 g_frame_num ++;
2053 }
2054 #endif
2055
2056 regs->swreg7.sw_blackwhite_e = dxva->format.mono_chrome;
2057 regs->swreg7.sw_clip_to_restricted_range = dxva->film_grain.clip_to_restricted_range;
2058 regs->swreg7.sw_delta_q_res_log = dxva->quantization.delta_q_res;
2059 regs->swreg7.sw_delta_q_present = dxva->quantization.delta_q_present;
2060
2061 regs->swreg8.sw_idr_pic_e = dxva->format.frame_type == AV1_FRAME_KEY ||
2062 dxva->format.frame_type == AV1_FRAME_INTRA_ONLY;
2063 regs->swreg8.sw_quant_base_qindex = dxva->quantization.base_qindex;
2064 regs->swreg8.sw_bit_depth_y_minus8 = dxva->bitdepth - 8;
2065 regs->swreg8.sw_bit_depth_c_minus8 = dxva->bitdepth - 8;
2066
2067 regs->swreg11.sw_mcomp_filt_type = dxva->interp_filter;
2068 regs->swreg11.sw_high_prec_mv_e = dxva->coding.high_precision_mv;
2069 regs->swreg11.sw_comp_pred_mode = dxva->coding.reference_mode ? 2 : 0;
2070 regs->swreg11.sw_transform_mode = dxva->coding.tx_mode ? (dxva->coding.tx_mode + 2) : 0;
2071 regs->swreg12.sw_max_cb_size = dxva->coding.use_128x128_superblock ? 7 : 6;;
2072 regs->swreg12.sw_min_cb_size = 3;
2073
2074 /* unused in comdel */
2075 regs->swreg12.sw_av1_comp_pred_fixed_ref = 0;
2076 regs->swreg13.sw_comp_pred_var_ref0_av1 = 0;
2077 regs->swreg13.sw_comp_pred_var_ref1_av1 = 0;
2078 regs->swreg14.sw_filt_level_seg0 = 0;
2079 regs->swreg15.sw_filt_level_seg1 = 0;
2080 regs->swreg16.sw_filt_level_seg2 = 0;
2081 regs->swreg17.sw_filt_level_seg3 = 0;
2082 regs->swreg18.sw_filt_level_seg4 = 0;
2083 regs->swreg19.sw_filt_level_seg5 = 0;
2084 regs->swreg31.sw_filt_level_seg6 = 0;
2085 regs->swreg32.sw_filt_level_seg7 = 0;
2086
2087
2088 regs->swreg13.sw_qp_delta_y_dc_av1 = dxva->quantization.y_dc_delta_q;
2089 regs->swreg13.sw_qp_delta_ch_dc_av1 = dxva->quantization.u_dc_delta_q;
2090 regs->swreg13.sw_qp_delta_ch_ac_av1 = dxva->quantization.u_ac_delta_q;
2091 regs->swreg47.sw_qmlevel_y = dxva->quantization.qm_y;
2092 regs->swreg48.sw_qmlevel_u = dxva->quantization.qm_u;
2093 regs->swreg49.sw_qmlevel_v = dxva->quantization.qm_v;
2094
2095 regs->swreg13.sw_lossless_e = dxva->coded_lossless;
2096 regs->swreg28.sw_quant_delta_v_dc = dxva->quantization.v_dc_delta_q;
2097 regs->swreg29.sw_quant_delta_v_ac = dxva->quantization.v_ac_delta_q;
2098
2099 regs->swreg31.sw_skip_ref0 = dxva->skip_ref0 ? dxva->skip_ref0 : 1;
2100 regs->swreg32.sw_skip_ref1 = dxva->skip_ref1 ? dxva->skip_ref1 : 1;
2101
2102 /*input out put buf cfg*/
2103 {
2104 // RK_U32 out_w = MPP_ALIGN(4 * width * bit_depth, 128) / 8;
2105 // RK_U32 out_h = height / 4;
2106 // RK_U32 y_stride = out_w * out_h;
2107 // RK_U32 uv_stride = y_stride / 2;
2108
2109 RK_U32 y_stride = ctx->luma_size;
2110 RK_U32 uv_stride = y_stride / 2;
2111 RK_U32 mv_offset = y_stride + uv_stride + 64;
2112 RK_U32 offset = (dxva->frame_tag_size & (~0xf));
2113
2114 regs->addr_cfg.swreg65.sw_dec_out_ybase_lsb = mpp_buffer_get_fd(tile_out_buf->buf[0]);//mpp_buffer_get_fd(buffer);
2115 regs->addr_cfg.swreg99.sw_dec_out_cbase_lsb = mpp_buffer_get_fd(tile_out_buf->buf[0]);
2116 mpp_dev_set_reg_offset(p_hal->dev, 99, y_stride);
2117 regs->addr_cfg.swreg133.sw_dec_out_dbase_lsb = mpp_buffer_get_fd(tile_out_buf->buf[0]);
2118 mpp_dev_set_reg_offset(p_hal->dev, 133, mv_offset);
2119
2120 /* if (ctx->fbc_en) {
2121 regs->swreg190.sw_dec_out_tybase_lsb = 0;// TODO:
2122 regs->swreg224.sw_dec_out_tcbase_lsb = 0;// TODO:
2123 }*/
2124
2125 regs->swreg258.sw_strm_buffer_len = MPP_ALIGN(p_hal->strm_len, 128);//
2126 regs->swreg5.sw_strm_start_bit = (dxva->frame_tag_size & 0xf) * 8; // bit start to decode
2127 regs->swreg6.sw_stream_len = MPP_ALIGN(p_hal->strm_len, 128);//p_hal->strm_len - offset;
2128 regs->swreg259.sw_strm_start_offset = 0;
2129 regs->addr_cfg.swreg168.sw_stream_base_msb = 0;
2130 regs->addr_cfg.swreg169.sw_stream_base_lsb = mpp_buffer_get_fd(streambuf);
2131 mpp_dev_set_reg_offset(p_hal->dev, 169, offset);
2132
2133 AV1D_DBG(AV1D_DBG_LOG, "stream len %d\n", p_hal->strm_len);
2134 AV1D_DBG(AV1D_DBG_LOG, "stream offset %d\n", offset);
2135 AV1D_DBG(AV1D_DBG_LOG, "stream tag_size %d\n", dxva->frame_tag_size);
2136 AV1D_DBG(AV1D_DBG_LOG, "stream start_bit %d\n", regs->swreg5.sw_strm_start_bit);
2137 }
2138 regs->swreg314.sw_dec_alignment = 64;
2139
2140 regs->addr_cfg.swreg175.sw_mc_sync_curr_base_lsb = mpp_buffer_get_fd(ctx->tile_buf);
2141 regs->addr_cfg.swreg177.sw_mc_sync_left_base_lsb = mpp_buffer_get_fd(ctx->tile_buf);
2142
2143 regs->swreg55.sw_apf_disable = 0;
2144 regs->swreg55.sw_apf_threshold = 8;
2145 regs->swreg58.sw_dec_buswidth = 2;
2146 regs->swreg58.sw_dec_max_burst = 16;
2147 regs->swreg266.sw_error_conceal_e = 0;
2148 regs->swreg265.sw_axi_rd_ostd_threshold = 64;
2149 regs->swreg265.sw_axi_wr_ostd_threshold = 64;
2150
2151 regs->swreg318.sw_ext_timeout_cycles = 0xfffffff;
2152 regs->swreg318.sw_ext_timeout_override_e = 1;
2153 regs->swreg319.sw_timeout_cycles = 0xfffffff;
2154 regs->swreg319.sw_timeout_override_e = 1;
2155
2156 /* pp cfg */
2157 regs->vdpu_av1d_pp_cfg.swreg320.sw_pp_out_e = 1;
2158 regs->vdpu_av1d_pp_cfg.swreg322.sw_pp_in_format = 0;
2159 regs->vdpu_av1d_pp_cfg.swreg394.sw_pp0_dup_hor = 1;
2160 regs->vdpu_av1d_pp_cfg.swreg394.sw_pp0_dup_ver = 1;
2161 regs->vdpu_av1d_pp_cfg.swreg331.sw_pp_in_height = height / 2;
2162 regs->vdpu_av1d_pp_cfg.swreg331.sw_pp_in_width = width / 2;
2163 regs->vdpu_av1d_pp_cfg.swreg332.sw_pp_out_height = height;
2164 regs->vdpu_av1d_pp_cfg.swreg332.sw_pp_out_width = width;
2165 regs->vdpu_av1d_pp_cfg.swreg329.sw_pp_out_y_stride = hor_stride;
2166 regs->vdpu_av1d_pp_cfg.swreg329.sw_pp_out_c_stride = hor_stride;
2167
2168 // regs->vdpu_av1d_pp_cfg.swreg337.sw_pp_in_y_stride = hor_stride;
2169 // regs->vdpu_av1d_pp_cfg.swreg337.sw_pp_in_c_stride = hor_stride;
2170 if (ctx->fbc_en) {
2171 RK_U32 vir_left = 0, vir_right = 0, vir_top = 0, vir_bottom = 0;
2172 RK_U32 bypass_filter = !regs->swreg5.sw_superres_is_scaled &&
2173 !regs->swreg5.sw_enable_cdef &&
2174 !regs->swreg14.sw_filt_level0 &&
2175 !regs->swreg15.sw_filt_level1 &&
2176 !regs->swreg18.sw_lr_type;
2177
2178 regs->vdpu_av1d_pp_cfg.swreg329.sw_pp_out_y_stride = dxva->bitdepth > 8 ?
2179 width * 2 : width;
2180 regs->vdpu_av1d_pp_cfg.swreg329.sw_pp_out_c_stride = dxva->bitdepth > 8 ?
2181 width * 2 : width;
2182 regs->swreg58.sw_dec_axi_wd_id_e = 1;
2183 regs->swreg58.sw_dec_axi_rd_id_e = 1;
2184 regs->vdpu_av1d_pp_cfg.swreg320.sw_pp_out_tile_e = 1;
2185 regs->vdpu_av1d_pp_cfg.swreg321.sw_pp_tile_size = 2;
2186
2187 vir_left = 0;
2188 if (((vir_left + width) % 16))
2189 vir_right = 16 - ((vir_left + width) % 16);
2190 else
2191 vir_right = 0;
2192
2193 if (!bypass_filter)
2194 vir_top = 8;
2195 else
2196 vir_top = 0;
2197
2198 if (((vir_top + height) % 16))
2199 vir_bottom = 16 - ((vir_top + height) % 16);
2200 else
2201 vir_bottom = 0;
2202
2203 regs->vdpu_av1d_pp_cfg.swreg503.sw_pp0_virtual_top = vir_top;
2204 regs->vdpu_av1d_pp_cfg.swreg503.sw_pp0_virtual_left = vir_left;
2205 regs->vdpu_av1d_pp_cfg.swreg503.sw_pp0_virtual_bottom = vir_bottom;
2206 regs->vdpu_av1d_pp_cfg.swreg503.sw_pp0_virtual_right = vir_right;
2207 mpp_frame_set_offset_y(mframe, vir_top);
2208 mpp_frame_set_ver_stride(mframe, vir_top + height + vir_bottom);
2209 regs->vdpu_av1d_pp_cfg.swreg322.sw_pp_out_format = 0;
2210 regs->vdpu_av1d_pp_cfg.swreg326.sw_pp_out_lu_base_lsb = mpp_buffer_get_fd(buffer);
2211 regs->vdpu_av1d_pp_cfg.swreg328.sw_pp_out_ch_base_lsb = mpp_buffer_get_fd(buffer);
2212 regs->vdpu_av1d_pp_cfg.swreg505.sw_pp0_afbc_tile_base_lsb = mpp_buffer_get_fd(buffer);
2213 } else {
2214 RK_U32 out_w = hor_stride;
2215 RK_U32 out_h = ver_stride;
2216 RK_U32 y_stride = out_w * out_h;
2217 RK_U32 out_fmt = 0;
2218
2219 if ((mpp_frame_get_fmt(mframe) & MPP_FRAME_FMT_MASK) == MPP_FMT_YUV420SP)
2220 out_fmt = 3;
2221
2222 /*
2223 * out_fmt:
2224 * 0 is 8bit or 10bit output by syntax
2225 * 3 is force 8bit output
2226 */
2227 regs->vdpu_av1d_pp_cfg.swreg322.sw_pp_out_format = out_fmt;
2228 regs->vdpu_av1d_pp_cfg.swreg326.sw_pp_out_lu_base_lsb = mpp_buffer_get_fd(buffer);
2229 regs->vdpu_av1d_pp_cfg.swreg328.sw_pp_out_ch_base_lsb = mpp_buffer_get_fd(buffer);
2230 mpp_dev_set_reg_offset(p_hal->dev, 328, y_stride);
2231 }
2232
2233 __RETURN:
2234 return ret = MPP_OK;
2235 }
2236
vdpu_av1d_start(void * hal,HalTaskInfo * task)2237 MPP_RET vdpu_av1d_start(void *hal, HalTaskInfo *task)
2238 {
2239 MPP_RET ret = MPP_ERR_UNKNOW;
2240 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2241 INP_CHECK(ret, NULL == p_hal);
2242 if (task->dec.flags.parse_err ||
2243 task->dec.flags.ref_err) {
2244 goto __RETURN;
2245 }
2246
2247 VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
2248 VdpuAv1dRegSet *regs = p_hal->fast_mode ?
2249 reg_ctx->reg_buf[task->dec.reg_index].regs :
2250 reg_ctx->regs;
2251 MppDev dev = p_hal->dev;
2252 #if DUMP_AV1_DATAS
2253 {
2254 RK_U32 i = 0;
2255 RK_U32 *p = (RK_U32*)regs;
2256 char fname[128];
2257 FILE *fp_in = NULL;
2258 static RK_U32 g_frame_no = 0;
2259
2260 sprintf(fname, "/data/video/reg_%d_in.txt", g_frame_no++);
2261 fp_in = fopen(fname, "wb");
2262 for (i = 0; i < sizeof(*regs) / 4; i++, p++)
2263 fprintf(fp_in, "reg[%3d] = %08x\n", i, *p);
2264
2265 fflush(fp_in);
2266 fclose(fp_in);
2267 }
2268 #endif
2269 do {
2270 MppDevRegWrCfg wr_cfg;
2271 MppDevRegRdCfg rd_cfg;
2272
2273 wr_cfg.reg = regs;
2274 wr_cfg.size = sizeof(*regs);
2275 wr_cfg.offset = 0;
2276 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg);
2277 if (ret) {
2278 mpp_err_f("set register write failed %d\n", ret);
2279 break;
2280 }
2281
2282 rd_cfg.reg = regs;
2283 rd_cfg.size = sizeof(*regs);
2284 rd_cfg.offset = 0;
2285
2286 ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg);
2287 if (ret) {
2288 mpp_err_f("set register read failed %d\n", ret);
2289 break;
2290 }
2291 /* send request to hardware */
2292 ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL);
2293 if (ret) {
2294 mpp_err_f("send cmd failed %d\n", ret);
2295 break;
2296 }
2297 } while (0);
2298
2299 __RETURN:
2300 return ret = MPP_OK;
2301 }
2302
vdpu_av1d_wait(void * hal,HalTaskInfo * task)2303 MPP_RET vdpu_av1d_wait(void *hal, HalTaskInfo *task)
2304 {
2305 MPP_RET ret = MPP_ERR_UNKNOW;
2306 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2307
2308 INP_CHECK(ret, NULL == p_hal);
2309 VdpuAv1dRegCtx *reg_ctx = (VdpuAv1dRegCtx *)p_hal->reg_ctx;
2310 VdpuAv1dRegSet *p_regs = p_hal->fast_mode ?
2311 reg_ctx->reg_buf[task->dec.reg_index].regs :
2312 reg_ctx->regs;
2313
2314 if (task->dec.flags.parse_err ||
2315 task->dec.flags.ref_err) {
2316 goto __SKIP_HARD;
2317 }
2318
2319 ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL);
2320 if (ret)
2321 mpp_err_f("poll cmd failed %d\n", ret);
2322 #if DUMP_AV1_DATAS
2323 {
2324 char fname[128];
2325 FILE *fp_in = NULL;
2326 static RK_U32 g_frame_no = 0;
2327 RK_U32 *p = (RK_U32*)p_regs;
2328 RK_U32 i;
2329
2330 sprintf(fname, "/data/video/reg_%d_out.txt", g_frame_no++);
2331 fp_in = fopen(fname, "wb");
2332 for (i = 0; i < sizeof(*p_regs) / 4; i++, p++)
2333 fprintf(fp_in, "reg[%3d] = %08x\n", i, *p);
2334
2335 fflush(fp_in);
2336 fclose(fp_in);
2337 }
2338 #endif
2339
2340 __SKIP_HARD:
2341 if (p_hal->dec_cb) {
2342 DecCbHalDone m_ctx;
2343 RK_U32 *prob_out = (RK_U32*)mpp_buffer_get_ptr(reg_ctx->prob_tbl_out_base);
2344
2345 mpp_buffer_sync_ro_begin(reg_ctx->prob_tbl_out_base);
2346 m_ctx.task = mpp_buffer_get_ptr(reg_ctx->prob_tbl_out_base);//(void *)&task->dec;
2347 m_ctx.regs = (RK_U32 *)prob_out;
2348 if (!p_regs->swreg1.sw_dec_rdy_int/* decode err */)
2349 m_ctx.hard_err = 1;
2350 else
2351 m_ctx.hard_err = 0;
2352
2353 mpp_callback(p_hal->dec_cb, &m_ctx);
2354 }
2355 if (p_hal->fast_mode)
2356 reg_ctx->reg_buf[task->dec.reg_index].valid = 0;
2357
2358 (void)task;
2359 __RETURN:
2360 return ret = MPP_OK;
2361 }
2362
vdpu_av1d_reset(void * hal)2363 MPP_RET vdpu_av1d_reset(void *hal)
2364 {
2365 MPP_RET ret = MPP_ERR_UNKNOW;
2366 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2367
2368 INP_CHECK(ret, NULL == p_hal);
2369
2370
2371 __RETURN:
2372 return ret = MPP_OK;
2373 }
2374
vdpu_av1d_flush(void * hal)2375 MPP_RET vdpu_av1d_flush(void *hal)
2376 {
2377 MPP_RET ret = MPP_ERR_UNKNOW;
2378 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2379
2380 INP_CHECK(ret, NULL == p_hal);
2381
2382 __RETURN:
2383 return ret = MPP_OK;
2384 }
2385
vdpu_av1d_control(void * hal,MpiCmd cmd_type,void * param)2386 MPP_RET vdpu_av1d_control(void *hal, MpiCmd cmd_type, void *param)
2387 {
2388 MPP_RET ret = MPP_ERR_UNKNOW;
2389 Av1dHalCtx *p_hal = (Av1dHalCtx *)hal;
2390
2391 INP_CHECK(ret, NULL == p_hal);
2392
2393 switch ((MpiCmd)cmd_type) {
2394 case MPP_DEC_SET_FRAME_INFO: {
2395 MppFrameFormat fmt = mpp_frame_get_fmt((MppFrame)param);
2396 RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param);
2397 RK_U32 imgheight = mpp_frame_get_height((MppFrame)param);
2398
2399 AV1D_DBG(AV1D_DBG_LOG, "control info: fmt %d, w %d, h %d\n", fmt, imgwidth, imgheight);
2400 if ((fmt & MPP_FRAME_FMT_MASK) == MPP_FMT_YUV422SP) {
2401 mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_422);
2402 }
2403 break;
2404 }
2405 case MPP_DEC_SET_OUTPUT_FORMAT: {
2406
2407 } break;
2408 default:
2409 break;
2410 }
2411
2412 __RETURN:
2413 return ret = MPP_OK;
2414 }
2415
2416 const MppHalApi hal_av1d_vdpu = {
2417 .name = "av1d_vdpu",
2418 .type = MPP_CTX_DEC,
2419 .coding = MPP_VIDEO_CodingAV1,
2420 .ctx_size = sizeof(VdpuAv1dRegCtx),
2421 .flag = 0,
2422 .init = vdpu_av1d_init,
2423 .deinit = vdpu_av1d_deinit,
2424 .reg_gen = vdpu_av1d_gen_regs,
2425 .start = vdpu_av1d_start,
2426 .wait = vdpu_av1d_wait,
2427 .reset = vdpu_av1d_reset,
2428 .flush = vdpu_av1d_flush,
2429 .control = vdpu_av1d_control,
2430 };
2431