1 /*
2 * Copyright 2016 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_m4vd_vdpu2"
18
19 #include <stdio.h>
20 #include <string.h>
21
22 #include "mpp_mem.h"
23 #include "mpp_env.h"
24 #include "mpp_debug.h"
25 #include "mpp_buffer.h"
26 #include "mpp_common.h"
27
28 #include "mpg4d_syntax.h"
29 #include "hal_mpg4d_api.h"
30 #include "hal_m4vd_com.h"
31 #include "hal_m4vd_vdpu2.h"
32 #include "hal_m4vd_vdpu2_reg.h"
33 #include "mpp_dec_cb_param.h"
34
vdpu2_mpg4d_setup_regs_by_syntax(hal_mpg4_ctx * ctx,MppSyntax syntax)35 static void vdpu2_mpg4d_setup_regs_by_syntax(hal_mpg4_ctx *ctx, MppSyntax syntax)
36 {
37 M4vdVdpu2Regs_t *regs = ctx->regs;
38 DXVA2_DecodeBufferDesc **data = syntax.data;
39 DXVA_PicParams_MPEG4_PART2 *pp = NULL;
40 DXVA_QmatrixData *qm = NULL;
41 RK_S32 mv_buf_fd = mpp_buffer_get_fd(ctx->mv_buf);
42 RK_U32 stream_length = 0;
43 RK_U32 stream_used = 0;
44 RK_U32 i;
45
46 for (i = 0; i < syntax.number; i++) {
47 DXVA2_DecodeBufferDesc *desc = data[i];
48 switch (desc->CompressedBufferType) {
49 case DXVA2_PictureParametersBufferType : {
50 pp = (DXVA_PicParams_MPEG4_PART2 *)desc->pvPVPState;
51 } break;
52 case DXVA2_InverseQuantizationMatrixBufferType : {
53 qm = (DXVA_QmatrixData *)desc->pvPVPState;
54 } break;
55 case DXVA2_BitStreamDateBufferType : {
56 stream_length = desc->DataSize;
57 stream_used = desc->DataOffset;
58 ctx->bitstrm_len = stream_length;
59 } break;
60 default : {
61 mpp_err_f("found invalid buffer descriptor type %d\n", desc->CompressedBufferType);
62 } break;
63 }
64 }
65
66 mpp_assert(pp);
67 mpp_assert(qm);
68 mpp_assert(stream_length);
69 mpp_assert(stream_used);
70
71 // copy qp table to buffer
72 {
73 RK_U8 *dst = (RK_U8 *)mpp_buffer_get_ptr(ctx->qp_table);
74 RK_U8 *src = (qm->bNewQmatrix[0]) ? (qm->Qmatrix[0]) : (default_intra_matrix);
75
76 memcpy(dst, src, 64);
77 dst += 64;
78
79 src = (qm->bNewQmatrix[1]) ? (qm->Qmatrix[1]) : (default_inter_matrix);
80 memcpy(dst, src, 64);
81 }
82
83 regs->reg120.sw_pic_mb_width = (pp->vop_width + 15) >> 4;
84 regs->reg120.sw_pic_mb_hight_p = (pp->vop_height + 15) >> 4;
85 if (pp->custorm_version == 4) {
86 regs->reg120.sw_mb_width_off = pp->vop_width & 0xf;
87 regs->reg120.sw_mb_height_off = pp->vop_height & 0xf;
88 } else {
89 regs->reg120.sw_mb_width_off = 0;
90 regs->reg120.sw_mb_height_off = 0;
91 }
92 regs->reg53_dec_mode = 1;
93
94 /* note: When comparing bit 19 of reg136(that is sw_alt_scan_flag_e) with bit 6 of
95 ** reg120(that is sw_alt_scan_e), we may be confused about the function of
96 ** these two bits. According to C Model, just sw_alt_scan_flag_e is set,
97 ** but not sw_alt_scan_e.
98 */
99 regs->reg136.sw_alt_scan_flag_e = pp->alternate_vertical_scan_flag;
100 regs->reg52_error_concealment.sw_xdim_mbst = 0;
101 regs->reg52_error_concealment.sw_ydim_mbst = 0;
102 regs->reg50_dec_ctrl.sw_dblk_flt_dis = 1;
103 regs->reg136.sw_rounding = pp->vop_rounding_type;
104 regs->reg122.sw_intradc_vlc_thr = pp->intra_dc_vlc_thr;
105 regs->reg51_stream_info.sw_qp_init_val = pp->vop_quant;
106 regs->reg122.sw_sync_markers_en = 1;
107
108 {
109 /*
110 * update stream base address here according to consumed bit length
111 * 1. hardware start address has to be 64 bit align
112 * 2. hardware need to know which is the start bit in
113 * 2. pass (10bit fd + (offset << 10)) register value to kernel
114 */
115 RK_U32 val = regs->reg64_input_stream_base;
116 RK_U32 consumed_bytes = stream_used >> 3;
117 RK_U32 consumed_bytes_align = consumed_bytes & (~0x7);
118 RK_U32 start_bit_offset = stream_used & 0x3F;
119 RK_U32 left_bytes = stream_length - consumed_bytes_align;
120
121 regs->reg64_input_stream_base = val;
122 if (consumed_bytes_align)
123 mpp_dev_set_reg_offset(ctx->dev, 64, consumed_bytes_align);
124 regs->reg122.sw_stream_start_word = start_bit_offset;
125 regs->reg51_stream_info.sw_stream_len = left_bytes;
126 }
127 regs->reg122.sw_vop_time_incr = pp->vop_time_increment_resolution;
128
129 switch (pp->vop_coding_type) {
130 case MPEG4_B_VOP : {
131 RK_U32 time_bp = pp->time_bp;
132 RK_U32 time_pp = pp->time_pp;
133
134 RK_U32 trb_per_trd_d0 = MPP_DIV((((RK_S64)(1 * time_bp + 0)) << 27) + 1 * (time_pp - 1), time_pp);
135 RK_U32 trb_per_trd_d1 = MPP_DIV((((RK_S64)(2 * time_bp + 1)) << 27) + 2 * (time_pp - 0), 2 * time_pp + 1);
136 RK_U32 trb_per_trd_dm1 = MPP_DIV((((RK_S64)(2 * time_bp - 1)) << 27) + 2 * (time_pp - 1), 2 * time_pp - 1);
137
138 regs->reg57_enable_ctrl.sw_pic_type_sel1 = 1;
139 regs->reg57_enable_ctrl.sw_pic_type_sel0 = 1;
140 regs->reg136.sw_rounding = 0;
141 regs->reg131_ref0_base = 1;
142
143 mpp_assert(ctx->fd_ref1 >= 0);
144 if (ctx->fd_ref1 >= 0) {
145 regs->reg131_ref0_base = (RK_U32)ctx->fd_ref1;
146 regs->reg148_ref1_base = (RK_U32)ctx->fd_ref1;
147 } else {
148 regs->reg131_ref0_base = (RK_U32)ctx->fd_curr;
149 regs->reg148_ref1_base = (RK_U32)ctx->fd_curr;
150 }
151
152 mpp_assert(ctx->fd_ref0 >= 0);
153 if (ctx->fd_ref0 >= 0) {
154 regs->reg134_ref2_base = (RK_U32)ctx->fd_ref0;
155 regs->reg135_ref3_base = (RK_U32)ctx->fd_ref0;
156 } else {
157 regs->reg134_ref2_base = (RK_U32)ctx->fd_curr;
158 regs->reg135_ref3_base = (RK_U32)ctx->fd_curr;
159 }
160
161 regs->reg136.sw_hrz_bit_of_fwd_mv = pp->vop_fcode_forward;
162 regs->reg136.sw_vrz_bit_of_fwd_mv = pp->vop_fcode_forward;
163 regs->reg136.sw_hrz_bit_of_bwd_mv = pp->vop_fcode_backward;
164 regs->reg136.sw_vrz_bit_of_bwd_mv = pp->vop_fcode_backward;
165 regs->reg57_enable_ctrl.sw_dmmv_wr_en = 0;
166 regs->reg62_directmv_base = mv_buf_fd;
167 regs->reg137.sw_trb_per_trd_d0 = trb_per_trd_d0;
168 regs->reg139.sw_trb_per_trd_d1 = trb_per_trd_d1;
169 regs->reg138.sw_trb_per_trd_dm1 = trb_per_trd_dm1;
170 } break;
171 case MPEG4_P_VOP : {
172 regs->reg57_enable_ctrl.sw_pic_type_sel1 = 0;
173 regs->reg57_enable_ctrl.sw_pic_type_sel0 = 1;
174
175 if (ctx->fd_ref0 >= 0) {
176 regs->reg131_ref0_base = (RK_U32)ctx->fd_ref0;
177 regs->reg148_ref1_base = (RK_U32)ctx->fd_ref0;
178 } else {
179 regs->reg131_ref0_base = (RK_U32)ctx->fd_curr;
180 regs->reg148_ref1_base = (RK_U32)ctx->fd_curr;
181 }
182 regs->reg134_ref2_base = (RK_U32)ctx->fd_curr;
183 regs->reg135_ref3_base = (RK_U32)ctx->fd_curr;
184
185 regs->reg136.sw_hrz_bit_of_fwd_mv = pp->vop_fcode_forward;
186 regs->reg136.sw_vrz_bit_of_fwd_mv = pp->vop_fcode_forward;
187 regs->reg57_enable_ctrl.sw_dmmv_wr_en = 1;
188 regs->reg62_directmv_base = mv_buf_fd;
189 } break;
190 case MPEG4_I_VOP : {
191 regs->reg57_enable_ctrl.sw_pic_type_sel1 = 0;
192 regs->reg57_enable_ctrl.sw_pic_type_sel0 = 0;
193
194 regs->reg131_ref0_base = (RK_U32)ctx->fd_curr;
195 regs->reg148_ref1_base = (RK_U32)ctx->fd_curr;
196 regs->reg134_ref2_base = (RK_U32)ctx->fd_curr;
197 regs->reg135_ref3_base = (RK_U32)ctx->fd_curr;
198
199 regs->reg57_enable_ctrl.sw_dmmv_wr_en = 0;
200 regs->reg62_directmv_base = mv_buf_fd;
201
202 regs->reg136.sw_hrz_bit_of_fwd_mv = 1;
203 regs->reg136.sw_vrz_bit_of_fwd_mv = 1;
204 } break;
205 default : {
206 /* no nothing */
207 } break;
208 }
209
210 if (pp->interlaced) {
211 regs->reg57_enable_ctrl.sw_curpic_code_sel = 1;
212 regs->reg57_enable_ctrl.sw_curpic_stru_sel = 0;
213 regs->reg120.sw_topfieldfirst_e = pp->top_field_first;
214 }
215
216 regs->reg136.sw_prev_pic_type = pp->prev_coding_type;
217 regs->reg122.sw_quant_type_1_en = pp->quant_type;
218 regs->reg61_qtable_base = mpp_buffer_get_fd(ctx->qp_table);
219 regs->reg136.sw_fwd_mv_y_resolution = pp->quarter_sample;
220
221 }
222
vdpu2_mpg4d_init(void * hal,MppHalCfg * cfg)223 MPP_RET vdpu2_mpg4d_init(void *hal, MppHalCfg *cfg)
224 {
225 MPP_RET ret = MPP_OK;
226 M4vdVdpu2Regs_t *regs = NULL;
227 MppBufferGroup group = NULL;
228 MppBuffer mv_buf = NULL;
229 MppBuffer qp_table = NULL;
230 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
231
232 mpp_assert(hal);
233
234 ret = mpp_buffer_group_get_internal(&group, MPP_BUFFER_TYPE_ION);
235 if (ret) {
236 mpp_err_f("failed to get buffer group ret %d\n", ret);
237 goto ERR_RET;
238 }
239
240 ret = mpp_buffer_get(group, &mv_buf, MPEG4_MAX_MV_BUF_SIZE);
241 if (ret) {
242 mpp_err_f("failed to get mv buffer ret %d\n", ret);
243 goto ERR_RET;
244 }
245
246 ret = mpp_buffer_get(group, &qp_table, 64 * 2 * sizeof(RK_U8));
247 if (ret) {
248 mpp_err_f("failed to get qp talbe buffer ret %d\n", ret);
249 goto ERR_RET;
250 }
251
252 regs = mpp_calloc(M4vdVdpu2Regs_t, 1);
253 if (NULL == regs) {
254 mpp_err_f("failed to malloc register ret\n");
255 ret = MPP_ERR_MALLOC;
256 goto ERR_RET;
257 }
258
259 ret = mpp_dev_init(&ctx->dev, VPU_CLIENT_VDPU2);
260 if (ret) {
261 mpp_err_f("mpp_dev_init failed. ret: %d\n", ret);
262 goto ERR_RET;
263 }
264
265 ctx->frm_slots = cfg->frame_slots;
266 ctx->pkt_slots = cfg->packet_slots;
267 ctx->dec_cb = cfg->dec_cb;
268 ctx->group = group;
269 ctx->mv_buf = mv_buf;
270 ctx->qp_table = qp_table;
271 ctx->regs = regs;
272 cfg->dev = ctx->dev;
273
274 mpp_env_get_u32("hal_mpg4d_debug", &hal_mpg4d_debug, 0);
275
276 return ret;
277 ERR_RET:
278 if (regs) {
279 mpp_free(regs);
280 regs = NULL;
281 }
282
283 if (qp_table) {
284 mpp_buffer_put(qp_table);
285 qp_table = NULL;
286 }
287
288 if (mv_buf) {
289 mpp_buffer_put(mv_buf);
290 mv_buf = NULL;
291 }
292
293 if (group) {
294 mpp_buffer_group_put(group);
295 group = NULL;
296 }
297
298 return ret;
299 }
300
vdpu2_mpg4d_deinit(void * hal)301 MPP_RET vdpu2_mpg4d_deinit(void *hal)
302 {
303 MPP_RET ret = MPP_OK;
304 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
305
306 mpp_assert(hal);
307
308 if (ctx->regs) {
309 mpp_free(ctx->regs);
310 ctx->regs = NULL;
311 }
312
313 if (ctx->qp_table) {
314 mpp_buffer_put(ctx->qp_table);
315 ctx->qp_table = NULL;
316 }
317
318 if (ctx->mv_buf) {
319 mpp_buffer_put(ctx->mv_buf);
320 ctx->mv_buf = NULL;
321 }
322
323 if (ctx->group) {
324 mpp_buffer_group_put(ctx->group);
325 ctx->group = NULL;
326 }
327
328 if (ctx->dev) {
329 mpp_dev_deinit(ctx->dev);
330 ctx->dev = NULL;
331 }
332
333 return ret;
334 }
335
vdpu2_mpg4d_gen_regs(void * hal,HalTaskInfo * syn)336 MPP_RET vdpu2_mpg4d_gen_regs(void *hal, HalTaskInfo *syn)
337 {
338 MPP_RET ret = MPP_OK;
339 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
340 HalDecTask *task = &syn->dec;
341 MppBuffer buf_frm_curr = NULL;
342 MppBuffer buf_frm_ref0 = NULL;
343 MppBuffer buf_frm_ref1 = NULL;
344 MppBuffer buf_pkt = NULL;
345 M4vdVdpu2Regs_t *regs = ctx->regs;
346
347 mpp_assert(task->valid);
348 mpp_assert(task->input >= 0);
349 mpp_assert(task->output >= 0);
350
351 memset(regs, 0, sizeof(M4vdVdpu2Regs_t));
352
353 /*
354 * basic register configuration setup here
355 */
356 regs->reg54_endian.sw_dec_out_endian = 1;
357 regs->reg54_endian.sw_dec_in_endian = 1;
358 regs->reg54_endian.sw_dec_in_wordsp = 1;
359 regs->reg54_endian.sw_dec_out_wordsp = 1;
360 regs->reg54_endian.sw_dec_strswap32_e = 1;
361 regs->reg54_endian.sw_dec_strendian_e = 1;
362 regs->reg56_axi_ctrl.sw_dec_max_burlen = 16;
363 regs->reg52_error_concealment.sw_adv_pref_thrd = 1;
364 regs->reg57_enable_ctrl.sw_timeout_sts_en = 1;
365 regs->reg57_enable_ctrl.sw_dec_clkgate_en = 1;
366 regs->reg57_enable_ctrl.sw_dec_st_work = 1;
367 regs->reg59.sw_pflt_set0_tap0 = -1;
368 regs->reg59.sw_pflt_set0_tap1 = 3;
369 regs->reg59.sw_pflt_set0_tap2 = -6;
370 regs->reg153.sw_pred_bc_tap_0_3 = 20;
371
372 /* setup buffer for input / output / reference */
373 mpp_buf_slot_get_prop(ctx->pkt_slots, task->input, SLOT_BUFFER, &buf_pkt);
374 mpp_assert(buf_pkt);
375 vpu_mpg4d_get_buffer_by_index(ctx, task->output, &buf_frm_curr);
376 vpu_mpg4d_get_buffer_by_index(ctx, task->refer[0], &buf_frm_ref0);
377 vpu_mpg4d_get_buffer_by_index(ctx, task->refer[1], &buf_frm_ref1);
378
379 /* address registers setup first */
380 ctx->fd_curr = mpp_buffer_get_fd(buf_frm_curr);
381 ctx->fd_ref0 = (buf_frm_ref0) ? (mpp_buffer_get_fd(buf_frm_ref0)) : (-1);
382 ctx->fd_ref1 = (buf_frm_ref1) ? (mpp_buffer_get_fd(buf_frm_ref1)) : (-1);
383 regs->reg63_cur_pic_base = (RK_U32)ctx->fd_curr;
384 regs->reg64_input_stream_base = mpp_buffer_get_fd(buf_pkt);
385
386 /* setup other registers, here will update packet address */
387 vdpu2_mpg4d_setup_regs_by_syntax(ctx, task->syntax);
388 /* memset tails to zero for stream buffer */
389 {
390 RK_U8 *ptr = (RK_U8 *)mpp_buffer_get_ptr(buf_pkt);
391 RK_U32 strm_len = MPP_ALIGN(ctx->bitstrm_len, 16) + 64;
392 memset(ptr + ctx->bitstrm_len, 0, strm_len - ctx->bitstrm_len);
393 }
394
395 return ret;
396 }
397
vdpu2_mpg4d_start(void * hal,HalTaskInfo * task)398 MPP_RET vdpu2_mpg4d_start(void *hal, HalTaskInfo *task)
399 {
400 MPP_RET ret = MPP_OK;
401 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
402 RK_U32* regs = (RK_U32 *)ctx->regs;
403
404 if (hal_mpg4d_debug & MPG4D_HAL_DBG_REG_PUT) {
405 RK_U32 reg_count = (sizeof(M4vdVdpu2Regs_t) / sizeof(RK_U32));
406 RK_U32 i = 0;
407
408 for (i = 0; i < reg_count; i++) {
409 mpp_log("reg[%03d]: %08x\n", i, regs[i]);
410 }
411 }
412
413 do {
414 MppDevRegWrCfg wr_cfg;
415 MppDevRegRdCfg rd_cfg;
416 RK_U32 reg_size = sizeof(M4vdVdpu2Regs_t);
417
418 wr_cfg.reg = regs;
419 wr_cfg.size = reg_size;
420 wr_cfg.offset = 0;
421
422 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
423 if (ret) {
424 mpp_err_f("set register write failed %d\n", ret);
425 break;
426 }
427
428 rd_cfg.reg = regs;
429 rd_cfg.size = reg_size;
430 rd_cfg.offset = 0;
431
432 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
433 if (ret) {
434 mpp_err_f("set register read failed %d\n", ret);
435 break;
436 }
437
438 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
439 if (ret) {
440 mpp_err_f("send cmd failed %d\n", ret);
441 break;
442 }
443 } while (0);
444
445 (void)task;
446 return ret;
447 }
448
vdpu2_mpg4d_wait(void * hal,HalTaskInfo * task)449 MPP_RET vdpu2_mpg4d_wait(void *hal, HalTaskInfo *task)
450 {
451 MPP_RET ret = MPP_OK;
452 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
453 M4vdVdpu2Regs_t *regs = (M4vdVdpu2Regs_t *)ctx->regs;
454
455 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
456 if (ret)
457 mpp_err_f("poll cmd failed %d\n", ret);
458
459 if (hal_mpg4d_debug & MPG4D_HAL_DBG_REG_GET) {
460 RK_U32 reg_count = (sizeof(M4vdVdpu2Regs_t) / sizeof(RK_U32));
461 RK_U32 i = 0;
462
463 for (i = 0; i < reg_count; i++) {
464 mpp_log("reg[%03d]: %08x\n", i, ((RK_U32 *)regs)[i]);
465 }
466 }
467
468 if (ctx->dec_cb) {
469 DecCbHalDone param = { 0 };
470
471 if (!regs->reg55_Interrupt.sw_dec_rdy_int)
472 param.hard_err = 1;
473
474 param.task = (void *)&task->dec;
475 param.regs = (RK_U32 *)ctx->regs;
476
477 mpp_callback(ctx->dec_cb, ¶m);
478 }
479
480 (void)task;
481 return ret;
482 }
483