1 /*
2 * Copyright 2017 Rockchip Electronics Co. LTD
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define MODULE_TAG "hal_m4vd_vdpu1"
18
19 #include <stdio.h>
20 #include <string.h>
21
22 #include "mpp_mem.h"
23 #include "mpp_env.h"
24 #include "mpp_debug.h"
25 #include "mpp_buffer.h"
26 #include "mpp_common.h"
27
28 #include "mpg4d_syntax.h"
29 #include "hal_mpg4d_api.h"
30 #include "hal_m4vd_com.h"
31 #include "hal_m4vd_vdpu1.h"
32 #include "hal_m4vd_vdpu1_reg.h"
33 #include "mpp_dec_cb_param.h"
34
vdpu1_mpg4d_setup_regs_by_syntax(hal_mpg4_ctx * ctx,MppSyntax syntax)35 static void vdpu1_mpg4d_setup_regs_by_syntax(hal_mpg4_ctx *ctx, MppSyntax syntax)
36 {
37 M4vdVdpu1Regs_t *regs = ctx->regs;
38 DXVA2_DecodeBufferDesc **data = syntax.data;
39 DXVA_PicParams_MPEG4_PART2 *pp = NULL;
40 DXVA_QmatrixData *qm = NULL;
41 RK_S32 mv_buf_fd = mpp_buffer_get_fd(ctx->mv_buf);
42 RK_U32 stream_length = 0;
43 RK_U32 stream_used = 0;
44 RK_U32 i;
45
46 for (i = 0; i < syntax.number; i++) {
47 DXVA2_DecodeBufferDesc *desc = data[i];
48 switch (desc->CompressedBufferType) {
49 case DXVA2_PictureParametersBufferType : {
50 pp = (DXVA_PicParams_MPEG4_PART2 *)desc->pvPVPState;
51 } break;
52 case DXVA2_InverseQuantizationMatrixBufferType : {
53 qm = (DXVA_QmatrixData *)desc->pvPVPState;
54 } break;
55 case DXVA2_BitStreamDateBufferType : {
56 stream_length = desc->DataSize;
57 stream_used = desc->DataOffset;
58 ctx->bitstrm_len = stream_length;
59 } break;
60 default : {
61 mpp_err_f("found invalid buffer descriptor type %d\n", desc->CompressedBufferType);
62 } break;
63 }
64 }
65
66 mpp_assert(pp);
67 mpp_assert(qm);
68 mpp_assert(stream_length);
69 mpp_assert(stream_used);
70
71 // copy qp table to buffer
72 {
73 RK_U8 *dst = (RK_U8 *)mpp_buffer_get_ptr(ctx->qp_table);
74 RK_U8 *src = (qm->bNewQmatrix[0]) ? (qm->Qmatrix[0]) : (default_intra_matrix);
75
76 memcpy(dst, src, 64);
77 dst += 64;
78
79 src = (qm->bNewQmatrix[1]) ? (qm->Qmatrix[1]) : (default_inter_matrix);
80 memcpy(dst, src, 64);
81 }
82
83 regs->SwReg04.sw_pic_mb_width = (pp->vop_width + 15) >> 4;
84 regs->SwReg04.sw_pic_mb_hight_p = (pp->vop_height + 15) >> 4;
85
86 if (pp->custorm_version == 4) {
87 regs->SwReg04.sw_mb_width_off = pp->vop_width & 0xf;
88 regs->SwReg04.sw_mb_height_off = pp->vop_height & 0xf;
89 } else {
90 regs->SwReg04.sw_mb_width_off = 0;
91 regs->SwReg04.sw_mb_height_off = 0;
92 }
93
94 regs->SwReg03.sw_dec_mode = 1;
95
96 /* note: When comparing bit 19 of reg136(that is sw_alt_scan_flag_e) with bit 6 of
97 ** reg120(that is sw_alt_scan_e), we may be confused about the function of
98 ** these two bits. According to C Model, just sw_alt_scan_flag_e is set,
99 ** but not sw_alt_scan_e.
100 */
101 regs->SwReg18.sw_alt_scan_flag_e = pp->alternate_vertical_scan_flag;
102 regs->SwReg48.sw_startmb_x = 0;
103 regs->SwReg48.sw_startmb_y = 0;
104 regs->SwReg03.sw_filtering_dis = 1;
105 regs->SwReg18.sw_mpeg4_vc1_rc = pp->vop_rounding_type;
106 regs->SwReg05.sw_intradc_vlc_thr = pp->intra_dc_vlc_thr;
107 regs->SwReg06.sw_init_qp = pp->vop_quant;
108 regs->SwReg05.sw_sync_markers_e = 1;
109
110 {
111 /*
112 * update stream base address here according to consumed bit length
113 * 1. hardware start address has to be 64 bit align
114 * 2. hardware need to know which is the start bit in
115 * 2. pass (10bit fd + (offset << 10)) register value to kernel
116 */
117 RK_U32 val = regs->SwReg12.sw_rlc_vlc_base;
118 RK_U32 consumed_bytes = stream_used >> 3;
119 RK_U32 consumed_bytes_align = consumed_bytes & (~0x7);
120 RK_U32 start_bit_offset = stream_used & 0x3F;
121 RK_U32 left_bytes = stream_length - consumed_bytes_align;
122
123 regs->SwReg12.sw_rlc_vlc_base = val;
124 if (consumed_bytes_align)
125 mpp_dev_set_reg_offset(ctx->dev, 12, consumed_bytes_align);
126 regs->SwReg05.sw_strm_start_bit = start_bit_offset;
127 regs->SwReg06.sw_stream_len = left_bytes;
128 }
129 regs->SwReg05.sw_vop_time_incr = pp->vop_time_increment_resolution;
130
131 switch (pp->vop_coding_type) {
132 case MPEG4_B_VOP : {
133 RK_U32 time_bp = pp->time_bp;
134 RK_U32 time_pp = pp->time_pp;
135
136 RK_U32 trb_per_trd_d0 = MPP_DIV((((RK_S64)(1 * time_bp + 0)) << 27) + 1 * (time_pp - 1), time_pp);
137 RK_U32 trb_per_trd_d1 = MPP_DIV((((RK_S64)(2 * time_bp + 1)) << 27) + 2 * (time_pp - 0), 2 * time_pp + 1);
138 RK_U32 trb_per_trd_dm1 = MPP_DIV((((RK_S64)(2 * time_bp - 1)) << 27) + 2 * (time_pp - 1), 2 * time_pp - 1);
139
140 regs->SwReg03.sw_pic_b_e = 1;
141 regs->SwReg03.sw_pic_inter_e = 1;
142 regs->SwReg18.sw_mpeg4_vc1_rc = 0;
143 regs->SwReg14.sw_refer0_base = 1;
144
145 mpp_assert(ctx->fd_ref1 >= 0);
146 if (ctx->fd_ref1 >= 0) {
147 regs->SwReg14.sw_refer0_base = (RK_U32)ctx->fd_ref1;
148 regs->SwReg15.sw_refer1_base = (RK_U32)ctx->fd_ref1;
149 } else {
150 regs->SwReg14.sw_refer0_base = (RK_U32)ctx->fd_curr;
151 regs->SwReg15.sw_refer1_base = (RK_U32)ctx->fd_curr;
152 }
153
154 mpp_assert(ctx->fd_ref0 >= 0);
155 if (ctx->fd_ref0 >= 0) {
156 regs->SwReg16.sw_refer2_base = (RK_U32)ctx->fd_ref0;
157 regs->SwReg17.sw_refer3_base = (RK_U32)ctx->fd_ref0;
158 } else {
159 regs->SwReg16.sw_refer2_base = (RK_U32)ctx->fd_curr;
160 regs->SwReg17.sw_refer3_base = (RK_U32)ctx->fd_curr;
161 }
162
163 regs->SwReg18.sw_fcode_fwd_hor = pp->vop_fcode_forward;
164 regs->SwReg18.sw_fcode_fwd_ver = pp->vop_fcode_forward;
165 regs->SwReg18.sw_fcode_bwd_hor = pp->vop_fcode_backward;
166 regs->SwReg18.sw_fcode_bwd_ver = pp->vop_fcode_backward;
167 regs->SwReg03.sw_write_mvs_e = 0;
168 regs->SwReg41.sw_dir_mv_base = mv_buf_fd;
169 regs->SwReg19.sw_refer5_base = trb_per_trd_d0;
170 regs->SwReg21.sw_refer7_base = trb_per_trd_d1;
171 regs->SwReg20.sw_refer6_base = trb_per_trd_dm1;
172 } break;
173 case MPEG4_P_VOP : {
174 regs->SwReg03.sw_pic_b_e = 0;
175 regs->SwReg03.sw_pic_inter_e = 1;
176
177 if (ctx->fd_ref0 >= 0) {
178 regs->SwReg14.sw_refer0_base = (RK_U32)ctx->fd_ref0;
179 regs->SwReg15.sw_refer1_base = (RK_U32)ctx->fd_ref0;
180 } else {
181 regs->SwReg14.sw_refer0_base = (RK_U32)ctx->fd_curr;
182 regs->SwReg15.sw_refer1_base = (RK_U32)ctx->fd_curr;
183 }
184 regs->SwReg16.sw_refer2_base = (RK_U32)ctx->fd_curr;
185 regs->SwReg17.sw_refer3_base = (RK_U32)ctx->fd_curr;
186
187 regs->SwReg18.sw_fcode_fwd_hor = pp->vop_fcode_forward;
188 regs->SwReg18.sw_fcode_fwd_ver = pp->vop_fcode_forward;
189 regs->SwReg03.sw_write_mvs_e = 1;
190 regs->SwReg41.sw_dir_mv_base = mv_buf_fd;
191 } break;
192 case MPEG4_I_VOP : {
193 regs->SwReg03.sw_pic_b_e = 0;
194 regs->SwReg03.sw_pic_inter_e = 0;
195
196 regs->SwReg14.sw_refer0_base = (RK_U32)ctx->fd_curr;
197 regs->SwReg15.sw_refer1_base = (RK_U32)ctx->fd_curr;
198 regs->SwReg16.sw_refer2_base = (RK_U32)ctx->fd_curr;
199 regs->SwReg17.sw_refer3_base = (RK_U32)ctx->fd_curr;
200
201 regs->SwReg03.sw_write_mvs_e = 0;
202 regs->SwReg41.sw_dir_mv_base = mv_buf_fd;
203
204 regs->SwReg18.sw_fcode_fwd_hor = 1;
205 regs->SwReg18.sw_fcode_fwd_ver = 1;
206 } break;
207 default : {
208 /* no nothing */
209 } break;
210 }
211
212 if (pp->interlaced) {
213 regs->SwReg03.sw_pic_interlace_e = 1;
214 regs->SwReg03.sw_pic_fieldmode_e = 0;
215 regs->SwReg04.sw_topfieldfirst_e = pp->top_field_first;
216 }
217
218 regs->SwReg18.sw_prev_anc_type = pp->prev_coding_type;
219 regs->SwReg05.sw_type1_quant_e = pp->quant_type;
220 regs->SwReg40.sw_qtable_base = mpp_buffer_get_fd(ctx->qp_table);
221 regs->SwReg18.sw_mv_accuracy_fwd = pp->quarter_sample;
222
223 }
224
vdpu1_mpg4d_init(void * hal,MppHalCfg * cfg)225 MPP_RET vdpu1_mpg4d_init(void *hal, MppHalCfg *cfg)
226 {
227 MPP_RET ret = MPP_OK;
228 M4vdVdpu1Regs_t *regs = NULL;
229 MppBufferGroup group = NULL;
230 MppBuffer mv_buf = NULL;
231 MppBuffer qp_table = NULL;
232 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
233
234 mpp_assert(hal);
235
236 ret = mpp_buffer_group_get_internal(&group, MPP_BUFFER_TYPE_ION);
237 if (ret) {
238 mpp_err_f("failed to get buffer group ret %d\n", ret);
239 goto ERR_RET;
240 }
241
242 ret = mpp_buffer_get(group, &mv_buf, MPEG4_MAX_MV_BUF_SIZE);
243 if (ret) {
244 mpp_err_f("failed to get mv buffer ret %d\n", ret);
245 goto ERR_RET;
246 }
247
248 ret = mpp_buffer_get(group, &qp_table, 64 * 2 * sizeof(RK_U8));
249 if (ret) {
250 mpp_err_f("failed to get qp talbe buffer ret %d\n", ret);
251 goto ERR_RET;
252 }
253
254 regs = mpp_calloc(M4vdVdpu1Regs_t, 1);
255 if (NULL == regs) {
256 mpp_err_f("failed to malloc register ret\n");
257 ret = MPP_ERR_MALLOC;
258 goto ERR_RET;
259 }
260
261 ret = mpp_dev_init(&ctx->dev, VPU_CLIENT_VDPU1);
262 if (ret) {
263 mpp_err_f("mpp_dev_init failed. ret: %d\n", ret);
264 goto ERR_RET;
265 }
266
267 ctx->frm_slots = cfg->frame_slots;
268 ctx->pkt_slots = cfg->packet_slots;
269 ctx->dec_cb = cfg->dec_cb;
270 ctx->group = group;
271 ctx->mv_buf = mv_buf;
272 ctx->qp_table = qp_table;
273 ctx->regs = regs;
274 cfg->dev = ctx->dev;
275
276 mpp_env_get_u32("hal_mpg4d_debug", &hal_mpg4d_debug, 0);
277
278 return ret;
279 ERR_RET:
280 if (regs) {
281 mpp_free(regs);
282 regs = NULL;
283 }
284
285 if (qp_table) {
286 mpp_buffer_put(qp_table);
287 qp_table = NULL;
288 }
289
290 if (mv_buf) {
291 mpp_buffer_put(mv_buf);
292 mv_buf = NULL;
293 }
294
295 if (group) {
296 mpp_buffer_group_put(group);
297 group = NULL;
298 }
299
300 return ret;
301 }
302
vdpu1_mpg4d_deinit(void * hal)303 MPP_RET vdpu1_mpg4d_deinit(void *hal)
304 {
305 MPP_RET ret = MPP_OK;
306 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
307
308 mpp_assert(hal);
309
310 if (ctx->regs) {
311 mpp_free(ctx->regs);
312 ctx->regs = NULL;
313 }
314
315 if (ctx->qp_table) {
316 mpp_buffer_put(ctx->qp_table);
317 ctx->qp_table = NULL;
318 }
319
320 if (ctx->mv_buf) {
321 mpp_buffer_put(ctx->mv_buf);
322 ctx->mv_buf = NULL;
323 }
324
325 if (ctx->group) {
326 mpp_buffer_group_put(ctx->group);
327 ctx->group = NULL;
328 }
329
330 if (ctx->dev) {
331 mpp_dev_deinit(ctx->dev);
332 ctx->dev = NULL;
333 }
334
335 return ret;
336 }
337
vdpu1_mpg4d_gen_regs(void * hal,HalTaskInfo * syn)338 MPP_RET vdpu1_mpg4d_gen_regs(void *hal, HalTaskInfo *syn)
339 {
340 MPP_RET ret = MPP_OK;
341 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
342 HalDecTask *task = &syn->dec;
343 MppBuffer buf_frm_curr = NULL;
344 MppBuffer buf_frm_ref0 = NULL;
345 MppBuffer buf_frm_ref1 = NULL;
346 MppBuffer buf_pkt = NULL;
347 M4vdVdpu1Regs_t *regs = ctx->regs;
348
349 mpp_assert(task->valid);
350 mpp_assert(task->input >= 0);
351 mpp_assert(task->output >= 0);
352
353 memset(regs, 0, sizeof(M4vdVdpu1Regs_t));
354
355 /*
356 * basic register configuration setup here
357 */
358 regs->SwReg02.sw_dec_out_endian = 1;
359 regs->SwReg02.sw_dec_in_endian = 1;
360 regs->SwReg02.sw_dec_inswap32_e = 1;
361 regs->SwReg02.sw_dec_outswap32_e = 1;
362 regs->SwReg02.sw_dec_strswap32_e = 1;
363 regs->SwReg02.sw_dec_strendian_e = 1;
364 regs->SwReg02.sw_dec_max_burst = 16;
365 regs->SwReg55.sw_apf_threshold = 1;
366 regs->SwReg02.sw_dec_timeout_e = 1;
367 regs->SwReg02.sw_dec_clk_gate_e = 1;
368 regs->SwReg01.sw_dec_en = 1;
369 regs->SwReg49.sw_pred_bc_tap_0_0 = -1;
370 regs->SwReg49.sw_pred_bc_tap_0_1 = 3;
371 regs->SwReg49.sw_pred_bc_tap_0_2 = -6;
372 regs->SwReg34.sw_pred_bc_tap_0_3 = 20;
373
374 /* setup buffer for input / output / reference */
375 mpp_buf_slot_get_prop(ctx->pkt_slots, task->input, SLOT_BUFFER, &buf_pkt);
376 mpp_assert(buf_pkt);
377 vpu_mpg4d_get_buffer_by_index(ctx, task->output, &buf_frm_curr);
378 vpu_mpg4d_get_buffer_by_index(ctx, task->refer[0], &buf_frm_ref0);
379 vpu_mpg4d_get_buffer_by_index(ctx, task->refer[1], &buf_frm_ref1);
380
381 /* address registers setup first */
382 ctx->fd_curr = mpp_buffer_get_fd(buf_frm_curr);
383 ctx->fd_ref0 = (buf_frm_ref0) ? (mpp_buffer_get_fd(buf_frm_ref0)) : (-1);
384 ctx->fd_ref1 = (buf_frm_ref1) ? (mpp_buffer_get_fd(buf_frm_ref1)) : (-1);
385 regs->SwReg13.dec_out_st_adr = (RK_U32)ctx->fd_curr;
386 regs->SwReg12.sw_rlc_vlc_base = mpp_buffer_get_fd(buf_pkt);
387
388 /* setup other registers, here will update packet address */
389 vdpu1_mpg4d_setup_regs_by_syntax(ctx, task->syntax);
390 /* memset tails to zero for stream buffer */
391 {
392 RK_U8 *ptr = (RK_U8 *)mpp_buffer_get_ptr(buf_pkt);
393 RK_U32 strm_len = MPP_ALIGN(ctx->bitstrm_len, 16) + 64;
394 memset(ptr + ctx->bitstrm_len, 0, strm_len - ctx->bitstrm_len);
395 }
396
397 return ret;
398 }
399
vdpu1_mpg4d_start(void * hal,HalTaskInfo * task)400 MPP_RET vdpu1_mpg4d_start(void *hal, HalTaskInfo *task)
401 {
402 MPP_RET ret = MPP_OK;
403 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
404 RK_U32* regs = (RK_U32 *)ctx->regs;
405
406 if (hal_mpg4d_debug & MPG4D_HAL_DBG_REG_PUT) {
407 RK_U32 reg_count = (sizeof(M4vdVdpu1Regs_t) / sizeof(RK_U32));
408 RK_U32 i = 0;
409
410 for (i = 2; i < reg_count; i++) {
411 mpp_log("reg[%03d]: %08x\n", i, regs[i]);
412 }
413 }
414
415 do {
416 MppDevRegWrCfg wr_cfg;
417 MppDevRegRdCfg rd_cfg;
418 RK_U32 reg_size = sizeof(M4vdVdpu1Regs_t);
419
420 wr_cfg.reg = regs;
421 wr_cfg.size = reg_size;
422 wr_cfg.offset = 0;
423
424 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_WR, &wr_cfg);
425 if (ret) {
426 mpp_err_f("set register write failed %d\n", ret);
427 break;
428 }
429
430 rd_cfg.reg = regs;
431 rd_cfg.size = reg_size;
432 rd_cfg.offset = 0;
433
434 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_REG_RD, &rd_cfg);
435 if (ret) {
436 mpp_err_f("set register read failed %d\n", ret);
437 break;
438 }
439
440 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);
441 if (ret) {
442 mpp_err_f("send cmd failed %d\n", ret);
443 break;
444 }
445 } while (0);
446
447 (void)task;
448 return ret;
449 }
450
vdpu1_mpg4d_wait(void * hal,HalTaskInfo * task)451 MPP_RET vdpu1_mpg4d_wait(void *hal, HalTaskInfo *task)
452 {
453 MPP_RET ret = MPP_OK;
454 hal_mpg4_ctx *ctx = (hal_mpg4_ctx *)hal;
455 M4vdVdpu1Regs_t *regs = (M4vdVdpu1Regs_t *)ctx->regs;
456
457 ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_POLL, NULL);
458 if (ret)
459 mpp_err_f("poll cmd failed %d\n", ret);
460
461 if (hal_mpg4d_debug & MPG4D_HAL_DBG_REG_GET) {
462 RK_U32 reg_count = (sizeof(M4vdVdpu1Regs_t) / sizeof(RK_U32));
463 RK_U32 i = 0;
464
465 for (i = 0; i < reg_count; i++) {
466 mpp_log("reg[%03d]: %08x\n", i, ((RK_U32 *)regs)[i]);
467 }
468 }
469 if (ctx->dec_cb) {
470 DecCbHalDone param;
471
472 param.task = (void *)&task->dec;
473 param.regs = (RK_U32 *)ctx->regs;
474 param.hard_err = !regs->SwReg01.sw_dec_rdy_int;
475
476 mpp_callback(ctx->dec_cb, ¶m);
477 }
478
479 memset(®s->SwReg01, 0, sizeof(RK_U32));
480
481 (void)task;
482 return ret;
483 }
484