xref: /OK3568_Linux_fs/external/rknpu2/examples/rknn_yolov5_demo/src/main_video.cc (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // Copyright (c) 2023 by Rockchip Electronics Co., Ltd. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 /*-------------------------------------------
16                 Includes
17 -------------------------------------------*/
18 #include <dlfcn.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <sys/time.h>
24 
25 #include "im2d.h"
26 #include "rga.h"
27 #include "RgaUtils.h"
28 
29 #include "rknn_api.h"
30 #include "postprocess.h"
31 
32 #include "utils/mpp_decoder.h"
33 #include "utils/mpp_encoder.h"
34 #include "utils/drawing.h"
35 #if defined(BUILD_VIDEO_RTSP)
36 #include "mk_mediakit.h"
37 #endif
38 
39 #define OUT_VIDEO_PATH "out.h264"
40 
41 typedef struct {
42   rknn_context rknn_ctx;
43   rknn_input_output_num io_num;
44   rknn_tensor_attr* input_attrs;
45   rknn_tensor_attr* output_attrs;
46   int model_channel;
47   int model_width;
48   int model_height;
49   FILE* out_fp;
50   MppDecoder* decoder;
51   MppEncoder* encoder;
52 } rknn_app_context_t;
53 
54 typedef struct {
55   int width;
56   int height;
57   int width_stride;
58   int height_stride;
59   int format;
60   char* virt_addr;
61   int fd;
62 } image_frame_t;
63 
64 /*-------------------------------------------
65                   Functions
66 -------------------------------------------*/
67 
dump_tensor_attr(rknn_tensor_attr * attr)68 static void dump_tensor_attr(rknn_tensor_attr* attr)
69 {
70   printf("  index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
71          "zp=%d, scale=%f\n",
72          attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
73          attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
74          get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
75 }
76 
__get_us(struct timeval t)77 double __get_us(struct timeval t) { return (t.tv_sec * 1000000 + t.tv_usec); }
78 
load_data(FILE * fp,size_t ofst,size_t sz)79 static unsigned char* load_data(FILE* fp, size_t ofst, size_t sz)
80 {
81   unsigned char* data;
82   int ret;
83 
84   data = NULL;
85 
86   if (NULL == fp) {
87     return NULL;
88   }
89 
90   ret = fseek(fp, ofst, SEEK_SET);
91   if (ret != 0) {
92     printf("blob seek failure.\n");
93     return NULL;
94   }
95 
96   data = (unsigned char*)malloc(sz);
97   if (data == NULL) {
98     printf("buffer malloc failure.\n");
99     return NULL;
100   }
101   ret = fread(data, 1, sz, fp);
102   return data;
103 }
104 
read_file_data(const char * filename,int * model_size)105 static unsigned char* read_file_data(const char* filename, int* model_size)
106 {
107   FILE* fp;
108   unsigned char* data;
109 
110   fp = fopen(filename, "rb");
111   if (NULL == fp) {
112     printf("Open file %s failed.\n", filename);
113     return NULL;
114   }
115 
116   fseek(fp, 0, SEEK_END);
117   int size = ftell(fp);
118 
119   data = load_data(fp, 0, size);
120 
121   fclose(fp);
122 
123   *model_size = size;
124   return data;
125 }
126 
write_data_to_file(const char * path,char * data,unsigned int size)127 static int write_data_to_file(const char *path, char *data, unsigned int size) {
128   FILE *fp;
129 
130   fp = fopen(path, "w");
131   if(fp == NULL) {
132     printf("open error: %s", path);
133     return -1;
134   }
135 
136   fwrite(data, 1, size, fp);
137   fflush(fp);
138 
139   fclose(fp);
140   return 0;
141 }
142 
init_model(const char * model_path,rknn_app_context_t * app_ctx)143 static int init_model(const char* model_path, rknn_app_context_t* app_ctx) {
144   int ret;
145   rknn_context ctx;
146 
147   /* Create the neural network */
148   printf("Loading mode...\n");
149   int model_data_size = 0;
150   unsigned char* model_data = read_file_data(model_path, &model_data_size);
151   if (model_data == NULL) {
152     return -1;
153   }
154 
155   ret = rknn_init(&ctx, model_data, model_data_size, 0, NULL);
156   if (ret < 0) {
157     printf("rknn_init error ret=%d\n", ret);
158     return -1;
159   }
160 
161   if (model_data) {
162     free(model_data);
163   }
164 
165   rknn_sdk_version version;
166   ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version, sizeof(rknn_sdk_version));
167   if (ret < 0) {
168     printf("rknn_query RKNN_QUERY_SDK_VERSION error ret=%d\n", ret);
169     return -1;
170   }
171   printf("sdk version: %s driver version: %s\n", version.api_version, version.drv_version);
172 
173   ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &app_ctx->io_num, sizeof(rknn_input_output_num));
174   if (ret < 0) {
175     printf("rknn_query RKNN_QUERY_IN_OUT_NUM error ret=%d\n", ret);
176     return -1;
177   }
178   printf("model input num: %d, output num: %d\n", app_ctx->io_num.n_input, app_ctx->io_num.n_output);
179 
180   rknn_tensor_attr* input_attrs = (rknn_tensor_attr*)malloc(app_ctx->io_num.n_input * sizeof(rknn_tensor_attr));
181   memset(input_attrs, 0, sizeof(input_attrs));
182   for (int i = 0; i < app_ctx->io_num.n_input; i++) {
183     input_attrs[i].index = i;
184     ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
185     if (ret < 0) {
186       printf("rknn_query RKNN_QUERY_INPUT_ATTR error ret=%d\n", ret);
187       return -1;
188     }
189     dump_tensor_attr(&(input_attrs[i]));
190   }
191 
192   rknn_tensor_attr* output_attrs = (rknn_tensor_attr*)malloc(app_ctx->io_num.n_output * sizeof(rknn_tensor_attr));
193   memset(output_attrs, 0, sizeof(output_attrs));
194   for (int i = 0; i < app_ctx->io_num.n_output; i++) {
195     output_attrs[i].index = i;
196     ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
197     if (ret < 0) {
198       printf("rknn_query RKNN_QUERY_OUTPUT_ATTR error ret=%d\n", ret);
199       return -1;
200     }
201     dump_tensor_attr(&(output_attrs[i]));
202   }
203 
204   app_ctx->input_attrs = input_attrs;
205   app_ctx->output_attrs = output_attrs;
206   app_ctx->rknn_ctx = ctx;
207 
208   if (input_attrs[0].fmt == RKNN_TENSOR_NCHW) {
209     printf("model is NCHW input fmt\n");
210     app_ctx->model_channel = input_attrs[0].dims[1];
211     app_ctx->model_height  = input_attrs[0].dims[2];
212     app_ctx->model_width   = input_attrs[0].dims[3];
213   } else {
214     printf("model is NHWC input fmt\n");
215     app_ctx->model_height  = input_attrs[0].dims[1];
216     app_ctx->model_width   = input_attrs[0].dims[2];
217     app_ctx->model_channel = input_attrs[0].dims[3];
218   }
219   printf("model input height=%d, width=%d, channel=%d\n", app_ctx->model_height, app_ctx->model_width, app_ctx->model_channel);
220 
221   return 0;
222 }
223 
release_model(rknn_app_context_t * app_ctx)224 static int release_model(rknn_app_context_t* app_ctx) {
225   if (app_ctx->rknn_ctx != NULL) {
226     rknn_destroy(app_ctx->rknn_ctx);
227   }
228   free(app_ctx->input_attrs);
229   free(app_ctx->output_attrs);
230   deinitPostProcess();
231   return 0;
232 }
233 
inference_model(rknn_app_context_t * app_ctx,image_frame_t * img,detect_result_group_t * detect_result)234 static int inference_model(rknn_app_context_t* app_ctx, image_frame_t* img, detect_result_group_t* detect_result) {
235   int ret;
236   rknn_context ctx = app_ctx->rknn_ctx;
237   int model_width = app_ctx->model_width;
238   int model_height = app_ctx->model_height;
239   int model_channel = app_ctx->model_channel;
240 
241   struct timeval start_time, stop_time;
242   const float    nms_threshold      = NMS_THRESH;
243   const float    box_conf_threshold = BOX_THRESH;
244   // You may not need resize when src resulotion equals to dst resulotion
245   void* resize_buf = nullptr;
246   // init rga context
247   rga_buffer_t src;
248   rga_buffer_t dst;
249   im_rect      src_rect;
250   im_rect      dst_rect;
251   memset(&src_rect, 0, sizeof(src_rect));
252   memset(&dst_rect, 0, sizeof(dst_rect));
253   memset(&src, 0, sizeof(src));
254   memset(&dst, 0, sizeof(dst));
255 
256   printf("input image %dx%d stride %dx%d format=%d\n", img->width, img->height, img->width_stride, img->height_stride, img->format);
257 
258   float scale_w = (float)model_width / img->width;
259   float scale_h = (float)model_height / img->height;
260 
261   rknn_input inputs[1];
262   memset(inputs, 0, sizeof(inputs));
263   inputs[0].index        = 0;
264   inputs[0].type         = RKNN_TENSOR_UINT8;
265   inputs[0].size         = model_width * model_height * model_channel;
266   inputs[0].fmt          = RKNN_TENSOR_NHWC;
267   inputs[0].pass_through = 0;
268 
269   printf("resize with RGA!\n");
270   resize_buf = malloc(model_width * model_height * model_channel);
271   memset(resize_buf, 0, model_width * model_height * model_channel);
272 
273   src = wrapbuffer_virtualaddr((void*)img->virt_addr, img->width, img->height, img->format, img->width_stride, img->height_stride);
274   dst = wrapbuffer_virtualaddr((void*)resize_buf, model_width, model_height, RK_FORMAT_RGB_888);
275   ret = imcheck(src, dst, src_rect, dst_rect);
276   if (IM_STATUS_NOERROR != ret) {
277     printf("%d, check error! %s", __LINE__, imStrError((IM_STATUS)ret));
278     return -1;
279   }
280   IM_STATUS STATUS = imresize(src, dst);
281 
282   inputs[0].buf = resize_buf;
283 
284   gettimeofday(&start_time, NULL);
285   rknn_inputs_set(ctx, app_ctx->io_num.n_input, inputs);
286 
287   rknn_output outputs[app_ctx->io_num.n_output];
288   memset(outputs, 0, sizeof(outputs));
289   for (int i = 0; i < app_ctx->io_num.n_output; i++) {
290     outputs[i].want_float = 0;
291   }
292 
293   ret = rknn_run(ctx, NULL);
294   ret = rknn_outputs_get(ctx, app_ctx->io_num.n_output, outputs, NULL);
295   gettimeofday(&stop_time, NULL);
296   printf("once run use %f ms\n", (__get_us(stop_time) - __get_us(start_time)) / 1000);
297 
298   printf("post process config: box_conf_threshold = %.2f, nms_threshold = %.2f\n", box_conf_threshold, nms_threshold);
299 
300   std::vector<float> out_scales;
301   std::vector<int32_t> out_zps;
302   for (int i = 0; i < app_ctx->io_num.n_output; ++i) {
303     out_scales.push_back(app_ctx->output_attrs[i].scale);
304     out_zps.push_back(app_ctx->output_attrs[i].zp);
305   }
306 
307   post_process((int8_t*)outputs[0].buf, (int8_t*)outputs[1].buf, (int8_t*)outputs[2].buf, model_height, model_width,
308                box_conf_threshold, nms_threshold, scale_w, scale_h, out_zps, out_scales, detect_result);
309   ret = rknn_outputs_release(ctx, app_ctx->io_num.n_output, outputs);
310 
311   if (resize_buf) {
312     free(resize_buf);
313   }
314   return 0;
315 }
316 
mpp_decoder_frame_callback(void * userdata,int width_stride,int height_stride,int width,int height,int format,int fd,void * data)317 void mpp_decoder_frame_callback(void* userdata, int width_stride, int height_stride, int width, int height, int format, int fd, void* data) {
318 
319   rknn_app_context_t* ctx = (rknn_app_context_t*)userdata;
320 
321   int ret = 0;
322   static int frame_index = 0;
323   frame_index++;
324 
325   void* mpp_frame = NULL;
326   int mpp_frame_fd = 0;
327   void* mpp_frame_addr = NULL;
328   int enc_data_size;
329 
330   rga_buffer_t origin;
331   rga_buffer_t src;
332 
333   if (ctx->encoder == NULL) {
334     MppEncoder* mpp_encoder = new MppEncoder();
335     MppEncoderParams enc_params;
336     memset(&enc_params, 0, sizeof(MppEncoderParams));
337     enc_params.width = width;
338     enc_params.height = height;
339     enc_params.hor_stride = width_stride;
340     enc_params.ver_stride = height_stride;
341     enc_params.fmt = MPP_FMT_YUV420SP;
342     //enc_params.type = MPP_VIDEO_CodingHEVC;
343     //Note: rk3562只能支持h264格式的视频流
344     enc_params.type = MPP_VIDEO_CodingAVC;
345     mpp_encoder->Init(enc_params, NULL);
346 
347     ctx->encoder = mpp_encoder;
348   }
349 
350   int enc_buf_size = ctx->encoder->GetFrameSize();
351   char* enc_data = (char*)malloc(enc_buf_size);
352 
353   image_frame_t img;
354   img.width = width;
355   img.height = height;
356   img.width_stride = width_stride;
357   img.height_stride = height_stride;
358   img.fd = fd;
359   img.virt_addr = (char*)data;
360   img.format = RK_FORMAT_YCbCr_420_SP;
361   detect_result_group_t detect_result;
362   memset(&detect_result, 0, sizeof(detect_result_group_t));
363 
364   ret = inference_model(ctx, &img, &detect_result);
365   if (ret != 0) {
366     printf("inference model fail\n");
367     goto RET;
368   }
369 
370   mpp_frame = ctx->encoder->GetInputFrameBuffer();
371   mpp_frame_fd = ctx->encoder->GetInputFrameBufferFd(mpp_frame);
372   mpp_frame_addr = ctx->encoder->GetInputFrameBufferAddr(mpp_frame);
373 
374   // Copy To another buffer avoid to modify mpp decoder buffer
375   origin = wrapbuffer_fd(fd, width, height, RK_FORMAT_YCbCr_420_SP, width_stride, height_stride);
376   src = wrapbuffer_fd(mpp_frame_fd, width, height, RK_FORMAT_YCbCr_420_SP, width_stride, height_stride);
377   imcopy(origin, src);
378 
379   // Draw objects
380   for (int i = 0; i < detect_result.count; i++) {
381     detect_result_t* det_result = &(detect_result.results[i]);
382     printf("%s @ (%d %d %d %d) %f\n", det_result->name, det_result->box.left, det_result->box.top,
383            det_result->box.right, det_result->box.bottom, det_result->prop);
384     int x1 = det_result->box.left;
385     int y1 = det_result->box.top;
386     int x2 = det_result->box.right;
387     int y2 = det_result->box.bottom;
388     draw_rectangle_yuv420sp((unsigned char*)mpp_frame_addr, width_stride, height_stride, x1, y1, x2-x1+1, y2-y1+1, 0x00FF0000, 4);
389   }
390 
391   // Encode to file
392   // Write header on first frame
393   if (frame_index == 1) {
394     enc_data_size = ctx->encoder->GetHeader(enc_data, enc_buf_size);
395     fwrite(enc_data, 1, enc_data_size, ctx->out_fp);
396   }
397   memset(enc_data, 0, enc_buf_size);
398   enc_data_size = ctx->encoder->Encode(mpp_frame, enc_data, enc_buf_size);
399   fwrite(enc_data, 1, enc_data_size, ctx->out_fp);
400 
401 RET:
402   if (enc_data != nullptr) {
403     free(enc_data);
404   }
405 }
406 
process_video_file(rknn_app_context_t * ctx,const char * path)407 int process_video_file(rknn_app_context_t* ctx, const char* path)
408 {
409   int video_size;
410   char* video_data = (char*)read_file_data(path, &video_size);
411   char* video_data_end = video_data + video_size;
412   printf("read video size=%d\n", video_size);
413 
414   const int SIZE = 8192;
415   char* video_data_ptr = video_data;
416 
417   do {
418       int pkt_eos = 0;
419       int size = SIZE;
420       if (video_data_ptr + size >= video_data_end) {
421           pkt_eos = 1;
422           size = video_data_end - video_data_ptr;
423       }
424 
425       ctx->decoder->Decode((uint8_t*)video_data_ptr, size, pkt_eos);
426 
427       video_data_ptr += size;
428 
429       if (video_data_ptr >= video_data_end) {
430           printf("reset decoder\n");
431           break;
432       }
433 
434       // LOGD("video_data_ptr=%p video_data_end=%p", video_data_ptr, video_data_end);
435       // usleep(10*1000);
436   } while (1);
437 
438   return 0;
439 }
440 
441 #if defined(BUILD_VIDEO_RTSP)
on_track_frame_out(void * user_data,mk_frame frame)442 void API_CALL on_track_frame_out(void *user_data, mk_frame frame) {
443   rknn_app_context_t *ctx = (rknn_app_context_t *) user_data;
444   printf("on_track_frame_out ctx=%p\n", ctx);
445   const char* data = mk_frame_get_data(frame);
446   size_t size = mk_frame_get_data_size(frame);
447   printf("decoder=%p\n", ctx->decoder);
448   ctx->decoder->Decode((uint8_t*)data, size, 0);
449 }
450 
on_mk_play_event_func(void * user_data,int err_code,const char * err_msg,mk_track tracks[],int track_count)451 void API_CALL on_mk_play_event_func(void *user_data, int err_code, const char *err_msg, mk_track tracks[],
452                                     int track_count) {
453   rknn_app_context_t *ctx = (rknn_app_context_t *) user_data;
454   if (err_code == 0) {
455       //success
456       printf("play success!");
457       int i;
458       for (i = 0; i < track_count; ++i) {
459           if (mk_track_is_video(tracks[i])) {
460               log_info("got video track: %s", mk_track_codec_name(tracks[i]));
461               //监听track数据回调
462               mk_track_add_delegate(tracks[i], on_track_frame_out, user_data);
463           }
464       }
465   } else {
466       printf("play failed: %d %s", err_code, err_msg);
467   }
468 }
469 
on_mk_shutdown_func(void * user_data,int err_code,const char * err_msg,mk_track tracks[],int track_count)470 void API_CALL on_mk_shutdown_func(void *user_data, int err_code, const char *err_msg, mk_track tracks[], int track_count) {
471   printf("play interrupted: %d %s", err_code, err_msg);
472 }
473 
process_video_rtsp(rknn_app_context_t * ctx,const char * url)474 int process_video_rtsp(rknn_app_context_t* ctx, const char* url)
475 {
476   mk_config config;
477   memset(&config, 0, sizeof(mk_config));
478   config.log_mask = LOG_CONSOLE;
479   mk_env_init(&config);
480   mk_player player = mk_player_create();
481   mk_player_set_on_result(player, on_mk_play_event_func, ctx);
482   mk_player_set_on_shutdown(player, on_mk_shutdown_func, ctx);
483   mk_player_play(player, url);
484 
485   printf("enter any key to exit\n");
486   getchar();
487 
488   if (player) {
489       mk_player_release(player);
490   }
491   return 0;
492 }
493 #endif
494 
495 /*-------------------------------------------
496                   Main Functions
497 -------------------------------------------*/
main(int argc,char ** argv)498 int main(int argc, char** argv)
499 {
500   int status = 0;
501   int ret;
502 
503   if (argc != 4) {
504     printf("Usage: %s <rknn_model> <video_path> <video_type 264/265> \n", argv[0]);
505     return -1;
506   }
507 
508   char* model_name = (char*)argv[1];
509   char* video_name = argv[2];
510   int video_type = atoi(argv[3]);
511 
512   rknn_app_context_t app_ctx;
513   memset(&app_ctx, 0, sizeof(rknn_app_context_t));
514 
515   ret = init_model(model_name, &app_ctx);
516   if (ret != 0) {
517     printf("init model fail\n");
518     return -1;
519   }
520 
521   if (app_ctx.decoder == NULL) {
522     MppDecoder* decoder = new MppDecoder();
523     decoder->Init(video_type, 30, &app_ctx);
524     decoder->SetCallback(mpp_decoder_frame_callback);
525     app_ctx.decoder = decoder;
526   }
527 
528   if (app_ctx.out_fp == NULL) {
529     FILE* fp = fopen(OUT_VIDEO_PATH, "w");
530     if(fp == NULL) {
531         printf("open %s error\n", OUT_VIDEO_PATH);
532         return -1;
533     }
534     app_ctx.out_fp = fp;
535   }
536 
537   printf("app_ctx=%p decoder=%p\n", &app_ctx, app_ctx.decoder);
538 
539   if (strncmp(video_name, "rtsp", 4) == 0) {
540 #if defined(BUILD_VIDEO_RTSP)
541         process_video_rtsp(&app_ctx, video_name);
542 #else
543         printf("rtsp no support\n");
544 #endif
545   } else {
546     process_video_file(&app_ctx, video_name);
547   }
548 
549   printf("waiting finish\n");
550   usleep(3*1000*1000);
551 
552   // release
553   fflush(app_ctx.out_fp);
554   fclose(app_ctx.out_fp);
555 
556   if (app_ctx.decoder != nullptr) {
557     delete(app_ctx.decoder);
558     app_ctx.decoder = nullptr;
559   }
560   if (app_ctx.encoder != nullptr) {
561     delete(app_ctx.encoder);
562     app_ctx.encoder = nullptr;
563   }
564 
565   release_model(&app_ctx);
566 
567   return 0;
568 }
569