xref: /OK3568_Linux_fs/external/rknpu2/examples/rknn_yolov5_android_apk_demo/app/src/main/cpp/yolo_image.cc (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /**
2   * @ClassName yolo_image
3   * @Description inference code for yolo
4   * @Author raul.rao
5   * @Date 2022/5/23 11:10
6   * @Version 1.0
7   */
8 
9 #include <cstdarg>
10 #include <cstdio>
11 #include <cstdlib>
12 #include <fstream>
13 #include <iostream>
14 #include <memory>
15 #include <sstream>
16 #include <string>
17 #include <vector>
18 #include <ctime>
19 
20 #include <cstdint>
21 
22 #include "rknn_api.h"
23 
24 #include "yolo_image.h"
25 #include "rga/rga.h"
26 #include "rga/im2d.h"
27 #include "rga/im2d_version.h"
28 #include "post_process.h"
29 
30 //#define DEBUG_DUMP
31 //#define EVAL_TIME
32 #define ZERO_COPY 1
33 #define DO_NOT_FLIP -1
34 
35 int g_inf_count = 0;
36 
37 int g_post_count = 0;
38 
39 rknn_context ctx = 0;
40 
41 bool created = false;
42 
43 int img_width = 0;    // the width of the actual input image
44 int img_height = 0;   // the height of the actual input image
45 
46 int m_in_width = 0;   // the width of the RKNN model input
47 int m_in_height = 0;  // the height of the RKNN model input
48 int m_in_channel = 0; // the channel of the RKNN model input
49 
50 float scale_w = 0.0;
51 float scale_h = 0.0;
52 
53 uint32_t n_input = 1;
54 uint32_t n_output = 3;
55 
56 rknn_tensor_attr input_attrs[1];
57 rknn_tensor_attr output_attrs[3];
58 
59 rknn_tensor_mem *input_mems[1];
60 rknn_tensor_mem *output_mems[3];
61 
62 rga_buffer_t g_rga_src;
63 rga_buffer_t g_rga_dst;
64 
65 std::vector<float> out_scales;
66 std::vector<int32_t> out_zps;
67 
__get_us(struct timeval t)68 double __get_us(struct timeval t) { return (t.tv_sec * 1000000 + t.tv_usec); }
69 
70 
create(int im_height,int im_width,int im_channel,char * model_path)71 int create(int im_height, int im_width, int im_channel, char *model_path)
72 {
73     img_height = im_height;
74     img_width = im_width;
75 
76     LOGI("try rknn_init!")
77 
78     // 0. RGA version check
79     LOGI("RGA API Version: %s", RGA_API_VERSION)
80     // Please refer to the link to confirm the RGA driver version, make sure it is higher than 1.2.4
81     // https://github.com/airockchip/librga/blob/main/docs/Rockchip_FAQ_RGA_CN.md#rga-driver
82 
83     // 1. Load model
84     FILE *fp = fopen(model_path, "rb");
85     if(fp == NULL) {
86         LOGE("fopen %s fail!\n", model_path);
87         return -1;
88     }
89     fseek(fp, 0, SEEK_END);
90     uint32_t model_len = ftell(fp);
91     void *model = malloc(model_len);
92     fseek(fp, 0, SEEK_SET);
93     if(model_len != fread(model, 1, model_len, fp)) {
94         LOGE("fread %s fail!\n", model_path);
95         free(model);
96         fclose(fp);
97         return -1;
98     }
99 
100     fclose(fp);
101 
102     // 2. Init RKNN model
103     int ret = rknn_init(&ctx, model, model_len, 0, nullptr);
104     free(model);
105 
106     if(ret < 0) {
107         LOGE("rknn_init fail! ret=%d\n", ret);
108         return -1;
109     }
110 
111     // 3. Query input/output attr.
112     rknn_input_output_num io_num;
113     rknn_query_cmd cmd = RKNN_QUERY_IN_OUT_NUM;
114     // 3.1 Query input/output num.
115     ret = rknn_query(ctx, cmd, &io_num, sizeof(io_num));
116     if (ret != RKNN_SUCC) {
117         LOGE("rknn_query io_num fail!ret=%d\n", ret);
118         return -1;
119     }
120     n_input = io_num.n_input;
121     n_output = io_num.n_output;
122 
123     // 3.2 Query input attributes
124     memset(input_attrs, 0, n_input * sizeof(rknn_tensor_attr));
125     for (int i = 0; i < n_input; ++i) {
126         input_attrs[i].index = i;
127         cmd = RKNN_QUERY_INPUT_ATTR;
128         ret = rknn_query(ctx, cmd, &(input_attrs[i]), sizeof(rknn_tensor_attr));
129         if (ret < 0) {
130             LOGE("rknn_query input_attrs[%d] fail!ret=%d\n", i, ret);
131             return -1;
132         }
133     }
134     // 3.2.0 Update global model input shape.
135     if (RKNN_TENSOR_NHWC == input_attrs[0].fmt) {
136         m_in_height = input_attrs[0].dims[1];
137         m_in_width = input_attrs[0].dims[2];
138         m_in_channel = input_attrs[0].dims[3];
139     } else if (RKNN_TENSOR_NCHW == input_attrs[0].fmt) {
140         m_in_height = input_attrs[0].dims[2];
141         m_in_width = input_attrs[0].dims[3];
142         m_in_channel = input_attrs[0].dims[1];
143     } else {
144         LOGE("Unsupported model input layout: %d!\n", input_attrs[0].fmt);
145         return -1;
146     }
147 
148     // set scale_w, scale_h for post process
149     scale_w = (float)m_in_width / img_width;
150     scale_h = (float)m_in_height / img_height;
151 
152     // 3.3 Query output attributes
153     memset(output_attrs, 0, n_output * sizeof(rknn_tensor_attr));
154     for (int i = 0; i < n_output; ++i) {
155         output_attrs[i].index = i;
156         cmd = RKNN_QUERY_OUTPUT_ATTR;
157         ret = rknn_query(ctx, cmd, &(output_attrs[i]), sizeof(rknn_tensor_attr));
158         if (ret < 0) {
159             LOGE("rknn_query output_attrs[%d] fail!ret=%d\n", i, ret);
160             return -1;
161         }
162         // set out_scales/out_zps for post_process
163         out_scales.push_back(output_attrs[i].scale);
164         out_zps.push_back(output_attrs[i].zp);
165     }
166 
167 #if ZERO_COPY
168     // 4. Set input/output buffer
169     // 4.1 Set inputs memory
170     // 4.1.1 Create input tensor memory, input data type is INT8, yolo has only 1 input.
171     input_mems[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride * sizeof(char));
172     memset(input_mems[0]->virt_addr, 0, input_attrs[0].size_with_stride * sizeof(char));
173     // 4.1.2 Update input attrs
174     input_attrs[0].index = 0;
175     input_attrs[0].type = RKNN_TENSOR_UINT8;
176     input_attrs[0].size = m_in_height * m_in_width * m_in_channel * sizeof(char);
177     input_attrs[0].fmt = RKNN_TENSOR_NHWC;
178     // TODO -- The efficiency of pass through will be higher, we need adjust the layout of input to
179     //         meet the use condition of pass through.
180     input_attrs[0].pass_through = 0;
181     // 4.1.3 Set input buffer
182     rknn_set_io_mem(ctx, input_mems[0], &(input_attrs[0]));
183     // 4.1.4 bind virtual address to rga virtual address
184     g_rga_dst = wrapbuffer_virtualaddr((void *)input_mems[0]->virt_addr, m_in_width, m_in_height,
185                                        RK_FORMAT_RGB_888);
186 
187     // 4.2 Set outputs memory
188     for (int i = 0; i < n_output; ++i) {
189         // 4.2.1 Create output tensor memory, output data type is int8, post_process need int8 data.
190         output_mems[i] = rknn_create_mem(ctx, output_attrs[i].n_elems * sizeof(unsigned char));
191         memset(output_mems[i]->virt_addr, 0, output_attrs[i].n_elems * sizeof(unsigned char));
192         // 4.2.2 Update input attrs
193         output_attrs[i].type = RKNN_TENSOR_INT8;
194         // 4.1.3 Set output buffer
195         rknn_set_io_mem(ctx, output_mems[i], &(output_attrs[i]));
196     }
197 #else
198     void *in_data = malloc(m_in_width * m_in_height * m_in_channel);
199     memset(in_data, 0, m_in_width * m_in_height * m_in_channel);
200     g_rga_dst = wrapbuffer_virtualaddr(in_data, m_in_width, m_in_height, RK_FORMAT_RGB_888);
201 #endif
202 
203     created = true;
204 
205     LOGI("rknn_init success!");
206 
207     return 0;
208 }
209 
destroy()210 void destroy() {
211 //    LOGI("rknn_destroy!");
212     // release io_mem resource
213     for (int i = 0; i < n_input; ++i) {
214         rknn_destroy_mem(ctx, input_mems[i]);
215     }
216     for (int i = 0; i < n_output; ++i) {
217         rknn_destroy_mem(ctx, output_mems[i]);
218     }
219     rknn_destroy(ctx);
220 }
221 
run_yolo(char * inDataRaw,char * y0,char * y1,char * y2)222 bool run_yolo(char *inDataRaw, char *y0, char *y1, char *y2)
223 {
224     int ret;
225     bool status = false;
226     if(!created) {
227         LOGE("run_yolo: init yolo hasn't successful!");
228         return false;
229     }
230 
231 #ifdef EVAL_TIME
232     struct timeval start_time, stop_time;
233 
234     gettimeofday(&start_time, NULL);
235 #endif
236     g_rga_src = wrapbuffer_virtualaddr((void *)inDataRaw, img_width, img_height,
237                                        RK_FORMAT_RGBA_8888);
238 
239     // convert color format and resize. RGA8888 -> RGB888
240     ret = imresize(g_rga_src, g_rga_dst);
241     if (IM_STATUS_SUCCESS != ret) {
242         LOGE("run_yolo: resize image with rga failed: %s\n", imStrError((IM_STATUS)ret));
243         return false;
244     }
245 #ifdef EVAL_TIME
246     gettimeofday(&stop_time, NULL);
247     LOGI("imresize use %f ms\n", (__get_us(stop_time) - __get_us(start_time)) / 1000);
248 #endif
249 
250 #ifdef DEBUG_DUMP
251     // save resized image
252     if (g_inf_count == 5) {
253         char out_img_name[1024];
254         memset(out_img_name, 0, sizeof(out_img_name));
255         sprintf(out_img_name, "/data/user/0/com.rockchip.gpadc.yolodemo/cache/resized_img_%d.rgb", g_inf_count);
256         FILE *fp = fopen(out_img_name, "w");
257 //        LOGI("n_elems: %d", input_attrs[0].n_elems);
258 //        fwrite(input_mems[0]->virt_addr, 1, input_attrs[0].n_elems * sizeof(unsigned char), fp);
259 //        fflush(fp);
260         for (int i = 0; i < input_attrs[0].n_elems; ++i) {
261             fprintf(fp, "%d\n", *((uint8_t *)(g_rga_dst.vir_addr) + i));
262         }
263         fclose(fp);
264     }
265 
266 #endif
267 
268 #if ZERO_COPY
269 #else
270     rknn_input inputs[1];
271     inputs[0].index = 0;
272     inputs[0].type = RKNN_TENSOR_UINT8;
273     inputs[0].size = m_in_width * m_in_height * m_in_channel;
274     inputs[0].fmt = RKNN_TENSOR_NHWC;
275     inputs[0].pass_through = 0;
276     inputs[0].buf = g_rga_dst.vir_addr;
277 #ifdef EVAL_TIME
278     gettimeofday(&start_time, NULL);
279 #endif
280     rknn_inputs_set(ctx, 1, inputs);
281 #ifdef EVAL_TIME
282     gettimeofday(&stop_time, NULL);
283     LOGI("rknn_inputs_set use %f ms\n", (__get_us(stop_time) - __get_us(start_time)) / 1000);
284 #endif
285 #endif
286 
287 #ifdef EVAL_TIME
288     gettimeofday(&start_time, NULL);
289 #endif
290     ret = rknn_run(ctx, nullptr);
291     if(ret < 0) {
292         LOGE("rknn_run fail! ret=%d\n", ret);
293         return false;
294     }
295 #ifdef EVAL_TIME
296     gettimeofday(&stop_time, NULL);
297     LOGI("inference use %f ms\n", (__get_us(stop_time) - __get_us(start_time)) / 1000);
298 
299     // outputs format are all NCHW.
300     gettimeofday(&start_time, NULL);
301 #endif
302 
303 #if ZERO_COPY
304     memcpy(y0, output_mems[0]->virt_addr, output_attrs[0].n_elems * sizeof(char));
305     memcpy(y1, output_mems[1]->virt_addr, output_attrs[1].n_elems * sizeof(char));
306     memcpy(y2, output_mems[2]->virt_addr, output_attrs[2].n_elems * sizeof(char));
307 #else
308     rknn_output outputs[3];
309     memset(outputs, 0, sizeof(outputs));
310     for (int i = 0; i < 3; ++i) {
311         outputs[i].want_float = 0;
312     }
313     rknn_outputs_get(ctx, 3, outputs, NULL);
314     memcpy(y0, outputs[0].buf, output_attrs[0].n_elems * sizeof(char));
315     memcpy(y1, outputs[1].buf, output_attrs[1].n_elems * sizeof(char));
316     memcpy(y2, outputs[2].buf, output_attrs[2].n_elems * sizeof(char));
317     rknn_outputs_release(ctx, 3, outputs);
318 #endif
319 
320 #ifdef EVAL_TIME
321     gettimeofday(&stop_time, NULL);
322     LOGI("copy output use %f ms\n", (__get_us(stop_time) - __get_us(start_time)) / 1000);
323 #endif
324 
325 #ifdef DEBUG_DUMP
326     if (g_inf_count == 5) {
327         for (int i = 0; i < n_output; ++i) {
328             char out_path[1024];
329             memset(out_path, 0, sizeof(out_path));
330             sprintf(out_path, "/data/user/0/com.rockchip.gpadc.yolodemo/cache/out_%d.tensor", i);
331             FILE *fp = fopen(out_path, "w");
332             for (int j = 0; j < output_attrs[i].n_elems; ++j) {
333 #if ZERO_COPY
334                 fprintf(fp, "%d\n", *((int8_t *)(output_mems[i]->virt_addr) + i));
335 #else
336                 fprintf(fp, "%d\n", *((int8_t *)(outputs[i].buf) + i));
337 #endif
338             }
339             fclose(fp);
340         }
341     }
342     if (g_inf_count < 10) {
343         g_inf_count++;
344     }
345 #endif
346 
347     status = true;
348 
349 //    LOGI("run_yolo: end\n");
350 
351     return status;
352 }
353 
yolo_post_process(char * grid0_buf,char * grid1_buf,char * grid2_buf,int * ids,float * scores,float * boxes)354 int yolo_post_process(char *grid0_buf, char *grid1_buf, char *grid2_buf,
355                       int *ids, float *scores, float *boxes) {
356     int ret;
357     if(!created) {
358         LOGE("yolo_post_process: init yolo hasn't successful!");
359         return false;
360     }
361 
362     detect_result_group_t detect_result_group;
363 //    LOGI("start yolo post.");
364     ret = post_process((int8_t *)grid0_buf, (int8_t *)grid1_buf, (int8_t *)grid2_buf,
365                        m_in_height, m_in_width, BOX_THRESH, NMS_THRESH, scale_w, scale_h,
366                        out_zps, out_scales, &detect_result_group);
367     if (ret < 0) {
368         LOGE("yolo_post_process: post process failed!");
369         return -1;
370     }
371 //    LOGI("deteced %d objects.\n", detect_result_group.count);
372 
373     memset(ids, 0, sizeof(int) * OBJ_NUMB_MAX_SIZE);
374     memset(scores, 0, sizeof(float) * OBJ_NUMB_MAX_SIZE);
375     memset(boxes, 0, sizeof(float) * OBJ_NUMB_MAX_SIZE * BOX_LEN);
376 
377     int count = detect_result_group.count;
378     for (int i = 0; i < count; ++i) {
379         ids[i] = detect_result_group.results[i].class_id;
380         scores[i] = detect_result_group.results[i].prop;
381         *(boxes+4*i+0) = detect_result_group.results[i].box.left;
382         *(boxes+4*i+1) = detect_result_group.results[i].box.top;
383         *(boxes+4*i+2) = detect_result_group.results[i].box.right;
384         *(boxes+4*i+3) = detect_result_group.results[i].box.bottom;
385 #ifdef DEBUG_DUMP
386         if (g_post_count == 5) {
387             LOGI("result %2d: (%4d, %4d, %4d, %4d), %d\n", i,
388                  detect_result_group.results[i].box.left,
389                  detect_result_group.results[i].box.top,
390                  detect_result_group.results[i].box.right,
391                  detect_result_group.results[i].box.bottom,
392                  detect_result_group.results->class_id)
393         }
394         if (g_post_count < 10) {
395             g_post_count++;
396         }
397 #endif
398     }
399 
400     return count;
401 }
402 
colorConvertAndFlip(void * src,int srcFmt,void * dst,int dstFmt,int width,int height,int flip)403 int colorConvertAndFlip(void *src, int srcFmt, void *dst,  int dstFmt, int width, int height, int flip) {
404     int ret;
405     // RGA needs to ensure page alignment when using virtual addresses, otherwise it may cause
406     // internal cache flushing errors. Manually modify src/dst buf to force its 4k alignment.
407     // TODO -- convert color format and flip with OpenGL.
408     int src_len = width * height * 3 / 2;    // yuv420 buffer length.
409     void *src_ = malloc(src_len + 4096);
410     void *org_src = src_;
411     memset(src_, 0, src_len + 4096);
412     src_ = (void *)((((int64_t)src_ >> 12) + 1) << 12);
413     memcpy(src_, src, src_len);
414     int dst_len = width * height * 4;    // rgba buffer length.
415     void *dst_ = malloc(dst_len + 4096);
416     void *org_dst = dst_;
417     memset(dst_, 0, dst_len + 4096);
418     dst_ = (void *)((((int64_t)dst_ >> 12) + 1) << 12);
419     rga_buffer_t rga_src = wrapbuffer_virtualaddr((void *)src_, width, height, srcFmt);
420     rga_buffer_t rga_dst = wrapbuffer_virtualaddr((void *)dst_, width, height, dstFmt);
421 
422     if (DO_NOT_FLIP == flip) {
423         // convert color format
424         ret = imcvtcolor(rga_src, rga_dst, rga_src.format, rga_dst.format);
425     } else {
426         // convert color format and flip.
427         ret = imflip(rga_src, rga_dst, flip);
428     }
429 
430     if (IM_STATUS_SUCCESS != ret) {
431         LOGE("colorConvertAndFlip failed. Ret: %s\n", imStrError((IM_STATUS)ret));
432     }
433 
434     memcpy(dst, dst_, dst_len);
435     free(org_src);
436     free(org_dst);
437 
438     return ret;
439 }
440 
rknn_app_destory()441 void rknn_app_destory() {
442     LOGI("rknn app destroy.\n");
443     if (g_rga_dst.vir_addr) {
444         free(g_rga_dst.vir_addr);
445     }
446     rknn_destroy(ctx);
447 }
448