xref: /OK3568_Linux_fs/external/rknpu2/examples/RV1106_RV1103/rknn_mobilenet_demo/src/main.cc (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 /*-------------------------------------------
16                 Includes
17 -------------------------------------------*/
18 #include "rknn_api.h"
19 
20 #include <float.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/time.h>
25 
26 #define STB_IMAGE_IMPLEMENTATION
27 #include "stb/stb_image.h"
28 #define STB_IMAGE_RESIZE_IMPLEMENTATION
29 #include <stb/stb_image_resize.h>
30 
31 /*-------------------------------------------
32                   Functions
33 -------------------------------------------*/
getCurrentTimeUs()34 static inline int64_t getCurrentTimeUs()
35 {
36   struct timeval tv;
37   gettimeofday(&tv, NULL);
38   return tv.tv_sec * 1000000 + tv.tv_usec;
39 }
40 
rknn_GetTopN(float * pfProb,float * pfMaxProb,uint32_t * pMaxClass,uint32_t outputCount,uint32_t topNum)41 static int rknn_GetTopN(float *pfProb, float *pfMaxProb, uint32_t *pMaxClass, uint32_t outputCount, uint32_t topNum)
42 {
43   uint32_t i, j;
44   uint32_t top_count = outputCount > topNum ? topNum : outputCount;
45 
46   for (i = 0; i < topNum; ++i)
47   {
48     pfMaxProb[i] = -FLT_MAX;
49     pMaxClass[i] = -1;
50   }
51 
52   for (j = 0; j < top_count; j++)
53   {
54     for (i = 0; i < outputCount; i++)
55     {
56       if ((i == *(pMaxClass + 0)) || (i == *(pMaxClass + 1)) || (i == *(pMaxClass + 2)) || (i == *(pMaxClass + 3)) ||
57           (i == *(pMaxClass + 4)))
58       {
59         continue;
60       }
61 
62       float prob = pfProb[i];
63       if (prob > *(pfMaxProb + j))
64       {
65         *(pfMaxProb + j) = prob;
66         *(pMaxClass + j) = i;
67       }
68     }
69   }
70 
71   return 1;
72 }
73 
rknn_GetTopN_int8(int8_t * pProb,float scale,int zp,float * pfMaxProb,uint32_t * pMaxClass,uint32_t outputCount,uint32_t topNum)74 static int rknn_GetTopN_int8(int8_t *pProb, float scale, int zp, float *pfMaxProb, uint32_t *pMaxClass,
75                              uint32_t outputCount, uint32_t topNum)
76 {
77   uint32_t i, j;
78   uint32_t top_count = outputCount > topNum ? topNum : outputCount;
79 
80   for (i = 0; i < topNum; ++i)
81   {
82     pfMaxProb[i] = -FLT_MAX;
83     pMaxClass[i] = -1;
84   }
85 
86   for (j = 0; j < top_count; j++)
87   {
88     for (i = 0; i < outputCount; i++)
89     {
90       if ((i == *(pMaxClass + 0)) || (i == *(pMaxClass + 1)) || (i == *(pMaxClass + 2)) || (i == *(pMaxClass + 3)) ||
91           (i == *(pMaxClass + 4)))
92       {
93         continue;
94       }
95 
96       float prob = (pProb[i] - zp) * scale;
97       if (prob > *(pfMaxProb + j))
98       {
99         *(pfMaxProb + j) = prob;
100         *(pMaxClass + j) = i;
101       }
102     }
103   }
104 
105   return 1;
106 }
107 
dump_tensor_attr(rknn_tensor_attr * attr)108 static void dump_tensor_attr(rknn_tensor_attr *attr)
109 {
110   char dims[128] = {0};
111   for (int i = 0; i < attr->n_dims; ++i)
112   {
113     int idx = strlen(dims);
114     sprintf(&dims[idx], "%d%s", attr->dims[i], (i == attr->n_dims - 1) ? "" : ", ");
115   }
116   printf("  index=%d, name=%s, n_dims=%d, dims=[%s], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
117          "zp=%d, scale=%f\n",
118          attr->index, attr->name, attr->n_dims, dims, attr->n_elems, attr->size, get_format_string(attr->fmt),
119          get_type_string(attr->type), get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
120 }
121 
load_file(const char * file_path,size_t * file_size)122 static void *load_file(const char *file_path, size_t *file_size)
123 {
124   FILE *fp = fopen(file_path, "r");
125   if (fp == NULL)
126   {
127     printf("failed to open file: %s\n", file_path);
128     return NULL;
129   }
130 
131   fseek(fp, 0, SEEK_END);
132   size_t size = (size_t)ftell(fp);
133   fseek(fp, 0, SEEK_SET);
134 
135   void *file_data = malloc(size);
136   if (file_data == NULL)
137   {
138     fclose(fp);
139     printf("failed allocate file size: %zu\n", size);
140     return NULL;
141   }
142 
143   if (fread(file_data, 1, size, fp) != size)
144   {
145     fclose(fp);
146     free(file_data);
147     printf("failed to read file data!\n");
148     return NULL;
149   }
150 
151   fclose(fp);
152 
153   *file_size = size;
154 
155   return file_data;
156 }
157 
load_image(const char * image_path,rknn_tensor_attr * input_attr)158 static unsigned char *load_image(const char *image_path, rknn_tensor_attr *input_attr)
159 {
160   int req_height = 0;
161   int req_width = 0;
162   int req_channel = 0;
163 
164   switch (input_attr->fmt)
165   {
166   case RKNN_TENSOR_NHWC:
167     req_height = input_attr->dims[1];
168     req_width = input_attr->dims[2];
169     req_channel = input_attr->dims[3];
170     break;
171   case RKNN_TENSOR_NCHW:
172     req_height = input_attr->dims[2];
173     req_width = input_attr->dims[3];
174     req_channel = input_attr->dims[1];
175     break;
176   default:
177     printf("meet unsupported layout\n");
178     return NULL;
179   }
180 
181   int height = 0;
182   int width = 0;
183   int channel = 0;
184 
185   unsigned char *image_data = stbi_load(image_path, &width, &height, &channel, req_channel);
186   if (image_data == NULL)
187   {
188     printf("load image failed!\n");
189     return NULL;
190   }
191 
192   if (width != req_width || height != req_height)
193   {
194     unsigned char *image_resized = (unsigned char *)STBI_MALLOC(req_width * req_height * req_channel);
195     if (!image_resized)
196     {
197       printf("malloc image failed!\n");
198       STBI_FREE(image_data);
199       return NULL;
200     }
201     if (stbir_resize_uint8(image_data, width, height, 0, image_resized, req_width, req_height, 0, channel) != 1)
202     {
203       printf("resize image failed!\n");
204       STBI_FREE(image_data);
205       return NULL;
206     }
207     STBI_FREE(image_data);
208     image_data = image_resized;
209   }
210 
211   return image_data;
212 }
213 
214 // 量化模型的npu输出结果为int8数据类型,后处理要按照int8数据类型处理
215 // 如下提供了int8排布的NC1HWC2转换成float的nchw转换代码
NC1HWC2_int8_to_NCHW_float(const int8_t * src,float * dst,int * dims,int channel,int h,int w,int zp,float scale)216 int NC1HWC2_int8_to_NCHW_float(const int8_t *src, float *dst, int *dims, int channel, int h, int w, int zp, float scale)
217 {
218   int batch = dims[0];
219   int C1 = dims[1];
220   int C2 = dims[4];
221   int hw_src = dims[2] * dims[3];
222   int hw_dst = h * w;
223   for (int i = 0; i < batch; i++)
224   {
225     src = src + i * C1 * hw_src * C2;
226     dst = dst + i * channel * hw_dst;
227     for (int c = 0; c < channel; ++c)
228     {
229       int plane = c / C2;
230       const int8_t *src_c = plane * hw_src * C2 + src;
231       int offset = c % C2;
232       for (int cur_h = 0; cur_h < h; ++cur_h)
233         for (int cur_w = 0; cur_w < w; ++cur_w)
234         {
235           int cur_hw = cur_h * w + cur_w;
236           dst[c * hw_dst + cur_h * w + cur_w] = (src_c[C2 * cur_hw + offset] - zp) * scale; // int8-->float
237         }
238     }
239   }
240 
241   return 0;
242 }
243 
244 /*-------------------------------------------
245                   Main Functions
246 -------------------------------------------*/
main(int argc,char * argv[])247 int main(int argc, char *argv[])
248 {
249   if (argc < 3)
250   {
251     printf("Usage:%s model_path input_path [loop_count]\n", argv[0]);
252     return -1;
253   }
254 
255   char *model_path = argv[1];
256   char *input_path = argv[2];
257 
258   int loop_count = 1;
259   if (argc > 3)
260   {
261     loop_count = atoi(argv[3]);
262   }
263 
264   rknn_context ctx = 0;
265 
266   // Load RKNN Model
267 #if 1
268   // Init rknn from model path
269   int ret = rknn_init(&ctx, model_path, 0, 0, NULL);
270 #else
271   // Init rknn from model data
272   size_t model_size;
273   void *model_data = load_file(model_path, &model_size);
274   if (model_data == NULL)
275   {
276     return -1;
277   }
278   int ret = rknn_init(&ctx, model_data, model_size, 0, NULL);
279   free(model_data);
280 #endif
281   if (ret < 0)
282   {
283     printf("rknn_init fail! ret=%d\n", ret);
284     return -1;
285   }
286 
287   // Get sdk and driver version
288   rknn_sdk_version sdk_ver;
289   ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
290   if (ret != RKNN_SUCC)
291   {
292     printf("rknn_query fail! ret=%d\n", ret);
293     return -1;
294   }
295   printf("rknn_api/rknnrt version: %s, driver version: %s\n", sdk_ver.api_version, sdk_ver.drv_version);
296 
297   // Get Model Input Output Info
298   rknn_input_output_num io_num;
299   ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
300   if (ret != RKNN_SUCC)
301   {
302     printf("rknn_query fail! ret=%d\n", ret);
303     return -1;
304   }
305   printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
306 
307   printf("input tensors:\n");
308   rknn_tensor_attr input_attrs[io_num.n_input];
309   memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
310   for (uint32_t i = 0; i < io_num.n_input; i++)
311   {
312     input_attrs[i].index = i;
313     // query info
314     ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
315     if (ret < 0)
316     {
317       printf("rknn_init error! ret=%d\n", ret);
318       return -1;
319     }
320     dump_tensor_attr(&input_attrs[i]);
321   }
322 
323   printf("output tensors:\n");
324   rknn_tensor_attr output_attrs[io_num.n_output];
325   memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
326   for (uint32_t i = 0; i < io_num.n_output; i++)
327   {
328     output_attrs[i].index = i;
329     // query info
330     ret = rknn_query(ctx, RKNN_QUERY_NATIVE_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
331     if (ret != RKNN_SUCC)
332     {
333       printf("rknn_query fail! ret=%d\n", ret);
334       return -1;
335     }
336     dump_tensor_attr(&output_attrs[i]);
337   }
338 
339   // Get custom string
340   rknn_custom_string custom_string;
341   ret = rknn_query(ctx, RKNN_QUERY_CUSTOM_STRING, &custom_string, sizeof(custom_string));
342   if (ret != RKNN_SUCC)
343   {
344     printf("rknn_query fail! ret=%d\n", ret);
345     return -1;
346   }
347   printf("custom string: %s\n", custom_string.string);
348 
349   unsigned char *input_data = NULL;
350   rknn_tensor_type input_type = RKNN_TENSOR_UINT8;
351   rknn_tensor_format input_layout = RKNN_TENSOR_NHWC;
352 
353   // Load image
354   input_data = load_image(input_path, &input_attrs[0]);
355 
356   if (!input_data)
357   {
358     return -1;
359   }
360 
361   // Create input tensor memory
362   rknn_tensor_mem *input_mems[1];
363   // default input type is int8 (normalize and quantize need compute in outside)
364   // if set uint8, will fuse normalize and quantize to npu
365   input_attrs[0].type = input_type;
366   // default fmt is NHWC, npu only support NHWC in zero copy mode
367   input_attrs[0].fmt = input_layout;
368 
369   input_mems[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride);
370 
371   // Copy input data to input tensor memory
372   int width = input_attrs[0].dims[2];
373   int stride = input_attrs[0].w_stride;
374 
375   if (width == stride)
376   {
377     memcpy(input_mems[0]->virt_addr, input_data, width * input_attrs[0].dims[1] * input_attrs[0].dims[3]);
378   }
379   else
380   {
381     int height = input_attrs[0].dims[1];
382     int channel = input_attrs[0].dims[3];
383     // copy from src to dst with stride
384     uint8_t *src_ptr = input_data;
385     uint8_t *dst_ptr = (uint8_t *)input_mems[0]->virt_addr;
386     // width-channel elements
387     int src_wc_elems = width * channel;
388     int dst_wc_elems = stride * channel;
389     for (int h = 0; h < height; ++h)
390     {
391       memcpy(dst_ptr, src_ptr, src_wc_elems);
392       src_ptr += src_wc_elems;
393       dst_ptr += dst_wc_elems;
394     }
395   }
396 
397   // Create output tensor memory
398   rknn_tensor_mem *output_mems[io_num.n_output];
399   for (uint32_t i = 0; i < io_num.n_output; ++i)
400   {
401     output_mems[i] = rknn_create_mem(ctx, output_attrs[i].size_with_stride);
402   }
403 
404   // Set input tensor memory
405   ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
406   if (ret < 0)
407   {
408     printf("rknn_set_io_mem fail! ret=%d\n", ret);
409     return -1;
410   }
411 
412   // Set output tensor memory
413   for (uint32_t i = 0; i < io_num.n_output; ++i)
414   {
415     // set output memory and attribute
416     ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
417     if (ret < 0)
418     {
419       printf("rknn_set_io_mem fail! ret=%d\n", ret);
420       return -1;
421     }
422   }
423 
424   // Run
425   printf("Begin perf ...\n");
426   for (int i = 0; i < loop_count; ++i)
427   {
428     int64_t start_us = getCurrentTimeUs();
429     ret = rknn_run(ctx, NULL);
430     int64_t elapse_us = getCurrentTimeUs() - start_us;
431     if (ret < 0)
432     {
433       printf("rknn run error %d\n", ret);
434       return -1;
435     }
436     printf("%4d: Elapse Time = %.2fms, FPS = %.2f\n", i, elapse_us / 1000.f, 1000.f * 1000.f / elapse_us);
437   }
438 
439   printf("output origin tensors:\n");
440   rknn_tensor_attr orig_output_attrs[io_num.n_output];
441   memset(orig_output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
442   for (uint32_t i = 0; i < io_num.n_output; i++)
443   {
444     orig_output_attrs[i].index = i;
445     // query info
446     ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(orig_output_attrs[i]), sizeof(rknn_tensor_attr));
447     if (ret != RKNN_SUCC)
448     {
449       printf("rknn_query fail! ret=%d\n", ret);
450       return -1;
451     }
452     dump_tensor_attr(&orig_output_attrs[i]);
453   }
454 
455   float *output_mems_nchw[io_num.n_output];
456   for (uint32_t i = 0; i < io_num.n_output; ++i)
457   {
458     int size = orig_output_attrs[i].size_with_stride * sizeof(float);
459     output_mems_nchw[i] = (float *)malloc(size);
460   }
461 
462   for (uint32_t i = 0; i < io_num.n_output; i++)
463   {
464     if (output_attrs[i].fmt == RKNN_TENSOR_NC1HWC2)
465     {
466       int channel = orig_output_attrs[i].dims[1];
467       int h = orig_output_attrs[i].n_dims > 2 ? orig_output_attrs[i].dims[2] : 1;
468       int w = orig_output_attrs[i].n_dims > 3 ? orig_output_attrs[i].dims[3] : 1;
469       int zp = output_attrs[i].zp;
470       float scale = output_attrs[i].scale;
471       NC1HWC2_int8_to_NCHW_float((int8_t *)output_mems[i]->virt_addr, (float *)output_mems_nchw[i], (int *)output_attrs[i].dims,
472                                  channel, h, w, zp, scale);
473     }
474     else
475     {
476       int8_t *src = (int8_t *)output_mems[i]->virt_addr;
477       float *dst = output_mems_nchw[i];
478       for (int index = 0; index < output_attrs[i].n_elems; index++)
479       {
480         dst[index] = (src[index] - output_attrs[i].zp) * output_attrs[i].scale;
481       }
482     }
483   }
484 
485   // Get top 5
486   uint32_t topNum = 5;
487   for (uint32_t i = 0; i < io_num.n_output; i++)
488   {
489     uint32_t MaxClass[topNum];
490     float fMaxProb[topNum];
491 
492     uint32_t sz = orig_output_attrs[i].n_elems;
493     int top_count = sz > topNum ? topNum : sz;
494 
495     float *buffer = (float *)output_mems_nchw[i];
496 
497     rknn_GetTopN(buffer, fMaxProb, MaxClass, sz, topNum);
498 
499     printf("---- Top%d ----\n", top_count);
500     for (int j = 0; j < top_count; j++)
501     {
502       printf("%8.6f - %d\n", fMaxProb[j], MaxClass[j]);
503     }
504   }
505 
506   // Destroy rknn memory
507   rknn_destroy_mem(ctx, input_mems[0]);
508   for (uint32_t i = 0; i < io_num.n_output; ++i)
509   {
510     rknn_destroy_mem(ctx, output_mems[i]);
511     free(output_mems_nchw[i]);
512   }
513 
514   // destroy
515   rknn_destroy(ctx);
516 
517   free(input_data);
518 
519   return 0;
520 }