xref: /OK3568_Linux_fs/external/rknpu2/examples/rknn_api_demo/src/rknn_create_mem_demo.cpp (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 /*-------------------------------------------
16                 Includes
17 -------------------------------------------*/
18 #include "rknn_api.h"
19 
20 #include <float.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/time.h>
25 
26 #define STB_IMAGE_IMPLEMENTATION
27 #include "stb/stb_image.h"
28 #define STB_IMAGE_RESIZE_IMPLEMENTATION
29 #include <stb/stb_image_resize.h>
30 
31 /*-------------------------------------------
32                   Functions
33 -------------------------------------------*/
getCurrentTimeUs()34 static inline int64_t getCurrentTimeUs()
35 {
36   struct timeval tv;
37   gettimeofday(&tv, NULL);
38   return tv.tv_sec * 1000000 + tv.tv_usec;
39 }
40 
rknn_GetTopN(float * pfProb,float * pfMaxProb,uint32_t * pMaxClass,uint32_t outputCount,uint32_t topNum)41 static int rknn_GetTopN(float* pfProb, float* pfMaxProb, uint32_t* pMaxClass, uint32_t outputCount, uint32_t topNum)
42 {
43   uint32_t i, j;
44   uint32_t top_count = outputCount > topNum ? topNum : outputCount;
45 
46   for (i = 0; i < topNum; ++i) {
47     pfMaxProb[i] = -FLT_MAX;
48     pMaxClass[i] = -1;
49   }
50 
51   for (j = 0; j < top_count; j++) {
52     for (i = 0; i < outputCount; i++) {
53       if ((i == *(pMaxClass + 0)) || (i == *(pMaxClass + 1)) || (i == *(pMaxClass + 2)) || (i == *(pMaxClass + 3)) ||
54           (i == *(pMaxClass + 4))) {
55         continue;
56       }
57 
58       if (pfProb[i] > *(pfMaxProb + j)) {
59         *(pfMaxProb + j) = pfProb[i];
60         *(pMaxClass + j) = i;
61       }
62     }
63   }
64 
65   return 1;
66 }
67 
dump_tensor_attr(rknn_tensor_attr * attr)68 static void dump_tensor_attr(rknn_tensor_attr* attr)
69 {
70   printf("  index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
71          "zp=%d, scale=%f\n",
72          attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
73          attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
74          get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
75 }
76 
load_image(const char * image_path,rknn_tensor_attr * input_attr)77 static unsigned char* load_image(const char* image_path, rknn_tensor_attr* input_attr)
78 {
79   int req_height  = 0;
80   int req_width   = 0;
81   int req_channel = 0;
82 
83   switch (input_attr->fmt) {
84   case RKNN_TENSOR_NHWC:
85     req_height  = input_attr->dims[1];
86     req_width   = input_attr->dims[2];
87     req_channel = input_attr->dims[3];
88     break;
89   case RKNN_TENSOR_NCHW:
90     req_height  = input_attr->dims[2];
91     req_width   = input_attr->dims[3];
92     req_channel = input_attr->dims[1];
93     break;
94   default:
95     printf("meet unsupported layout\n");
96     return NULL;
97   }
98 
99   int height  = 0;
100   int width   = 0;
101   int channel = 0;
102 
103   unsigned char* image_data = stbi_load(image_path, &width, &height, &channel, req_channel);
104   if (image_data == NULL) {
105     printf("load image failed!\n");
106     return NULL;
107   }
108 
109   if (width != req_width || height != req_height) {
110     unsigned char* image_resized = (unsigned char*)STBI_MALLOC(req_width * req_height * req_channel);
111     if (!image_resized) {
112       printf("malloc image failed!\n");
113       STBI_FREE(image_data);
114       return NULL;
115     }
116     if (stbir_resize_uint8(image_data, width, height, 0, image_resized, req_width, req_height, 0, channel) != 1) {
117       printf("resize image failed!\n");
118       STBI_FREE(image_data);
119       return NULL;
120     }
121     STBI_FREE(image_data);
122     image_data = image_resized;
123   }
124 
125   return image_data;
126 }
127 
load_model(const char * filename,int * model_size)128 static unsigned char* load_model(const char* filename, int* model_size)
129 {
130   FILE* fp = fopen(filename, "rb");
131   if (fp == nullptr) {
132     printf("fopen %s fail!\n", filename);
133     return NULL;
134   }
135   fseek(fp, 0, SEEK_END);
136   int            model_len = ftell(fp);
137   unsigned char* model     = (unsigned char*)malloc(model_len);
138   fseek(fp, 0, SEEK_SET);
139   if (model_len != fread(model, 1, model_len, fp)) {
140     printf("fread %s fail!\n", filename);
141     free(model);
142     return NULL;
143   }
144   *model_size = model_len;
145   if (fp) {
146     fclose(fp);
147   }
148   return model;
149 }
150 
151 /*-------------------------------------------
152                   Main Functions
153 -------------------------------------------*/
main(int argc,char * argv[])154 int main(int argc, char* argv[])
155 {
156   if (argc < 3) {
157     printf("Usage:%s model_path input_path [loop_count]\n", argv[0]);
158     return -1;
159   }
160 
161   char* model_path = argv[1];
162   char* input_path = argv[2];
163 
164   int loop_count = 1;
165   if (argc > 3) {
166     loop_count = atoi(argv[3]);
167   }
168 
169   rknn_context ctx = 0;
170 
171   // Load RKNN Model
172   int            model_len = 0;
173   unsigned char* model     = load_model(model_path, &model_len);
174   int            ret       = rknn_init(&ctx, model, model_len, 0, NULL);
175   if (ret < 0) {
176     printf("rknn_init fail! ret=%d\n", ret);
177     return -1;
178   }
179 
180   // Get sdk and driver version
181   rknn_sdk_version sdk_ver;
182   ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
183   if (ret != RKNN_SUCC) {
184     printf("rknn_query fail! ret=%d\n", ret);
185     return -1;
186   }
187 
188   printf("rknn_api/rknnrt version: %s, driver version: %s\n", sdk_ver.api_version, sdk_ver.drv_version);
189 
190   // Get Model Input Output Info
191   rknn_input_output_num io_num;
192   ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
193   if (ret != RKNN_SUCC) {
194     printf("rknn_query fail! ret=%d\n", ret);
195     return -1;
196   }
197   printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
198 
199   printf("input tensors:\n");
200   rknn_tensor_attr input_attrs[io_num.n_input];
201   memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
202   for (uint32_t i = 0; i < io_num.n_input; i++) {
203     input_attrs[i].index = i;
204     // query info
205     ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
206     if (ret < 0) {
207       printf("rknn_init error! ret=%d\n", ret);
208       return -1;
209     }
210     dump_tensor_attr(&input_attrs[i]);
211   }
212 
213   printf("output tensors:\n");
214   rknn_tensor_attr output_attrs[io_num.n_output];
215   memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
216   for (uint32_t i = 0; i < io_num.n_output; i++) {
217     output_attrs[i].index = i;
218     // query info
219     ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
220     if (ret != RKNN_SUCC) {
221       printf("rknn_query fail! ret=%d\n", ret);
222       return -1;
223     }
224     dump_tensor_attr(&output_attrs[i]);
225   }
226 
227   // Get custom string
228   rknn_custom_string custom_string;
229   ret = rknn_query(ctx, RKNN_QUERY_CUSTOM_STRING, &custom_string, sizeof(custom_string));
230   if (ret != RKNN_SUCC) {
231     printf("rknn_query fail! ret=%d\n", ret);
232     return -1;
233   }
234   printf("custom string: %s\n", custom_string.string);
235 
236   unsigned char*     input_data   = NULL;
237   rknn_tensor_type   input_type   = RKNN_TENSOR_UINT8;
238   rknn_tensor_format input_layout = RKNN_TENSOR_NHWC;
239 
240   // Load image
241   input_data = load_image(input_path, &input_attrs[0]);
242 
243   if (!input_data) {
244     return -1;
245   }
246 
247   // Create input tensor memory
248   rknn_tensor_mem* input_mems[1];
249   // default input type is int8 (normalize and quantize need compute in outside)
250   // if set uint8, will fuse normalize and quantize to npu
251   input_attrs[0].type = input_type;
252   // default fmt is NHWC, npu only support NHWC in zero copy mode
253   input_attrs[0].fmt = input_layout;
254 
255   input_mems[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride);
256 
257   // Copy input data to input tensor memory
258   int width  = input_attrs[0].dims[2];
259   int stride = input_attrs[0].w_stride;
260 
261   if (width == stride) {
262     memcpy(input_mems[0]->virt_addr, input_data, width * input_attrs[0].dims[1] * input_attrs[0].dims[3]);
263   } else {
264     int height  = input_attrs[0].dims[1];
265     int channel = input_attrs[0].dims[3];
266     // copy from src to dst with stride
267     uint8_t* src_ptr = input_data;
268     uint8_t* dst_ptr = (uint8_t*)input_mems[0]->virt_addr;
269     // width-channel elements
270     int src_wc_elems = width * channel;
271     int dst_wc_elems = stride * channel;
272     for (int h = 0; h < height; ++h) {
273       memcpy(dst_ptr, src_ptr, src_wc_elems);
274       src_ptr += src_wc_elems;
275       dst_ptr += dst_wc_elems;
276     }
277   }
278 
279   // Create output tensor memory
280   rknn_tensor_mem* output_mems[io_num.n_output];
281   for (uint32_t i = 0; i < io_num.n_output; ++i) {
282     // default output type is depend on model, this require float32 to compute top5
283     // allocate float32 output tensor
284     int output_size = output_attrs[i].n_elems * sizeof(float);
285     output_mems[i]  = rknn_create_mem(ctx, output_size);
286   }
287 
288   // Set input tensor memory
289   ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
290   if (ret < 0) {
291     printf("rknn_set_io_mem fail! ret=%d\n", ret);
292     return -1;
293   }
294 
295   // Set output tensor memory
296   for (uint32_t i = 0; i < io_num.n_output; ++i) {
297     // default output type is depend on model, this require float32 to compute top5
298     output_attrs[i].type = RKNN_TENSOR_FLOAT32;
299     // set output memory and attribute
300     ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
301     if (ret < 0) {
302       printf("rknn_set_io_mem fail! ret=%d\n", ret);
303       return -1;
304     }
305   }
306 
307   // Run
308   printf("Begin perf ...\n");
309   for (int i = 0; i < loop_count; ++i) {
310     int64_t start_us  = getCurrentTimeUs();
311     ret               = rknn_run(ctx, NULL);
312     int64_t elapse_us = getCurrentTimeUs() - start_us;
313     if (ret < 0) {
314       printf("rknn run error %d\n", ret);
315       return -1;
316     }
317     printf("%4d: Elapse Time = %.2fms, FPS = %.2f\n", i, elapse_us / 1000.f, 1000.f * 1000.f / elapse_us);
318   }
319 
320   // Get top 5
321   uint32_t topNum = 5;
322   for (uint32_t i = 0; i < io_num.n_output; i++) {
323     uint32_t MaxClass[topNum];
324     float    fMaxProb[topNum];
325     float*   buffer    = (float*)output_mems[i]->virt_addr;
326     uint32_t sz        = output_attrs[i].n_elems;
327     int      top_count = sz > topNum ? topNum : sz;
328 
329     rknn_GetTopN(buffer, fMaxProb, MaxClass, sz, topNum);
330 
331     printf("---- Top%d ----\n", top_count);
332     for (int j = 0; j < top_count; j++) {
333       printf("%8.6f - %d\n", fMaxProb[j], MaxClass[j]);
334     }
335   }
336 
337   // Destroy rknn memory
338   rknn_destroy_mem(ctx, input_mems[0]);
339   for (uint32_t i = 0; i < io_num.n_output; ++i) {
340     rknn_destroy_mem(ctx, output_mems[i]);
341   }
342 
343   // destroy
344   rknn_destroy(ctx);
345 
346   if (input_data != nullptr) {
347     free(input_data);
348   }
349 
350   if (model != nullptr) {
351     free(model);
352   }
353 
354   return 0;
355 }
356