1 // Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 /*-------------------------------------------
16 Includes
17 -------------------------------------------*/
18 #include "rknn_api.h"
19
20 #include <float.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/time.h>
25
26 #define STB_IMAGE_IMPLEMENTATION
27 #include "stb/stb_image.h"
28 #define STB_IMAGE_RESIZE_IMPLEMENTATION
29 #include <stb/stb_image_resize.h>
30
31 /*-------------------------------------------
32 Functions
33 -------------------------------------------*/
getCurrentTimeUs()34 static inline int64_t getCurrentTimeUs()
35 {
36 struct timeval tv;
37 gettimeofday(&tv, NULL);
38 return tv.tv_sec * 1000000 + tv.tv_usec;
39 }
40
rknn_GetTopN(float * pfProb,float * pfMaxProb,uint32_t * pMaxClass,uint32_t outputCount,uint32_t topNum)41 static int rknn_GetTopN(float *pfProb, float *pfMaxProb, uint32_t *pMaxClass, uint32_t outputCount, uint32_t topNum)
42 {
43 uint32_t i, j;
44 uint32_t top_count = outputCount > topNum ? topNum : outputCount;
45
46 for (i = 0; i < topNum; ++i)
47 {
48 pfMaxProb[i] = -FLT_MAX;
49 pMaxClass[i] = -1;
50 }
51
52 for (j = 0; j < top_count; j++)
53 {
54 for (i = 0; i < outputCount; i++)
55 {
56 if ((i == *(pMaxClass + 0)) || (i == *(pMaxClass + 1)) || (i == *(pMaxClass + 2)) || (i == *(pMaxClass + 3)) ||
57 (i == *(pMaxClass + 4)))
58 {
59 continue;
60 }
61
62 float prob = pfProb[i];
63 if (prob > *(pfMaxProb + j))
64 {
65 *(pfMaxProb + j) = prob;
66 *(pMaxClass + j) = i;
67 }
68 }
69 }
70
71 return 1;
72 }
73
rknn_GetTopN_int8(int8_t * pProb,float scale,int zp,float * pfMaxProb,uint32_t * pMaxClass,uint32_t outputCount,uint32_t topNum)74 static int rknn_GetTopN_int8(int8_t *pProb, float scale, int zp, float *pfMaxProb, uint32_t *pMaxClass,
75 uint32_t outputCount, uint32_t topNum)
76 {
77 uint32_t i, j;
78 uint32_t top_count = outputCount > topNum ? topNum : outputCount;
79
80 for (i = 0; i < topNum; ++i)
81 {
82 pfMaxProb[i] = -FLT_MAX;
83 pMaxClass[i] = -1;
84 }
85
86 for (j = 0; j < top_count; j++)
87 {
88 for (i = 0; i < outputCount; i++)
89 {
90 if ((i == *(pMaxClass + 0)) || (i == *(pMaxClass + 1)) || (i == *(pMaxClass + 2)) || (i == *(pMaxClass + 3)) ||
91 (i == *(pMaxClass + 4)))
92 {
93 continue;
94 }
95
96 float prob = (pProb[i] - zp) * scale;
97 if (prob > *(pfMaxProb + j))
98 {
99 *(pfMaxProb + j) = prob;
100 *(pMaxClass + j) = i;
101 }
102 }
103 }
104
105 return 1;
106 }
107
dump_tensor_attr(rknn_tensor_attr * attr)108 static void dump_tensor_attr(rknn_tensor_attr *attr)
109 {
110 char dims[128] = {0};
111 for (int i = 0; i < attr->n_dims; ++i)
112 {
113 int idx = strlen(dims);
114 sprintf(&dims[idx], "%d%s", attr->dims[i], (i == attr->n_dims - 1) ? "" : ", ");
115 }
116 printf(" index=%d, name=%s, n_dims=%d, dims=[%s], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
117 "zp=%d, scale=%f\n",
118 attr->index, attr->name, attr->n_dims, dims, attr->n_elems, attr->size, get_format_string(attr->fmt),
119 get_type_string(attr->type), get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
120 }
121
load_file(const char * file_path,size_t * file_size)122 static void *load_file(const char *file_path, size_t *file_size)
123 {
124 FILE *fp = fopen(file_path, "r");
125 if (fp == NULL)
126 {
127 printf("failed to open file: %s\n", file_path);
128 return NULL;
129 }
130
131 fseek(fp, 0, SEEK_END);
132 size_t size = (size_t)ftell(fp);
133 fseek(fp, 0, SEEK_SET);
134
135 void *file_data = malloc(size);
136 if (file_data == NULL)
137 {
138 fclose(fp);
139 printf("failed allocate file size: %zu\n", size);
140 return NULL;
141 }
142
143 if (fread(file_data, 1, size, fp) != size)
144 {
145 fclose(fp);
146 free(file_data);
147 printf("failed to read file data!\n");
148 return NULL;
149 }
150
151 fclose(fp);
152
153 *file_size = size;
154
155 return file_data;
156 }
157
load_image(const char * image_path,rknn_tensor_attr * input_attr)158 static unsigned char *load_image(const char *image_path, rknn_tensor_attr *input_attr)
159 {
160 int req_height = 0;
161 int req_width = 0;
162 int req_channel = 0;
163
164 switch (input_attr->fmt)
165 {
166 case RKNN_TENSOR_NHWC:
167 req_height = input_attr->dims[1];
168 req_width = input_attr->dims[2];
169 req_channel = input_attr->dims[3];
170 break;
171 case RKNN_TENSOR_NCHW:
172 req_height = input_attr->dims[2];
173 req_width = input_attr->dims[3];
174 req_channel = input_attr->dims[1];
175 break;
176 default:
177 printf("meet unsupported layout\n");
178 return NULL;
179 }
180
181 int height = 0;
182 int width = 0;
183 int channel = 0;
184
185 unsigned char *image_data = stbi_load(image_path, &width, &height, &channel, req_channel);
186 if (image_data == NULL)
187 {
188 printf("load image failed!\n");
189 return NULL;
190 }
191
192 if (width != req_width || height != req_height)
193 {
194 unsigned char *image_resized = (unsigned char *)STBI_MALLOC(req_width * req_height * req_channel);
195 if (!image_resized)
196 {
197 printf("malloc image failed!\n");
198 STBI_FREE(image_data);
199 return NULL;
200 }
201 if (stbir_resize_uint8(image_data, width, height, 0, image_resized, req_width, req_height, 0, channel) != 1)
202 {
203 printf("resize image failed!\n");
204 STBI_FREE(image_data);
205 return NULL;
206 }
207 STBI_FREE(image_data);
208 image_data = image_resized;
209 }
210
211 return image_data;
212 }
213
214 // 量化模型的npu输出结果为int8数据类型,后处理要按照int8数据类型处理
215 // 如下提供了int8排布的NC1HWC2转换成float的nchw转换代码
NC1HWC2_int8_to_NCHW_float(const int8_t * src,float * dst,int * dims,int channel,int h,int w,int zp,float scale)216 int NC1HWC2_int8_to_NCHW_float(const int8_t *src, float *dst, int *dims, int channel, int h, int w, int zp, float scale)
217 {
218 int batch = dims[0];
219 int C1 = dims[1];
220 int C2 = dims[4];
221 int hw_src = dims[2] * dims[3];
222 int hw_dst = h * w;
223 for (int i = 0; i < batch; i++)
224 {
225 src = src + i * C1 * hw_src * C2;
226 dst = dst + i * channel * hw_dst;
227 for (int c = 0; c < channel; ++c)
228 {
229 int plane = c / C2;
230 const int8_t *src_c = plane * hw_src * C2 + src;
231 int offset = c % C2;
232 for (int cur_h = 0; cur_h < h; ++cur_h)
233 for (int cur_w = 0; cur_w < w; ++cur_w)
234 {
235 int cur_hw = cur_h * w + cur_w;
236 dst[c * hw_dst + cur_h * w + cur_w] = (src_c[C2 * cur_hw + offset] - zp) * scale; // int8-->float
237 }
238 }
239 }
240
241 return 0;
242 }
243
244 /*-------------------------------------------
245 Main Functions
246 -------------------------------------------*/
main(int argc,char * argv[])247 int main(int argc, char *argv[])
248 {
249 if (argc < 3)
250 {
251 printf("Usage:%s model_path input_path [loop_count]\n", argv[0]);
252 return -1;
253 }
254
255 char *model_path = argv[1];
256 char *input_path = argv[2];
257
258 int loop_count = 1;
259 if (argc > 3)
260 {
261 loop_count = atoi(argv[3]);
262 }
263
264 rknn_context ctx = 0;
265
266 // Load RKNN Model
267 #if 1
268 // Init rknn from model path
269 int ret = rknn_init(&ctx, model_path, 0, 0, NULL);
270 #else
271 // Init rknn from model data
272 size_t model_size;
273 void *model_data = load_file(model_path, &model_size);
274 if (model_data == NULL)
275 {
276 return -1;
277 }
278 int ret = rknn_init(&ctx, model_data, model_size, 0, NULL);
279 free(model_data);
280 #endif
281 if (ret < 0)
282 {
283 printf("rknn_init fail! ret=%d\n", ret);
284 return -1;
285 }
286
287 // Get sdk and driver version
288 rknn_sdk_version sdk_ver;
289 ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
290 if (ret != RKNN_SUCC)
291 {
292 printf("rknn_query fail! ret=%d\n", ret);
293 return -1;
294 }
295 printf("rknn_api/rknnrt version: %s, driver version: %s\n", sdk_ver.api_version, sdk_ver.drv_version);
296
297 // Get Model Input Output Info
298 rknn_input_output_num io_num;
299 ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
300 if (ret != RKNN_SUCC)
301 {
302 printf("rknn_query fail! ret=%d\n", ret);
303 return -1;
304 }
305 printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
306
307 printf("input tensors:\n");
308 rknn_tensor_attr input_attrs[io_num.n_input];
309 memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
310 for (uint32_t i = 0; i < io_num.n_input; i++)
311 {
312 input_attrs[i].index = i;
313 // query info
314 ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
315 if (ret < 0)
316 {
317 printf("rknn_init error! ret=%d\n", ret);
318 return -1;
319 }
320 dump_tensor_attr(&input_attrs[i]);
321 }
322
323 printf("output tensors:\n");
324 rknn_tensor_attr output_attrs[io_num.n_output];
325 memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
326 for (uint32_t i = 0; i < io_num.n_output; i++)
327 {
328 output_attrs[i].index = i;
329 // query info
330 ret = rknn_query(ctx, RKNN_QUERY_NATIVE_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
331 if (ret != RKNN_SUCC)
332 {
333 printf("rknn_query fail! ret=%d\n", ret);
334 return -1;
335 }
336 dump_tensor_attr(&output_attrs[i]);
337 }
338
339 // Get custom string
340 rknn_custom_string custom_string;
341 ret = rknn_query(ctx, RKNN_QUERY_CUSTOM_STRING, &custom_string, sizeof(custom_string));
342 if (ret != RKNN_SUCC)
343 {
344 printf("rknn_query fail! ret=%d\n", ret);
345 return -1;
346 }
347 printf("custom string: %s\n", custom_string.string);
348
349 unsigned char *input_data = NULL;
350 rknn_tensor_type input_type = RKNN_TENSOR_UINT8;
351 rknn_tensor_format input_layout = RKNN_TENSOR_NHWC;
352
353 // Load image
354 input_data = load_image(input_path, &input_attrs[0]);
355
356 if (!input_data)
357 {
358 return -1;
359 }
360
361 // Create input tensor memory
362 rknn_tensor_mem *input_mems[1];
363 // default input type is int8 (normalize and quantize need compute in outside)
364 // if set uint8, will fuse normalize and quantize to npu
365 input_attrs[0].type = input_type;
366 // default fmt is NHWC, npu only support NHWC in zero copy mode
367 input_attrs[0].fmt = input_layout;
368
369 input_mems[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride);
370
371 // Copy input data to input tensor memory
372 int width = input_attrs[0].dims[2];
373 int stride = input_attrs[0].w_stride;
374
375 if (width == stride)
376 {
377 memcpy(input_mems[0]->virt_addr, input_data, width * input_attrs[0].dims[1] * input_attrs[0].dims[3]);
378 }
379 else
380 {
381 int height = input_attrs[0].dims[1];
382 int channel = input_attrs[0].dims[3];
383 // copy from src to dst with stride
384 uint8_t *src_ptr = input_data;
385 uint8_t *dst_ptr = (uint8_t *)input_mems[0]->virt_addr;
386 // width-channel elements
387 int src_wc_elems = width * channel;
388 int dst_wc_elems = stride * channel;
389 for (int h = 0; h < height; ++h)
390 {
391 memcpy(dst_ptr, src_ptr, src_wc_elems);
392 src_ptr += src_wc_elems;
393 dst_ptr += dst_wc_elems;
394 }
395 }
396
397 // Create output tensor memory
398 rknn_tensor_mem *output_mems[io_num.n_output];
399 for (uint32_t i = 0; i < io_num.n_output; ++i)
400 {
401 output_mems[i] = rknn_create_mem(ctx, output_attrs[i].size_with_stride);
402 }
403
404 // Set input tensor memory
405 ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
406 if (ret < 0)
407 {
408 printf("rknn_set_io_mem fail! ret=%d\n", ret);
409 return -1;
410 }
411
412 // Set output tensor memory
413 for (uint32_t i = 0; i < io_num.n_output; ++i)
414 {
415 // set output memory and attribute
416 ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
417 if (ret < 0)
418 {
419 printf("rknn_set_io_mem fail! ret=%d\n", ret);
420 return -1;
421 }
422 }
423
424 // Run
425 printf("Begin perf ...\n");
426 for (int i = 0; i < loop_count; ++i)
427 {
428 int64_t start_us = getCurrentTimeUs();
429 ret = rknn_run(ctx, NULL);
430 int64_t elapse_us = getCurrentTimeUs() - start_us;
431 if (ret < 0)
432 {
433 printf("rknn run error %d\n", ret);
434 return -1;
435 }
436 printf("%4d: Elapse Time = %.2fms, FPS = %.2f\n", i, elapse_us / 1000.f, 1000.f * 1000.f / elapse_us);
437 }
438
439 printf("output origin tensors:\n");
440 rknn_tensor_attr orig_output_attrs[io_num.n_output];
441 memset(orig_output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
442 for (uint32_t i = 0; i < io_num.n_output; i++)
443 {
444 orig_output_attrs[i].index = i;
445 // query info
446 ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(orig_output_attrs[i]), sizeof(rknn_tensor_attr));
447 if (ret != RKNN_SUCC)
448 {
449 printf("rknn_query fail! ret=%d\n", ret);
450 return -1;
451 }
452 dump_tensor_attr(&orig_output_attrs[i]);
453 }
454
455 float *output_mems_nchw[io_num.n_output];
456 for (uint32_t i = 0; i < io_num.n_output; ++i)
457 {
458 int size = orig_output_attrs[i].size_with_stride * sizeof(float);
459 output_mems_nchw[i] = (float *)malloc(size);
460 }
461
462 for (uint32_t i = 0; i < io_num.n_output; i++)
463 {
464 if (output_attrs[i].fmt == RKNN_TENSOR_NC1HWC2)
465 {
466 int channel = orig_output_attrs[i].dims[1];
467 int h = orig_output_attrs[i].n_dims > 2 ? orig_output_attrs[i].dims[2] : 1;
468 int w = orig_output_attrs[i].n_dims > 3 ? orig_output_attrs[i].dims[3] : 1;
469 int zp = output_attrs[i].zp;
470 float scale = output_attrs[i].scale;
471 NC1HWC2_int8_to_NCHW_float((int8_t *)output_mems[i]->virt_addr, (float *)output_mems_nchw[i], (int *)output_attrs[i].dims,
472 channel, h, w, zp, scale);
473 }
474 else
475 {
476 int8_t *src = (int8_t *)output_mems[i]->virt_addr;
477 float *dst = output_mems_nchw[i];
478 for (int index = 0; index < output_attrs[i].n_elems; index++)
479 {
480 dst[index] = (src[index] - output_attrs[i].zp) * output_attrs[i].scale;
481 }
482 }
483 }
484
485 // Get top 5
486 uint32_t topNum = 5;
487 for (uint32_t i = 0; i < io_num.n_output; i++)
488 {
489 uint32_t MaxClass[topNum];
490 float fMaxProb[topNum];
491
492 uint32_t sz = orig_output_attrs[i].n_elems;
493 int top_count = sz > topNum ? topNum : sz;
494
495 float *buffer = (float *)output_mems_nchw[i];
496
497 rknn_GetTopN(buffer, fMaxProb, MaxClass, sz, topNum);
498
499 printf("---- Top%d ----\n", top_count);
500 for (int j = 0; j < top_count; j++)
501 {
502 printf("%8.6f - %d\n", fMaxProb[j], MaxClass[j]);
503 }
504 }
505
506 // Destroy rknn memory
507 rknn_destroy_mem(ctx, input_mems[0]);
508 for (uint32_t i = 0; i < io_num.n_output; ++i)
509 {
510 rknn_destroy_mem(ctx, output_mems[i]);
511 free(output_mems_nchw[i]);
512 }
513
514 // destroy
515 rknn_destroy(ctx);
516
517 free(input_data);
518
519 return 0;
520 }