xref: /OK3568_Linux_fs/external/rknpu2/examples/rknn_common_test/src/main.cc (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
2*4882a593Smuzhiyun //
3*4882a593Smuzhiyun // Licensed under the Apache License, Version 2.0 (the "License");
4*4882a593Smuzhiyun // you may not use this file except in compliance with the License.
5*4882a593Smuzhiyun // You may obtain a copy of the License at
6*4882a593Smuzhiyun //
7*4882a593Smuzhiyun //     http://www.apache.org/licenses/LICENSE-2.0
8*4882a593Smuzhiyun //
9*4882a593Smuzhiyun // Unless required by applicable law or agreed to in writing, software
10*4882a593Smuzhiyun // distributed under the License is distributed on an "AS IS" BASIS,
11*4882a593Smuzhiyun // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*4882a593Smuzhiyun // See the License for the specific language governing permissions and
13*4882a593Smuzhiyun // limitations under the License.
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun /*-------------------------------------------
16*4882a593Smuzhiyun                 Includes
17*4882a593Smuzhiyun -------------------------------------------*/
18*4882a593Smuzhiyun #include "opencv2/core/core.hpp"
19*4882a593Smuzhiyun #include "opencv2/imgcodecs.hpp"
20*4882a593Smuzhiyun #include "opencv2/imgproc.hpp"
21*4882a593Smuzhiyun #include "rknn_api.h"
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun #include <float.h>
24*4882a593Smuzhiyun #include <stdio.h>
25*4882a593Smuzhiyun #include <stdlib.h>
26*4882a593Smuzhiyun #include <string.h>
27*4882a593Smuzhiyun #include <sys/time.h>
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun using namespace std;
30*4882a593Smuzhiyun using namespace cv;
31*4882a593Smuzhiyun 
32*4882a593Smuzhiyun /*-------------------------------------------
33*4882a593Smuzhiyun                   Functions
34*4882a593Smuzhiyun -------------------------------------------*/
getCurrentTimeUs()35*4882a593Smuzhiyun static inline int64_t getCurrentTimeUs()
36*4882a593Smuzhiyun {
37*4882a593Smuzhiyun   struct timeval tv;
38*4882a593Smuzhiyun   gettimeofday(&tv, NULL);
39*4882a593Smuzhiyun   return tv.tv_sec * 1000000 + tv.tv_usec;
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun 
rknn_GetTopN(float * pfProb,float * pfMaxProb,uint32_t * pMaxClass,uint32_t outputCount,uint32_t topNum)42*4882a593Smuzhiyun static int rknn_GetTopN(float* pfProb, float* pfMaxProb, uint32_t* pMaxClass, uint32_t outputCount, uint32_t topNum)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun   uint32_t i, j;
45*4882a593Smuzhiyun   uint32_t top_count = outputCount > topNum ? topNum : outputCount;
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun   for (i = 0; i < topNum; ++i) {
48*4882a593Smuzhiyun     pfMaxProb[i] = -FLT_MAX;
49*4882a593Smuzhiyun     pMaxClass[i] = -1;
50*4882a593Smuzhiyun   }
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun   for (j = 0; j < top_count; j++) {
53*4882a593Smuzhiyun     for (i = 0; i < outputCount; i++) {
54*4882a593Smuzhiyun       if ((i == *(pMaxClass + 0)) || (i == *(pMaxClass + 1)) || (i == *(pMaxClass + 2)) || (i == *(pMaxClass + 3)) ||
55*4882a593Smuzhiyun           (i == *(pMaxClass + 4))) {
56*4882a593Smuzhiyun         continue;
57*4882a593Smuzhiyun       }
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun       if (pfProb[i] > *(pfMaxProb + j)) {
60*4882a593Smuzhiyun         *(pfMaxProb + j) = pfProb[i];
61*4882a593Smuzhiyun         *(pMaxClass + j) = i;
62*4882a593Smuzhiyun       }
63*4882a593Smuzhiyun     }
64*4882a593Smuzhiyun   }
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun   return 1;
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun 
dump_tensor_attr(rknn_tensor_attr * attr)69*4882a593Smuzhiyun static void dump_tensor_attr(rknn_tensor_attr* attr)
70*4882a593Smuzhiyun {
71*4882a593Smuzhiyun   printf("  index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
72*4882a593Smuzhiyun          "zp=%d, scale=%f\n",
73*4882a593Smuzhiyun          attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
74*4882a593Smuzhiyun          attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
75*4882a593Smuzhiyun          get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun 
78*4882a593Smuzhiyun /*-------------------------------------------
79*4882a593Smuzhiyun                   Main Functions
80*4882a593Smuzhiyun -------------------------------------------*/
main(int argc,char * argv[])81*4882a593Smuzhiyun int main(int argc, char* argv[])
82*4882a593Smuzhiyun {
83*4882a593Smuzhiyun   if (argc < 3) {
84*4882a593Smuzhiyun     printf("Usage:%s model_path input_path [loop_count]\n", argv[0]);
85*4882a593Smuzhiyun     return -1;
86*4882a593Smuzhiyun   }
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun   char* model_path = argv[1];
89*4882a593Smuzhiyun   char* input_path = argv[2];
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun   int loop_count = 1;
92*4882a593Smuzhiyun   if (argc > 3) {
93*4882a593Smuzhiyun     loop_count = atoi(argv[3]);
94*4882a593Smuzhiyun   }
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun   rknn_context ctx = 0;
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun   // Load RKNN Model
99*4882a593Smuzhiyun   int ret = rknn_init(&ctx, model_path, 0, 0, NULL);
100*4882a593Smuzhiyun   if (ret < 0) {
101*4882a593Smuzhiyun     printf("rknn_init fail! ret=%d\n", ret);
102*4882a593Smuzhiyun     return -1;
103*4882a593Smuzhiyun   }
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun   // Get sdk and driver version
106*4882a593Smuzhiyun   rknn_sdk_version sdk_ver;
107*4882a593Smuzhiyun   ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
108*4882a593Smuzhiyun   if (ret != RKNN_SUCC) {
109*4882a593Smuzhiyun     printf("rknn_query fail! ret=%d\n", ret);
110*4882a593Smuzhiyun     return -1;
111*4882a593Smuzhiyun   }
112*4882a593Smuzhiyun   printf("rknn_api/rknnrt version: %s, driver version: %s\n", sdk_ver.api_version, sdk_ver.drv_version);
113*4882a593Smuzhiyun 
114*4882a593Smuzhiyun   // Get Model Input Output Info
115*4882a593Smuzhiyun   rknn_input_output_num io_num;
116*4882a593Smuzhiyun   ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
117*4882a593Smuzhiyun   if (ret != RKNN_SUCC) {
118*4882a593Smuzhiyun     printf("rknn_query fail! ret=%d\n", ret);
119*4882a593Smuzhiyun     return -1;
120*4882a593Smuzhiyun   }
121*4882a593Smuzhiyun   printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun   printf("input tensors:\n");
124*4882a593Smuzhiyun   rknn_tensor_attr input_attrs[io_num.n_input];
125*4882a593Smuzhiyun   memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
126*4882a593Smuzhiyun   for (uint32_t i = 0; i < io_num.n_input; i++) {
127*4882a593Smuzhiyun     input_attrs[i].index = i;
128*4882a593Smuzhiyun     // query info
129*4882a593Smuzhiyun     ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
130*4882a593Smuzhiyun     if (ret < 0) {
131*4882a593Smuzhiyun       printf("rknn_init error! ret=%d\n", ret);
132*4882a593Smuzhiyun       return -1;
133*4882a593Smuzhiyun     }
134*4882a593Smuzhiyun     dump_tensor_attr(&input_attrs[i]);
135*4882a593Smuzhiyun   }
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun   printf("output tensors:\n");
138*4882a593Smuzhiyun   rknn_tensor_attr output_attrs[io_num.n_output];
139*4882a593Smuzhiyun   memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
140*4882a593Smuzhiyun   for (uint32_t i = 0; i < io_num.n_output; i++) {
141*4882a593Smuzhiyun     output_attrs[i].index = i;
142*4882a593Smuzhiyun     // query info
143*4882a593Smuzhiyun     ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
144*4882a593Smuzhiyun     if (ret != RKNN_SUCC) {
145*4882a593Smuzhiyun       printf("rknn_query fail! ret=%d\n", ret);
146*4882a593Smuzhiyun       return -1;
147*4882a593Smuzhiyun     }
148*4882a593Smuzhiyun     dump_tensor_attr(&output_attrs[i]);
149*4882a593Smuzhiyun   }
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun   // Get custom string
152*4882a593Smuzhiyun   rknn_custom_string custom_string;
153*4882a593Smuzhiyun   ret = rknn_query(ctx, RKNN_QUERY_CUSTOM_STRING, &custom_string, sizeof(custom_string));
154*4882a593Smuzhiyun   if (ret != RKNN_SUCC) {
155*4882a593Smuzhiyun     printf("rknn_query fail! ret=%d\n", ret);
156*4882a593Smuzhiyun     return -1;
157*4882a593Smuzhiyun   }
158*4882a593Smuzhiyun   printf("custom string: %s\n", custom_string.string);
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun   unsigned char*     input_data   = NULL;
161*4882a593Smuzhiyun   rknn_tensor_type   input_type   = RKNN_TENSOR_UINT8;
162*4882a593Smuzhiyun   rknn_tensor_format input_layout = RKNN_TENSOR_NHWC;
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun   // Load image
165*4882a593Smuzhiyun   int req_height  = 0;
166*4882a593Smuzhiyun   int req_width   = 0;
167*4882a593Smuzhiyun   int req_channel = 0;
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun   switch (input_attrs[0].fmt) {
170*4882a593Smuzhiyun   case RKNN_TENSOR_NHWC:
171*4882a593Smuzhiyun     req_height  = input_attrs[0].dims[1];
172*4882a593Smuzhiyun     req_width   = input_attrs[0].dims[2];
173*4882a593Smuzhiyun     req_channel = input_attrs[0].dims[3];
174*4882a593Smuzhiyun     break;
175*4882a593Smuzhiyun   case RKNN_TENSOR_NCHW:
176*4882a593Smuzhiyun     req_height  = input_attrs[0].dims[2];
177*4882a593Smuzhiyun     req_width   = input_attrs[0].dims[3];
178*4882a593Smuzhiyun     req_channel = input_attrs[0].dims[1];
179*4882a593Smuzhiyun     break;
180*4882a593Smuzhiyun   default:
181*4882a593Smuzhiyun     printf("meet unsupported layout\n");
182*4882a593Smuzhiyun     return -1;
183*4882a593Smuzhiyun   }
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun   int height  = 0;
186*4882a593Smuzhiyun   int width   = 0;
187*4882a593Smuzhiyun   int channel = 0;
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun   cv::Mat orig_img = imread(input_path, cv::IMREAD_COLOR);
190*4882a593Smuzhiyun   if (!orig_img.data) {
191*4882a593Smuzhiyun     printf("cv::imread %s fail!\n", input_path);
192*4882a593Smuzhiyun     return -1;
193*4882a593Smuzhiyun   }
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun   // if origin model is from Caffe, you maybe not need do BGR2RGB.
196*4882a593Smuzhiyun   cv::Mat orig_img_rgb;
197*4882a593Smuzhiyun   cv::cvtColor(orig_img, orig_img_rgb, cv::COLOR_BGR2RGB);
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun   cv::Mat img = orig_img_rgb.clone();
200*4882a593Smuzhiyun   if (orig_img.cols != req_width || orig_img.rows != req_height) {
201*4882a593Smuzhiyun     printf("resize %d %d to %d %d\n", orig_img.cols, orig_img.rows, req_width, req_height);
202*4882a593Smuzhiyun     cv::resize(orig_img_rgb, img, cv::Size(req_width, req_height), 0, 0, cv::INTER_LINEAR);
203*4882a593Smuzhiyun   }
204*4882a593Smuzhiyun   input_data = img.data;
205*4882a593Smuzhiyun   if (!input_data) {
206*4882a593Smuzhiyun     return -1;
207*4882a593Smuzhiyun   }
208*4882a593Smuzhiyun 
209*4882a593Smuzhiyun   // Create input tensor memory
210*4882a593Smuzhiyun   rknn_tensor_mem* input_mems[1];
211*4882a593Smuzhiyun   // default input type is int8 (normalize and quantize need compute in outside)
212*4882a593Smuzhiyun   // if set uint8, will fuse normalize and quantize to npu
213*4882a593Smuzhiyun   input_attrs[0].type = input_type;
214*4882a593Smuzhiyun   // default fmt is NHWC, npu only support NHWC in zero copy mode
215*4882a593Smuzhiyun   input_attrs[0].fmt = input_layout;
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun   input_mems[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride);
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun   // Copy input data to input tensor memory
220*4882a593Smuzhiyun   width      = input_attrs[0].dims[2];
221*4882a593Smuzhiyun   int stride = input_attrs[0].w_stride;
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun   if (width == stride) {
224*4882a593Smuzhiyun     memcpy(input_mems[0]->virt_addr, input_data, width * input_attrs[0].dims[1] * input_attrs[0].dims[3]);
225*4882a593Smuzhiyun   } else {
226*4882a593Smuzhiyun     int height  = input_attrs[0].dims[1];
227*4882a593Smuzhiyun     int channel = input_attrs[0].dims[3];
228*4882a593Smuzhiyun     // copy from src to dst with stride
229*4882a593Smuzhiyun     uint8_t* src_ptr = input_data;
230*4882a593Smuzhiyun     uint8_t* dst_ptr = (uint8_t*)input_mems[0]->virt_addr;
231*4882a593Smuzhiyun     // width-channel elements
232*4882a593Smuzhiyun     int src_wc_elems = width * channel;
233*4882a593Smuzhiyun     int dst_wc_elems = stride * channel;
234*4882a593Smuzhiyun     for (int h = 0; h < height; ++h) {
235*4882a593Smuzhiyun       memcpy(dst_ptr, src_ptr, src_wc_elems);
236*4882a593Smuzhiyun       src_ptr += src_wc_elems;
237*4882a593Smuzhiyun       dst_ptr += dst_wc_elems;
238*4882a593Smuzhiyun     }
239*4882a593Smuzhiyun   }
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun   // Create output tensor memory
242*4882a593Smuzhiyun   rknn_tensor_mem* output_mems[io_num.n_output];
243*4882a593Smuzhiyun   for (uint32_t i = 0; i < io_num.n_output; ++i) {
244*4882a593Smuzhiyun     // default output type is depend on model, this require float32 to compute top5
245*4882a593Smuzhiyun     // allocate float32 output tensor
246*4882a593Smuzhiyun     int output_size = output_attrs[i].n_elems * sizeof(float);
247*4882a593Smuzhiyun     output_mems[i]  = rknn_create_mem(ctx, output_size);
248*4882a593Smuzhiyun   }
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun   // Set input tensor memory
251*4882a593Smuzhiyun   ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
252*4882a593Smuzhiyun   if (ret < 0) {
253*4882a593Smuzhiyun     printf("rknn_set_io_mem fail! ret=%d\n", ret);
254*4882a593Smuzhiyun     return -1;
255*4882a593Smuzhiyun   }
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun   // Set output tensor memory
258*4882a593Smuzhiyun   for (uint32_t i = 0; i < io_num.n_output; ++i) {
259*4882a593Smuzhiyun     // default output type is depend on model, this require float32 to compute top5
260*4882a593Smuzhiyun     output_attrs[i].type = RKNN_TENSOR_FLOAT32;
261*4882a593Smuzhiyun     // set output memory and attribute
262*4882a593Smuzhiyun     ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
263*4882a593Smuzhiyun     if (ret < 0) {
264*4882a593Smuzhiyun       printf("rknn_set_io_mem fail! ret=%d\n", ret);
265*4882a593Smuzhiyun       return -1;
266*4882a593Smuzhiyun     }
267*4882a593Smuzhiyun   }
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun   // Run
270*4882a593Smuzhiyun   printf("Begin perf ...\n");
271*4882a593Smuzhiyun   for (int i = 0; i < loop_count; ++i) {
272*4882a593Smuzhiyun     int64_t start_us  = getCurrentTimeUs();
273*4882a593Smuzhiyun     ret               = rknn_run(ctx, NULL);
274*4882a593Smuzhiyun     int64_t elapse_us = getCurrentTimeUs() - start_us;
275*4882a593Smuzhiyun     if (ret < 0) {
276*4882a593Smuzhiyun       printf("rknn run error %d\n", ret);
277*4882a593Smuzhiyun       return -1;
278*4882a593Smuzhiyun     }
279*4882a593Smuzhiyun     printf("%4d: Elapse Time = %.2fms, FPS = %.2f\n", i, elapse_us / 1000.f, 1000.f * 1000.f / elapse_us);
280*4882a593Smuzhiyun   }
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun   // Get top 5
283*4882a593Smuzhiyun   uint32_t topNum = 5;
284*4882a593Smuzhiyun   for (uint32_t i = 0; i < io_num.n_output; i++) {
285*4882a593Smuzhiyun     uint32_t MaxClass[topNum];
286*4882a593Smuzhiyun     float    fMaxProb[topNum];
287*4882a593Smuzhiyun     float*   buffer    = (float*)output_mems[i]->virt_addr;
288*4882a593Smuzhiyun     uint32_t sz        = output_attrs[i].n_elems;
289*4882a593Smuzhiyun     int      top_count = sz > topNum ? topNum : sz;
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun     rknn_GetTopN(buffer, fMaxProb, MaxClass, sz, topNum);
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun     printf("---- Top%d ----\n", top_count);
294*4882a593Smuzhiyun     for (int j = 0; j < top_count; j++) {
295*4882a593Smuzhiyun       printf("%8.6f - %d\n", fMaxProb[j], MaxClass[j]);
296*4882a593Smuzhiyun     }
297*4882a593Smuzhiyun   }
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun   // Destroy rknn memory
300*4882a593Smuzhiyun   rknn_destroy_mem(ctx, input_mems[0]);
301*4882a593Smuzhiyun   for (uint32_t i = 0; i < io_num.n_output; ++i) {
302*4882a593Smuzhiyun     rknn_destroy_mem(ctx, output_mems[i]);
303*4882a593Smuzhiyun   }
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun   // destroy
306*4882a593Smuzhiyun   rknn_destroy(ctx);
307*4882a593Smuzhiyun   return 0;
308*4882a593Smuzhiyun }
309