1*4882a593Smuzhiyun // Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved.
2*4882a593Smuzhiyun //
3*4882a593Smuzhiyun // Licensed under the Apache License, Version 2.0 (the "License");
4*4882a593Smuzhiyun // you may not use this file except in compliance with the License.
5*4882a593Smuzhiyun // You may obtain a copy of the License at
6*4882a593Smuzhiyun //
7*4882a593Smuzhiyun // http://www.apache.org/licenses/LICENSE-2.0
8*4882a593Smuzhiyun //
9*4882a593Smuzhiyun // Unless required by applicable law or agreed to in writing, software
10*4882a593Smuzhiyun // distributed under the License is distributed on an "AS IS" BASIS,
11*4882a593Smuzhiyun // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*4882a593Smuzhiyun // See the License for the specific language governing permissions and
13*4882a593Smuzhiyun // limitations under the License.
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun /*-------------------------------------------
16*4882a593Smuzhiyun Includes
17*4882a593Smuzhiyun -------------------------------------------*/
18*4882a593Smuzhiyun #include "opencv2/core/core.hpp"
19*4882a593Smuzhiyun #include "opencv2/imgcodecs.hpp"
20*4882a593Smuzhiyun #include "opencv2/imgproc.hpp"
21*4882a593Smuzhiyun #include "rknn_api.h"
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun #include <float.h>
24*4882a593Smuzhiyun #include <stdio.h>
25*4882a593Smuzhiyun #include <stdlib.h>
26*4882a593Smuzhiyun #include <string.h>
27*4882a593Smuzhiyun #include <sys/time.h>
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun using namespace std;
30*4882a593Smuzhiyun using namespace cv;
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun /*-------------------------------------------
33*4882a593Smuzhiyun Functions
34*4882a593Smuzhiyun -------------------------------------------*/
getCurrentTimeUs()35*4882a593Smuzhiyun static inline int64_t getCurrentTimeUs()
36*4882a593Smuzhiyun {
37*4882a593Smuzhiyun struct timeval tv;
38*4882a593Smuzhiyun gettimeofday(&tv, NULL);
39*4882a593Smuzhiyun return tv.tv_sec * 1000000 + tv.tv_usec;
40*4882a593Smuzhiyun }
41*4882a593Smuzhiyun
rknn_GetTopN(float * pfProb,float * pfMaxProb,uint32_t * pMaxClass,uint32_t outputCount,uint32_t topNum)42*4882a593Smuzhiyun static int rknn_GetTopN(float* pfProb, float* pfMaxProb, uint32_t* pMaxClass, uint32_t outputCount, uint32_t topNum)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun uint32_t i, j;
45*4882a593Smuzhiyun uint32_t top_count = outputCount > topNum ? topNum : outputCount;
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun for (i = 0; i < topNum; ++i) {
48*4882a593Smuzhiyun pfMaxProb[i] = -FLT_MAX;
49*4882a593Smuzhiyun pMaxClass[i] = -1;
50*4882a593Smuzhiyun }
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun for (j = 0; j < top_count; j++) {
53*4882a593Smuzhiyun for (i = 0; i < outputCount; i++) {
54*4882a593Smuzhiyun if ((i == *(pMaxClass + 0)) || (i == *(pMaxClass + 1)) || (i == *(pMaxClass + 2)) || (i == *(pMaxClass + 3)) ||
55*4882a593Smuzhiyun (i == *(pMaxClass + 4))) {
56*4882a593Smuzhiyun continue;
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun if (pfProb[i] > *(pfMaxProb + j)) {
60*4882a593Smuzhiyun *(pfMaxProb + j) = pfProb[i];
61*4882a593Smuzhiyun *(pMaxClass + j) = i;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun }
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun return 1;
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun
dump_tensor_attr(rknn_tensor_attr * attr)69*4882a593Smuzhiyun static void dump_tensor_attr(rknn_tensor_attr* attr)
70*4882a593Smuzhiyun {
71*4882a593Smuzhiyun printf(" index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], n_elems=%d, size=%d, fmt=%s, type=%s, qnt_type=%s, "
72*4882a593Smuzhiyun "zp=%d, scale=%f\n",
73*4882a593Smuzhiyun attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], attr->dims[3],
74*4882a593Smuzhiyun attr->n_elems, attr->size, get_format_string(attr->fmt), get_type_string(attr->type),
75*4882a593Smuzhiyun get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun /*-------------------------------------------
79*4882a593Smuzhiyun Main Functions
80*4882a593Smuzhiyun -------------------------------------------*/
main(int argc,char * argv[])81*4882a593Smuzhiyun int main(int argc, char* argv[])
82*4882a593Smuzhiyun {
83*4882a593Smuzhiyun if (argc < 3) {
84*4882a593Smuzhiyun printf("Usage:%s model_path input_path [loop_count]\n", argv[0]);
85*4882a593Smuzhiyun return -1;
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun char* model_path = argv[1];
89*4882a593Smuzhiyun char* input_path = argv[2];
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun int loop_count = 1;
92*4882a593Smuzhiyun if (argc > 3) {
93*4882a593Smuzhiyun loop_count = atoi(argv[3]);
94*4882a593Smuzhiyun }
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun rknn_context ctx = 0;
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun // Load RKNN Model
99*4882a593Smuzhiyun int ret = rknn_init(&ctx, model_path, 0, 0, NULL);
100*4882a593Smuzhiyun if (ret < 0) {
101*4882a593Smuzhiyun printf("rknn_init fail! ret=%d\n", ret);
102*4882a593Smuzhiyun return -1;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun // Get sdk and driver version
106*4882a593Smuzhiyun rknn_sdk_version sdk_ver;
107*4882a593Smuzhiyun ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
108*4882a593Smuzhiyun if (ret != RKNN_SUCC) {
109*4882a593Smuzhiyun printf("rknn_query fail! ret=%d\n", ret);
110*4882a593Smuzhiyun return -1;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun printf("rknn_api/rknnrt version: %s, driver version: %s\n", sdk_ver.api_version, sdk_ver.drv_version);
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun // Get Model Input Output Info
115*4882a593Smuzhiyun rknn_input_output_num io_num;
116*4882a593Smuzhiyun ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
117*4882a593Smuzhiyun if (ret != RKNN_SUCC) {
118*4882a593Smuzhiyun printf("rknn_query fail! ret=%d\n", ret);
119*4882a593Smuzhiyun return -1;
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun printf("input tensors:\n");
124*4882a593Smuzhiyun rknn_tensor_attr input_attrs[io_num.n_input];
125*4882a593Smuzhiyun memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
126*4882a593Smuzhiyun for (uint32_t i = 0; i < io_num.n_input; i++) {
127*4882a593Smuzhiyun input_attrs[i].index = i;
128*4882a593Smuzhiyun // query info
129*4882a593Smuzhiyun ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
130*4882a593Smuzhiyun if (ret < 0) {
131*4882a593Smuzhiyun printf("rknn_init error! ret=%d\n", ret);
132*4882a593Smuzhiyun return -1;
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun dump_tensor_attr(&input_attrs[i]);
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun printf("output tensors:\n");
138*4882a593Smuzhiyun rknn_tensor_attr output_attrs[io_num.n_output];
139*4882a593Smuzhiyun memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
140*4882a593Smuzhiyun for (uint32_t i = 0; i < io_num.n_output; i++) {
141*4882a593Smuzhiyun output_attrs[i].index = i;
142*4882a593Smuzhiyun // query info
143*4882a593Smuzhiyun ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
144*4882a593Smuzhiyun if (ret != RKNN_SUCC) {
145*4882a593Smuzhiyun printf("rknn_query fail! ret=%d\n", ret);
146*4882a593Smuzhiyun return -1;
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun dump_tensor_attr(&output_attrs[i]);
149*4882a593Smuzhiyun }
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun // Get custom string
152*4882a593Smuzhiyun rknn_custom_string custom_string;
153*4882a593Smuzhiyun ret = rknn_query(ctx, RKNN_QUERY_CUSTOM_STRING, &custom_string, sizeof(custom_string));
154*4882a593Smuzhiyun if (ret != RKNN_SUCC) {
155*4882a593Smuzhiyun printf("rknn_query fail! ret=%d\n", ret);
156*4882a593Smuzhiyun return -1;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun printf("custom string: %s\n", custom_string.string);
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun unsigned char* input_data = NULL;
161*4882a593Smuzhiyun rknn_tensor_type input_type = RKNN_TENSOR_UINT8;
162*4882a593Smuzhiyun rknn_tensor_format input_layout = RKNN_TENSOR_NHWC;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun // Load image
165*4882a593Smuzhiyun int req_height = 0;
166*4882a593Smuzhiyun int req_width = 0;
167*4882a593Smuzhiyun int req_channel = 0;
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun switch (input_attrs[0].fmt) {
170*4882a593Smuzhiyun case RKNN_TENSOR_NHWC:
171*4882a593Smuzhiyun req_height = input_attrs[0].dims[1];
172*4882a593Smuzhiyun req_width = input_attrs[0].dims[2];
173*4882a593Smuzhiyun req_channel = input_attrs[0].dims[3];
174*4882a593Smuzhiyun break;
175*4882a593Smuzhiyun case RKNN_TENSOR_NCHW:
176*4882a593Smuzhiyun req_height = input_attrs[0].dims[2];
177*4882a593Smuzhiyun req_width = input_attrs[0].dims[3];
178*4882a593Smuzhiyun req_channel = input_attrs[0].dims[1];
179*4882a593Smuzhiyun break;
180*4882a593Smuzhiyun default:
181*4882a593Smuzhiyun printf("meet unsupported layout\n");
182*4882a593Smuzhiyun return -1;
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun int height = 0;
186*4882a593Smuzhiyun int width = 0;
187*4882a593Smuzhiyun int channel = 0;
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun cv::Mat orig_img = imread(input_path, cv::IMREAD_COLOR);
190*4882a593Smuzhiyun if (!orig_img.data) {
191*4882a593Smuzhiyun printf("cv::imread %s fail!\n", input_path);
192*4882a593Smuzhiyun return -1;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun // if origin model is from Caffe, you maybe not need do BGR2RGB.
196*4882a593Smuzhiyun cv::Mat orig_img_rgb;
197*4882a593Smuzhiyun cv::cvtColor(orig_img, orig_img_rgb, cv::COLOR_BGR2RGB);
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun cv::Mat img = orig_img_rgb.clone();
200*4882a593Smuzhiyun if (orig_img.cols != req_width || orig_img.rows != req_height) {
201*4882a593Smuzhiyun printf("resize %d %d to %d %d\n", orig_img.cols, orig_img.rows, req_width, req_height);
202*4882a593Smuzhiyun cv::resize(orig_img_rgb, img, cv::Size(req_width, req_height), 0, 0, cv::INTER_LINEAR);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun input_data = img.data;
205*4882a593Smuzhiyun if (!input_data) {
206*4882a593Smuzhiyun return -1;
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun // Create input tensor memory
210*4882a593Smuzhiyun rknn_tensor_mem* input_mems[1];
211*4882a593Smuzhiyun // default input type is int8 (normalize and quantize need compute in outside)
212*4882a593Smuzhiyun // if set uint8, will fuse normalize and quantize to npu
213*4882a593Smuzhiyun input_attrs[0].type = input_type;
214*4882a593Smuzhiyun // default fmt is NHWC, npu only support NHWC in zero copy mode
215*4882a593Smuzhiyun input_attrs[0].fmt = input_layout;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun input_mems[0] = rknn_create_mem(ctx, input_attrs[0].size_with_stride);
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun // Copy input data to input tensor memory
220*4882a593Smuzhiyun width = input_attrs[0].dims[2];
221*4882a593Smuzhiyun int stride = input_attrs[0].w_stride;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun if (width == stride) {
224*4882a593Smuzhiyun memcpy(input_mems[0]->virt_addr, input_data, width * input_attrs[0].dims[1] * input_attrs[0].dims[3]);
225*4882a593Smuzhiyun } else {
226*4882a593Smuzhiyun int height = input_attrs[0].dims[1];
227*4882a593Smuzhiyun int channel = input_attrs[0].dims[3];
228*4882a593Smuzhiyun // copy from src to dst with stride
229*4882a593Smuzhiyun uint8_t* src_ptr = input_data;
230*4882a593Smuzhiyun uint8_t* dst_ptr = (uint8_t*)input_mems[0]->virt_addr;
231*4882a593Smuzhiyun // width-channel elements
232*4882a593Smuzhiyun int src_wc_elems = width * channel;
233*4882a593Smuzhiyun int dst_wc_elems = stride * channel;
234*4882a593Smuzhiyun for (int h = 0; h < height; ++h) {
235*4882a593Smuzhiyun memcpy(dst_ptr, src_ptr, src_wc_elems);
236*4882a593Smuzhiyun src_ptr += src_wc_elems;
237*4882a593Smuzhiyun dst_ptr += dst_wc_elems;
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun // Create output tensor memory
242*4882a593Smuzhiyun rknn_tensor_mem* output_mems[io_num.n_output];
243*4882a593Smuzhiyun for (uint32_t i = 0; i < io_num.n_output; ++i) {
244*4882a593Smuzhiyun // default output type is depend on model, this require float32 to compute top5
245*4882a593Smuzhiyun // allocate float32 output tensor
246*4882a593Smuzhiyun int output_size = output_attrs[i].n_elems * sizeof(float);
247*4882a593Smuzhiyun output_mems[i] = rknn_create_mem(ctx, output_size);
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun // Set input tensor memory
251*4882a593Smuzhiyun ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
252*4882a593Smuzhiyun if (ret < 0) {
253*4882a593Smuzhiyun printf("rknn_set_io_mem fail! ret=%d\n", ret);
254*4882a593Smuzhiyun return -1;
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun // Set output tensor memory
258*4882a593Smuzhiyun for (uint32_t i = 0; i < io_num.n_output; ++i) {
259*4882a593Smuzhiyun // default output type is depend on model, this require float32 to compute top5
260*4882a593Smuzhiyun output_attrs[i].type = RKNN_TENSOR_FLOAT32;
261*4882a593Smuzhiyun // set output memory and attribute
262*4882a593Smuzhiyun ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
263*4882a593Smuzhiyun if (ret < 0) {
264*4882a593Smuzhiyun printf("rknn_set_io_mem fail! ret=%d\n", ret);
265*4882a593Smuzhiyun return -1;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun // Run
270*4882a593Smuzhiyun printf("Begin perf ...\n");
271*4882a593Smuzhiyun for (int i = 0; i < loop_count; ++i) {
272*4882a593Smuzhiyun int64_t start_us = getCurrentTimeUs();
273*4882a593Smuzhiyun ret = rknn_run(ctx, NULL);
274*4882a593Smuzhiyun int64_t elapse_us = getCurrentTimeUs() - start_us;
275*4882a593Smuzhiyun if (ret < 0) {
276*4882a593Smuzhiyun printf("rknn run error %d\n", ret);
277*4882a593Smuzhiyun return -1;
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun printf("%4d: Elapse Time = %.2fms, FPS = %.2f\n", i, elapse_us / 1000.f, 1000.f * 1000.f / elapse_us);
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun // Get top 5
283*4882a593Smuzhiyun uint32_t topNum = 5;
284*4882a593Smuzhiyun for (uint32_t i = 0; i < io_num.n_output; i++) {
285*4882a593Smuzhiyun uint32_t MaxClass[topNum];
286*4882a593Smuzhiyun float fMaxProb[topNum];
287*4882a593Smuzhiyun float* buffer = (float*)output_mems[i]->virt_addr;
288*4882a593Smuzhiyun uint32_t sz = output_attrs[i].n_elems;
289*4882a593Smuzhiyun int top_count = sz > topNum ? topNum : sz;
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun rknn_GetTopN(buffer, fMaxProb, MaxClass, sz, topNum);
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun printf("---- Top%d ----\n", top_count);
294*4882a593Smuzhiyun for (int j = 0; j < top_count; j++) {
295*4882a593Smuzhiyun printf("%8.6f - %d\n", fMaxProb[j], MaxClass[j]);
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun // Destroy rknn memory
300*4882a593Smuzhiyun rknn_destroy_mem(ctx, input_mems[0]);
301*4882a593Smuzhiyun for (uint32_t i = 0; i < io_num.n_output; ++i) {
302*4882a593Smuzhiyun rknn_destroy_mem(ctx, output_mems[i]);
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun // destroy
306*4882a593Smuzhiyun rknn_destroy(ctx);
307*4882a593Smuzhiyun return 0;
308*4882a593Smuzhiyun }
309