xref: /OK3568_Linux_fs/external/rknpu2/examples/rknn_matmul_api_demo/src/rknn_matmul_demo.cpp (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /****************************************************************************
2  *
3  *    Copyright (c) 2017 - 2018 by Rockchip Corp.  All rights reserved.
4  *
5  *    The material in this file is confidential and contains trade secrets
6  *    of Rockchip Corporation. This is proprietary information owned by
7  *    Rockchip Corporation. No part of this work may be disclosed,
8  *    reproduced, copied, transmitted, or used in any way for any purpose,
9  *    without the express written permission of Rockchip Corporation.
10  *
11  *****************************************************************************/
12 
13 /*-------------------------------------------
14                 Includes
15 -------------------------------------------*/
16 #include "rknn_matmul_api.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/time.h>
22 
23 /*-------------------------------------------
24                   Functions
25 -------------------------------------------*/
getCurrentTimeUs()26 static inline int64_t getCurrentTimeUs()
27 {
28   struct timeval tv;
29   gettimeofday(&tv, NULL);
30   return tv.tv_sec * 1000000 + tv.tv_usec;
31 }
32 
get_dims_string(rknn_matmul_tensor_attr * attr)33 static const char* get_dims_string(rknn_matmul_tensor_attr* attr)
34 {
35   if (!attr->n_dims) {
36     return "()";
37   }
38   static char dims_str[128];
39   memset(&dims_str[0], 0, sizeof(dims_str));
40   sprintf(&dims_str[0], "(%d", attr->dims[0]);
41   for (uint32_t i = 1; i < attr->n_dims; ++i) {
42     int idx = strlen(dims_str);
43     sprintf(&dims_str[idx], ", %d", attr->dims[i]);
44   }
45   strcat(&dims_str[0], ")");
46   return dims_str;
47 }
48 
dump_matmul_tensor_attr(rknn_matmul_tensor_attr * attr)49 static void dump_matmul_tensor_attr(rknn_matmul_tensor_attr* attr)
50 {
51   printf("  name=%s, dims=%s, size=%d, type=%s\n", attr->name, get_dims_string(attr), attr->size,
52          get_type_string(attr->type));
53 }
54 
dump_matmul_tensor(rknn_tensor_mem * tensor,rknn_matmul_tensor_attr * attr)55 static void dump_matmul_tensor(rknn_tensor_mem* tensor, rknn_matmul_tensor_attr* attr)
56 {
57   printf("  %s%s:\n", attr->name, get_dims_string(attr));
58   // normal layout
59   if (attr->n_dims == 2) {
60     for (uint32_t i = 0; i < attr->dims[0]; ++i) {
61       for (uint32_t j = 0; j < attr->dims[1]; ++j) {
62         void* virt_addr = (void*)((size_t)tensor->virt_addr + tensor->offset);
63         if (attr->type == RKNN_TENSOR_INT8) {
64           printf(" %2d", ((int8_t*)virt_addr)[i * attr->dims[1] + j]);
65         } else if (attr->type == RKNN_TENSOR_INT32) {
66           printf("  %3d", ((int32_t*)virt_addr)[i * attr->dims[1] + j]);
67         }
68       }
69       printf("\n");
70     }
71     printf("\n");
72   }
73   // perf layout
74   else if (attr->n_dims == 3) {
75     for (uint32_t i = 0; i < attr->dims[0]; ++i) {
76       for (uint32_t j = 0; j < attr->dims[1]; ++j) {
77         for (uint32_t k = 0; k < attr->dims[2]; ++k) {
78           void* virt_addr = (void*)((size_t)tensor->virt_addr + tensor->offset);
79           if (attr->type == RKNN_TENSOR_INT8) {
80             printf("  %2d ", ((int8_t*)virt_addr)[(i * attr->dims[1] + j) * attr->dims[2] + k]);
81           } else if (attr->type == RKNN_TENSOR_INT32) {
82             printf("  %2d ", ((int32_t*)virt_addr)[(i * attr->dims[1] + j) * attr->dims[2] + k]);
83           }
84         }
85         printf("\n");
86       }
87       printf("\n");
88     }
89   }
90   // native layout
91   else if (attr->n_dims == 4) {
92     // N / 16
93     for (uint32_t n = 0; n < attr->dims[0]; ++n) {
94       // K / 32
95       for (uint32_t k = 0; k < attr->dims[1]; ++k) {
96         // 16
97         for (uint32_t nn = 0; nn < attr->dims[2]; ++nn) {
98           // 32
99           for (uint32_t kk = 0; kk < attr->dims[3]; kk++) {
100             void* virt_addr = (void*)((size_t)tensor->virt_addr + tensor->offset);
101             if (attr->type == RKNN_TENSOR_INT8) {
102               printf("  %2d ",
103                      ((int8_t*)virt_addr)[((n * attr->dims[1] + k) * attr->dims[2] + nn) * attr->dims[3] + kk]);
104             } else if (attr->type == RKNN_TENSOR_INT32) {
105               printf("  %2d ",
106                      ((int32_t*)virt_addr)[((n * attr->dims[1] + k) * attr->dims[2] + nn) * attr->dims[3] + kk]);
107             }
108           }
109           printf("\n");
110         }
111         printf("\n");
112       }
113       printf("\n");
114     }
115   }
116 }
117 
print_usage(char * argv[])118 static void print_usage(char* argv[])
119 {
120   printf("Usage: %s [loop_count=1] [native_layout=0] [perf_layout=0]\n", argv[0]);
121 }
122 
main(int argc,char * argv[])123 int main(int argc, char* argv[])
124 {
125   int loop_count = 1;
126   if (argc > 1) {
127     if (!strcmp(argv[1], "-h")) {
128       print_usage(argv);
129       return 0;
130     }
131     loop_count = atoi(argv[1]);
132   }
133 
134   // request normal or native layout for B
135   int native_layout = 0;
136   if (argc > 2) {
137     native_layout = atoi(argv[2]);
138   }
139 
140   // request normal or perf layout for A and C
141   int perf_layout = 0;
142   if (argc > 3) {
143     perf_layout = atoi(argv[3]);
144   }
145 
146   int32_t M = 4;
147   int32_t K = 64;
148   int32_t N = 32;
149 
150   printf("MatMul M=%d, K=%d, N=%d\n", M, K, N);
151 
152   rknn_matmul_ctx ctx;
153 
154   rknn_matmul_info info;
155   memset(&info, 0, sizeof(rknn_matmul_info));
156   info.M             = M;
157   info.K             = K;
158   info.N             = N;
159   info.type          = RKNN_TENSOR_INT8;
160   info.native_layout = native_layout;
161   info.perf_layout   = perf_layout;
162 
163   rknn_matmul_io_attr io_attr;
164   memset(&io_attr, 0, sizeof(rknn_matmul_io_attr));
165 
166   int ret = rknn_matmul_create(&ctx, &info, &io_attr);
167   if (ret < 0) {
168     printf("rknn_matmul_create fail! ret=%d\n", ret);
169     return -1;
170   }
171 
172   printf("input/output matmul tensor attribute:\n");
173   dump_matmul_tensor_attr(&io_attr.A);
174   dump_matmul_tensor_attr(&io_attr.B);
175   dump_matmul_tensor_attr(&io_attr.C);
176 
177   // Create A
178   rknn_tensor_mem* A = rknn_create_mem(ctx, io_attr.A.size);
179   if (A == NULL) {
180     printf("rknn_create_mem fail!\n");
181     return -1;
182   }
183   memset(A->virt_addr, 1, A->size);
184 
185   // Create B
186   rknn_tensor_mem* B = rknn_create_mem(ctx, io_attr.B.size);
187   if (B == NULL) {
188     printf("rknn_create_mem fail!\n");
189     return -1;
190   }
191 
192   // normal layout
193   if (io_attr.B.n_dims == 2) {
194     for (uint32_t i = 0; i < io_attr.B.dims[1]; ++i) {
195       for (uint32_t j = 0; j < io_attr.B.dims[0]; ++j) {
196         ((int8_t*)B->virt_addr)[i * io_attr.B.dims[0] + j] = (j % 16) + 1;
197       }
198     }
199   }
200   // native layout
201   else if (io_attr.B.n_dims == 4) {
202     for (uint32_t n = 0; n < io_attr.B.dims[0]; ++n) {
203       for (uint32_t k = 0; k < io_attr.B.dims[1]; ++k) {
204         for (uint32_t nn = 0; nn < io_attr.B.dims[2]; ++nn) {
205           for (uint32_t kk = 0; kk < io_attr.B.dims[3]; ++kk) {
206             ((int8_t*)B->virt_addr)[((n * io_attr.B.dims[1] + k) * io_attr.B.dims[2] + nn) * io_attr.B.dims[3] + kk] =
207               nn + 1;
208           }
209         }
210       }
211     }
212   }
213 
214   // Create C
215   rknn_tensor_mem* C = rknn_create_mem(ctx, io_attr.C.size);
216   if (C == NULL) {
217     printf("rknn_create_mem fail!\n");
218     return -1;
219   }
220 
221   // Set A
222   ret = rknn_matmul_set_io_mem(ctx, A, &io_attr.A);
223   if (ret < 0) {
224     printf("rknn_matmul_set_io_mem fail! ret=%d\n", ret);
225     return -1;
226   }
227 
228   // Set B
229   ret = rknn_matmul_set_io_mem(ctx, B, &io_attr.B);
230   if (ret < 0) {
231     printf("rknn_matmul_set_io_mem fail! ret=%d\n", ret);
232     return -1;
233   }
234 
235   // Set C
236   ret = rknn_matmul_set_io_mem(ctx, C, &io_attr.C);
237   if (ret < 0) {
238     printf("rknn_matmul_set_io_mem fail! ret=%d\n", ret);
239     return -1;
240   }
241 
242   // Run
243   printf("Begin perf ...\n");
244   for (int i = 0; i < loop_count; ++i) {
245     int64_t start_us  = getCurrentTimeUs();
246     ret               = rknn_matmul_run(ctx);
247     int64_t elapse_us = getCurrentTimeUs() - start_us;
248     if (ret < 0) {
249       printf("rknn_matmul_run error %d\n", ret);
250       return -1;
251     }
252     printf("%4d: Elapse Time = %.2fms, FPS = %.2f\n", i, elapse_us / 1000.f, 1000.f * 1000.f / elapse_us);
253   }
254 
255   // Dump A/B/C tensors
256   printf("matmul tensors:\n");
257   dump_matmul_tensor(A, &io_attr.A);
258   dump_matmul_tensor(B, &io_attr.B);
259   dump_matmul_tensor(C, &io_attr.C);
260 
261   // destroy
262   rknn_destroy_mem(ctx, A);
263   rknn_destroy_mem(ctx, B);
264   rknn_destroy_mem(ctx, C);
265 
266   rknn_matmul_destroy(ctx);
267 
268   return 0;
269 }