1 /****************************************************************************
2 *
3 * Copyright (c) 2017 - 2018 by Rockchip Corp. All rights reserved.
4 *
5 * The material in this file is confidential and contains trade secrets
6 * of Rockchip Corporation. This is proprietary information owned by
7 * Rockchip Corporation. No part of this work may be disclosed,
8 * reproduced, copied, transmitted, or used in any way for any purpose,
9 * without the express written permission of Rockchip Corporation.
10 *
11 *****************************************************************************/
12
13 /*-------------------------------------------
14 Includes
15 -------------------------------------------*/
16 #include "rknn_matmul_api.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/time.h>
22
23 /*-------------------------------------------
24 Functions
25 -------------------------------------------*/
getCurrentTimeUs()26 static inline int64_t getCurrentTimeUs()
27 {
28 struct timeval tv;
29 gettimeofday(&tv, NULL);
30 return tv.tv_sec * 1000000 + tv.tv_usec;
31 }
32
get_dims_string(rknn_matmul_tensor_attr * attr)33 static const char* get_dims_string(rknn_matmul_tensor_attr* attr)
34 {
35 if (!attr->n_dims) {
36 return "()";
37 }
38 static char dims_str[128];
39 memset(&dims_str[0], 0, sizeof(dims_str));
40 sprintf(&dims_str[0], "(%d", attr->dims[0]);
41 for (uint32_t i = 1; i < attr->n_dims; ++i) {
42 int idx = strlen(dims_str);
43 sprintf(&dims_str[idx], ", %d", attr->dims[i]);
44 }
45 strcat(&dims_str[0], ")");
46 return dims_str;
47 }
48
dump_matmul_tensor_attr(rknn_matmul_tensor_attr * attr)49 static void dump_matmul_tensor_attr(rknn_matmul_tensor_attr* attr)
50 {
51 printf(" name=%s, dims=%s, size=%d, type=%s\n", attr->name, get_dims_string(attr), attr->size,
52 get_type_string(attr->type));
53 }
54
dump_matmul_tensor(rknn_tensor_mem * tensor,rknn_matmul_tensor_attr * attr)55 static void dump_matmul_tensor(rknn_tensor_mem* tensor, rknn_matmul_tensor_attr* attr)
56 {
57 printf(" %s%s:\n", attr->name, get_dims_string(attr));
58 // normal layout
59 if (attr->n_dims == 2) {
60 for (uint32_t i = 0; i < attr->dims[0]; ++i) {
61 for (uint32_t j = 0; j < attr->dims[1]; ++j) {
62 void* virt_addr = (void*)((size_t)tensor->virt_addr + tensor->offset);
63 if (attr->type == RKNN_TENSOR_INT8) {
64 printf(" %2d", ((int8_t*)virt_addr)[i * attr->dims[1] + j]);
65 } else if (attr->type == RKNN_TENSOR_INT32) {
66 printf(" %3d", ((int32_t*)virt_addr)[i * attr->dims[1] + j]);
67 }
68 }
69 printf("\n");
70 }
71 printf("\n");
72 }
73 // perf layout
74 else if (attr->n_dims == 3) {
75 for (uint32_t i = 0; i < attr->dims[0]; ++i) {
76 for (uint32_t j = 0; j < attr->dims[1]; ++j) {
77 for (uint32_t k = 0; k < attr->dims[2]; ++k) {
78 void* virt_addr = (void*)((size_t)tensor->virt_addr + tensor->offset);
79 if (attr->type == RKNN_TENSOR_INT8) {
80 printf(" %2d ", ((int8_t*)virt_addr)[(i * attr->dims[1] + j) * attr->dims[2] + k]);
81 } else if (attr->type == RKNN_TENSOR_INT32) {
82 printf(" %2d ", ((int32_t*)virt_addr)[(i * attr->dims[1] + j) * attr->dims[2] + k]);
83 }
84 }
85 printf("\n");
86 }
87 printf("\n");
88 }
89 }
90 // native layout
91 else if (attr->n_dims == 4) {
92 // N / 16
93 for (uint32_t n = 0; n < attr->dims[0]; ++n) {
94 // K / 32
95 for (uint32_t k = 0; k < attr->dims[1]; ++k) {
96 // 16
97 for (uint32_t nn = 0; nn < attr->dims[2]; ++nn) {
98 // 32
99 for (uint32_t kk = 0; kk < attr->dims[3]; kk++) {
100 void* virt_addr = (void*)((size_t)tensor->virt_addr + tensor->offset);
101 if (attr->type == RKNN_TENSOR_INT8) {
102 printf(" %2d ",
103 ((int8_t*)virt_addr)[((n * attr->dims[1] + k) * attr->dims[2] + nn) * attr->dims[3] + kk]);
104 } else if (attr->type == RKNN_TENSOR_INT32) {
105 printf(" %2d ",
106 ((int32_t*)virt_addr)[((n * attr->dims[1] + k) * attr->dims[2] + nn) * attr->dims[3] + kk]);
107 }
108 }
109 printf("\n");
110 }
111 printf("\n");
112 }
113 printf("\n");
114 }
115 }
116 }
117
print_usage(char * argv[])118 static void print_usage(char* argv[])
119 {
120 printf("Usage: %s [loop_count=1] [native_layout=0] [perf_layout=0]\n", argv[0]);
121 }
122
main(int argc,char * argv[])123 int main(int argc, char* argv[])
124 {
125 int loop_count = 1;
126 if (argc > 1) {
127 if (!strcmp(argv[1], "-h")) {
128 print_usage(argv);
129 return 0;
130 }
131 loop_count = atoi(argv[1]);
132 }
133
134 // request normal or native layout for B
135 int native_layout = 0;
136 if (argc > 2) {
137 native_layout = atoi(argv[2]);
138 }
139
140 // request normal or perf layout for A and C
141 int perf_layout = 0;
142 if (argc > 3) {
143 perf_layout = atoi(argv[3]);
144 }
145
146 int32_t M = 4;
147 int32_t K = 64;
148 int32_t N = 32;
149
150 printf("MatMul M=%d, K=%d, N=%d\n", M, K, N);
151
152 rknn_matmul_ctx ctx;
153
154 rknn_matmul_info info;
155 memset(&info, 0, sizeof(rknn_matmul_info));
156 info.M = M;
157 info.K = K;
158 info.N = N;
159 info.type = RKNN_TENSOR_INT8;
160 info.native_layout = native_layout;
161 info.perf_layout = perf_layout;
162
163 rknn_matmul_io_attr io_attr;
164 memset(&io_attr, 0, sizeof(rknn_matmul_io_attr));
165
166 int ret = rknn_matmul_create(&ctx, &info, &io_attr);
167 if (ret < 0) {
168 printf("rknn_matmul_create fail! ret=%d\n", ret);
169 return -1;
170 }
171
172 printf("input/output matmul tensor attribute:\n");
173 dump_matmul_tensor_attr(&io_attr.A);
174 dump_matmul_tensor_attr(&io_attr.B);
175 dump_matmul_tensor_attr(&io_attr.C);
176
177 // Create A
178 rknn_tensor_mem* A = rknn_create_mem(ctx, io_attr.A.size);
179 if (A == NULL) {
180 printf("rknn_create_mem fail!\n");
181 return -1;
182 }
183 memset(A->virt_addr, 1, A->size);
184
185 // Create B
186 rknn_tensor_mem* B = rknn_create_mem(ctx, io_attr.B.size);
187 if (B == NULL) {
188 printf("rknn_create_mem fail!\n");
189 return -1;
190 }
191
192 // normal layout
193 if (io_attr.B.n_dims == 2) {
194 for (uint32_t i = 0; i < io_attr.B.dims[1]; ++i) {
195 for (uint32_t j = 0; j < io_attr.B.dims[0]; ++j) {
196 ((int8_t*)B->virt_addr)[i * io_attr.B.dims[0] + j] = (j % 16) + 1;
197 }
198 }
199 }
200 // native layout
201 else if (io_attr.B.n_dims == 4) {
202 for (uint32_t n = 0; n < io_attr.B.dims[0]; ++n) {
203 for (uint32_t k = 0; k < io_attr.B.dims[1]; ++k) {
204 for (uint32_t nn = 0; nn < io_attr.B.dims[2]; ++nn) {
205 for (uint32_t kk = 0; kk < io_attr.B.dims[3]; ++kk) {
206 ((int8_t*)B->virt_addr)[((n * io_attr.B.dims[1] + k) * io_attr.B.dims[2] + nn) * io_attr.B.dims[3] + kk] =
207 nn + 1;
208 }
209 }
210 }
211 }
212 }
213
214 // Create C
215 rknn_tensor_mem* C = rknn_create_mem(ctx, io_attr.C.size);
216 if (C == NULL) {
217 printf("rknn_create_mem fail!\n");
218 return -1;
219 }
220
221 // Set A
222 ret = rknn_matmul_set_io_mem(ctx, A, &io_attr.A);
223 if (ret < 0) {
224 printf("rknn_matmul_set_io_mem fail! ret=%d\n", ret);
225 return -1;
226 }
227
228 // Set B
229 ret = rknn_matmul_set_io_mem(ctx, B, &io_attr.B);
230 if (ret < 0) {
231 printf("rknn_matmul_set_io_mem fail! ret=%d\n", ret);
232 return -1;
233 }
234
235 // Set C
236 ret = rknn_matmul_set_io_mem(ctx, C, &io_attr.C);
237 if (ret < 0) {
238 printf("rknn_matmul_set_io_mem fail! ret=%d\n", ret);
239 return -1;
240 }
241
242 // Run
243 printf("Begin perf ...\n");
244 for (int i = 0; i < loop_count; ++i) {
245 int64_t start_us = getCurrentTimeUs();
246 ret = rknn_matmul_run(ctx);
247 int64_t elapse_us = getCurrentTimeUs() - start_us;
248 if (ret < 0) {
249 printf("rknn_matmul_run error %d\n", ret);
250 return -1;
251 }
252 printf("%4d: Elapse Time = %.2fms, FPS = %.2f\n", i, elapse_us / 1000.f, 1000.f * 1000.f / elapse_us);
253 }
254
255 // Dump A/B/C tensors
256 printf("matmul tensors:\n");
257 dump_matmul_tensor(A, &io_attr.A);
258 dump_matmul_tensor(B, &io_attr.B);
259 dump_matmul_tensor(C, &io_attr.C);
260
261 // destroy
262 rknn_destroy_mem(ctx, A);
263 rknn_destroy_mem(ctx, B);
264 rknn_destroy_mem(ctx, C);
265
266 rknn_matmul_destroy(ctx);
267
268 return 0;
269 }