xref: /OK3568_Linux_fs/external/rknpu2/examples/3rdparty/cnpy/cnpy.cpp (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // Copyright (C) 2011  Carl Rogers
2 // Released under MIT License
3 // license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
4 
5 #include "cnpy.h"
6 
7 #include <stdint.h>
8 
9 #include <algorithm>
10 #include <complex>
11 #include <cstdlib>
12 #include <cstring>
13 #include <iomanip>
14 #include <regex>
15 #include <stdexcept>
16 
BigEndianTest(int size)17 char cnpy::BigEndianTest(int size)
18 {
19   if (size == 1)
20     return '|';
21   int x = 1;
22   return (((char*)&x)[0]) ? '<' : '>';
23 }
24 
map_type(const std::type_info & t)25 char cnpy::map_type(const std::type_info& t)
26 {
27   if (t == typeid(float))
28     return 'f';
29   if (t == typeid(double))
30     return 'f';
31   if (t == typeid(long double))
32     return 'f';
33 
34   if (t == typeid(int))
35     return 'i';
36   if (t == typeid(char))
37     return 'i';
38   if (t == typeid(signed char))
39     return 'i';
40   if (t == typeid(short))
41     return 'i';
42   if (t == typeid(long))
43     return 'i';
44   if (t == typeid(long long))
45     return 'i';
46 
47   if (t == typeid(unsigned char))
48     return 'u';
49   if (t == typeid(unsigned short))
50     return 'u';
51   if (t == typeid(unsigned long))
52     return 'u';
53   if (t == typeid(unsigned long long))
54     return 'u';
55   if (t == typeid(unsigned int))
56     return 'u';
57 
58   if (t == typeid(bool))
59     return 'b';
60 
61   if (t == typeid(std::complex<float>))
62     return 'c';
63   if (t == typeid(std::complex<double>))
64     return 'c';
65   if (t == typeid(std::complex<long double>))
66     return 'c';
67 
68   else
69     return '?';
70 }
71 
72 template <>
operator +=(std::vector<char> & lhs,const std::string rhs)73 std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const std::string rhs)
74 {
75   lhs.insert(lhs.end(), rhs.begin(), rhs.end());
76   return lhs;
77 }
78 
79 template <>
operator +=(std::vector<char> & lhs,const char * rhs)80 std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const char* rhs)
81 {
82   // write in little endian
83   size_t len = strlen(rhs);
84   lhs.reserve(len);
85   for (size_t byte = 0; byte < len; byte++) {
86     lhs.push_back(rhs[byte]);
87   }
88   return lhs;
89 }
90 
parse_npy_header(unsigned char * buffer,size_t & word_size,std::vector<size_t> & shape,bool & fortran_order,std::string & typeName)91 void cnpy::parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
92                             std::string& typeName)
93 {
94   // std::string magic_string(buffer,6);
95   uint8_t     major_version = *reinterpret_cast<uint8_t*>(buffer + 6);
96   uint8_t     minor_version = *reinterpret_cast<uint8_t*>(buffer + 7);
97   uint16_t    header_len    = *reinterpret_cast<uint16_t*>(buffer + 8);
98   std::string header(reinterpret_cast<char*>(buffer + 9), header_len);
99 
100   size_t loc1, loc2;
101 
102   // fortran order
103   loc1          = header.find("fortran_order") + 16;
104   fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
105   if (fortran_order)
106     throw std::runtime_error("npy input file: 'fortran_order' must be false, use: arr2 = np.ascontiguousarray(arr1)");
107 
108   // shape
109   loc1 = header.find("(");
110   loc2 = header.find(")");
111 
112   std::regex  num_regex("[0-9][0-9]*");
113   std::smatch sm;
114   shape.clear();
115 
116   std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
117   while (std::regex_search(str_shape, sm, num_regex)) {
118     shape.push_back(std::stoi(sm[0].str()));
119     str_shape = sm.suffix().str();
120   }
121 
122   // endian, word size, data type
123   // byte order code | stands for not applicable.
124   // not sure when this applies except for byte array
125   loc1              = header.find("descr") + 9;
126   bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
127   assert(littleEndian);
128 
129   // char type = header[loc1+1];
130   // assert(type == map_type(T));
131 
132   std::string str_ws = header.substr(loc1 + 2);
133   loc2               = str_ws.find("'");
134   word_size          = atoi(str_ws.substr(0, loc2).c_str());
135   if (header.substr(loc1 + 1, 1) == "i") {
136     typeName = "int";
137   } else if (header.substr(loc1 + 1, 1) == "u") {
138     typeName = "uint";
139   } else if (header.substr(loc1 + 1, 1) == "f") {
140     typeName = "float";
141   }
142   typeName = typeName + std::to_string(word_size * 8);
143 }
144 
parse_npy_header(FILE * fp,size_t & word_size,std::vector<size_t> & shape,bool & fortran_order,std::string & typeName)145 void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
146                             std::string& typeName)
147 {
148   char   buffer[256];
149   size_t res = fread(buffer, sizeof(char), 11, fp);
150   if (res != 11)
151     throw std::runtime_error("parse_npy_header: failed fread");
152   std::string header = fgets(buffer, 256, fp);
153   assert(header[header.size() - 1] == '\n');
154 
155   size_t loc1, loc2;
156 
157   // fortran order
158   loc1 = header.find("fortran_order");
159   if (loc1 == std::string::npos)
160     throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'");
161   loc1 += 16;
162   fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
163   if (fortran_order)
164     throw std::runtime_error("npy input file: 'fortran_order' must be false, use: arr2 = np.ascontiguousarray(arr1)");
165 
166   // shape
167   loc1 = header.find("(");
168   loc2 = header.find(")");
169   if (loc1 == std::string::npos || loc2 == std::string::npos)
170     throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
171 
172   std::regex  num_regex("[0-9][0-9]*");
173   std::smatch sm;
174   shape.clear();
175 
176   std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
177   while (std::regex_search(str_shape, sm, num_regex)) {
178     shape.push_back(std::stoi(sm[0].str()));
179     str_shape = sm.suffix().str();
180   }
181 
182   // endian, word size, data type
183   // byte order code | stands for not applicable.
184   // not sure when this applies except for byte array
185   loc1 = header.find("descr");
186   if (loc1 == std::string::npos)
187     throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
188   loc1 += 9;
189   bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
190   assert(littleEndian);
191 
192   // char type = header[loc1+1];
193   // assert(type == map_type(T));
194 
195   std::string str_ws = header.substr(loc1 + 2);
196   loc2               = str_ws.find("'");
197   word_size          = atoi(str_ws.substr(0, loc2).c_str());
198   if (header.substr(loc1 + 1, 1) == "i") {
199     typeName = "int";
200   } else if (header.substr(loc1 + 1, 1) == "u") {
201     typeName = "uint";
202   } else if (header.substr(loc1 + 1, 1) == "f") {
203     typeName = "float";
204   }
205   typeName = typeName + std::to_string(word_size * 8);
206 }
207 
parse_zip_footer(FILE * fp,uint16_t & nrecs,size_t & global_header_size,size_t & global_header_offset)208 void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset)
209 {
210   std::vector<char> footer(22);
211   fseek(fp, -22, SEEK_END);
212   size_t res = fread(&footer[0], sizeof(char), 22, fp);
213   if (res != 22)
214     throw std::runtime_error("parse_zip_footer: failed fread");
215 
216   uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
217   disk_no              = *(uint16_t*)&footer[4];
218   disk_start           = *(uint16_t*)&footer[6];
219   nrecs_on_disk        = *(uint16_t*)&footer[8];
220   nrecs                = *(uint16_t*)&footer[10];
221   global_header_size   = *(uint32_t*)&footer[12];
222   global_header_offset = *(uint32_t*)&footer[16];
223   comment_len          = *(uint16_t*)&footer[20];
224 
225   assert(disk_no == 0);
226   assert(disk_start == 0);
227   assert(nrecs_on_disk == nrecs);
228   assert(comment_len == 0);
229 }
230 
load_the_npy_file(FILE * fp)231 cnpy::NpyArray load_the_npy_file(FILE* fp)
232 {
233   std::vector<size_t> shape;
234   size_t              word_size;
235   std::string         typeName;
236   bool                fortran_order;
237   cnpy::parse_npy_header(fp, word_size, shape, fortran_order, typeName);
238 
239   cnpy::NpyArray arr(shape, word_size, fortran_order, typeName);
240   size_t         nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp);
241   if (nread != arr.num_bytes())
242     throw std::runtime_error("load_the_npy_file: failed fread");
243   return arr;
244 }
245 
load_the_npz_array(FILE * fp,uint32_t compr_bytes,uint32_t uncompr_bytes)246 cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncompr_bytes)
247 {
248   std::vector<unsigned char> buffer_compr(compr_bytes);
249   std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
250   size_t                     nread = fread(&buffer_compr[0], 1, compr_bytes, fp);
251   if (nread != compr_bytes)
252     throw std::runtime_error("load_the_npy_file: failed fread");
253 
254 #if 0
255   int      err;
256   z_stream d_stream;
257 
258   d_stream.zalloc   = Z_NULL;
259   d_stream.zfree    = Z_NULL;
260   d_stream.opaque   = Z_NULL;
261   d_stream.avail_in = 0;
262   d_stream.next_in  = Z_NULL;
263   err               = inflateInit2(&d_stream, -MAX_WBITS);
264 
265   d_stream.avail_in  = compr_bytes;
266   d_stream.next_in   = &buffer_compr[0];
267   d_stream.avail_out = uncompr_bytes;
268   d_stream.next_out  = &buffer_uncompr[0];
269 
270   err = inflate(&d_stream, Z_FINISH);
271   err = inflateEnd(&d_stream);
272 #endif
273 
274   std::vector<size_t> shape;
275   size_t              word_size;
276   bool                fortran_order;
277   std::string         typeName;
278   cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order, typeName);
279 
280   cnpy::NpyArray array(shape, word_size, fortran_order, typeName);
281 
282   size_t offset = uncompr_bytes - array.num_bytes();
283   memcpy(array.data<unsigned char>(), &buffer_uncompr[0] + offset, array.num_bytes());
284 
285   return array;
286 }
287 
npz_load(std::string fname)288 cnpy::npz_t cnpy::npz_load(std::string fname)
289 {
290   FILE* fp = fopen(fname.c_str(), "rb");
291 
292   if (!fp) {
293     throw std::runtime_error("npz_load: Error! Unable to open file " + fname + "!");
294   }
295 
296   cnpy::npz_t arrays;
297 
298   while (1) {
299     std::vector<char> local_header(30);
300     size_t            headerres = fread(&local_header[0], sizeof(char), 30, fp);
301     if (headerres != 30)
302       throw std::runtime_error("npz_load: failed fread");
303 
304     // if we've reached the global header, stop reading
305     if (local_header[2] != 0x03 || local_header[3] != 0x04)
306       break;
307 
308     // read in the variable name
309     uint16_t    name_len = *(uint16_t*)&local_header[26];
310     std::string varname(name_len, ' ');
311     size_t      vname_res = fread(&varname[0], sizeof(char), name_len, fp);
312     if (vname_res != name_len)
313       throw std::runtime_error("npz_load: failed fread");
314 
315     // erase the lagging .npy
316     varname.erase(varname.end() - 4, varname.end());
317 
318     // read in the extra field
319     uint16_t extra_field_len = *(uint16_t*)&local_header[28];
320     if (extra_field_len > 0) {
321       std::vector<char> buff(extra_field_len);
322       size_t            efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp);
323       if (efield_res != extra_field_len)
324         throw std::runtime_error("npz_load: failed fread");
325     }
326 
327     uint16_t compr_method  = *reinterpret_cast<uint16_t*>(&local_header[0] + 8);
328     uint32_t compr_bytes   = *reinterpret_cast<uint32_t*>(&local_header[0] + 18);
329     uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22);
330 
331     if (compr_method == 0) {
332       arrays[varname] = load_the_npy_file(fp);
333     } else {
334       arrays[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes);
335     }
336   }
337 
338   fclose(fp);
339   return arrays;
340 }
341 
npz_load(std::string fname,std::string varname)342 cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname)
343 {
344   FILE* fp = fopen(fname.c_str(), "rb");
345 
346   if (!fp)
347     throw std::runtime_error("npz_load: Unable to open file " + fname);
348 
349   while (1) {
350     std::vector<char> local_header(30);
351     size_t            header_res = fread(&local_header[0], sizeof(char), 30, fp);
352     if (header_res != 30)
353       throw std::runtime_error("npz_load: failed fread");
354 
355     // if we've reached the global header, stop reading
356     if (local_header[2] != 0x03 || local_header[3] != 0x04)
357       break;
358 
359     // read in the variable name
360     uint16_t    name_len = *(uint16_t*)&local_header[26];
361     std::string vname(name_len, ' ');
362     size_t      vname_res = fread(&vname[0], sizeof(char), name_len, fp);
363     if (vname_res != name_len)
364       throw std::runtime_error("npz_load: failed fread");
365     vname.erase(vname.end() - 4, vname.end()); // erase the lagging .npy
366 
367     // read in the extra field
368     uint16_t extra_field_len = *(uint16_t*)&local_header[28];
369     fseek(fp, extra_field_len, SEEK_CUR); // skip past the extra field
370 
371     uint16_t compr_method  = *reinterpret_cast<uint16_t*>(&local_header[0] + 8);
372     uint32_t compr_bytes   = *reinterpret_cast<uint32_t*>(&local_header[0] + 18);
373     uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22);
374 
375     if (vname == varname) {
376       NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp, compr_bytes, uncompr_bytes);
377       fclose(fp);
378       return array;
379     } else {
380       // skip past the data
381       uint32_t size = *(uint32_t*)&local_header[22];
382       fseek(fp, size, SEEK_CUR);
383     }
384   }
385 
386   fclose(fp);
387 
388   // if we get here, we haven't found the variable in the file
389   throw std::runtime_error("npz_load: Variable name " + varname + " not found in " + fname);
390 }
391 
npy_load(std::string fname)392 cnpy::NpyArray cnpy::npy_load(std::string fname)
393 {
394   FILE* fp = fopen(fname.c_str(), "rb");
395 
396   if (!fp)
397     throw std::runtime_error("npy_load: Unable to open file " + fname);
398 
399   NpyArray arr = load_the_npy_file(fp);
400 
401   fclose(fp);
402   return arr;
403 }
404