xref: /OK3568_Linux_fs/external/rknpu2/examples/3rdparty/cnpy/cnpy.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // Copyright (C) 2011  Carl Rogers
2 // Released under MIT License
3 // license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
4 
5 #ifndef LIBCNPY_H_
6 #define LIBCNPY_H_
7 
8 #if 0
9 #include <zlib.h>
10 #endif
11 
12 #include <stdint.h>
13 
14 #include <cassert>
15 #include <cstdio>
16 #include <fstream>
17 #include <iostream>
18 #include <map>
19 #include <memory>
20 #include <numeric>
21 #include <sstream>
22 #include <stdexcept>
23 #include <string>
24 #include <typeinfo>
25 #include <vector>
26 
27 namespace cnpy {
28 
29 struct NpyArray
30 {
NpyArrayNpyArray31   NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order, std::string _typeName)
32     : shape(_shape)
33     , word_size(_word_size)
34     , fortran_order(_fortran_order)
35     , typeName(_typeName)
36   {
37     num_vals = 1;
38     for (size_t i = 0; i < shape.size(); i++)
39       num_vals *= shape[i];
40     data_holder = std::shared_ptr<std::vector<char>>(new std::vector<char>(num_vals * word_size));
41   }
42 
NpyArrayNpyArray43   NpyArray()
44     : shape(0)
45     , word_size(0)
46     , fortran_order(0)
47     , num_vals(0)
48   {}
49 
50   template <typename T>
dataNpyArray51   T* data()
52   {
53     return reinterpret_cast<T*>(&(*data_holder)[0]);
54   }
55 
56   template <typename T>
dataNpyArray57   const T* data() const
58   {
59     return reinterpret_cast<T*>(&(*data_holder)[0]);
60   }
61 
62   template <typename T>
as_vecNpyArray63   std::vector<T> as_vec() const
64   {
65     const T* p = data<T>();
66     return std::vector<T>(p, p + num_vals);
67   }
68 
num_bytesNpyArray69   size_t num_bytes() const { return data_holder->size(); }
70 
71   std::shared_ptr<std::vector<char>> data_holder;
72   std::vector<size_t>                shape;
73   size_t                             word_size;
74   bool                               fortran_order;
75   size_t                             num_vals;
76   std::string                        typeName;
77 };
78 
79 using npz_t = std::map<std::string, NpyArray>;
80 
81 char BigEndianTest(int size);
82 char map_type(const std::type_info& t);
83 template <typename T>
84 std::vector<char> create_npy_header(const std::vector<size_t>& shape);
85 void              parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
86                                    std::string& typeName);
87 void     parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
88                           std::string& typeName);
89 void     parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset);
90 npz_t    npz_load(std::string fname);
91 NpyArray npz_load(std::string fname, std::string varname);
92 NpyArray npy_load(std::string fname);
93 
94 template <typename T>
95 std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs)
96 {
97   // write in little endian
98   for (size_t byte = 0; byte < sizeof(T); byte++) {
99     char val = *((char*)&rhs + byte);
100     lhs.push_back(val);
101   }
102   return lhs;
103 }
104 
105 template <>
106 std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
107 template <>
108 std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
109 
110 template <typename T>
111 int npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w")
112 {
113   std::ofstream ofs(fname, std::ios::out);
114   if (!ofs.is_open()) {
115     return -1;
116   }
117   ofs.close();
118   FILE*               fp = NULL;
119   std::vector<size_t> true_data_shape; // if appending, the shape of existing + new data
120 
121   if (mode == "a")
122     fp = fopen(fname.c_str(), "r+b");
123 
124   if (fp) {
125     // file exists. we need to append to it. read the header, modify the array size
126     size_t      word_size;
127     bool        fortran_order;
128     std::string typeName;
129     parse_npy_header(fp, word_size, true_data_shape, fortran_order, typeName);
130     assert(!fortran_order);
131 
132     if (word_size != sizeof(T)) {
133       std::cout << "libnpy error: " << fname << " has word size " << word_size << " but npy_save appending data sized "
134                 << sizeof(T) << "\n";
135       assert(word_size == sizeof(T));
136     }
137     if (true_data_shape.size() != shape.size()) {
138       std::cout << "libnpy error: npy_save attempting to append misdimensioned data to " << fname << "\n";
139       assert(true_data_shape.size() != shape.size());
140     }
141 
142     for (size_t i = 1; i < shape.size(); i++) {
143       if (shape[i] != true_data_shape[i]) {
144         std::cout << "libnpy error: npy_save attempting to append misshaped data to " << fname << "\n";
145         assert(shape[i] == true_data_shape[i]);
146       }
147     }
148     true_data_shape[0] += shape[0];
149   } else {
150     fp              = fopen(fname.c_str(), "wb");
151     true_data_shape = shape;
152   }
153 
154   std::vector<char> header = create_npy_header<T>(true_data_shape);
155   size_t            nels   = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
156 
157   fseek(fp, 0, SEEK_SET);
158   fwrite(&header[0], sizeof(char), header.size(), fp);
159   fseek(fp, 0, SEEK_END);
160   fwrite(data, sizeof(T), nels, fp);
161   fclose(fp);
162   return 0;
163 }
164 
165 template <typename T>
166 void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape,
167               std::string mode = "w")
168 {
169   // first, append a .npy to the fname
170   fname += ".npy";
171 
172   // now, on with the show
173   FILE*             fp                   = NULL;
174   uint16_t          nrecs                = 0;
175   size_t            global_header_offset = 0;
176   std::vector<char> global_header;
177 
178   if (mode == "a")
179     fp = fopen(zipname.c_str(), "r+b");
180 
181   if (fp) {
182     // zip file exists. we need to add a new npy file to it.
183     // first read the footer. this gives us the offset and size of the global header
184     // then read and store the global header.
185     // below, we will write the the new data at the start of the global header then append the global header and footer
186     // below it
187     size_t global_header_size;
188     parse_zip_footer(fp, nrecs, global_header_size, global_header_offset);
189     fseek(fp, global_header_offset, SEEK_SET);
190     global_header.resize(global_header_size);
191     size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp);
192     if (res != global_header_size) {
193       throw std::runtime_error("npz_save: header read error while adding to existing zip");
194     }
195     fseek(fp, global_header_offset, SEEK_SET);
196   } else {
197     fp = fopen(zipname.c_str(), "wb");
198   }
199 
200   std::vector<char> npy_header = create_npy_header<T>(shape);
201 
202   size_t nels   = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
203   size_t nbytes = nels * sizeof(T) + npy_header.size();
204 
205 #if 0
206   // get the CRC of the data to be added
207   uint32_t crc = crc32(0L, (uint8_t*)&npy_header[0], npy_header.size());
208   crc          = crc32(crc, (uint8_t*)data, nels * sizeof(T));
209 #else
210   uint32_t crc = 0;
211 #endif
212 
213   // build the local header
214   std::vector<char> local_header;
215   local_header += "PK";                   // first part of sig
216   local_header += (uint16_t)0x0403;       // second part of sig
217   local_header += (uint16_t)20;           // min version to extract
218   local_header += (uint16_t)0;            // general purpose bit flag
219   local_header += (uint16_t)0;            // compression method
220   local_header += (uint16_t)0;            // file last mod time
221   local_header += (uint16_t)0;            // file last mod date
222   local_header += (uint32_t)crc;          // crc
223   local_header += (uint32_t)nbytes;       // compressed size
224   local_header += (uint32_t)nbytes;       // uncompressed size
225   local_header += (uint16_t)fname.size(); // fname length
226   local_header += (uint16_t)0;            // extra field length
227   local_header += fname;
228 
229   // build global header
230   global_header += "PK";             // first part of sig
231   global_header += (uint16_t)0x0201; // second part of sig
232   global_header += (uint16_t)20;     // version made by
233   global_header.insert(global_header.end(), local_header.begin() + 4, local_header.begin() + 30);
234   global_header += (uint16_t)0; // file comment length
235   global_header += (uint16_t)0; // disk number where file starts
236   global_header += (uint16_t)0; // internal file attributes
237   global_header += (uint32_t)0; // external file attributes
238   global_header += (uint32_t)
239     global_header_offset; // relative offset of local file header, since it begins where the global header used to begin
240   global_header += fname;
241 
242   // build footer
243   std::vector<char> footer;
244   footer += "PK";                           // first part of sig
245   footer += (uint16_t)0x0605;               // second part of sig
246   footer += (uint16_t)0;                    // number of this disk
247   footer += (uint16_t)0;                    // disk where footer starts
248   footer += (uint16_t)(nrecs + 1);          // number of records on this disk
249   footer += (uint16_t)(nrecs + 1);          // total number of records
250   footer += (uint32_t)global_header.size(); // nbytes of global headers
251   footer +=
252     (uint32_t)(global_header_offset + nbytes + local_header.size()); // offset of start of global headers, since global
253                                                                      // header now starts after newly written array
254   footer += (uint16_t)0;                                             // zip file comment length
255 
256   // write everything
257   fwrite(&local_header[0], sizeof(char), local_header.size(), fp);
258   fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp);
259   fwrite(data, sizeof(T), nels, fp);
260   fwrite(&global_header[0], sizeof(char), global_header.size(), fp);
261   fwrite(&footer[0], sizeof(char), footer.size(), fp);
262   fclose(fp);
263 }
264 
265 template <typename T>
266 void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w")
267 {
268   std::vector<size_t> shape;
269   shape.push_back(data.size());
270   npy_save(fname, &data[0], shape, mode);
271 }
272 
273 template <typename T>
274 void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w")
275 {
276   std::vector<size_t> shape;
277   shape.push_back(data.size());
278   npz_save(zipname, fname, &data[0], shape, mode);
279 }
280 
281 template <typename T>
create_npy_header(const std::vector<size_t> & shape)282 std::vector<char> create_npy_header(const std::vector<size_t>& shape)
283 {
284   const char* tpye_name = typeid(T).name();
285   std::vector<char> dict;
286   dict += "{'descr': '";
287   dict += BigEndianTest(sizeof(T));
288   if (std::string(tpye_name) == "N4rknn7float16E") {
289     dict += "f";
290   } else {
291     dict += map_type(typeid(T));
292   }
293   dict += std::to_string(sizeof(T));
294   dict += "', 'fortran_order': False, 'shape': (";
295   dict += std::to_string(shape[0]);
296   for (size_t i = 1; i < shape.size(); i++) {
297     dict += ", ";
298     dict += std::to_string(shape[i]);
299   }
300   if (shape.size() == 1)
301     dict += ",";
302   dict += "), }";
303   // pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
304   int remainder = 16 - (10 + dict.size()) % 16;
305   dict.insert(dict.end(), remainder, ' ');
306   dict.back() = '\n';
307 
308   std::vector<char> header;
309   header += (char)0x93;
310   header += "NUMPY";
311   header += (char)0x01; // major version of numpy format
312   header += (char)0x00; // minor version of numpy format
313   header += (uint16_t)dict.size();
314   header.insert(header.end(), dict.begin(), dict.end());
315 
316   return header;
317 }
318 
319 } // namespace cnpy
320 
321 #endif
322