1 // Copyright (C) 2011 Carl Rogers
2 // Released under MIT License
3 // license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
4
5 #ifndef LIBCNPY_H_
6 #define LIBCNPY_H_
7
8 #if 0
9 #include <zlib.h>
10 #endif
11
12 #include <stdint.h>
13
14 #include <cassert>
15 #include <cstdio>
16 #include <fstream>
17 #include <iostream>
18 #include <map>
19 #include <memory>
20 #include <numeric>
21 #include <sstream>
22 #include <stdexcept>
23 #include <string>
24 #include <typeinfo>
25 #include <vector>
26
27 namespace cnpy {
28
29 struct NpyArray
30 {
NpyArrayNpyArray31 NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order, std::string _typeName)
32 : shape(_shape)
33 , word_size(_word_size)
34 , fortran_order(_fortran_order)
35 , typeName(_typeName)
36 {
37 num_vals = 1;
38 for (size_t i = 0; i < shape.size(); i++)
39 num_vals *= shape[i];
40 data_holder = std::shared_ptr<std::vector<char>>(new std::vector<char>(num_vals * word_size));
41 }
42
NpyArrayNpyArray43 NpyArray()
44 : shape(0)
45 , word_size(0)
46 , fortran_order(0)
47 , num_vals(0)
48 {}
49
50 template <typename T>
dataNpyArray51 T* data()
52 {
53 return reinterpret_cast<T*>(&(*data_holder)[0]);
54 }
55
56 template <typename T>
dataNpyArray57 const T* data() const
58 {
59 return reinterpret_cast<T*>(&(*data_holder)[0]);
60 }
61
62 template <typename T>
as_vecNpyArray63 std::vector<T> as_vec() const
64 {
65 const T* p = data<T>();
66 return std::vector<T>(p, p + num_vals);
67 }
68
num_bytesNpyArray69 size_t num_bytes() const { return data_holder->size(); }
70
71 std::shared_ptr<std::vector<char>> data_holder;
72 std::vector<size_t> shape;
73 size_t word_size;
74 bool fortran_order;
75 size_t num_vals;
76 std::string typeName;
77 };
78
79 using npz_t = std::map<std::string, NpyArray>;
80
81 char BigEndianTest(int size);
82 char map_type(const std::type_info& t);
83 template <typename T>
84 std::vector<char> create_npy_header(const std::vector<size_t>& shape);
85 void parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
86 std::string& typeName);
87 void parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order,
88 std::string& typeName);
89 void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset);
90 npz_t npz_load(std::string fname);
91 NpyArray npz_load(std::string fname, std::string varname);
92 NpyArray npy_load(std::string fname);
93
94 template <typename T>
95 std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs)
96 {
97 // write in little endian
98 for (size_t byte = 0; byte < sizeof(T); byte++) {
99 char val = *((char*)&rhs + byte);
100 lhs.push_back(val);
101 }
102 return lhs;
103 }
104
105 template <>
106 std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
107 template <>
108 std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
109
110 template <typename T>
111 int npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w")
112 {
113 std::ofstream ofs(fname, std::ios::out);
114 if (!ofs.is_open()) {
115 return -1;
116 }
117 ofs.close();
118 FILE* fp = NULL;
119 std::vector<size_t> true_data_shape; // if appending, the shape of existing + new data
120
121 if (mode == "a")
122 fp = fopen(fname.c_str(), "r+b");
123
124 if (fp) {
125 // file exists. we need to append to it. read the header, modify the array size
126 size_t word_size;
127 bool fortran_order;
128 std::string typeName;
129 parse_npy_header(fp, word_size, true_data_shape, fortran_order, typeName);
130 assert(!fortran_order);
131
132 if (word_size != sizeof(T)) {
133 std::cout << "libnpy error: " << fname << " has word size " << word_size << " but npy_save appending data sized "
134 << sizeof(T) << "\n";
135 assert(word_size == sizeof(T));
136 }
137 if (true_data_shape.size() != shape.size()) {
138 std::cout << "libnpy error: npy_save attempting to append misdimensioned data to " << fname << "\n";
139 assert(true_data_shape.size() != shape.size());
140 }
141
142 for (size_t i = 1; i < shape.size(); i++) {
143 if (shape[i] != true_data_shape[i]) {
144 std::cout << "libnpy error: npy_save attempting to append misshaped data to " << fname << "\n";
145 assert(shape[i] == true_data_shape[i]);
146 }
147 }
148 true_data_shape[0] += shape[0];
149 } else {
150 fp = fopen(fname.c_str(), "wb");
151 true_data_shape = shape;
152 }
153
154 std::vector<char> header = create_npy_header<T>(true_data_shape);
155 size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
156
157 fseek(fp, 0, SEEK_SET);
158 fwrite(&header[0], sizeof(char), header.size(), fp);
159 fseek(fp, 0, SEEK_END);
160 fwrite(data, sizeof(T), nels, fp);
161 fclose(fp);
162 return 0;
163 }
164
165 template <typename T>
166 void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape,
167 std::string mode = "w")
168 {
169 // first, append a .npy to the fname
170 fname += ".npy";
171
172 // now, on with the show
173 FILE* fp = NULL;
174 uint16_t nrecs = 0;
175 size_t global_header_offset = 0;
176 std::vector<char> global_header;
177
178 if (mode == "a")
179 fp = fopen(zipname.c_str(), "r+b");
180
181 if (fp) {
182 // zip file exists. we need to add a new npy file to it.
183 // first read the footer. this gives us the offset and size of the global header
184 // then read and store the global header.
185 // below, we will write the the new data at the start of the global header then append the global header and footer
186 // below it
187 size_t global_header_size;
188 parse_zip_footer(fp, nrecs, global_header_size, global_header_offset);
189 fseek(fp, global_header_offset, SEEK_SET);
190 global_header.resize(global_header_size);
191 size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp);
192 if (res != global_header_size) {
193 throw std::runtime_error("npz_save: header read error while adding to existing zip");
194 }
195 fseek(fp, global_header_offset, SEEK_SET);
196 } else {
197 fp = fopen(zipname.c_str(), "wb");
198 }
199
200 std::vector<char> npy_header = create_npy_header<T>(shape);
201
202 size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
203 size_t nbytes = nels * sizeof(T) + npy_header.size();
204
205 #if 0
206 // get the CRC of the data to be added
207 uint32_t crc = crc32(0L, (uint8_t*)&npy_header[0], npy_header.size());
208 crc = crc32(crc, (uint8_t*)data, nels * sizeof(T));
209 #else
210 uint32_t crc = 0;
211 #endif
212
213 // build the local header
214 std::vector<char> local_header;
215 local_header += "PK"; // first part of sig
216 local_header += (uint16_t)0x0403; // second part of sig
217 local_header += (uint16_t)20; // min version to extract
218 local_header += (uint16_t)0; // general purpose bit flag
219 local_header += (uint16_t)0; // compression method
220 local_header += (uint16_t)0; // file last mod time
221 local_header += (uint16_t)0; // file last mod date
222 local_header += (uint32_t)crc; // crc
223 local_header += (uint32_t)nbytes; // compressed size
224 local_header += (uint32_t)nbytes; // uncompressed size
225 local_header += (uint16_t)fname.size(); // fname length
226 local_header += (uint16_t)0; // extra field length
227 local_header += fname;
228
229 // build global header
230 global_header += "PK"; // first part of sig
231 global_header += (uint16_t)0x0201; // second part of sig
232 global_header += (uint16_t)20; // version made by
233 global_header.insert(global_header.end(), local_header.begin() + 4, local_header.begin() + 30);
234 global_header += (uint16_t)0; // file comment length
235 global_header += (uint16_t)0; // disk number where file starts
236 global_header += (uint16_t)0; // internal file attributes
237 global_header += (uint32_t)0; // external file attributes
238 global_header += (uint32_t)
239 global_header_offset; // relative offset of local file header, since it begins where the global header used to begin
240 global_header += fname;
241
242 // build footer
243 std::vector<char> footer;
244 footer += "PK"; // first part of sig
245 footer += (uint16_t)0x0605; // second part of sig
246 footer += (uint16_t)0; // number of this disk
247 footer += (uint16_t)0; // disk where footer starts
248 footer += (uint16_t)(nrecs + 1); // number of records on this disk
249 footer += (uint16_t)(nrecs + 1); // total number of records
250 footer += (uint32_t)global_header.size(); // nbytes of global headers
251 footer +=
252 (uint32_t)(global_header_offset + nbytes + local_header.size()); // offset of start of global headers, since global
253 // header now starts after newly written array
254 footer += (uint16_t)0; // zip file comment length
255
256 // write everything
257 fwrite(&local_header[0], sizeof(char), local_header.size(), fp);
258 fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp);
259 fwrite(data, sizeof(T), nels, fp);
260 fwrite(&global_header[0], sizeof(char), global_header.size(), fp);
261 fwrite(&footer[0], sizeof(char), footer.size(), fp);
262 fclose(fp);
263 }
264
265 template <typename T>
266 void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w")
267 {
268 std::vector<size_t> shape;
269 shape.push_back(data.size());
270 npy_save(fname, &data[0], shape, mode);
271 }
272
273 template <typename T>
274 void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w")
275 {
276 std::vector<size_t> shape;
277 shape.push_back(data.size());
278 npz_save(zipname, fname, &data[0], shape, mode);
279 }
280
281 template <typename T>
create_npy_header(const std::vector<size_t> & shape)282 std::vector<char> create_npy_header(const std::vector<size_t>& shape)
283 {
284 const char* tpye_name = typeid(T).name();
285 std::vector<char> dict;
286 dict += "{'descr': '";
287 dict += BigEndianTest(sizeof(T));
288 if (std::string(tpye_name) == "N4rknn7float16E") {
289 dict += "f";
290 } else {
291 dict += map_type(typeid(T));
292 }
293 dict += std::to_string(sizeof(T));
294 dict += "', 'fortran_order': False, 'shape': (";
295 dict += std::to_string(shape[0]);
296 for (size_t i = 1; i < shape.size(); i++) {
297 dict += ", ";
298 dict += std::to_string(shape[i]);
299 }
300 if (shape.size() == 1)
301 dict += ",";
302 dict += "), }";
303 // pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
304 int remainder = 16 - (10 + dict.size()) % 16;
305 dict.insert(dict.end(), remainder, ' ');
306 dict.back() = '\n';
307
308 std::vector<char> header;
309 header += (char)0x93;
310 header += "NUMPY";
311 header += (char)0x01; // major version of numpy format
312 header += (char)0x00; // minor version of numpy format
313 header += (uint16_t)dict.size();
314 header.insert(header.end(), dict.begin(), dict.end());
315
316 return header;
317 }
318
319 } // namespace cnpy
320
321 #endif
322