1 /*
2 * Copyright 2006 The Android Open Source Project
3 *
4 * Simple Zip file support.
5 */
6 #include "safe_iop.h"
7 #include "zlib.h"
8
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <limits.h>
12 #include <stdint.h> // for uintptr_t
13 #include <stdlib.h>
14 #include <sys/stat.h> // for S_ISLNK()
15 #include <unistd.h>
16
17 #define LOG_TAG "minzip"
18 #include "Zip.h"
19 #include "Bits.h"
20 #include "Log.h"
21 #include "DirUtil.h"
22
23 #undef NDEBUG // do this after including Log.h
24 #include <assert.h>
25
26 #define SORT_ENTRIES 1
27
28 /*
29 * Offset and length constants (java.util.zip naming convention).
30 */
31 enum {
32 CENSIG = 0x02014b50, // PK12
33 CENHDR = 46,
34
35 CENVEM = 4,
36 CENVER = 6,
37 CENFLG = 8,
38 CENHOW = 10,
39 CENTIM = 12,
40 CENCRC = 16,
41 CENSIZ = 20,
42 CENLEN = 24,
43 CENNAM = 28,
44 CENEXT = 30,
45 CENCOM = 32,
46 CENDSK = 34,
47 CENATT = 36,
48 CENATX = 38,
49 CENOFF = 42,
50
51 ENDSIG = 0x06054b50, // PK56
52 ENDHDR = 22,
53
54 ENDSUB = 8,
55 ENDTOT = 10,
56 ENDSIZ = 12,
57 ENDOFF = 16,
58 ENDCOM = 20,
59
60 EXTSIG = 0x08074b50, // PK78
61 EXTHDR = 16,
62
63 EXTCRC = 4,
64 EXTSIZ = 8,
65 EXTLEN = 12,
66
67 LOCSIG = 0x04034b50, // PK34
68 LOCHDR = 30,
69
70 LOCVER = 4,
71 LOCFLG = 6,
72 LOCHOW = 8,
73 LOCTIM = 10,
74 LOCCRC = 14,
75 LOCSIZ = 18,
76 LOCLEN = 22,
77 LOCNAM = 26,
78 LOCEXT = 28,
79
80 STORED = 0,
81 DEFLATED = 8,
82
83 CENVEM_UNIX = 3 << 8, // the high byte of CENVEM
84 };
85
86
87 /*
88 * For debugging, dump the contents of a ZipEntry.
89 */
90 #if 0
91 static void dumpEntry(const ZipEntry* pEntry)
92 {
93 LOGI(" %p '%.*s'\n", pEntry->fileName, pEntry->fileNameLen, pEntry->fileName);
94 LOGI(" off=%ld comp=%ld uncomp=%ld how=%d\n", pEntry->offset,
95 pEntry->compLen, pEntry->uncompLen, pEntry->compression);
96 }
97 #endif
98
99 /*
100 * (This is a mzHashTableLookup callback.)
101 *
102 * Compare two ZipEntry structs, by name.
103 */
hashcmpZipEntry(const void * ventry1,const void * ventry2)104 static int hashcmpZipEntry(const void* ventry1, const void* ventry2)
105 {
106 const ZipEntry* entry1 = (const ZipEntry*) ventry1;
107 const ZipEntry* entry2 = (const ZipEntry*) ventry2;
108
109 if (entry1->fileNameLen != entry2->fileNameLen)
110 return entry1->fileNameLen - entry2->fileNameLen;
111 return memcmp(entry1->fileName, entry2->fileName, entry1->fileNameLen);
112 }
113
114 /*
115 * (This is a mzHashTableLookup callback.)
116 *
117 * find a ZipEntry struct by name.
118 */
hashcmpZipName(const void * ventry,const void * vname)119 static int hashcmpZipName(const void* ventry, const void* vname)
120 {
121 const ZipEntry* entry = (const ZipEntry*) ventry;
122 const char* name = (const char*) vname;
123 unsigned int nameLen = strlen(name);
124
125 if (entry->fileNameLen != nameLen)
126 return entry->fileNameLen - nameLen;
127 return memcmp(entry->fileName, name, nameLen);
128 }
129
130 /*
131 * Compute the hash code for a ZipEntry filename.
132 *
133 * Not expected to be compatible with any other hash function, so we init
134 * to 2 to ensure it doesn't happen to match.
135 */
computeHash(const char * name,int nameLen)136 static unsigned int computeHash(const char* name, int nameLen)
137 {
138 unsigned int hash = 2;
139
140 while (nameLen--)
141 hash = hash * 31 + *name++;
142
143 return hash;
144 }
145
addEntryToHashTable(HashTable * pHash,ZipEntry * pEntry)146 static void addEntryToHashTable(HashTable* pHash, ZipEntry* pEntry)
147 {
148 unsigned int itemHash = computeHash(pEntry->fileName, pEntry->fileNameLen);
149 const ZipEntry* found;
150
151 found = (const ZipEntry*)mzHashTableLookup(pHash,
152 itemHash, pEntry, hashcmpZipEntry, true);
153 if (found != pEntry) {
154 LOGW("WARNING: duplicate entry '%.*s' in Zip\n",
155 found->fileNameLen, found->fileName);
156 /* keep going */
157 }
158 }
159
validFilename(const char * fileName,unsigned int fileNameLen)160 static int validFilename(const char *fileName, unsigned int fileNameLen)
161 {
162 // Forbid super long filenames.
163 if (fileNameLen >= PATH_MAX) {
164 LOGW("Filename too long (%d chatacters)\n", fileNameLen);
165 return 0;
166 }
167
168 // Require all characters to be printable ASCII (no NUL, no UTF-8, etc).
169 unsigned int i;
170 for (i = 0; i < fileNameLen; ++i) {
171 if (fileName[i] < 32 || fileName[i] >= 127) {
172 LOGW("Filename contains invalid character '\%03o'\n", fileName[i]);
173 return 0;
174 }
175 }
176
177 return 1;
178 }
179
180 /*
181 * Parse the contents of a Zip archive. After confirming that the file
182 * is in fact a Zip, we scan out the contents of the central directory and
183 * store it in a hash table.
184 *
185 * Returns "true" on success.
186 */
parseZipArchive(ZipArchive * pArchive,const MemMapping * pMap)187 static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap)
188 {
189 bool result = false;
190 const unsigned char* ptr;
191 unsigned int i, numEntries, cdOffset;
192 unsigned int val;
193
194 /*
195 * The first 4 bytes of the file will either be the local header
196 * signature for the first file (LOCSIG) or, if the archive doesn't
197 * have any files in it, the end-of-central-directory signature (ENDSIG).
198 */
199 val = get4LE(pMap->addr);
200 if (val == ENDSIG) {
201 LOGI("Found Zip archive, but it looks empty\n");
202 goto bail;
203 } else if (val != LOCSIG) {
204 LOGV("Not a Zip archive (found 0x%08x)\n", val);
205 goto bail;
206 }
207
208 /*
209 * Find the EOCD. We'll find it immediately unless they have a file
210 * comment.
211 */
212 ptr = pMap->addr + pMap->length - ENDHDR;
213
214 while (ptr >= (const unsigned char*) pMap->addr) {
215 if (*ptr == (ENDSIG & 0xff) && get4LE(ptr) == ENDSIG)
216 break;
217 ptr--;
218 }
219 if (ptr < (const unsigned char*) pMap->addr) {
220 LOGI("Could not find end-of-central-directory in Zip\n");
221 goto bail;
222 }
223
224 /*
225 * There are two interesting items in the EOCD block: the number of
226 * entries in the file, and the file offset of the start of the
227 * central directory.
228 */
229 numEntries = get2LE(ptr + ENDSUB);
230 cdOffset = get4LE(ptr + ENDOFF);
231
232 LOGVV("numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
233 if (numEntries == 0 || cdOffset >= pMap->length) {
234 LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
235 numEntries, cdOffset, pMap->length);
236 goto bail;
237 }
238
239 /*
240 * Create data structures to hold entries.
241 */
242 pArchive->numEntries = numEntries;
243 pArchive->pEntries = (ZipEntry*) calloc(numEntries, sizeof(ZipEntry));
244 pArchive->pHash = mzHashTableCreate(mzHashSize(numEntries), NULL);
245 if (pArchive->pEntries == NULL || pArchive->pHash == NULL)
246 goto bail;
247
248 ptr = pMap->addr + cdOffset;
249 for (i = 0; i < numEntries; i++) {
250 ZipEntry* pEntry;
251 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
252 const unsigned char* localHdr;
253 const char *fileName;
254
255 if (ptr + CENHDR > (const unsigned char*)pMap->addr + pMap->length) {
256 LOGW("Ran off the end (at %d)\n", i);
257 goto bail;
258 }
259 if (get4LE(ptr) != CENSIG) {
260 LOGW("Missed a central dir sig (at %d)\n", i);
261 goto bail;
262 }
263
264 localHdrOffset = get4LE(ptr + CENOFF);
265 fileNameLen = get2LE(ptr + CENNAM);
266 extraLen = get2LE(ptr + CENEXT);
267 commentLen = get2LE(ptr + CENCOM);
268 fileName = (const char*)ptr + CENHDR;
269 if (fileName + fileNameLen > (const char*)pMap->addr + pMap->length) {
270 LOGW("Filename ran off the end (at %d)\n", i);
271 goto bail;
272 }
273 if (!validFilename(fileName, fileNameLen)) {
274 LOGW("Invalid filename (at %d)\n", i);
275 goto bail;
276 }
277
278 #if SORT_ENTRIES
279 /* Figure out where this entry should go (binary search).
280 */
281 if (i > 0) {
282 int low, high;
283
284 low = 0;
285 high = i - 1;
286 while (low <= high) {
287 int mid;
288 int diff;
289 int diffLen;
290
291 mid = low + ((high - low) / 2); // avoid overflow
292
293 if (pArchive->pEntries[mid].fileNameLen < fileNameLen) {
294 diffLen = pArchive->pEntries[mid].fileNameLen;
295 } else {
296 diffLen = fileNameLen;
297 }
298 diff = strncmp(pArchive->pEntries[mid].fileName, fileName,
299 diffLen);
300 if (diff == 0) {
301 diff = pArchive->pEntries[mid].fileNameLen - fileNameLen;
302 }
303 if (diff < 0) {
304 low = mid + 1;
305 } else if (diff > 0) {
306 high = mid - 1;
307 } else {
308 high = mid;
309 break;
310 }
311 }
312
313 unsigned int target = high + 1;
314 assert(target <= i);
315 if (target != i) {
316 /* It belongs somewhere other than at the end of
317 * the list. Make some room at [target].
318 */
319 memmove(pArchive->pEntries + target + 1,
320 pArchive->pEntries + target,
321 (i - target) * sizeof(ZipEntry));
322 }
323 pEntry = &pArchive->pEntries[target];
324 } else {
325 pEntry = &pArchive->pEntries[0];
326 }
327 #else
328 pEntry = &pArchive->pEntries[i];
329 #endif
330
331 //LOGI("%d: localHdr=%d fnl=%d el=%d cl=%d\n",
332 // i, localHdrOffset, fileNameLen, extraLen, commentLen);
333
334 pEntry->fileNameLen = fileNameLen;
335 pEntry->fileName = fileName;
336
337 pEntry->compLen = get4LE(ptr + CENSIZ);
338 pEntry->uncompLen = get4LE(ptr + CENLEN);
339 pEntry->compression = get2LE(ptr + CENHOW);
340 pEntry->modTime = get4LE(ptr + CENTIM);
341 pEntry->crc32 = get4LE(ptr + CENCRC);
342
343 /* These two are necessary for finding the mode of the file.
344 */
345 pEntry->versionMadeBy = get2LE(ptr + CENVEM);
346 if ((pEntry->versionMadeBy & 0xff00) != 0 &&
347 (pEntry->versionMadeBy & 0xff00) != CENVEM_UNIX) {
348 LOGW("Incompatible \"version made by\": 0x%02x (at %d)\n",
349 pEntry->versionMadeBy >> 8, i);
350 goto bail;
351 }
352 pEntry->externalFileAttributes = get4LE(ptr + CENATX);
353
354 // Perform pMap->addr + localHdrOffset, ensuring that it won't
355 // overflow. This is needed because localHdrOffset is untrusted.
356 if (!safe_add((uintptr_t *)&localHdr, (uintptr_t)pMap->addr,
357 (uintptr_t)localHdrOffset)) {
358 LOGW("Integer overflow adding in parseZipArchive\n");
359 goto bail;
360 }
361 if ((uintptr_t)localHdr + LOCHDR >
362 (uintptr_t)pMap->addr + pMap->length) {
363 LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i);
364 goto bail;
365 }
366 if (get4LE(localHdr) != LOCSIG) {
367 LOGW("Missed a local header sig (at %d)\n", i);
368 goto bail;
369 }
370 pEntry->offset = localHdrOffset + LOCHDR
371 + get2LE(localHdr + LOCNAM) + get2LE(localHdr + LOCEXT);
372 if (!safe_add(NULL, pEntry->offset, pEntry->compLen)) {
373 LOGW("Integer overflow adding in parseZipArchive\n");
374 goto bail;
375 }
376 if ((size_t)pEntry->offset + pEntry->compLen > pMap->length) {
377 LOGW("Data ran off the end (at %d)\n", i);
378 goto bail;
379 }
380
381 #if !SORT_ENTRIES
382 /* Add to hash table; no need to lock here.
383 * Can't do this now if we're sorting, because entries
384 * will move around.
385 */
386 addEntryToHashTable(pArchive->pHash, pEntry);
387 #endif
388
389 //dumpEntry(pEntry);
390 ptr += CENHDR + fileNameLen + extraLen + commentLen;
391 }
392
393 #if SORT_ENTRIES
394 /* If we're sorting, we have to wait until all entries
395 * are in their final places, otherwise the pointers will
396 * probably point to the wrong things.
397 */
398 for (i = 0; i < numEntries; i++) {
399 /* Add to hash table; no need to lock here.
400 */
401 addEntryToHashTable(pArchive->pHash, &pArchive->pEntries[i]);
402 }
403 #endif
404
405 result = true;
406
407 bail:
408 if (!result) {
409 mzHashTableFree(pArchive->pHash);
410 pArchive->pHash = NULL;
411 }
412 return result;
413 }
414
415 /*
416 * Open a Zip archive and scan out the contents.
417 *
418 * The easiest way to do this is to mmap() the whole thing and do the
419 * traditional backward scan for central directory. Since the EOCD is
420 * a relatively small bit at the end, we should end up only touching a
421 * small set of pages.
422 *
423 * This will be called on non-Zip files, especially during startup, so
424 * we don't want to be too noisy about failures. (Do we want a "quiet"
425 * flag?)
426 *
427 * On success, we fill out the contents of "pArchive".
428 */
mzOpenZipArchive(const char * fileName,ZipArchive * pArchive)429 int mzOpenZipArchive(const char* fileName, ZipArchive* pArchive)
430 {
431 MemMapping map;
432 int err;
433
434 LOGV("Opening archive '%s' %p\n", fileName, pArchive);
435
436 map.addr = NULL;
437 memset(pArchive, 0, sizeof(*pArchive));
438
439 pArchive->fd = open(fileName, O_RDONLY, 0);
440 if (pArchive->fd < 0) {
441 err = errno ? errno : -1;
442 LOGV("Unable to open '%s': %s\n", fileName, strerror(err));
443 goto bail;
444 }
445
446 if (sysMapFileInShmem(pArchive->fd, &map) != 0) {
447 err = -1;
448 LOGW("Map of '%s' failed\n", fileName);
449 goto bail;
450 }
451
452 if (map.length < ENDHDR) {
453 err = -1;
454 LOGV("File '%s' too small to be zip (%zd)\n", fileName, map.length);
455 goto bail;
456 }
457
458 if (!parseZipArchive(pArchive, &map)) {
459 err = -1;
460 LOGV("Parsing '%s' failed\n", fileName);
461 goto bail;
462 }
463
464 err = 0;
465 sysCopyMap(&pArchive->map, &map);
466 map.addr = NULL;
467
468 bail:
469 if (err != 0)
470 mzCloseZipArchive(pArchive);
471 if (map.addr != NULL)
472 sysReleaseShmem(&map);
473 return err;
474 }
475
476 /*
477 * Close a ZipArchive, closing the file and freeing the contents.
478 *
479 * NOTE: the ZipArchive may not have been fully created.
480 */
mzCloseZipArchive(ZipArchive * pArchive)481 void mzCloseZipArchive(ZipArchive* pArchive)
482 {
483 LOGV("Closing archive %p\n", pArchive);
484
485 if (pArchive->fd >= 0)
486 close(pArchive->fd);
487 if (pArchive->map.addr != NULL)
488 sysReleaseShmem(&pArchive->map);
489
490 free(pArchive->pEntries);
491
492 mzHashTableFree(pArchive->pHash);
493
494 pArchive->fd = -1;
495 pArchive->pHash = NULL;
496 pArchive->pEntries = NULL;
497 }
498
499 /*
500 * Find a matching entry.
501 *
502 * Returns NULL if no matching entry found.
503 */
mzFindZipEntry(const ZipArchive * pArchive,const char * entryName)504 const ZipEntry* mzFindZipEntry(const ZipArchive* pArchive,
505 const char* entryName)
506 {
507 unsigned int itemHash = computeHash(entryName, strlen(entryName));
508
509 return (const ZipEntry*)mzHashTableLookup(pArchive->pHash,
510 itemHash, (char*) entryName, hashcmpZipName, false);
511 }
512
513 /*
514 * Return true if the entry is a symbolic link.
515 */
mzIsZipEntrySymlink(const ZipEntry * pEntry)516 bool mzIsZipEntrySymlink(const ZipEntry* pEntry)
517 {
518 if ((pEntry->versionMadeBy & 0xff00) == CENVEM_UNIX) {
519 return S_ISLNK(pEntry->externalFileAttributes >> 16);
520 }
521 return false;
522 }
523
524 /* Call processFunction on the uncompressed data of a STORED entry.
525 */
processStoredEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)526 static bool processStoredEntry(const ZipArchive *pArchive,
527 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
528 void *cookie)
529 {
530 size_t bytesLeft = pEntry->compLen;
531 while (bytesLeft > 0) {
532 unsigned char buf[32 * 1024];
533 ssize_t n;
534 size_t count;
535 bool ret;
536
537 count = bytesLeft;
538 if (count > sizeof(buf)) {
539 count = sizeof(buf);
540 }
541 n = read(pArchive->fd, buf, count);
542 if (n < 0 || (size_t)n != count) {
543 LOGE("Can't read %zu bytes from zip file: %ld\n", count, n);
544 return false;
545 }
546 ret = processFunction(buf, n, cookie);
547 if (!ret) {
548 return false;
549 }
550 bytesLeft -= count;
551 }
552 return true;
553 }
554
processDeflatedEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)555 static bool processDeflatedEntry(const ZipArchive *pArchive,
556 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
557 void *cookie)
558 {
559 long result = -1;
560 unsigned char readBuf[32 * 1024];
561 unsigned char procBuf[32 * 1024];
562 z_stream zstream;
563 int zerr;
564 long compRemaining;
565
566 compRemaining = pEntry->compLen;
567
568 /*
569 * Initialize the zlib stream.
570 */
571 memset(&zstream, 0, sizeof(zstream));
572 zstream.zalloc = Z_NULL;
573 zstream.zfree = Z_NULL;
574 zstream.opaque = Z_NULL;
575 zstream.next_in = NULL;
576 zstream.avail_in = 0;
577 zstream.next_out = (Bytef*) procBuf;
578 zstream.avail_out = sizeof(procBuf);
579 zstream.data_type = Z_UNKNOWN;
580
581 /*
582 * Use the undocumented "negative window bits" feature to tell zlib
583 * that there's no zlib header waiting for it.
584 */
585 zerr = inflateInit2(&zstream, -MAX_WBITS);
586 if (zerr != Z_OK) {
587 if (zerr == Z_VERSION_ERROR) {
588 LOGE("Installed zlib is not compatible with linked version (%s)\n",
589 ZLIB_VERSION);
590 } else {
591 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
592 }
593 goto bail;
594 }
595
596 /*
597 * Loop while we have data.
598 */
599 do {
600 /* read as much as we can */
601 if (zstream.avail_in == 0) {
602 long getSize = (compRemaining > (long)sizeof(readBuf)) ?
603 (long)sizeof(readBuf) : compRemaining;
604 LOGVV("+++ reading %ld bytes (%ld left)\n",
605 getSize, compRemaining);
606
607 int cc = read(pArchive->fd, readBuf, getSize);
608 if (cc != (int) getSize) {
609 LOGW("inflate read failed (%d vs %ld)\n", cc, getSize);
610 goto z_bail;
611 }
612
613 compRemaining -= getSize;
614
615 zstream.next_in = readBuf;
616 zstream.avail_in = getSize;
617 }
618
619 /* uncompress the data */
620 zerr = inflate(&zstream, Z_NO_FLUSH);
621 if (zerr != Z_OK && zerr != Z_STREAM_END) {
622 LOGD("zlib inflate call failed (zerr=%d)\n", zerr);
623 goto z_bail;
624 }
625
626 /* write when we're full or when we're done */
627 if (zstream.avail_out == 0 ||
628 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(procBuf))) {
629 long procSize = zstream.next_out - procBuf;
630 LOGVV("+++ processing %d bytes\n", (int) procSize);
631 bool ret = processFunction(procBuf, procSize, cookie);
632 if (!ret) {
633 LOGW("Process function elected to fail (in inflate)\n");
634 goto z_bail;
635 }
636
637 zstream.next_out = procBuf;
638 zstream.avail_out = sizeof(procBuf);
639 }
640 } while (zerr == Z_OK);
641
642 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
643
644 // success!
645 result = zstream.total_out;
646
647 z_bail:
648 inflateEnd(&zstream); /* free up any allocated structures */
649
650 bail:
651 if (result != pEntry->uncompLen) {
652 if (result != -1) // error already shown?
653 LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
654 result, pEntry->uncompLen);
655 return false;
656 }
657 return true;
658 }
659
660 /*
661 * Stream the uncompressed data through the supplied function,
662 * passing cookie to it each time it gets called. processFunction
663 * may be called more than once.
664 *
665 * If processFunction returns false, the operation is abandoned and
666 * mzProcessZipEntryContents() immediately returns false.
667 *
668 * This is useful for calculating the hash of an entry's uncompressed contents.
669 */
mzProcessZipEntryContents(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)670 bool mzProcessZipEntryContents(const ZipArchive *pArchive,
671 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
672 void *cookie)
673 {
674 bool ret = false;
675 off_t oldOff;
676
677 /* save current offset */
678 oldOff = lseek(pArchive->fd, 0, SEEK_CUR);
679
680 /* Seek to the beginning of the entry's compressed data. */
681 lseek(pArchive->fd, pEntry->offset, SEEK_SET);
682
683 switch (pEntry->compression) {
684 case STORED:
685 ret = processStoredEntry(pArchive, pEntry, processFunction, cookie);
686 break;
687 case DEFLATED:
688 ret = processDeflatedEntry(pArchive, pEntry, processFunction, cookie);
689 break;
690 default:
691 LOGE("Unsupported compression type %d for entry '%s'\n",
692 pEntry->compression, pEntry->fileName);
693 break;
694 }
695
696 /* restore file offset */
697 lseek(pArchive->fd, oldOff, SEEK_SET);
698 return ret;
699 }
700
crcProcessFunction(const unsigned char * data,int dataLen,void * crc)701 static bool crcProcessFunction(const unsigned char *data, int dataLen,
702 void *crc)
703 {
704 *(unsigned long *)crc = crc32(*(unsigned long *)crc, data, dataLen);
705 return true;
706 }
707
708 /*
709 * Check the CRC on this entry; return true if it is correct.
710 * May do other internal checks as well.
711 */
mzIsZipEntryIntact(const ZipArchive * pArchive,const ZipEntry * pEntry)712 bool mzIsZipEntryIntact(const ZipArchive *pArchive, const ZipEntry *pEntry)
713 {
714 unsigned long crc;
715 bool ret;
716
717 crc = crc32(0L, Z_NULL, 0);
718 ret = mzProcessZipEntryContents(pArchive, pEntry, crcProcessFunction,
719 (void *)&crc);
720 if (!ret) {
721 LOGE("Can't calculate CRC for entry\n");
722 return false;
723 }
724 if (crc != (unsigned long)pEntry->crc32) {
725 LOGW("CRC for entry %.*s (0x%08lx) != expected (0x%08lx)\n",
726 pEntry->fileNameLen, pEntry->fileName, crc, pEntry->crc32);
727 return false;
728 }
729 return true;
730 }
731
732 typedef struct {
733 char *buf;
734 int bufLen;
735 } CopyProcessArgs;
736
copyProcessFunction(const unsigned char * data,int dataLen,void * cookie)737 static bool copyProcessFunction(const unsigned char *data, int dataLen,
738 void *cookie)
739 {
740 CopyProcessArgs *args = (CopyProcessArgs *)cookie;
741 if (dataLen <= args->bufLen) {
742 memcpy(args->buf, data, dataLen);
743 args->buf += dataLen;
744 args->bufLen -= dataLen;
745 return true;
746 }
747 return false;
748 }
749
750 /*
751 * Read an entry into a buffer allocated by the caller.
752 */
mzReadZipEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,char * buf,int bufLen)753 bool mzReadZipEntry(const ZipArchive* pArchive, const ZipEntry* pEntry,
754 char *buf, int bufLen)
755 {
756 CopyProcessArgs args;
757 bool ret;
758
759 args.buf = buf;
760 args.bufLen = bufLen;
761 ret = mzProcessZipEntryContents(pArchive, pEntry, copyProcessFunction,
762 (void *)&args);
763 if (!ret) {
764 LOGE("Can't extract entry to buffer.\n");
765 return false;
766 }
767 return true;
768 }
769
writeProcessFunction(const unsigned char * data,int dataLen,void * cookie)770 static bool writeProcessFunction(const unsigned char *data, int dataLen,
771 void *cookie)
772 {
773 int fd = (intptr_t)cookie;
774
775 ssize_t soFar = 0;
776 while (true) {
777 ssize_t n = write(fd, data + soFar, dataLen - soFar);
778 if (n <= 0) {
779 LOGE("Error writing %ld bytes from zip file from %p: %s\n",
780 dataLen - soFar, data + soFar, strerror(errno));
781 if (errno != EINTR) {
782 return false;
783 }
784 } else if (n > 0) {
785 soFar += n;
786 if (soFar == dataLen) return true;
787 if (soFar > dataLen) {
788 LOGE("write overrun? (%ld bytes instead of %d)\n",
789 soFar, dataLen);
790 return false;
791 }
792 }
793 }
794 }
795
796 /*
797 * Uncompress "pEntry" in "pArchive" to "fd" at the current offset.
798 */
mzExtractZipEntryToFile(const ZipArchive * pArchive,const ZipEntry * pEntry,int fd)799 bool mzExtractZipEntryToFile(const ZipArchive *pArchive,
800 const ZipEntry *pEntry, int fd)
801 {
802 bool ret = mzProcessZipEntryContents(pArchive, pEntry, writeProcessFunction,
803 (void *)(intptr_t)fd);
804 if (!ret) {
805 LOGE("Can't extract entry to file.\n");
806 return false;
807 }
808 return true;
809 }
810
811 typedef struct {
812 unsigned char* buffer;
813 long len;
814 } BufferExtractCookie;
815
bufferProcessFunction(const unsigned char * data,int dataLen,void * cookie)816 static bool bufferProcessFunction(const unsigned char *data, int dataLen,
817 void *cookie)
818 {
819 BufferExtractCookie *bec = (BufferExtractCookie*)cookie;
820
821 memmove(bec->buffer, data, dataLen);
822 bec->buffer += dataLen;
823 bec->len -= dataLen;
824
825 return true;
826 }
827
828 /*
829 * Uncompress "pEntry" in "pArchive" to buffer, which must be large
830 * enough to hold mzGetZipEntryUncomplen(pEntry) bytes.
831 */
mzExtractZipEntryToBuffer(const ZipArchive * pArchive,const ZipEntry * pEntry,unsigned char * buffer)832 bool mzExtractZipEntryToBuffer(const ZipArchive *pArchive,
833 const ZipEntry *pEntry, unsigned char *buffer)
834 {
835 BufferExtractCookie bec;
836 bec.buffer = buffer;
837 bec.len = mzGetZipEntryUncompLen(pEntry);
838
839 bool ret = mzProcessZipEntryContents(pArchive, pEntry,
840 bufferProcessFunction, (void*)&bec);
841 if (!ret || bec.len != 0) {
842 LOGE("Can't extract entry to memory buffer.\n");
843 return false;
844 }
845 return true;
846 }
847
848
849 /* Helper state to make path translation easier and less malloc-happy.
850 */
851 typedef struct {
852 const char *targetDir;
853 const char *zipDir;
854 char *buf;
855 int targetDirLen;
856 int zipDirLen;
857 int bufLen;
858 } MzPathHelper;
859
860 /* Given the values of targetDir and zipDir in the helper,
861 * return the target filename of the provided entry.
862 * The helper must be initialized first.
863 */
targetEntryPath(MzPathHelper * helper,ZipEntry * pEntry)864 static const char *targetEntryPath(MzPathHelper *helper, ZipEntry *pEntry)
865 {
866 int needLen;
867 bool firstTime = (helper->buf == NULL);
868
869 /* target file <-- targetDir + / + entry[zipDirLen:]
870 */
871 needLen = helper->targetDirLen + 1 +
872 pEntry->fileNameLen - helper->zipDirLen + 1;
873 if (needLen > helper->bufLen) {
874 char *newBuf;
875
876 needLen *= 2;
877 newBuf = (char *)realloc(helper->buf, needLen);
878 if (newBuf == NULL) {
879 return NULL;
880 }
881 helper->buf = newBuf;
882 helper->bufLen = needLen;
883 }
884
885 /* Every path will start with the target path and a slash.
886 */
887 if (firstTime) {
888 char *p = helper->buf;
889 memcpy(p, helper->targetDir, helper->targetDirLen);
890 p += helper->targetDirLen;
891 if (p == helper->buf || p[-1] != '/') {
892 helper->targetDirLen += 1;
893 *p++ = '/';
894 }
895 }
896
897 /* Replace the custom part of the path with the appropriate
898 * part of the entry's path.
899 */
900 char *epath = helper->buf + helper->targetDirLen;
901 memcpy(epath, pEntry->fileName + helper->zipDirLen,
902 pEntry->fileNameLen - helper->zipDirLen);
903 epath += pEntry->fileNameLen - helper->zipDirLen;
904 *epath = '\0';
905
906 return helper->buf;
907 }
908
909 /*
910 * Inflate all entries under zipDir to the directory specified by
911 * targetDir, which must exist and be a writable directory.
912 *
913 * The immediate children of zipDir will become the immediate
914 * children of targetDir; e.g., if the archive contains the entries
915 *
916 * a/b/c/one
917 * a/b/c/two
918 * a/b/c/d/three
919 *
920 * and mzExtractRecursive(a, "a/b/c", "/tmp") is called, the resulting
921 * files will be
922 *
923 * /tmp/one
924 * /tmp/two
925 * /tmp/d/three
926 *
927 * Returns true on success, false on failure.
928 */
mzExtractRecursive(const ZipArchive * pArchive,const char * zipDir,const char * targetDir,int flags,const struct utimbuf * timestamp,void (* callback)(const char * fn,void *),void * cookie)929 bool mzExtractRecursive(const ZipArchive *pArchive,
930 const char *zipDir, const char *targetDir,
931 int flags, const struct utimbuf *timestamp,
932 void (*callback)(const char *fn, void *), void *cookie)
933 {
934 if (zipDir[0] == '/') {
935 LOGE("mzExtractRecursive(): zipDir must be a relative path.\n");
936 return false;
937 }
938 if (targetDir[0] != '/') {
939 LOGE("mzExtractRecursive(): targetDir must be an absolute path.\n");
940 return false;
941 }
942
943 unsigned int zipDirLen;
944 char *zpath;
945
946 zipDirLen = strlen(zipDir);
947 zpath = (char *)malloc(zipDirLen + 2);
948 if (zpath == NULL) {
949 LOGE("Can't allocate %d bytes for zip path\n", zipDirLen + 2);
950 return false;
951 }
952 /* If zipDir is empty, we'll extract the entire zip file.
953 * Otherwise, canonicalize the path.
954 */
955 if (zipDirLen > 0) {
956 /* Make sure there's (hopefully, exactly one) slash at the
957 * end of the path. This way we don't need to worry about
958 * accidentally extracting "one/twothree" when a path like
959 * "one/two" is specified.
960 */
961 memcpy(zpath, zipDir, zipDirLen);
962 if (zpath[zipDirLen - 1] != '/') {
963 zpath[zipDirLen++] = '/';
964 }
965 }
966 zpath[zipDirLen] = '\0';
967
968 /* Set up the helper structure that we'll use to assemble paths.
969 */
970 MzPathHelper helper;
971 helper.targetDir = targetDir;
972 helper.targetDirLen = strlen(helper.targetDir);
973 helper.zipDir = zpath;
974 helper.zipDirLen = strlen(helper.zipDir);
975 helper.buf = NULL;
976 helper.bufLen = 0;
977
978 /* Walk through the entries and extract anything whose path begins
979 * with zpath.
980 //TODO: since the entries are sorted, binary search for the first match
981 // and stop after the first non-match.
982 */
983 unsigned int i;
984 bool seenMatch = false;
985 int ok = true;
986 for (i = 0; i < pArchive->numEntries; i++) {
987 ZipEntry *pEntry = pArchive->pEntries + i;
988 if (pEntry->fileNameLen < zipDirLen) {
989 //TODO: look out for a single empty directory entry that matches zpath, but
990 // missing the trailing slash. Most zip files seem to include
991 // the trailing slash, but I think it's legal to leave it off.
992 // e.g., zpath "a/b/", entry "a/b", with no children of the entry.
993 /* No chance of matching.
994 */
995 #if SORT_ENTRIES
996 if (seenMatch) {
997 /* Since the entries are sorted, we can give up
998 * on the first mismatch after the first match.
999 */
1000 break;
1001 }
1002 #endif
1003 continue;
1004 }
1005 /* If zpath is empty, this strncmp() will match everything,
1006 * which is what we want.
1007 */
1008 if (strncmp(pEntry->fileName, zpath, zipDirLen) != 0) {
1009 #if SORT_ENTRIES
1010 if (seenMatch) {
1011 /* Since the entries are sorted, we can give up
1012 * on the first mismatch after the first match.
1013 */
1014 break;
1015 }
1016 #endif
1017 continue;
1018 }
1019 /* This entry begins with zipDir, so we'll extract it.
1020 */
1021 seenMatch = true;
1022
1023 /* Find the target location of the entry.
1024 */
1025 const char *targetFile = targetEntryPath(&helper, pEntry);
1026 if (targetFile == NULL) {
1027 LOGE("Can't assemble target path for \"%.*s\"\n",
1028 pEntry->fileNameLen, pEntry->fileName);
1029 ok = false;
1030 break;
1031 }
1032
1033 /* With DRY_RUN set, invoke the callback but don't do anything else.
1034 */
1035 if (flags & MZ_EXTRACT_DRY_RUN) {
1036 if (callback != NULL) callback(targetFile, cookie);
1037 continue;
1038 }
1039
1040 /* Create the file or directory.
1041 */
1042 #define UNZIP_DIRMODE 0755
1043 #define UNZIP_FILEMODE 0644
1044 if (pEntry->fileName[pEntry->fileNameLen - 1] == '/') {
1045 if (!(flags & MZ_EXTRACT_FILES_ONLY)) {
1046 int ret = dirCreateHierarchy(
1047 targetFile, UNZIP_DIRMODE, timestamp, false);
1048 if (ret != 0) {
1049 LOGE("Can't create containing directory for \"%s\": %s\n",
1050 targetFile, strerror(errno));
1051 ok = false;
1052 break;
1053 }
1054 LOGD("Extracted dir \"%s\"\n", targetFile);
1055 }
1056 } else {
1057 /* This is not a directory. First, make sure that
1058 * the containing directory exists.
1059 */
1060 int ret = dirCreateHierarchy(
1061 targetFile, UNZIP_DIRMODE, timestamp, true);
1062 if (ret != 0) {
1063 LOGE("Can't create containing directory for \"%s\": %s\n",
1064 targetFile, strerror(errno));
1065 ok = false;
1066 break;
1067 }
1068
1069 /* With FILES_ONLY set, we need to ignore metadata entirely,
1070 * so treat symlinks as regular files.
1071 */
1072 if (!(flags & MZ_EXTRACT_FILES_ONLY) && mzIsZipEntrySymlink(pEntry)) {
1073 /* The entry is a symbolic link.
1074 * The relative target of the symlink is in the
1075 * data section of this entry.
1076 */
1077 if (pEntry->uncompLen == 0) {
1078 LOGE("Symlink entry \"%s\" has no target\n",
1079 targetFile);
1080 ok = false;
1081 break;
1082 }
1083 char *linkTarget = malloc(pEntry->uncompLen + 1);
1084 if (linkTarget == NULL) {
1085 ok = false;
1086 break;
1087 }
1088 ok = mzReadZipEntry(pArchive, pEntry, linkTarget,
1089 pEntry->uncompLen);
1090 if (!ok) {
1091 LOGE("Can't read symlink target for \"%s\"\n",
1092 targetFile);
1093 free(linkTarget);
1094 break;
1095 }
1096 linkTarget[pEntry->uncompLen] = '\0';
1097
1098 /* Make the link.
1099 */
1100 ret = symlink(linkTarget, targetFile);
1101 if (ret != 0) {
1102 LOGE("Can't symlink \"%s\" to \"%s\": %s\n",
1103 targetFile, linkTarget, strerror(errno));
1104 free(linkTarget);
1105 ok = false;
1106 break;
1107 }
1108 LOGD("Extracted symlink \"%s\" -> \"%s\"\n",
1109 targetFile, linkTarget);
1110 free(linkTarget);
1111 } else {
1112 /* The entry is a regular file.
1113 * Open the target for writing.
1114 */
1115 int fd = creat(targetFile, UNZIP_FILEMODE);
1116 if (fd < 0) {
1117 LOGE("Can't create target file \"%s\": %s\n",
1118 targetFile, strerror(errno));
1119 ok = false;
1120 break;
1121 }
1122
1123 bool ok = mzExtractZipEntryToFile(pArchive, pEntry, fd);
1124 close(fd);
1125 if (!ok) {
1126 LOGE("Error extracting \"%s\"\n", targetFile);
1127 ok = false;
1128 break;
1129 }
1130
1131 if (timestamp != NULL && utime(targetFile, timestamp)) {
1132 LOGE("Error touching \"%s\"\n", targetFile);
1133 ok = false;
1134 break;
1135 }
1136
1137 LOGD("Extracted file \"%s\"\n", targetFile);
1138 }
1139 }
1140
1141 if (callback != NULL) callback(targetFile, cookie);
1142 }
1143
1144 free(helper.buf);
1145 free(zpath);
1146
1147 return ok;
1148 }
1149