diff --git a/testdata/binary_comment.zip b/testdata/binary_comment.zip new file mode 100644 index 0000000..cd5de5a Binary files /dev/null and b/testdata/binary_comment.zip differ diff --git a/testdata/blue.png b/testdata/blue.png new file mode 100644 index 0000000..e492f4e Binary files /dev/null and b/testdata/blue.png differ diff --git a/testdata/readme.binarycontentzip b/testdata/readme.binarycontentzip new file mode 100644 index 0000000..4a8a1ac --- /dev/null +++ b/testdata/readme.binarycontentzip @@ -0,0 +1,95 @@ +# File comment contents + +The ZIP specification does not specify what the contents of a file +comment can be. Intuitively it makes sense to assume that it should be text +but it hasn't been defined. In fact, the Python `zipfile` module documentation +says: + +``` +ZipInfo.comment + Comment for the individual archive member as a bytes object. +``` + +Because it is a bytes object it basically means that there are no restrictions +on the *contents* of the file comment itself and any kind of data is accepted +when assembling a ZIP file using Python. For example, embedding a small PNG +as a file comment is absolutely no problem at all: + +``` +>>> import zipfile +>>> z = zipfile.ZipInfo(40*'a') +>>> test_image = open('blue.png', 'rb').read() +>>> len(test_image) +162 +>>> z.comment = test_image +>>> contents = 10*b'c' +>>> bla = zipfile.ZipFile('binary_comment.zip', mode='w') +>>> bla.writestr(z, contents) +>>> bla.close() +``` + +When expecting the file with `hexdump` it is very easy to see that there +is a PNG file embedded in the file comment: + +``` +$ hexdump -C binary_comment.zip | grep PNG +000000a0 61 61 61 61 61 61 89 50 4e 47 0d 0a 1a 0a 00 00 |aaaaaa.PNG......| +``` + +`zipinfo` tries to display the content when run in verbose mode, but cannot: + +``` +$ zipinfo -v binary_comment.zip +Archive: binary_comment.zip +There is no zipfile comment. + +End-of-central-directory record: +------------------------------- + + Zip archive file size: 350 (000000000000015Eh) + Actual end-cent-dir record offset: 328 (0000000000000148h) + Expected end-cent-dir record offset: 328 (0000000000000148h) + (based on the length of the central directory and its expected offset) + + This zipfile constitutes the sole disk of a single-part archive; its + central directory contains 1 entry. + The central directory is 248 (00000000000000F8h) bytes long, + and its (expected) offset in bytes from the beginning of the zipfile + is 80 (0000000000000050h). + + +Central directory entry #1: +--------------------------- + + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + + offset of local header from start of archive: 0 + (0000000000000000h) bytes + file system or operating system of origin: Unix + version of encoding software: 2.0 + minimum file system compatibility required: MS-DOS, OS/2 or NT FAT + minimum software version required to extract: 2.0 + compression method: none (stored) + file security status: not encrypted + extended local header: no + file last modified on (DOS date/time): 1980 Jan 1 00:00:00 + 32-bit CRC value (hex): f115ce3f + compressed size: 10 bytes + uncompressed size: 10 bytes + length of filename: 40 characters + length of extra field: 0 bytes + length of file comment: 162 characters + disk number on which file begins: disk 1 + apparent file type: binary + Unix file attributes (000600 octal): ?rw------- + MS-DOS file attributes (00 hex): none + +------------------------- file comment begins ---------------------------- +�PNG +� +-------------------------- file comment ends ----------------------------- +``` + +This would allow someone to hide information in the ZIP file that is not +easy to extract unless the ZIP file is parsed in a particular way (and not +with regular unpacking tools). diff --git a/ziplinter/src/snapshots/ziplinter__test__binary_comment.zip.snap b/ziplinter/src/snapshots/ziplinter__test__binary_comment.zip.snap new file mode 100644 index 0000000..b8dd2e7 --- /dev/null +++ b/ziplinter/src/snapshots/ziplinter__test__binary_comment.zip.snap @@ -0,0 +1,97 @@ +--- +source: ziplinter/src/lib.rs +expression: result +--- +{ + "comment": "", + "contents": [ + { + "central": { + "comment": "ëPNG\r\n\u001a\n\u0000\u0000\u0000\rIHDR\u0000\u0000\u0000(\u0000\u0000\u0000(\b\u0002\u0000\u0000\u0000\u0003£/:\u0000\u0000\u0000iIDATX├φ╓▒\r└ \fDQêÿäé)2>Sñ`\u0015╙E)\u0002æ░eèⁿ\u001bα₧l╣p\u0014æ░#G╪\u0014`````αaÆ'vµ╦\u0003~2╞\u0013╧½╡½╓┤ÅR[I\u000e^mσcΓe∞╡┌∞¬\u0017┌U░¡w'≥▐\u0002\u0003\u0003\u0003\u0003\u0003 \u0017εΩí\u0016æïîó─\u0000\u0000\u0000\u0000IEND«B`é", + "compressed_size": 10, + "crc32": 4044738111, + "creator_version": { + "host_system": "Unix", + "version": 20 + }, + "disk_nbr_start": 0, + "external_attrs": 25165824, + "extra": [], + "flags": 0, + "header_offset": 0, + "internal_attrs": 0, + "method": "Store", + "mode": 384, + "modified": "1980-01-01T00:00:00Z", + "name": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "reader_version": { + "host_system": "MsDos", + "version": 20 + }, + "uncompressed_size": 10 + }, + "local": { + "accessed": null, + "compressed_size": 10, + "crc32": 4044738111, + "created": null, + "extra": [], + "flags": 0, + "gid": null, + "header_offset": 0, + "method": "Store", + "method_specific": "None", + "mode": 0, + "modified": "1980-01-01T00:00:00Z", + "name": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "reader_version": { + "host_system": "MsDos", + "version": 20 + }, + "uid": null, + "uncompressed_size": 10 + } + } + ], + "encoding": "Cp437", + "eocd": { + "dir": { + "inner": { + "dir_disk_nbr": 0, + "dir_records_this_disk": 1, + "directory_offset": 80, + "directory_records": 1, + "directory_size": 248, + "disk_nbr": 0 + }, + "offset": 328 + }, + "dir64": null, + "global_offset": 0 + }, + "parsed_ranges": [ + { + "contains": "end of central directory record", + "end": 350, + "start": 328 + }, + { + "contains": "central directory header", + "end": 328, + "start": 80 + }, + { + "contains": "local file header", + "end": 70, + "filename": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "start": 0 + }, + { + "contains": "file data", + "end": 80, + "filename": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "start": 70 + } + ], + "size": 350 +}