From 963fe132b9d507f1519b42c8128493dd17e8268b Mon Sep 17 00:00:00 2001 From: Armijn Hemel Date: Wed, 7 Jan 2026 21:18:41 +0100 Subject: [PATCH] add test data for a file comment containing binary data --- testdata/binary_comment.zip | Bin 0 -> 350 bytes testdata/blue.png | Bin 0 -> 162 bytes testdata/readme.binarycontentzip | 95 +++++++++++++++++ .../ziplinter__test__binary_comment.zip.snap | 97 ++++++++++++++++++ 4 files changed, 192 insertions(+) create mode 100644 testdata/binary_comment.zip create mode 100644 testdata/blue.png create mode 100644 testdata/readme.binarycontentzip create mode 100644 ziplinter/src/snapshots/ziplinter__test__binary_comment.zip.snap diff --git a/testdata/binary_comment.zip b/testdata/binary_comment.zip new file mode 100644 index 0000000000000000000000000000000000000000..cd5de5ab7c93c96db14bdd2edc3b4bcabab77c18 GIT binary patch literal 350 zcmWIWW@Zs#fB;1X`*WfnxquuH)&Sx}B0(}71b8zti7?|faS_BSh6Y9ugGl2OI|KaO zdAX!O&f@j-a0vp^Am?dtFab&CIr>&WD$~=&F(l&f+iM$n4=C`s1a{15Y0@;Z3to~S zdf8QzY2t>|u0PTb=H=`x5Sh3^+5Lt{0th@voTOg%>@;(o(J|rktGBMcwxvHP+LJFX z_i6H@)Hhpiy;&uGD|Ex!a`jL5n3zG}zxcaX3&kdO_bfUBw3NZq)z4*}Q$kaKH!B;6 O&j^G+fOG(e!vFww^=PpG literal 0 HcmV?d00001 diff --git a/testdata/blue.png b/testdata/blue.png new file mode 100644 index 0000000000000000000000000000000000000000..e492f4e86c1d203f4c32ce98fa415d82de2ca540 GIT binary patch literal 162 zcmeAS@N?(olHy`uVBq!ia0vp^8X(NU1SFZ~=vx7)Oivfbkch)?uWjT#pupo2*fFD} zNz=$Kcu9ijWmiq6i5pV8{zyNVm$S1#Wa0*8_ZuP!An+h@l6u**)68{7$Ar(X-n#nQ zmj0k1Vg`Z#;_qH96r0%Hv*-xWQU*^~KbLh* G2~7Z_S3MB` literal 0 HcmV?d00001 diff --git a/testdata/readme.binarycontentzip b/testdata/readme.binarycontentzip new file mode 100644 index 0000000..4a8a1ac --- /dev/null +++ b/testdata/readme.binarycontentzip @@ -0,0 +1,95 @@ +# File comment contents + +The ZIP specification does not specify what the contents of a file +comment can be. Intuitively it makes sense to assume that it should be text +but it hasn't been defined. In fact, the Python `zipfile` module documentation +says: + +``` +ZipInfo.comment + Comment for the individual archive member as a bytes object. +``` + +Because it is a bytes object it basically means that there are no restrictions +on the *contents* of the file comment itself and any kind of data is accepted +when assembling a ZIP file using Python. For example, embedding a small PNG +as a file comment is absolutely no problem at all: + +``` +>>> import zipfile +>>> z = zipfile.ZipInfo(40*'a') +>>> test_image = open('blue.png', 'rb').read() +>>> len(test_image) +162 +>>> z.comment = test_image +>>> contents = 10*b'c' +>>> bla = zipfile.ZipFile('binary_comment.zip', mode='w') +>>> bla.writestr(z, contents) +>>> bla.close() +``` + +When expecting the file with `hexdump` it is very easy to see that there +is a PNG file embedded in the file comment: + +``` +$ hexdump -C binary_comment.zip | grep PNG +000000a0 61 61 61 61 61 61 89 50 4e 47 0d 0a 1a 0a 00 00 |aaaaaa.PNG......| +``` + +`zipinfo` tries to display the content when run in verbose mode, but cannot: + +``` +$ zipinfo -v binary_comment.zip +Archive: binary_comment.zip +There is no zipfile comment. + +End-of-central-directory record: +------------------------------- + + Zip archive file size: 350 (000000000000015Eh) + Actual end-cent-dir record offset: 328 (0000000000000148h) + Expected end-cent-dir record offset: 328 (0000000000000148h) + (based on the length of the central directory and its expected offset) + + This zipfile constitutes the sole disk of a single-part archive; its + central directory contains 1 entry. + The central directory is 248 (00000000000000F8h) bytes long, + and its (expected) offset in bytes from the beginning of the zipfile + is 80 (0000000000000050h). + + +Central directory entry #1: +--------------------------- + + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + + offset of local header from start of archive: 0 + (0000000000000000h) bytes + file system or operating system of origin: Unix + version of encoding software: 2.0 + minimum file system compatibility required: MS-DOS, OS/2 or NT FAT + minimum software version required to extract: 2.0 + compression method: none (stored) + file security status: not encrypted + extended local header: no + file last modified on (DOS date/time): 1980 Jan 1 00:00:00 + 32-bit CRC value (hex): f115ce3f + compressed size: 10 bytes + uncompressed size: 10 bytes + length of filename: 40 characters + length of extra field: 0 bytes + length of file comment: 162 characters + disk number on which file begins: disk 1 + apparent file type: binary + Unix file attributes (000600 octal): ?rw------- + MS-DOS file attributes (00 hex): none + +------------------------- file comment begins ---------------------------- +�PNG +� +-------------------------- file comment ends ----------------------------- +``` + +This would allow someone to hide information in the ZIP file that is not +easy to extract unless the ZIP file is parsed in a particular way (and not +with regular unpacking tools). diff --git a/ziplinter/src/snapshots/ziplinter__test__binary_comment.zip.snap b/ziplinter/src/snapshots/ziplinter__test__binary_comment.zip.snap new file mode 100644 index 0000000..b8dd2e7 --- /dev/null +++ b/ziplinter/src/snapshots/ziplinter__test__binary_comment.zip.snap @@ -0,0 +1,97 @@ +--- +source: ziplinter/src/lib.rs +expression: result +--- +{ + "comment": "", + "contents": [ + { + "central": { + "comment": "ëPNG\r\n\u001a\n\u0000\u0000\u0000\rIHDR\u0000\u0000\u0000(\u0000\u0000\u0000(\b\u0002\u0000\u0000\u0000\u0003£/:\u0000\u0000\u0000iIDATX├φ╓▒\r└ \fDQêÿäé)2>Sñ`\u0015╙E)\u0002æ░eèⁿ\u001bα₧l╣p\u0014æ░#G╪\u0014`````αaÆ'vµ╦\u0003~2╞\u0013╧½╡½╓┤ÅR[I\u000e^mσcΓe∞╡┌∞¬\u0017┌U░¡w'≥▐\u0002\u0003\u0003\u0003\u0003\u0003 \u0017εΩí\u0016æïîó─\u0000\u0000\u0000\u0000IEND«B`é", + "compressed_size": 10, + "crc32": 4044738111, + "creator_version": { + "host_system": "Unix", + "version": 20 + }, + "disk_nbr_start": 0, + "external_attrs": 25165824, + "extra": [], + "flags": 0, + "header_offset": 0, + "internal_attrs": 0, + "method": "Store", + "mode": 384, + "modified": "1980-01-01T00:00:00Z", + "name": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "reader_version": { + "host_system": "MsDos", + "version": 20 + }, + "uncompressed_size": 10 + }, + "local": { + "accessed": null, + "compressed_size": 10, + "crc32": 4044738111, + "created": null, + "extra": [], + "flags": 0, + "gid": null, + "header_offset": 0, + "method": "Store", + "method_specific": "None", + "mode": 0, + "modified": "1980-01-01T00:00:00Z", + "name": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "reader_version": { + "host_system": "MsDos", + "version": 20 + }, + "uid": null, + "uncompressed_size": 10 + } + } + ], + "encoding": "Cp437", + "eocd": { + "dir": { + "inner": { + "dir_disk_nbr": 0, + "dir_records_this_disk": 1, + "directory_offset": 80, + "directory_records": 1, + "directory_size": 248, + "disk_nbr": 0 + }, + "offset": 328 + }, + "dir64": null, + "global_offset": 0 + }, + "parsed_ranges": [ + { + "contains": "end of central directory record", + "end": 350, + "start": 328 + }, + { + "contains": "central directory header", + "end": 328, + "start": 80 + }, + { + "contains": "local file header", + "end": 70, + "filename": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "start": 0 + }, + { + "contains": "file data", + "end": 80, + "filename": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "start": 70 + } + ], + "size": 350 +}