diff --git a/.typos.toml b/.typos.toml index 7faede56f..84959bd2f 100644 --- a/.typos.toml +++ b/.typos.toml @@ -1,5 +1,6 @@ [default.extend-identifiers] ZIP64_BYTES_THR = "ZIP64_BYTES_THR" +ZIP64_BYTES_THR_U32 = "ZIP64_BYTES_THR_U32" ZIP64_ENTRY_THR = "ZIP64_ENTRY_THR" flate2 = "flate2" "00ba" = "00ba" diff --git a/src/read/zipfile.rs b/src/read/zipfile.rs index 045686e23..f46031a2a 100644 --- a/src/read/zipfile.rs +++ b/src/read/zipfile.rs @@ -297,7 +297,7 @@ impl<'a, R: Read + ?Sized> ZipFile<'a, R> { /// a new zip archive. pub fn options(&self) -> SimpleFileOptions { let mut options = SimpleFileOptions::default() - .large_file(self.compressed_size().max(self.size()) > ZIP64_BYTES_THR) + .large_file(self.compressed_size().max(self.size()) >= ZIP64_BYTES_THR) .compression_method(self.compression()) .unix_permissions(self.unix_mode().unwrap_or(0o644) | ffi::S_IFREG) .last_modified_time( diff --git a/src/spec.rs b/src/spec.rs index cb1d8ae59..f3c0a9cc9 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -107,6 +107,7 @@ impl Magic { /// # } ///``` pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; +pub const ZIP64_BYTES_THR_U32: u32 = u32::MAX; /// The number of entries within a single zip necessary to allocate a zip64 central /// directory record. /// @@ -609,6 +610,8 @@ impl Zip64CentralDirectoryEnd { /// Minimum size of the block /// Block - record_size - extensible_data const MIN_SIZE: usize = 2 * size_of::() + 2 * size_of::() + 4 * size_of::(); + pub(crate) const MIN_FULL_SIZE: usize = + 2 * size_of::() + 2 * size_of::() + 5 * size_of::(); /// Size of ZIP64 EOCD signature + record_size field. const RECORD_OVERHEAD: u64 = (size_of::() + size_of::()) as u64; @@ -990,5 +993,6 @@ mod tests { fn test_size_zip64_central_directory_end() { use super::Zip64CentralDirectoryEnd; assert_eq!(Zip64CentralDirectoryEnd::MIN_SIZE, 44); + assert_eq!(Zip64CentralDirectoryEnd::MIN_FULL_SIZE, 52); } } diff --git a/src/types.rs b/src/types.rs index 2dedda4e6..31841c35d 100644 --- a/src/types.rs +++ b/src/types.rs @@ -510,13 +510,14 @@ impl ZipFileData { } fn clamp_size_field(&self, field: u64) -> Result { if self.large_file { - Ok(spec::ZIP64_BYTES_THR as u32) + Ok(spec::ZIP64_BYTES_THR_U32) } else { - field.min(spec::ZIP64_BYTES_THR).try_into().map_err(|_| { + let size: u32 = field.try_into().map_err(|_| { std::io::Error::other(format!( "File size {field} exceeds maximum size for non-ZIP64 files" )) - }) + })?; + Ok(size.min(spec::ZIP64_BYTES_THR_U32 - 1)) } } @@ -560,22 +561,8 @@ impl ZipFileData { } pub(crate) fn block(&self, file_name_raw: &[u8]) -> ZipResult { - let compressed_size = if self.large_file { - spec::ZIP64_BYTES_THR as u32 - } else { - self.compressed_size - .min(spec::ZIP64_BYTES_THR) - .try_into() - .map_err(std::io::Error::other)? - }; - let uncompressed_size = if self.large_file { - spec::ZIP64_BYTES_THR as u32 - } else { - self.uncompressed_size - .min(spec::ZIP64_BYTES_THR) - .try_into() - .map_err(std::io::Error::other)? - }; + let compressed_size = self.clamp_size_field(self.compressed_size)?; + let uncompressed_size = self.clamp_size_field(self.uncompressed_size)?; let offset = self .header_start .min(spec::ZIP64_BYTES_THR) @@ -636,8 +623,8 @@ impl ZipFileData { if self.large_file { return self.zip64_data_descriptor_block().write(writer); } - if self.compressed_size > spec::ZIP64_BYTES_THR - || self.uncompressed_size > spec::ZIP64_BYTES_THR + if self.compressed_size >= spec::ZIP64_BYTES_THR + || self.uncompressed_size >= spec::ZIP64_BYTES_THR { if auto_large_file { return self.zip64_data_descriptor_block().write(writer); diff --git a/src/write.rs b/src/write.rs index f6a71ef17..1c823f74d 100644 --- a/src/write.rs +++ b/src/write.rs @@ -8,7 +8,10 @@ use crate::extra_fields::Zip64ExtendedInformation; use crate::format::flags::ZipFlags; use crate::read::{Config, ZipArchive, ZipFile, parse_single_extra_field}; use crate::result::{ZipError, ZipResult, invalid}; -use crate::spec::{self, FixedSizeBlock, Magic, Zip32CDEBlock, ZipLocalEntryBlock}; +use crate::spec::{ + self, FixedSizeBlock, Magic, Zip32CDEBlock, Zip64CentralDirectoryEnd, + Zip64CentralDirectoryEndLocator, ZipLocalEntryBlock, +}; use crate::types::EncryptWith; use crate::types::{AesVendorVersion, MIN_VERSION, System, ZipFileData, ZipRawValues, ffi}; use core::default::Default; @@ -789,7 +792,7 @@ impl Write for ZipWriter { if let Ok(count) = write_result { self.stats.update(&buf[..count]); // Only perform the expensive large-file check when we first cross the threshold. - if self.stats.bytes_written > spec::ZIP64_BYTES_THR { + if self.stats.bytes_written >= spec::ZIP64_BYTES_THR { let is_large_file = self .files .last() @@ -1845,7 +1848,7 @@ impl ZipWriter { fn finalize(&mut self) -> ZipResult { self.finish_file()?; - let mut central_start = self.write_central_and_footer()?; + let (central_start, is_zip64) = self.write_central_and_footer()?; let writer = self.inner.try_inner_mut()?; let footer_end = writer.stream_position()?; let archive_end = writer.seek(SeekFrom::End(0))?; @@ -1855,24 +1858,48 @@ impl ZipWriter { // Overwrite the magic so the footer is no longer valid. writer.seek(SeekFrom::Start(central_start))?; writer.write_u32_le(0)?; - writer.seek(SeekFrom::Start( - footer_end - - (size_of::() + size_of::()) as u64 - - self.comment.len() as u64, - ))?; + let start_zip32_cde = footer_end + - (size_of::() + size_of::()) as u64 + - self.comment.len() as u64; + writer.seek(SeekFrom::Start(start_zip32_cde))?; writer.write_u32_le(0)?; + let zip64_extensible_len = self + .zip64_extensible_data_sector + .as_ref() + .map(|e| e.len() as u64) + .unwrap_or(0); + if is_zip64 { + let start_zip64_locator = start_zip32_cde + - (size_of::() + size_of::()) as u64; + writer.seek(SeekFrom::Start(start_zip64_locator))?; + writer.write_u32_le(0)?; + let start_zip64_cde = start_zip64_locator + - (size_of::() + Zip64CentralDirectoryEnd::MIN_FULL_SIZE) as u64 + - zip64_extensible_len; + writer.seek(SeekFrom::Start(start_zip64_cde))?; + writer.write_u32_le(0)?; + } // Rewrite the footer at the actual end. let central_and_footer_size = footer_end - central_start; writer.seek(SeekFrom::End(-(central_and_footer_size as i64)))?; - central_start = self.write_central_and_footer()?; - debug_assert!(self.inner.try_inner_mut()?.stream_position()? == archive_end); + let (_, new_is_zip64) = self.write_central_and_footer()?; + if new_is_zip64 && new_is_zip64 != is_zip64 { + let new_size = (size_of::() + Zip64CentralDirectoryEnd::MIN_FULL_SIZE) + as u64 + + zip64_extensible_len + + (size_of::() + size_of::()) as u64; + let new_archive_end = archive_end + new_size; + debug_assert!(self.inner.try_inner_mut()?.stream_position()? == new_archive_end); + } else { + debug_assert!(self.inner.try_inner_mut()?.stream_position()? == archive_end); + } } Ok(central_start) } - fn write_central_and_footer(&mut self) -> Result { + fn write_central_and_footer(&mut self) -> Result<(u64, bool), ZipError> { let writer = self.inner.try_inner_mut()?; let mut version_needed = u16::from(MIN_VERSION); @@ -1918,7 +1945,7 @@ impl ZipWriter { } let central_directory_size = if is64 { - spec::ZIP64_BYTES_THR as u32 + spec::ZIP64_BYTES_THR_U32 } else { central_size.min(spec::ZIP64_BYTES_THR) as u32 }; @@ -1935,7 +1962,7 @@ impl ZipWriter { }; footer.write(writer)?; - Ok(central_start) + Ok((central_start, is64)) } fn index_by_name(&self, name: &[u8]) -> ZipResult { @@ -2425,16 +2452,13 @@ impl ZipFileData { ))?; writer.write_u32_le(self.crc32)?; if self.large_file { - writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?; - writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?; + writer.write_u32_le(spec::ZIP64_BYTES_THR_U32)?; + writer.write_u32_le(spec::ZIP64_BYTES_THR_U32)?; self.update_local_zip64_extra_field(writer, file_name_raw)?; - - // self.compressed_size = spec::ZIP64_BYTES_THR; - // self.uncompressed_size = spec::ZIP64_BYTES_THR; } else { // check compressed size as well as it can also be slightly larger than uncompressed size - if self.compressed_size > spec::ZIP64_BYTES_THR { + if self.compressed_size >= spec::ZIP64_BYTES_THR { return Err(ZipError::Io(std::io::Error::other( "large_file(true) option has not been set", ))); @@ -2466,6 +2490,11 @@ impl ZipFileData { writer.seek(SeekFrom::Start(zip64_extra_field_start))?; zip64_block.write(writer)?; + if let Some(extra_field) = &mut self.extra_field { + let slice = Arc::make_mut(extra_field); + let mut cursor = Cursor::new(&mut slice[0..20]); + zip64_block.write(&mut cursor)?; + } Ok(()) } diff --git a/tests/append_near_4gb.rs b/tests/append_near_4gb.rs index ed90fef5e..26c58e1e6 100644 --- a/tests/append_near_4gb.rs +++ b/tests/append_near_4gb.rs @@ -1,18 +1,116 @@ -/// Only on little endian because we cannot use fs with miri CI -#[cfg(all(target_endian = "little", not(miri)))] -fn write_data(w: &mut dyn std::io::Write, size: usize) { +//! Tests related to big zip file + +// Only on little endian because we cannot use fs with miri CI +#![cfg(all(target_endian = "little", not(miri)))] + +fn write_data(w: &mut dyn std::io::Write, size: usize) -> Result<(), std::io::Error> { let chunks = 1 << 20; // 1MB chunks let mut written = 0; let buf = vec![0x21; chunks]; while written < size { let to_write = (size - written).min(chunks); - w.write_all(&buf[..to_write]).unwrap(); + w.write_all(&buf[..to_write])?; written += to_write; } + Ok(()) +} + +#[test] +fn test_append_4gb_without_large_file() { + use std::fs::File; + use tempfile::tempdir; + use zip::ZipWriter; + use zip::write::SimpleFileOptions; + + let dir = tempdir().unwrap(); + let path = dir + .path() + .join("debug_large_without_large_file_options.zip"); + //let path = std::path::PathBuf::from("debug_large_without_large_file_options.zip"); + + let file = File::create(&path).unwrap(); + let mut writer = ZipWriter::new(file); + + let opts = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + + writer.start_file_from_path("4gb_file", opts).unwrap(); + + // Write a file that's 4GB + let size = u32::MAX; + let write_result = write_data(&mut writer, size as usize); // check is error + + assert!(write_result.is_err()); +} + +/// We cannot run this test because on wasm32 we cannot fit the u32:MAX in the usize +#[cfg(not(target_arch = "wasm32"))] +#[test] +fn test_append_4gb_with_large_file() { + use std::fs::File; + use std::io::Read; + use tempfile::tempdir; + use zip::ZipWriter; + use zip::write::SimpleFileOptions; + + let dir = tempdir().unwrap(); + let path = dir.path().join("debug_large_with_large_file_options.zip"); + //let path = std::path::PathBuf::from("debug_large_with_large_file_options.zip"); + + let zipfile = File::create(&path).unwrap(); + let mut writer = ZipWriter::new(zipfile); + + let opts = SimpleFileOptions::default() + .compression_method(zip::CompressionMethod::Stored) + .large_file(true); + + writer.start_file_from_path("4gb_file", opts).unwrap(); + + // Write a file that's 4GB + let size = u32::MAX; + let write_result = write_data(&mut writer, size as usize); // check is error + + assert!(write_result.is_ok()); + let mut zip = writer.finish_into_readable().unwrap(); + let file_res = zip.by_name("4gb_file"); + assert!(file_res.is_ok()); + let file = file_res.unwrap(); + eprintln!("{file:?}"); + + let mut file = File::open(&path).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); + + // local header + assert_eq!(buffer[18..22], [0xFF, 0xFF, 0xFF, 0xFF]); + assert_eq!(buffer[22..26], [0xFF, 0xFF, 0xFF, 0xFF]); + + // extra field of local header + let extra_field_start = 38; + assert_eq!(buffer[extra_field_start..40], [0x01, 0x00]); + assert_eq!(buffer[40..42], [16, 0x00]); + assert_eq!( + buffer[42..50], + [0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00] + ); + assert_eq!( + buffer[50..58], + [0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00] + ); + + // extra fields of central header + let start = extra_field_start + 20 + u32::MAX as usize + 54; + assert_eq!(buffer[start..(start + 2)], [0x01, 0x00]); + assert_eq!(buffer[(start + 2)..(start + 4)], [16, 0x00]); + assert_eq!( + buffer[(start + 4)..(start + 12)], + [0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00] + ); + assert_eq!( + buffer[(start + 12)..(start + 20)], + [0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00] + ); } -/// Only on little endian because we cannot use fs with miri CI -#[cfg(all(target_endian = "little", not(miri)))] #[test] fn test_append_near_4gb() { use std::fs::File; @@ -33,12 +131,12 @@ fn test_append_near_4gb() { writer.start_file_from_path("close_to_4gb", opts).unwrap(); // Write a file that's just under 4GB (4GB - 1 byte) - let size = u32::MAX; - write_data(&mut writer, size as usize); + let size = u32::MAX - 1; + write_data(&mut writer, size as usize).unwrap(); // Add a small file writer.start_file_from_path("small_file", opts).unwrap(); - write_data(&mut writer, 1024); + write_data(&mut writer, 1024).unwrap(); writer.finish().unwrap(); } @@ -52,7 +150,7 @@ fn test_append_near_4gb() { // Add another small file writer.start_file_from_path("appended_file", opts).unwrap(); - write_data(&mut writer, 1024); + write_data(&mut writer, 1024).unwrap(); writer.finish().unwrap(); } @@ -82,8 +180,6 @@ fn test_append_near_4gb() { } } -/// Only on little endian because we cannot use fs with miri CI -#[cfg(all(target_endian = "little", not(miri)))] #[test] fn test_append_near_4gb_with_1gb_files() { use std::fs::File; @@ -108,12 +204,12 @@ fn test_append_near_4gb_with_1gb_files() { // Write a file that's 1 GB let size = 1u64 << 30; - write_data(&mut writer, size as usize); + write_data(&mut writer, size as usize).unwrap(); } // Add a small file writer.start_file_from_path("small_file", opts).unwrap(); - write_data(&mut writer, 1024); + write_data(&mut writer, 1024).unwrap(); writer.finish().unwrap(); } @@ -127,7 +223,7 @@ fn test_append_near_4gb_with_1gb_files() { // Add another small file writer.start_file_from_path("appended_file", opts).unwrap(); - write_data(&mut writer, 1024); + write_data(&mut writer, 1024).unwrap(); writer.finish().unwrap(); } @@ -173,8 +269,6 @@ fn test_append_near_4gb_with_1gb_files() { } // A smaller test that doesn't create a 4GB file but still tests the logic -/// Only on little endian because we cannot use fs with miri CI -#[cfg(all(target_endian = "little", not(miri)))] #[test] fn test_append_with_large_file_flag() { use std::fs::File; @@ -195,7 +289,7 @@ fn test_append_with_large_file_flag() { .large_file(true); // Force ZIP64 format writer.start_file_from_path("file1", opts).unwrap(); - write_data(&mut writer, 1024); + write_data(&mut writer, 1024).unwrap(); writer.finish().unwrap(); } @@ -209,7 +303,7 @@ fn test_append_with_large_file_flag() { // Add another file writer.start_file_from_path("file2", opts).unwrap(); - write_data(&mut writer, 1024); + write_data(&mut writer, 1024).unwrap(); writer.finish().unwrap(); }