From 9f3c178e2807dd3c32ffad3d500ed91e22bf6638 Mon Sep 17 00:00:00 2001 From: Brian Carlsen Date: Sat, 13 Dec 2025 20:25:06 +0100 Subject: [PATCH 1/2] add new_with_metadata for reader --- Cargo.toml | 1 + src/read.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 20b50c74b..be1cb2b62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,7 @@ time = { workspace = true, features = ["formatting", "macros"] } anyhow = "1.0.95" clap = { version = "=4.4.18", features = ["derive"] } tempfile = "3.15" +rayon = "*" [features] aes-crypto = ["dep:aes", "dep:constant_time_eq", "dep:generic-array", "dep:getrandom", "dep:hmac", "dep:pbkdf2", "dep:sha1", "dep:zeroize"] diff --git a/src/read.rs b/src/read.rs index f45fb1908..20a485d18 100644 --- a/src/read.rs +++ b/src/read.rs @@ -44,7 +44,7 @@ pub(crate) mod zip_archive { /// Extract immutable data from `ZipArchive` to make it cheap to clone #[derive(Debug)] - pub(crate) struct Shared { + pub struct Shared { pub(crate) files: IndexMap, super::ZipFileData>, pub(super) offset: u64, pub(super) dir_start: u64, @@ -763,6 +763,60 @@ impl ZipArchive { Self::with_config(Default::default(), reader) } + /// Get the metadata associated with the ZIP archive. + /// + /// This can be used with [`Self::new_with_metadata`] to create a new reader over the + /// same file without needing to reparse the metadata. + pub fn metadata(&self) -> Arc { + self.shared.clone() + } + + // UNSAFETY: Requires `unsafe` because this relies on the user to ensure + // `reader` and `metadata` are compatible. + // This is similar to [how `sguaba` uses `unsafe`](https://github.com/helsing-ai/sguaba/blob/6c82af9626d0fe761a75d023be571cebb5d7e5a0/src/lib.rs#L64). + /// Read a ZIP archive using the given `metadata`. + /// + /// This is useful for creating multiple readers over the same file without + /// needing to reparse the metadata. + /// + /// # Safety + /// `unsafe` is used here to indicate that `reader` and `metadata` could + /// potentially be incompatible, and it is left to the user to ensure they are. + /// + /// # Example + /// + /// ```no_run + /// # use std::fs; + /// use rayon::prelude::*; + /// + /// const FILE_NAME: &str = "my_data.zip"; + /// + /// let file = fs::File::open(FILE_NAME).unwrap(); + /// let mut archive = zip::ZipArchive::new(file).unwrap(); + /// + /// let file_names = (0..archive.len()) + /// .into_par_iter() + /// .map_init({ + /// let metadata = archive.metadata().clone(); + /// move || { + /// let file = fs::File::open(FILE_NAME).unwrap(); + /// unsafe { zip::ZipArchive::new_with_metadata(file, metadata.clone()) } + /// }}, + /// |archive, i| { + /// let mut file = archive.by_index(i).unwrap(); + /// file.enclosed_name() + /// } + /// ) + /// .filter_map(|name| name) + /// .collect::>(); + /// ``` + pub unsafe fn new_with_metadata(reader: R, metadata: Arc) -> Self { + Self { + reader, + shared: metadata, + } + } + /// Read a ZIP archive providing a read configuration, collecting the files it contains. /// /// This uses the central directory record of the ZIP file, and ignores local file headers. From 5dbcdabf97d93314a9de075caf4ad82823bf15eb Mon Sep 17 00:00:00 2001 From: Brian Carlsen Date: Sun, 14 Dec 2025 06:23:13 +0100 Subject: [PATCH 2/2] change visibility of crate::read::zip_archive::shared to be pub. minor cleanup. --- Cargo.toml | 2 +- src/read.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index be1cb2b62..c6a2d1f6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,7 +64,7 @@ time = { workspace = true, features = ["formatting", "macros"] } anyhow = "1.0.95" clap = { version = "=4.4.18", features = ["derive"] } tempfile = "3.15" -rayon = "*" +rayon = "1.11.0" [features] aes-crypto = ["dep:aes", "dep:constant_time_eq", "dep:generic-array", "dep:getrandom", "dep:hmac", "dep:pbkdf2", "dep:sha1", "dep:zeroize"] diff --git a/src/read.rs b/src/read.rs index 20a485d18..9abd11ca1 100644 --- a/src/read.rs +++ b/src/read.rs @@ -6,7 +6,7 @@ use crate::compression::{CompressionMethod, Decompressor}; use crate::cp437::FromCp437; use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField, Ntfs}; -use crate::read::zip_archive::{Shared, SharedBuilder}; +use crate::read::zip_archive::SharedBuilder; use crate::result::invalid; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod}; @@ -31,6 +31,7 @@ use std::sync::{Arc, OnceLock}; mod config; pub use config::*; +pub use zip_archive::Shared; /// Provides high level API for reading from a stream. pub(crate) mod stream; @@ -810,7 +811,7 @@ impl ZipArchive { /// .filter_map(|name| name) /// .collect::>(); /// ``` - pub unsafe fn new_with_metadata(reader: R, metadata: Arc) -> Self { + pub unsafe fn unsafe_new_with_metadata(reader: R, metadata: Arc) -> Self { Self { reader, shared: metadata,