-
Notifications
You must be signed in to change notification settings - Fork 100
feat: Enable creation of ZipArchive without reparsing
#485
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,7 @@ use crate::compression::{CompressionMethod, Decompressor}; | |
| use crate::cp437::FromCp437; | ||
| use crate::crc32::Crc32Reader; | ||
| use crate::extra_fields::{ExtendedTimestamp, ExtraField, Ntfs}; | ||
| use crate::read::zip_archive::{Shared, SharedBuilder}; | ||
| use crate::read::zip_archive::SharedBuilder; | ||
| use crate::result::invalid; | ||
| use crate::result::{ZipError, ZipResult}; | ||
| use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod}; | ||
|
|
@@ -31,6 +31,7 @@ use std::sync::{Arc, OnceLock}; | |
| mod config; | ||
|
|
||
| pub use config::*; | ||
| pub use zip_archive::Shared; | ||
|
|
||
| /// Provides high level API for reading from a stream. | ||
| pub(crate) mod stream; | ||
|
|
@@ -44,7 +45,7 @@ pub(crate) mod zip_archive { | |
|
|
||
| /// Extract immutable data from `ZipArchive` to make it cheap to clone | ||
| #[derive(Debug)] | ||
| pub(crate) struct Shared { | ||
| pub struct Shared { | ||
| pub(crate) files: IndexMap<Box<str>, super::ZipFileData>, | ||
| pub(super) offset: u64, | ||
| pub(super) dir_start: u64, | ||
|
|
@@ -763,6 +764,60 @@ impl<R: Read + Seek> ZipArchive<R> { | |
| Self::with_config(Default::default(), reader) | ||
| } | ||
|
|
||
| /// Get the metadata associated with the ZIP archive. | ||
| /// | ||
| /// This can be used with [`Self::new_with_metadata`] to create a new reader over the | ||
| /// same file without needing to reparse the metadata. | ||
| pub fn metadata(&self) -> Arc<Shared> { | ||
| self.shared.clone() | ||
| } | ||
|
|
||
| // UNSAFETY: Requires `unsafe` because this relies on the user to ensure | ||
| // `reader` and `metadata` are compatible. | ||
| // This is similar to [how `sguaba` uses `unsafe`](https://github.com/helsing-ai/sguaba/blob/6c82af9626d0fe761a75d023be571cebb5d7e5a0/src/lib.rs#L64). | ||
|
Comment on lines
+775
to
+777
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment block is redundant with the |
||
| /// Read a ZIP archive using the given `metadata`. | ||
| /// | ||
| /// This is useful for creating multiple readers over the same file without | ||
| /// needing to reparse the metadata. | ||
| /// | ||
| /// # Safety | ||
| /// `unsafe` is used here to indicate that `reader` and `metadata` could | ||
| /// potentially be incompatible, and it is left to the user to ensure they are. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ```no_run | ||
| /// # use std::fs; | ||
| /// use rayon::prelude::*; | ||
| /// | ||
| /// const FILE_NAME: &str = "my_data.zip"; | ||
| /// | ||
| /// let file = fs::File::open(FILE_NAME).unwrap(); | ||
| /// let mut archive = zip::ZipArchive::new(file).unwrap(); | ||
| /// | ||
| /// let file_names = (0..archive.len()) | ||
| /// .into_par_iter() | ||
| /// .map_init({ | ||
| /// let metadata = archive.metadata().clone(); | ||
| /// move || { | ||
| /// let file = fs::File::open(FILE_NAME).unwrap(); | ||
| /// unsafe { zip::ZipArchive::new_with_metadata(file, metadata.clone()) } | ||
| /// }}, | ||
| /// |archive, i| { | ||
| /// let mut file = archive.by_index(i).unwrap(); | ||
| /// file.enclosed_name() | ||
| /// } | ||
| /// ) | ||
| /// .filter_map(|name| name) | ||
| /// .collect::<Vec<_>>(); | ||
| /// ``` | ||
| pub unsafe fn unsafe_new_with_metadata(reader: R, metadata: Arc<Shared>) -> Self { | ||
| Self { | ||
| reader, | ||
| shared: metadata, | ||
| } | ||
| } | ||
|
|
||
| /// Read a ZIP archive providing a read configuration, collecting the files it contains. | ||
| /// | ||
| /// This uses the central directory record of the ZIP file, and ignores local file headers. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
Sharedstruct is defined inside thepub(crate) mod zip_archive, which makes it private to the crate. However, it's used in the public functionsmetadata()andnew_with_metadata(). This will cause a compilation error because a private type is exposed in a public interface.To fix this, you can move the
Sharedstruct definition out of thezip_archivemodule to make it public within thereadmodule. You'll also need to adduse super::Shared;insidezip_archiveand re-exportSharedfromlib.rs.Example of changes in
src/read.rs:And in
src/lib.rs, you'll need to makeSharedpublic:Note that I've also changed
pub(super)fields topub(crate)and removedsuper::prefixes as they are no longer needed after moving the struct.This is a critical issue as it prevents the code from compiling.