diff --git a/Cargo.lock b/Cargo.lock index 2d464709fd..601766bc5d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -177,6 +177,7 @@ checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" dependencies = [ "bigdecimal", "bon", + "crc32fast", "digest", "log", "miniz_oxide", @@ -187,6 +188,7 @@ dependencies = [ "serde", "serde_bytes", "serde_json", + "snap", "strum", "strum_macros", "thiserror 2.0.17", @@ -3372,7 +3374,9 @@ dependencies = [ "futures", "iceberg_test_utils", "itertools 0.13.0", + "log", "minijinja", + "miniz_oxide", "mockall", "moka", "murmur3", diff --git a/Cargo.toml b/Cargo.toml index ded3aedecb..26c774db98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ rust-version = "1.88" [workspace.dependencies] anyhow = "1.0.72" -apache-avro = { version = "0.21", features = ["zstandard"] } +apache-avro = { version = "0.21", features = ["zstandard", "snappy"] } array-init = "2" arrow-arith = "57.0" arrow-array = "57.0" @@ -93,6 +93,7 @@ log = "0.4.28" metainfo = "0.7.14" mimalloc = "0.1.46" minijinja = "2.12.0" +miniz_oxide = "0.8" mockall = "0.13.1" mockito = "1" motore-macros = "0.4.3" diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs index 457471b34a..f6e2a851be 100644 --- a/crates/catalog/glue/src/utils.rs +++ b/crates/catalog/glue/src/utils.rs @@ -307,7 +307,6 @@ mod tests { let table_name = "my_table".to_string(); let location = "s3a://warehouse/hive".to_string(); let metadata_location = MetadataLocation::new_with_table_location(location).to_string(); - let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) .with_fields(vec![ @@ -336,8 +335,13 @@ mod tests { .location(metadata.location()) .build(); - let result = - convert_to_glue_table(&table_name, metadata_location, &metadata, &properties, None)?; + let result = convert_to_glue_table( + &table_name, + metadata_location, + &metadata, + metadata.properties(), + None, + )?; assert_eq!(result.name(), &table_name); assert_eq!(result.description(), None); diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 6f1332a444..362de7030a 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -65,6 +65,8 @@ flate2 = { workspace = true } fnv = { workspace = true } futures = { workspace = true } itertools = { workspace = true } +log = { workspace = true } +miniz_oxide = { workspace = true } moka = { version = "0.12.10", features = ["future"] } murmur3 = { workspace = true } num-bigint = { workspace = true } diff --git a/crates/iceberg/src/io/object_cache.rs b/crates/iceberg/src/io/object_cache.rs index af297bebb5..1d4c497a69 100644 --- a/crates/iceberg/src/io/object_cache.rs +++ b/crates/iceberg/src/io/object_cache.rs @@ -185,6 +185,7 @@ impl ObjectCache { mod tests { use std::fs; + use apache_avro::Codec; use minijinja::value::Value; use minijinja::{AutoEscape, Environment, context}; use tempfile::TempDir; @@ -275,6 +276,7 @@ mod tests { None, current_schema.clone(), current_partition_spec.as_ref().clone(), + Codec::Null, ) .build_v2_data(); writer @@ -307,6 +309,7 @@ mod tests { current_snapshot.snapshot_id(), current_snapshot.parent_snapshot_id(), current_snapshot.sequence_number(), + Codec::Null, ); manifest_list_write .add_manifests(vec![data_file_manifest].into_iter()) diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 1f7fa50df8..2445c4c0f9 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -565,11 +565,12 @@ pub mod tests { //! shared tests for the table scan API #![allow(missing_docs)] - use std::collections::HashMap; + use std::collections::{HashMap, HashSet}; use std::fs; use std::fs::File; use std::sync::Arc; + use apache_avro::Codec; use arrow_array::cast::AsArray; use arrow_array::{ Array, ArrayRef, BooleanArray, Float64Array, Int32Array, Int64Array, RecordBatch, @@ -763,6 +764,7 @@ pub mod tests { None, current_schema.clone(), current_partition_spec.as_ref().clone(), + Codec::Null, ) .build_v2_data(); writer @@ -840,6 +842,7 @@ pub mod tests { current_snapshot.snapshot_id(), current_snapshot.parent_snapshot_id(), current_snapshot.sequence_number(), + Codec::Null, ); manifest_list_write .add_manifests(vec![data_file_manifest].into_iter()) @@ -975,6 +978,7 @@ pub mod tests { None, current_schema.clone(), current_partition_spec.as_ref().clone(), + Codec::Null, ) .build_v2_data(); @@ -1059,6 +1063,7 @@ pub mod tests { current_snapshot.snapshot_id(), current_snapshot.parent_snapshot_id(), current_snapshot.sequence_number(), + Codec::Null, ); manifest_list_write .add_manifests(vec![data_file_manifest].into_iter()) @@ -1186,6 +1191,7 @@ pub mod tests { None, current_schema.clone(), current_partition_spec.as_ref().clone(), + Codec::Null, ) .build_v2_data(); @@ -1221,6 +1227,7 @@ pub mod tests { None, current_schema.clone(), current_partition_spec.as_ref().clone(), + Codec::Null, ) .build_v2_deletes(); @@ -1255,6 +1262,7 @@ pub mod tests { current_snapshot.snapshot_id(), current_snapshot.parent_snapshot_id(), current_snapshot.sequence_number(), + Codec::Null, ); manifest_list_write .add_manifests(vec![data_manifest, delete_manifest].into_iter()) @@ -1908,8 +1916,6 @@ pub mod tests { #[tokio::test] async fn test_select_with_file_column() { - use arrow_array::cast::AsArray; - let mut fixture = TableTestFixture::new(); fixture.setup_manifest_files().await; @@ -2031,8 +2037,6 @@ pub mod tests { #[tokio::test] async fn test_file_column_with_multiple_files() { - use std::collections::HashSet; - let mut fixture = TableTestFixture::new(); fixture.setup_manifest_files().await; diff --git a/crates/iceberg/src/spec/avro_util.rs b/crates/iceberg/src/spec/avro_util.rs new file mode 100644 index 0000000000..86166129f2 --- /dev/null +++ b/crates/iceberg/src/spec/avro_util.rs @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Utilities for working with Apache Avro in Iceberg. + +use apache_avro::{Codec, DeflateSettings, ZstandardSettings}; +use log::warn; +use miniz_oxide::deflate::CompressionLevel; + +/// Codec name for gzip compression +pub const CODEC_GZIP: &str = "gzip"; +/// Codec name for zstd compression +pub const CODEC_ZSTD: &str = "zstd"; +/// Codec name for snappy compression +pub const CODEC_SNAPPY: &str = "snappy"; +/// Codec name for uncompressed +pub const CODEC_UNCOMPRESSED: &str = "uncompressed"; + +/// Default compression level for gzip (matches Java implementation) +const DEFAULT_GZIP_LEVEL: u8 = 9; +/// Default compression level for zstd (matches Java implementation) +const DEFAULT_ZSTD_LEVEL: u8 = 1; +/// Max supported level for ZSTD +const MAX_ZSTD_LEVEL: u8 = 22; + +/// Convert codec name and level to apache_avro::Codec. +/// Returns Codec::Null for unknown or unsupported codecs. +/// +/// # Arguments +/// +/// * `codec` - The name of the compression codec (e.g., "gzip", "zstd", "snappy", "uncompressed") +/// * `level` - The compression level. For gzip/deflate: +/// - 0: NoCompression +/// - 1: BestSpeed +/// - 9: BestCompression +/// - 10: UberCompression +/// - 6: DefaultLevel (balanced speed/compression) +/// - Other values: DefaultLevel +/// +/// For zstd, level is clamped to valid range (0-22). +/// When `None`, uses codec-specific defaults. +/// +/// # Supported Codecs +/// +/// - `gzip`: Uses Deflate compression with specified level +/// - `zstd`: Uses Zstandard compression (level clamped to valid zstd range 0-22) +/// - `snappy`: Uses Snappy compression (level parameter ignored) +/// - `uncompressed` or `None`: No compression +/// - Any other value: Defaults to no compression (Codec::Null) +pub(crate) fn codec_from_str(codec: Option<&str>, level: Option) -> Codec { + // Use case-insensitive comparison to match Java implementation + match codec.map(|s| s.to_lowercase()).as_deref() { + Some(c) if c == CODEC_GZIP => { + // Map compression level to miniz_oxide::deflate::CompressionLevel + // Reference: https://docs.rs/miniz_oxide/latest/miniz_oxide/deflate/enum.CompressionLevel.html + let compression_level = match level.unwrap_or(DEFAULT_GZIP_LEVEL) { + 0 => CompressionLevel::NoCompression, + 1 => CompressionLevel::BestSpeed, + 9 => CompressionLevel::BestCompression, + 10 => CompressionLevel::UberCompression, + _ => CompressionLevel::DefaultLevel, + }; + + Codec::Deflate(DeflateSettings::new(compression_level)) + } + Some(c) if c == CODEC_ZSTD => { + // Zstandard supports levels 0-22, clamp to valid range + let zstd_level = level.unwrap_or(DEFAULT_ZSTD_LEVEL).min(MAX_ZSTD_LEVEL); + Codec::Zstandard(ZstandardSettings::new(zstd_level)) + } + Some(c) if c == CODEC_SNAPPY => Codec::Snappy, + Some(c) if c == CODEC_UNCOMPRESSED => Codec::Null, + None => Codec::Null, + Some(unknown) => { + warn!("Unrecognized compression codec '{unknown}', using no compression (Codec::Null)"); + Codec::Null + } + } +} + +#[cfg(test)] +mod tests { + use apache_avro::{DeflateSettings, ZstandardSettings}; + use miniz_oxide::deflate::CompressionLevel; + + use super::*; + + #[test] + fn test_codec_from_str_gzip() { + // Test with mixed case to verify case-insensitive matching + let codec = codec_from_str(Some("GZip"), Some(5)); + assert_eq!( + codec, + Codec::Deflate(DeflateSettings::new(CompressionLevel::DefaultLevel)) + ); + } + + #[test] + fn test_codec_from_str_snappy() { + let codec = codec_from_str(Some("snappy"), None); + assert_eq!(codec, Codec::Snappy); + } + + #[test] + fn test_codec_from_str_zstd() { + let codec = codec_from_str(Some("zstd"), Some(3)); + assert_eq!(codec, Codec::Zstandard(ZstandardSettings::new(3))); + } + + #[test] + fn test_codec_from_str_zstd_clamping() { + let codec = codec_from_str(Some("zstd"), Some(MAX_ZSTD_LEVEL + 1)); + assert_eq!( + codec, + Codec::Zstandard(ZstandardSettings::new(MAX_ZSTD_LEVEL)) + ); + } + + #[test] + fn test_codec_from_str_uncompressed() { + let codec = codec_from_str(Some("uncompressed"), None); + assert!(matches!(codec, Codec::Null)); + } + + #[test] + fn test_codec_from_str_null() { + let codec = codec_from_str(None, None); + assert!(matches!(codec, Codec::Null)); + } + + #[test] + fn test_codec_from_str_unknown() { + let codec = codec_from_str(Some("unknown"), Some(1)); + assert!(matches!(codec, Codec::Null)); + } + + #[test] + fn test_codec_from_str_gzip_default_level() { + // Test that None level defaults to 9 for gzip + let codec = codec_from_str(Some("gzip"), None); + assert_eq!( + codec, + Codec::Deflate(DeflateSettings::new(CompressionLevel::BestCompression)) + ); + } + + #[test] + fn test_codec_from_str_zstd_default_level() { + // Test that None level defaults to 1 for zstd + let codec = codec_from_str(Some("zstd"), None); + assert_eq!(codec, Codec::Zstandard(ZstandardSettings::new(1))); + } +} diff --git a/crates/iceberg/src/spec/manifest/mod.rs b/crates/iceberg/src/spec/manifest/mod.rs index b126396e3c..0ec0a82b15 100644 --- a/crates/iceberg/src/spec/manifest/mod.rs +++ b/crates/iceberg/src/spec/manifest/mod.rs @@ -161,6 +161,7 @@ mod tests { use std::fs; use std::sync::Arc; + use apache_avro::Codec; use serde_json::Value; use tempfile::TempDir; @@ -272,6 +273,7 @@ mod tests { None, metadata.schema.clone(), metadata.partition_spec.clone(), + Codec::Null, ) .build_v2_data(); for entry in &entries { @@ -457,6 +459,7 @@ mod tests { None, metadata.schema.clone(), metadata.partition_spec.clone(), + Codec::Null, ) .build_v2_data(); for entry in &entries { @@ -554,6 +557,7 @@ mod tests { None, metadata.schema.clone(), metadata.partition_spec.clone(), + Codec::Null, ) .build_v1(); for entry in &entries { @@ -663,6 +667,7 @@ mod tests { None, metadata.schema.clone(), metadata.partition_spec.clone(), + Codec::Null, ) .build_v1(); for entry in &entries { @@ -771,6 +776,7 @@ mod tests { None, metadata.schema.clone(), metadata.partition_spec.clone(), + Codec::Null, ) .build_v2_data(); for entry in &entries { @@ -1050,6 +1056,7 @@ mod tests { None, metadata.schema.clone(), metadata.partition_spec.clone(), + Codec::Null, ) .build_v2_data(); for entry in &entries { diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index 2fb6a42062..46828054b8 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -17,7 +17,7 @@ use std::cmp::min; -use apache_avro::{Writer as AvroWriter, to_value}; +use apache_avro::{Codec, Writer as AvroWriter, to_value}; use bytes::Bytes; use itertools::Itertools; use serde_json::to_vec; @@ -43,6 +43,7 @@ pub struct ManifestWriterBuilder { key_metadata: Option>, schema: SchemaRef, partition_spec: PartitionSpec, + compression: Codec, } impl ManifestWriterBuilder { @@ -53,6 +54,7 @@ impl ManifestWriterBuilder { key_metadata: Option>, schema: SchemaRef, partition_spec: PartitionSpec, + compression: Codec, ) -> Self { Self { output, @@ -60,6 +62,7 @@ impl ManifestWriterBuilder { key_metadata, schema, partition_spec, + compression, } } @@ -78,6 +81,7 @@ impl ManifestWriterBuilder { self.key_metadata, metadata, None, + self.compression, ) } @@ -96,6 +100,7 @@ impl ManifestWriterBuilder { self.key_metadata, metadata, None, + self.compression, ) } @@ -114,6 +119,7 @@ impl ManifestWriterBuilder { self.key_metadata, metadata, None, + self.compression, ) } @@ -134,6 +140,7 @@ impl ManifestWriterBuilder { // First row id is assigned by the [`ManifestListWriter`] when the manifest // is added to the list. None, + self.compression, ) } @@ -152,6 +159,7 @@ impl ManifestWriterBuilder { self.key_metadata, metadata, None, + self.compression, ) } } @@ -177,6 +185,8 @@ pub struct ManifestWriter { manifest_entries: Vec, metadata: ManifestMetadata, + + compression: Codec, } impl ManifestWriter { @@ -187,6 +197,7 @@ impl ManifestWriter { key_metadata: Option>, metadata: ManifestMetadata, first_row_id: Option, + compression: Codec, ) -> Self { Self { output, @@ -202,6 +213,7 @@ impl ManifestWriter { key_metadata, manifest_entries: Vec::new(), metadata, + compression, } } @@ -410,7 +422,8 @@ impl ManifestWriter { // Manifest schema did not change between V2 and V3 FormatVersion::V2 | FormatVersion::V3 => manifest_schema_v2(&partition_type)?, }; - let mut avro_writer = AvroWriter::new(&avro_schema, Vec::new()); + + let mut avro_writer = AvroWriter::with_codec(&avro_schema, Vec::new(), self.compression); avro_writer.add_user_metadata( "schema".to_string(), to_vec(table_schema).map_err(|err| { @@ -553,11 +566,17 @@ mod tests { use std::fs; use std::sync::Arc; + use apache_avro::DeflateSettings; + use miniz_oxide::deflate::CompressionLevel; use tempfile::TempDir; use super::*; use crate::io::FileIOBuilder; - use crate::spec::{DataFileFormat, Manifest, NestedField, PrimitiveType, Schema, Struct, Type}; + use crate::spec::{ + DataContentType, DataFileBuilder, DataFileFormat, Manifest, ManifestContentType, + ManifestEntry, ManifestMetadata, ManifestStatus, NestedField, PartitionSpec, PrimitiveType, + Schema, Struct, Type, + }; #[tokio::test] async fn test_add_delete_existing() { @@ -689,6 +708,7 @@ mod tests { None, metadata.schema.clone(), metadata.partition_spec.clone(), + Codec::Null, ) .build_v2_data(); writer.add_entry(entries[0].clone()).unwrap(); @@ -708,4 +728,117 @@ mod tests { entries[0].file_sequence_number = None; assert_eq!(actual_manifest, Manifest::new(metadata, entries)); } + + #[tokio::test] + async fn test_manifest_writer_with_compression() { + let metadata = { + let schema = Schema::builder() + .with_fields(vec![Arc::new(NestedField::required( + 1, + "id", + Type::Primitive(PrimitiveType::Int), + ))]) + .build() + .unwrap(); + + ManifestMetadata { + schema_id: 0, + schema: Arc::new(schema), + partition_spec: PartitionSpec::unpartition_spec(), + format_version: FormatVersion::V2, + content: ManifestContentType::Data, + } + }; + + // Write uncompressed manifest with multiple entries to make compression effective + let tmp_dir = TempDir::new().unwrap(); + let uncompressed_path = tmp_dir.path().join("uncompressed_manifest.avro"); + let io = FileIOBuilder::new_fs_io().build().unwrap(); + let output_file = io.new_output(uncompressed_path.to_str().unwrap()).unwrap(); + let mut writer = ManifestWriterBuilder::new( + output_file, + Some(1), + None, + metadata.schema.clone(), + metadata.partition_spec.clone(), + Codec::Null, + ) + .build_v2_data(); + // Add multiple entries with long paths to create compressible data + for i in 0..1000 { + let data_file = DataFileBuilder::default() + .content(DataContentType::Data) + .file_path(format!( + "/very/long/path/to/data/directory/with/many/subdirectories/file_{i}.parquet" + )) + .file_format(DataFileFormat::Parquet) + .partition(Struct::empty()) + .file_size_in_bytes(100000 + i) + .record_count(1000 + i) + .build() + .unwrap(); + + let entry = ManifestEntry::builder() + .status(ManifestStatus::Added) + .snapshot_id(1) + .sequence_number(1) + .file_sequence_number(1) + .data_file(data_file) + .build(); + writer.add_entry(entry).unwrap(); + } + writer.write_manifest_file().await.unwrap(); + let uncompressed_size = fs::metadata(&uncompressed_path).unwrap().len(); + + // Write compressed manifest with gzip + let compressed_path = tmp_dir.path().join("compressed_manifest.avro"); + let output_file = io.new_output(compressed_path.to_str().unwrap()).unwrap(); + let compression = Codec::Deflate(DeflateSettings::new(CompressionLevel::BestCompression)); + let mut writer = ManifestWriterBuilder::new( + output_file, + Some(1), + None, + metadata.schema.clone(), + metadata.partition_spec.clone(), + compression, + ) + .build_v2_data(); + // Add the same entries with long paths as the uncompressed version + for i in 0..1000 { + let data_file = DataFileBuilder::default() + .content(DataContentType::Data) + .file_path(format!( + "/very/long/path/to/data/directory/with/many/subdirectories/file_{i}.parquet" + )) + .file_format(DataFileFormat::Parquet) + .partition(Struct::empty()) + .file_size_in_bytes(100000 + i) + .record_count(1000 + i) + .build() + .unwrap(); + + let entry = ManifestEntry::builder() + .status(ManifestStatus::Added) + .snapshot_id(1) + .sequence_number(1) + .file_sequence_number(1) + .data_file(data_file) + .build(); + writer.add_entry(entry).unwrap(); + } + writer.write_manifest_file().await.unwrap(); + let compressed_size = fs::metadata(&compressed_path).unwrap().len(); + + // Verify compression is actually working + assert!( + compressed_size < uncompressed_size, + "Compressed size ({compressed_size}) should be less than uncompressed size ({uncompressed_size})" + ); + + // Verify the compressed file can be read back correctly + let compressed_bytes = fs::read(&compressed_path).unwrap(); + let manifest = Manifest::parse_avro(&compressed_bytes).unwrap(); + assert_eq!(manifest.metadata.format_version, FormatVersion::V2); + assert_eq!(manifest.entries.len(), 1000); + } } diff --git a/crates/iceberg/src/spec/manifest_list.rs b/crates/iceberg/src/spec/manifest_list.rs index 5e97e5466e..999f6bc3e7 100644 --- a/crates/iceberg/src/spec/manifest_list.rs +++ b/crates/iceberg/src/spec/manifest_list.rs @@ -21,7 +21,7 @@ use std::collections::HashMap; use std::str::FromStr; use apache_avro::types::Value; -use apache_avro::{Reader, Writer, from_value}; +use apache_avro::{Codec, Reader, Writer, from_value}; use bytes::Bytes; pub use serde_bytes::ByteBuf; use serde_derive::{Deserialize, Serialize}; @@ -117,7 +117,12 @@ impl ManifestListWriter { } /// Construct a v1 [`ManifestListWriter`] that writes to a provided [`OutputFile`]. - pub fn v1(output_file: OutputFile, snapshot_id: i64, parent_snapshot_id: Option) -> Self { + pub fn v1( + output_file: OutputFile, + snapshot_id: i64, + parent_snapshot_id: Option, + compression: Codec, + ) -> Self { let mut metadata = HashMap::from_iter([ ("snapshot-id".to_string(), snapshot_id.to_string()), ("format-version".to_string(), "1".to_string()), @@ -135,6 +140,7 @@ impl ManifestListWriter { 0, snapshot_id, None, + compression, ) } @@ -144,6 +150,7 @@ impl ManifestListWriter { snapshot_id: i64, parent_snapshot_id: Option, sequence_number: i64, + compression: Codec, ) -> Self { let mut metadata = HashMap::from_iter([ ("snapshot-id".to_string(), snapshot_id.to_string()), @@ -163,6 +170,7 @@ impl ManifestListWriter { sequence_number, snapshot_id, None, + compression, ) } @@ -173,6 +181,7 @@ impl ManifestListWriter { parent_snapshot_id: Option, sequence_number: i64, first_row_id: Option, // Always None for delete manifests + compression: Codec, ) -> Self { let mut metadata = HashMap::from_iter([ ("snapshot-id".to_string(), snapshot_id.to_string()), @@ -198,6 +207,7 @@ impl ManifestListWriter { sequence_number, snapshot_id, first_row_id, + compression, ) } @@ -208,13 +218,15 @@ impl ManifestListWriter { sequence_number: i64, snapshot_id: i64, first_row_id: Option, + compression: Codec, ) -> Self { let avro_schema = match format_version { FormatVersion::V1 => &MANIFEST_LIST_AVRO_SCHEMA_V1, FormatVersion::V2 => &MANIFEST_LIST_AVRO_SCHEMA_V2, FormatVersion::V3 => &MANIFEST_LIST_AVRO_SCHEMA_V3, }; - let mut avro_writer = Writer::new(avro_schema, Vec::new()); + + let mut avro_writer = Writer::with_codec(avro_schema, Vec::new(), compression); for (key, value) in metadata { avro_writer .add_user_metadata(key, value) @@ -1361,12 +1373,13 @@ pub(super) mod _serde { mod test { use std::fs; - use apache_avro::{Reader, Schema}; + use apache_avro::{Codec, DeflateSettings, Reader, Schema}; + use miniz_oxide::deflate::CompressionLevel; use tempfile::TempDir; use super::_serde::ManifestListV2; use crate::io::FileIOBuilder; - use crate::spec::manifest_list::_serde::{ManifestListV1, ManifestListV3}; + use crate::spec::manifest_list::_serde::{ManifestFileV1, ManifestListV1, ManifestListV3}; use crate::spec::{ Datum, FieldSummary, ManifestContentType, ManifestFile, ManifestList, ManifestListWriter, UNASSIGNED_SEQUENCE_NUMBER, @@ -1407,6 +1420,7 @@ mod test { file_io.new_output(full_path.clone()).unwrap(), 1646658105718557341, Some(1646658105718557341), + Codec::Null, ); writer @@ -1480,6 +1494,7 @@ mod test { 1646658105718557341, Some(1646658105718557341), 1, + Codec::Null, ); writer @@ -1554,6 +1569,7 @@ mod test { Some(377075049360453639), 1, Some(10), + Codec::Null, ); writer @@ -1690,7 +1706,8 @@ mod test { let io = FileIOBuilder::new_fs_io().build().unwrap(); let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let mut writer = ManifestListWriter::v1(output_file, 1646658105718557341, Some(0)); + let mut writer = + ManifestListWriter::v1(output_file, 1646658105718557341, Some(0), Codec::Null); writer .add_manifests(expected_manifest_list.entries.clone().into_iter()) .unwrap(); @@ -1737,7 +1754,8 @@ mod test { let io = FileIOBuilder::new_fs_io().build().unwrap(); let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let mut writer = ManifestListWriter::v2(output_file, snapshot_id, Some(0), seq_num); + let mut writer = + ManifestListWriter::v2(output_file, snapshot_id, Some(0), seq_num, Codec::Null); writer .add_manifests(expected_manifest_list.entries.clone().into_iter()) .unwrap(); @@ -1785,8 +1803,14 @@ mod test { let io = FileIOBuilder::new_fs_io().build().unwrap(); let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let mut writer = - ManifestListWriter::v3(output_file, snapshot_id, Some(0), seq_num, Some(10)); + let mut writer = ManifestListWriter::v3( + output_file, + snapshot_id, + Some(0), + seq_num, + Some(10), + Codec::Null, + ); writer .add_manifests(expected_manifest_list.entries.clone().into_iter()) .unwrap(); @@ -1833,7 +1857,8 @@ mod test { let io = FileIOBuilder::new_fs_io().build().unwrap(); let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let mut writer = ManifestListWriter::v1(output_file, 1646658105718557341, Some(0)); + let mut writer = + ManifestListWriter::v1(output_file, 1646658105718557341, Some(0), Codec::Null); writer .add_manifests(expected_manifest_list.entries.clone().into_iter()) .unwrap(); @@ -1878,7 +1903,8 @@ mod test { let io = FileIOBuilder::new_fs_io().build().unwrap(); let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let mut writer = ManifestListWriter::v1(output_file, 1646658105718557341, Some(0)); + let mut writer = + ManifestListWriter::v1(output_file, 1646658105718557341, Some(0), Codec::Null); writer .add_manifests(expected_manifest_list.entries.clone().into_iter()) .unwrap(); @@ -1925,7 +1951,8 @@ mod test { let io = FileIOBuilder::new_fs_io().build().unwrap(); let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let mut writer = ManifestListWriter::v2(output_file, snapshot_id, Some(0), seq_num); + let mut writer = + ManifestListWriter::v2(output_file, snapshot_id, Some(0), seq_num, Codec::Null); writer .add_manifests(expected_manifest_list.entries.clone().into_iter()) .unwrap(); @@ -1994,8 +2021,6 @@ mod test { #[test] fn test_manifest_file_v1_to_v2_projection() { - use crate::spec::manifest_list::_serde::ManifestFileV1; - // Create a V1 manifest file object (without V2 fields) let v1_manifest = ManifestFileV1 { manifest_path: "/test/manifest.avro".to_string(), @@ -2044,4 +2069,89 @@ mod test { assert_eq!(v2_manifest.partitions, None); assert_eq!(v2_manifest.key_metadata, None); } + + #[tokio::test] + async fn test_manifest_list_writer_with_compression() { + // Create multiple manifest entries to make compression effective + let mut entries = Vec::new(); + for i in 0..100 { + entries.push(ManifestFile { + manifest_path: format!("/test/manifest{i}.avro"), + manifest_length: 1000 + i, + partition_spec_id: 0, + content: ManifestContentType::Data, + sequence_number: 1, + min_sequence_number: 1, + added_snapshot_id: 1646658105718557341, + added_files_count: Some(10), + existing_files_count: Some(5), + deleted_files_count: Some(2), + added_rows_count: Some(100), + existing_rows_count: Some(50), + deleted_rows_count: Some(20), + partitions: None, + key_metadata: None, + first_row_id: None, + }); + } + let manifest_list = ManifestList { entries }; + + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + let tmp_dir = TempDir::new().unwrap(); + + // Write uncompressed manifest list + let uncompressed_path = tmp_dir + .path() + .join("uncompressed_manifest_list.avro") + .to_str() + .unwrap() + .to_string(); + let mut writer = ManifestListWriter::v2( + file_io.new_output(&uncompressed_path).unwrap(), + 1646658105718557341, + Some(0), + 1, + Codec::Null, + ); + writer + .add_manifests(manifest_list.entries.clone().into_iter()) + .unwrap(); + writer.close().await.unwrap(); + let uncompressed_size = fs::metadata(&uncompressed_path).unwrap().len(); + + // Write compressed manifest list with gzip + let compressed_path = tmp_dir + .path() + .join("compressed_manifest_list.avro") + .to_str() + .unwrap() + .to_string(); + + let compression = Codec::Deflate(DeflateSettings::new(CompressionLevel::BestCompression)); + let mut writer = ManifestListWriter::v2( + file_io.new_output(&compressed_path).unwrap(), + 1646658105718557341, + Some(0), + 1, + compression, + ); + writer + .add_manifests(manifest_list.entries.clone().into_iter()) + .unwrap(); + writer.close().await.unwrap(); + let compressed_size = fs::metadata(&compressed_path).unwrap().len(); + + // Verify compression is actually working + assert!( + compressed_size < uncompressed_size, + "Compressed size ({compressed_size}) should be less than uncompressed size ({uncompressed_size})" + ); + + // Verify the compressed file can be read back correctly + let compressed_bytes = fs::read(&compressed_path).unwrap(); + let parsed_manifest_list = + ManifestList::parse_with_version(&compressed_bytes, crate::spec::FormatVersion::V2) + .unwrap(); + assert_eq!(manifest_list, parsed_manifest_list); + } } diff --git a/crates/iceberg/src/spec/mod.rs b/crates/iceberg/src/spec/mod.rs index 44b35e5a6b..cfe9132193 100644 --- a/crates/iceberg/src/spec/mod.rs +++ b/crates/iceberg/src/spec/mod.rs @@ -17,6 +17,7 @@ //! Spec for Iceberg. +mod avro_util; mod datatypes; mod encrypted_key; mod manifest; diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 4975456010..514827e7fe 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -16,21 +16,31 @@ // under the License. use std::collections::HashMap; +use std::fmt::Display; +use std::str::FromStr; + +use apache_avro::Codec; + +use crate::error::{Error, ErrorKind}; +use crate::spec::avro_util; // Helper function to parse a property from a HashMap // If the property is not found, use the default value -fn parse_property( +fn parse_property( properties: &HashMap, key: &str, default: T, -) -> Result +) -> crate::error::Result where - ::Err: std::fmt::Display, + ::Err: Display, { properties.get(key).map_or(Ok(default), |value| { - value - .parse::() - .map_err(|e| anyhow::anyhow!("Invalid value for {key}: {e}")) + value.parse::().map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + format!("Invalid value for {key}: {e}"), + ) + }) }) } @@ -49,6 +59,8 @@ pub struct TableProperties { pub write_format_default: String, /// The target file size for files. pub write_target_file_size_bytes: usize, + /// Compression codec for Avro files (manifests, manifest lists) + pub avro_compression_codec: Codec, } impl TableProperties { @@ -137,11 +149,19 @@ impl TableProperties { pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes"; /// Default target file size pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB + + /// Compression codec for Avro files (manifests, manifest lists) + pub const PROPERTY_AVRO_COMPRESSION_CODEC: &str = "write.avro.compression-codec"; + /// Default Avro compression codec - gzip + pub const PROPERTY_AVRO_COMPRESSION_CODEC_DEFAULT: &str = "gzip"; + + /// Compression level for Avro files + pub const PROPERTY_AVRO_COMPRESSION_LEVEL: &str = "write.avro.compression-level"; } impl TryFrom<&HashMap> for TableProperties { // parse by entry key or use default value - type Error = anyhow::Error; + type Error = crate::Error; fn try_from(props: &HashMap) -> Result { Ok(TableProperties { @@ -175,12 +195,38 @@ impl TryFrom<&HashMap> for TableProperties { TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES, TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, )?, + avro_compression_codec: { + // Parse codec name and level from properties + let codec_name = parse_property( + props, + TableProperties::PROPERTY_AVRO_COMPRESSION_CODEC, + TableProperties::PROPERTY_AVRO_COMPRESSION_CODEC_DEFAULT.to_string(), + )?; + + // Parse optional compression level (sentinel value 255 means not specified) + let level_raw = parse_property( + props, + TableProperties::PROPERTY_AVRO_COMPRESSION_LEVEL, + 255u8, + )?; + let level = if level_raw == 255 { + None + } else { + Some(level_raw) + }; + + // Convert to Codec + avro_util::codec_from_str(Some(&codec_name), level) + }, }) } } #[cfg(test)] mod tests { + use apache_avro::{DeflateSettings, ZstandardSettings}; + use miniz_oxide::deflate::CompressionLevel; + use super::*; #[test] @@ -207,6 +253,31 @@ mod tests { table_properties.write_target_file_size_bytes, TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT ); + // Test compression defaults - should be gzip with default level (9) + assert_eq!( + table_properties.avro_compression_codec, + Codec::Deflate(DeflateSettings::new(CompressionLevel::BestCompression)) + ); + } + + #[test] + fn test_table_properties_compression() { + let props = HashMap::from([ + ( + TableProperties::PROPERTY_AVRO_COMPRESSION_CODEC.to_string(), + "zstd".to_string(), + ), + ( + TableProperties::PROPERTY_AVRO_COMPRESSION_LEVEL.to_string(), + "3".to_string(), + ), + ]); + let table_properties = TableProperties::try_from(&props).unwrap(); + // Check that it parsed to a Zstandard codec with level 3 + assert_eq!( + table_properties.avro_compression_codec, + Codec::Zstandard(ZstandardSettings::new(3)) + ); } #[test] diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index c8bf26a174..743c925247 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -233,6 +233,17 @@ impl<'a> SnapshotProducer<'a> { DataFileFormat::Avro ); let output_file = self.table.file_io().new_output(new_manifest_path)?; + + // Get compression settings from table properties + let table_props = + TableProperties::try_from(self.table.metadata().properties()).map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + "Failed to parse table properties for compression settings", + ) + .with_source(e) + })?; + let builder = ManifestWriterBuilder::new( output_file, Some(self.snapshot_id), @@ -243,7 +254,9 @@ impl<'a> SnapshotProducer<'a> { .default_partition_spec() .as_ref() .clone(), + table_props.avro_compression_codec, ); + match self.table.metadata().format_version() { FormatVersion::V1 => Ok(builder.build_v1()), FormatVersion::V2 => match content { @@ -424,6 +437,19 @@ impl<'a> SnapshotProducer<'a> { let manifest_list_path = self.generate_manifest_list_file_path(0); let next_seq_num = self.table.metadata().next_sequence_number(); let first_row_id = self.table.metadata().next_row_id(); + + // Get compression settings from table properties + let table_props = + TableProperties::try_from(self.table.metadata().properties()).map_err(|e| { + Error::new( + ErrorKind::DataInvalid, + "Failed to parse table properties for compression settings", + ) + .with_source(e) + })?; + + let compression = table_props.avro_compression_codec; + let mut manifest_list_writer = match self.table.metadata().format_version() { FormatVersion::V1 => ManifestListWriter::v1( self.table @@ -431,6 +457,7 @@ impl<'a> SnapshotProducer<'a> { .new_output(manifest_list_path.clone())?, self.snapshot_id, self.table.metadata().current_snapshot_id(), + compression, ), FormatVersion::V2 => ManifestListWriter::v2( self.table @@ -439,6 +466,7 @@ impl<'a> SnapshotProducer<'a> { self.snapshot_id, self.table.metadata().current_snapshot_id(), next_seq_num, + compression, ), FormatVersion::V3 => ManifestListWriter::v3( self.table @@ -448,6 +476,7 @@ impl<'a> SnapshotProducer<'a> { self.table.metadata().current_snapshot_id(), next_seq_num, Some(first_row_id), + compression, ), };