Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions parquet/src/file/metadata/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,7 @@ impl MetadataObjectWriter {
match &self.file_encryptor {
Some(file_encryptor) => {
let unencrypted_row_groups = row_groups.clone();
// TODO: unencrypted_row_groups should not contain statistics for encrypted columns
let encrypted_row_groups = Self::encrypt_row_groups(row_groups, file_encryptor)?;
Ok((encrypted_row_groups, Some(unencrypted_row_groups)))
}
Expand Down
86 changes: 86 additions & 0 deletions parquet/tests/encryption/encryption.rs
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,92 @@ fn test_write_uniform_encryption_plaintext_footer() {
.starts_with("Parquet error: Footer signature verification failed. Computed: ["));
}

#[test]
pub fn test_row_group_statistics_plaintext_encrypted_write() {
let footer_key = b"0123456789012345".to_vec(); // 128bit/16
let column_key = b"1234567890123450".to_vec();

let decryption_properties = FileDecryptionProperties::builder(footer_key.clone())
.with_column_key("x", column_key.clone())
.build()
.unwrap();

let file_encryption_properties = FileEncryptionProperties::builder(footer_key)
.with_plaintext_footer(true)
.build()
.unwrap();

let props = WriterProperties::builder()
.with_file_encryption_properties(file_encryption_properties)
.build();

// Write encrypted data with plaintext footer
let values = Int32Array::from(vec![8, 3, 4, 19, 5]);
let schema = Arc::new(Schema::new(vec![Field::new(
"x",
values.data_type().clone(),
true,
)]));
let values = Arc::new(values);
let record_batches = vec![RecordBatch::try_new(schema.clone(), vec![values]).unwrap()];

let temp_file = tempfile::tempfile().unwrap();
let mut writer = ArrowWriter::try_new(&temp_file, schema, Some(props)).unwrap();
for batch in record_batches.clone() {
writer.write(&batch).unwrap();
}
let _file_metadata = writer.close().unwrap();

// Check column statistics can be read by decrypting
let options =
ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
let reader_metadata = ArrowReaderMetadata::load(&temp_file, options.clone()).unwrap();
let metadata = reader_metadata.metadata();

assert_eq!(metadata.num_row_groups(), 1);

let row_group = &metadata.row_groups()[0];
assert_eq!(row_group.columns().len(), 1);
let column_stats = &row_group.columns()[0].statistics().unwrap();
assert_eq!(
column_stats.min_bytes_opt(),
Some(3i32.to_le_bytes().as_slice())
);
assert_eq!(
column_stats.max_bytes_opt(),
Some(19i32.to_le_bytes().as_slice())
);

// TODO: statistics shouldn't be available without decryption when footer is plaintext
//
// Check column statistics are not available in plaintext footer
let options = ArrowReaderOptions::default();
let reader_metadata = ArrowReaderMetadata::load(&temp_file, options.clone()).unwrap();
let metadata = reader_metadata.metadata();

assert_eq!(metadata.num_row_groups(), 1);

let row_group = &metadata.row_groups()[0];
assert_eq!(row_group.columns().len(), 1);
let column_stats = &row_group.columns()[0].statistics().unwrap();
assert_eq!(
column_stats.min_bytes_opt(),
Some(3i32.to_le_bytes().as_slice())
);
assert_eq!(
column_stats.max_bytes_opt(),
Some(19i32.to_le_bytes().as_slice())
);

let builder =
ParquetRecordBatchReaderBuilder::try_new_with_options(temp_file, options).unwrap();
let mut record_reader = builder.build().unwrap();
assert_eq!(
record_reader.next().unwrap().unwrap_err().to_string(),
"Parquet argument error: External: protocol error"
);
}

#[test]
fn test_write_uniform_encryption() {
let testdata = arrow::util::test_util::parquet_test_data();
Expand Down
Loading